| Copyright | (c) CNRS 2017-Present |
|---|---|
| License | AGPL + CECILL v3 |
| Maintainer | team@gargantext.org |
| Stability | experimental |
| Portability | POSIX |
| Safe Haskell | Safe-Inferred |
| Language | Haskell2010 |
Gargantext.Core.Text.Terms.WithList
Description
Here is a longer description of this module, containing some
commentary with some markup.
Synopsis
- data Pattern = Pattern {
- _pat_table :: !(Table Text)
- _pat_length :: !Int
- _pat_terms :: ![Text]
- type Patterns = [Pattern]
- data ReplaceTerms
- replaceTerms :: ReplaceTerms -> Patterns -> [Text] -> [[Text]]
- buildPatternsWith :: Lang -> [NgramsTerm] -> Patterns
- buildPatterns :: TermList -> Patterns
- type MatchedText = Text
- termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)]
- manipulateText :: Lang -> Text -> Text
- extractTermsWithList :: Patterns -> Text -> Corpus [Text]
- extractTermsWithList' :: Patterns -> Text -> [Text]
- addSpaces :: Text -> Text
Documentation
Constructors
| Pattern | |
Fields
| |
data ReplaceTerms #
Constructors
| KeepAll | |
| LongestOnly |
replaceTerms :: ReplaceTerms -> Patterns -> [Text] -> [[Text]] #
buildPatternsWith :: Lang -> [NgramsTerm] -> Patterns #
buildPatterns :: TermList -> Patterns #
type MatchedText = Text #
termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)] #
manipulateText :: Lang -> Text -> Text #
Manipulates the input Text before passing it to termsInText.
In particular, if the language is Chinese (ZH), we add spaces.
extractTermsWithList' :: Patterns -> Text -> [Text] #
Extract terms >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"] ["chat blanc"]