Copyright | (c) CNRS 2017-Present |
---|---|
License | AGPL + CECILL v3 |
Maintainer | team@gargantext.org |
Stability | experimental |
Portability | POSIX |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
Here is a longer description of this module, containing some
commentary with some markup
.
Synopsis
- data Pattern = Pattern {
- _pat_table :: !(Table Text)
- _pat_length :: !Int
- _pat_terms :: ![Text]
- type Patterns = [Pattern]
- data ReplaceTerms
- replaceTerms :: ReplaceTerms -> Patterns -> [Text] -> [[Text]]
- buildPatternsWith :: Lang -> [NgramsTerm] -> Patterns
- buildPatterns :: TermList -> Patterns
- type MatchedText = Text
- termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)]
- manipulateText :: Lang -> Text -> Text
- extractTermsWithList :: Patterns -> Text -> Corpus [Text]
- extractTermsWithList' :: Patterns -> Text -> [Text]
- addSpaces :: Text -> Text
Documentation
Pattern | |
|
replaceTerms :: ReplaceTerms -> Patterns -> [Text] -> [[Text]] #
buildPatternsWith :: Lang -> [NgramsTerm] -> Patterns #
buildPatterns :: TermList -> Patterns #
type MatchedText = Text #
termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)] #
manipulateText :: Lang -> Text -> Text #
Manipulates the input Text
before passing it to termsInText
.
In particular, if the language is Chinese (ZH), we add spaces.
extractTermsWithList' :: Patterns -> Text -> [Text] #
Extract terms >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"] ["chat blanc"]