gargantext-0.0.7.1.5.3: Search, map, share
Copyright(c) CNRS 2017 - present
LicenseAGPL + CECILL v3
Maintainerteam@gargantext.org
Stabilityexperimental
PortabilityPOSIX
Safe HaskellSafe-Inferred
LanguageHaskell2010

Gargantext.Core.Text.Terms

Description

An n-gram is a contiguous sequence of n items from a given sample of text. In Gargantext application the items are words, n is a non negative integer.

Using Latin numerical prefixes, an n-gram of size 1 is referred to as a "unigram"; size 2 is a "bigram" (or, less commonly, a "digram"); size 3 is a "trigram". English cardinal numbers are sometimes used, e.g., "four-gram", "five-gram", and so on.

Source: https://en.wikipedia.org/wiki/Ngrams

TODO group Ngrams -> Tree compute occ by node of Tree group occs according groups

compute cooccurrences compute graph

Synopsis

Documentation

data TermType lang #

Constructors

Mono 

Fields

Multi 

Fields

MonoMulti 

Fields

Unsupervised 

Fields

Instances

Instances details
Generic (TermType lang) # 
Instance details

Defined in Gargantext.Core.Text.Terms

Associated Types

type Rep (TermType lang) :: Type -> Type #

Methods

from :: TermType lang -> Rep (TermType lang) x #

to :: Rep (TermType lang) x -> TermType lang #

Show lang => Show (TermType lang) # 
Instance details

Defined in Gargantext.Core.Text.Terms

Methods

showsPrec :: Int -> TermType lang -> ShowS #

show :: TermType lang -> String #

showList :: [TermType lang] -> ShowS #

type Rep (TermType lang) # 
Instance details

Defined in Gargantext.Core.Text.Terms

type Rep (TermType lang) = D1 ('MetaData "TermType" "Gargantext.Core.Text.Terms" "gargantext-0.0.7.1.5.3-inplace" 'False) ((C1 ('MetaCons "Mono" 'PrefixI 'True) (S1 ('MetaSel ('Just "_tt_lang") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 lang)) :+: C1 ('MetaCons "Multi" 'PrefixI 'True) (S1 ('MetaSel ('Just "_tt_lang") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 lang))) :+: (C1 ('MetaCons "MonoMulti" 'PrefixI 'True) (S1 ('MetaSel ('Just "_tt_lang") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 lang)) :+: C1 ('MetaCons "Unsupervised" 'PrefixI 'True) ((S1 ('MetaSel ('Just "_tt_lang") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 lang) :*: S1 ('MetaSel ('Just "_tt_windowSize") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 Int)) :*: (S1 ('MetaSel ('Just "_tt_ngramsSize") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 Int) :*: S1 ('MetaSel ('Just "_tt_model") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 (Maybe (Tries Token ())))))))

tt_windowSize :: forall lang. Traversal' (TermType lang) Int #

tt_ngramsSize :: forall lang. Traversal' (TermType lang) Int #

tt_model :: forall lang. Traversal' (TermType lang) (Maybe (Tries Token ())) #

tt_lang :: forall lang lang. Lens (TermType lang) (TermType lang) lang lang #

extractTerms :: NLPServerConfig -> TermType Lang -> [Text] -> IO [[TermsWithCount]] #

Sugar to extract terms from text (hidding mapM from end user). extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])

data ExtractedNgrams #

Instances

Instances details
Generic ExtractedNgrams # 
Instance details

Defined in Gargantext.Core.Text.Terms

Associated Types

type Rep ExtractedNgrams :: Type -> Type #

Show ExtractedNgrams # 
Instance details

Defined in Gargantext.Core.Text.Terms

Eq ExtractedNgrams # 
Instance details

Defined in Gargantext.Core.Text.Terms

Ord ExtractedNgrams # 
Instance details

Defined in Gargantext.Core.Text.Terms

Hashable ExtractedNgrams # 
Instance details

Defined in Gargantext.Core.Text.Terms

type Rep ExtractedNgrams # 
Instance details

Defined in Gargantext.Core.Text.Terms

type Rep ExtractedNgrams = D1 ('MetaData "ExtractedNgrams" "Gargantext.Core.Text.Terms" "gargantext-0.0.7.1.5.3-inplace" 'False) (C1 ('MetaCons "SimpleNgrams" 'PrefixI 'True) (S1 ('MetaSel ('Just "unSimpleNgrams") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Ngrams)) :+: C1 ('MetaCons "EnrichedNgrams" 'PrefixI 'True) (S1 ('MetaSel ('Just "unEnrichedNgrams") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 NgramsPostag)))

class ExtractNgramsT h where #

A typeclass that represents extracting ngrams from an entity.

Instances

Instances details
ExtractNgramsT HyperdataContact # 
Instance details

Defined in Gargantext.Database.Action.Flow.Extract

ExtractNgramsT HyperdataDocument #

Main ngrams extraction functionality. For NgramsTerms, this calls NLP server under the hood. For Sources, Institutes, Authors, this uses simple split on " ".

Instance details

Defined in Gargantext.Database.Action.Flow.Extract

(ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a) # 
Instance details

Defined in Gargantext.Database.Action.Flow.Extract

terms :: NLPServerConfig -> TermType Lang -> Text -> IO [TermsWithCount] #

Terms from Text Mono : mono terms Multi : multi terms MonoMulti : mono and multi TODO : multi terms should exclude mono (intersection is not empty yet)

termsUnsupervised :: TermType Lang -> Text -> [TermsWithCount] #

Unsupervised ngrams extraction language agnostic extraction TODO: newtype BlockText

uniText :: Text -> [[Text]] #

TODO removing long terms > 24