gargantext-0.0.7.1.5.3: Search, map, share
Copyright(c) CNRS 2019-Present
LicenseAGPL + CECILL v3
Maintainerteam@gargantext.org
Stabilityexperimental
PortabilityPOSIX
Safe HaskellSafe-Inferred
LanguageHaskell2010

Gargantext.Core.Text.Terms.Eleve

Description

# Implementation of Unsupervized Word Segmentation

References:

  • Python implementation (Korantin August, Emmanuel Navarro): EleVe
  • Unsupervized Word Segmentation:the case for Mandarin Chinese Pierre Magistry, Benoît Sagot, Alpage, INRIA & Univ. Paris 7, Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics , pages 383–387. PDF

Notes for current implementation: - TODO extract longer ngrams (see paper above, viterbi algo can be used) - TODO AD TEST: prop (Node c _e f) = c == Map.size f

  • AD: Real ngrams extraction test from Gargantext.Core.Text.Terms import extractTermsUnsupervised docs <- runCmdRepl $ selectDocs 1004 extractTermsUnsupervised 3 $ DT.intercalate " " $ catMaybes $ Gargantext.map _hyperdataDocument_abstract docs
Synopsis

Documentation

nan :: Floating e => e #

noNaNs :: RealFloat e => [e] -> [e] #

updateIfDefined :: RealFloat e => e -> e -> e #

sim :: Entropy e => e -> e -> Bool #

subst :: Entropy e => (e, e) -> e -> e #

type Entropy e = (Fractional e, Floating e, RealFloat e, Show e) #

TODO: Show Instance only used for debugging

data I e #

Example and tests for development

Constructors

I 

Instances

Instances details
Show e => Show (I e) # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

showsPrec :: Int -> I e -> ShowS #

show :: I e -> String #

showList :: [I e] -> ShowS #

info_entropy_var :: forall e. Lens' (I e) e #

info_entropy :: forall e. Lens' (I e) e #

info_autonomy :: forall e. Lens' (I e) e #

type ModEntropy i o e = (e -> e) -> i -> o #

set_autonomy :: Entropy e => ModEntropy (I e) (I e) e #

set_entropy_var :: Entropy e => Setter e (I e) e e #

data Token #

Instances

Instances details
Show Token # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

showsPrec :: Int -> Token -> ShowS #

show :: Token -> String #

showList :: [Token] -> ShowS #

Eq Token # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

(==) :: Token -> Token -> Bool #

(/=) :: Token -> Token -> Bool #

Ord Token # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

compare :: Token -> Token -> Ordering #

(<) :: Token -> Token -> Bool #

(<=) :: Token -> Token -> Bool #

(>) :: Token -> Token -> Bool #

(>=) :: Token -> Token -> Bool #

max :: Token -> Token -> Token #

min :: Token -> Token -> Token #

toToken :: [Text] -> [Token] #

data Trie k e #

Constructors

Node 

Fields

Leaf 

Fields

Instances

Instances details
IsTrie Trie # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

entropyTrie :: Entropy e => (k -> Bool) -> Trie k () -> Trie k e #

nodeEntropy :: Entropy e => Getting e i e -> Trie k i -> e #

nodeChild :: Ord k => k -> Trie k e -> Trie k e #

findTrie :: Ord k => [k] -> Trie k e -> Trie k e #

printTrie :: (Show i, Entropy e) => Getting e i e -> Trie Token i -> IO () #

evTrie :: Entropy e => Getting e i e -> Setter i o e e -> Trie k i -> Trie k o #

normalizeEntropy :: Entropy e => Getting e i e -> ModEntropy i o e -> Trie k i -> Trie k o #

(Show e, Show k) => Show (Trie k e) # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

showsPrec :: Int -> Trie k e -> ShowS #

show :: Trie k e -> String #

showList :: [Trie k e] -> ShowS #

node_entropy :: forall k e. Traversal' (Trie k e) e #

node_count :: forall k e. Lens' (Trie k e) Int #

node_children :: forall k e k. Traversal (Trie k e) (Trie k e) (Map k (Trie k e)) (Map k (Trie k e)) #

insertTrie :: Ord k => [k] -> Trie k () -> Trie k () #

mkTrie :: Monoid e => Int -> Map k (Trie k e) -> Trie k e #

toTree :: k -> Trie k e -> Tree (k, Int, Maybe e) #

Trie to Tree since Tree as nice print function

normalizeLevel :: Entropy e => e -> e -> e -> e #

chunkAlongEleve :: Int -> [a] -> [[a]] #

data Direction #

Constructors

Backward 
Forward 

buildTrie :: Direction -> Int -> [[Token]] -> Trie Token () #

class IsTrie trie where #

Methods

entropyTrie :: Entropy e => (k -> Bool) -> trie k () -> trie k e #

nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e #

nodeChild :: Ord k => k -> trie k e -> trie k e #

findTrie :: Ord k => [k] -> trie k e -> trie k e #

printTrie :: (Show i, Entropy e) => Getting e i e -> trie Token i -> IO () #

evTrie :: Entropy e => Getting e i e -> Setter i o e e -> trie k i -> trie k o #

normalizeEntropy :: Entropy e => Getting e i e -> ModEntropy i o e -> trie k i -> trie k o #

Instances

Instances details
IsTrie Trie # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

entropyTrie :: Entropy e => (k -> Bool) -> Trie k () -> Trie k e #

nodeEntropy :: Entropy e => Getting e i e -> Trie k i -> e #

nodeChild :: Ord k => k -> Trie k e -> Trie k e #

findTrie :: Ord k => [k] -> Trie k e -> Trie k e #

printTrie :: (Show i, Entropy e) => Getting e i e -> Trie Token i -> IO () #

evTrie :: Entropy e => Getting e i e -> Setter i o e e -> Trie k i -> Trie k o #

normalizeEntropy :: Entropy e => Getting e i e -> ModEntropy i o e -> Trie k i -> Trie k o #

IsTrie Tries # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

entropyTrie :: Entropy e => (k -> Bool) -> Tries k () -> Tries k e #

nodeEntropy :: Entropy e => Getting e i e -> Tries k i -> e #

nodeChild :: Ord k => k -> Tries k e -> Tries k e #

findTrie :: Ord k => [k] -> Tries k e -> Tries k e #

printTrie :: (Show i, Entropy e) => Getting e i e -> Tries Token i -> IO () #

evTrie :: Entropy e => Getting e i e -> Setter i o e e -> Tries k i -> Tries k o #

normalizeEntropy :: Entropy e => Getting e i e -> ModEntropy i o e -> Tries k i -> Tries k o #

levels :: Trie k e -> [[Trie k e]] #

entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]] #

normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)] #

data Tries k e #

Constructors

Tries 

Fields

Instances

Instances details
IsTrie Tries # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

entropyTrie :: Entropy e => (k -> Bool) -> Tries k () -> Tries k e #

nodeEntropy :: Entropy e => Getting e i e -> Tries k i -> e #

nodeChild :: Ord k => k -> Tries k e -> Tries k e #

findTrie :: Ord k => [k] -> Tries k e -> Tries k e #

printTrie :: (Show i, Entropy e) => Getting e i e -> Tries Token i -> IO () #

evTrie :: Entropy e => Getting e i e -> Setter i o e e -> Tries k i -> Tries k o #

normalizeEntropy :: Entropy e => Getting e i e -> ModEntropy i o e -> Tries k i -> Tries k o #

(Show k, Show e) => Show (Tries k e) # 
Instance details

Defined in Gargantext.Core.Text.Terms.Eleve

Methods

showsPrec :: Int -> Tries k e -> ShowS #

show :: Tries k e -> String #

showList :: [Tries k e] -> ShowS #

fwd :: forall k e. Lens' (Tries k e) (Trie k e) #

bwd :: forall k e. Lens' (Tries k e) (Trie k e) #

buildTries :: Int -> [[Token]] -> Tries Token () #

onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o #

mayCons :: [a] -> [[a]] -> [[a]] #

split :: Entropy e => Int -> Lens' i e -> Tries Token i -> [Token] -> [[Text]] #

mainEleve :: Int -> [[Text]] -> [[[Text]]] #

mainEleve' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]] #

mainEleve'' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]] #

This function should take the longest possible chain of: mainEleve'' n x y = maxChainSizeOf [ mainEleve' n x y , mainEleve' n x x , mainEleve' n y y ]

mainEleveWith :: Tries Token () -> Int -> [[Text]] -> [[[Text]]] #

type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)] #

testEleve :: e ~ Double => Bool -> Int -> [Text] -> Checks e -> IO Bool #

example0 :: [Text] #

TODO real data is a list of tokenized sentences

example1 :: [Text] #

TODO real data is a list of tokenized sentences

example2 :: [Text] #

TODO real data is a list of tokenized sentences

example3 :: [Text] #

TODO real data is a list of tokenized sentences

example4 :: [Text] #

TODO real data is a list of tokenized sentences

example5 :: [Text] #

TODO real data is a list of tokenized sentences

example6 :: [Text] #

TODO real data is a list of tokenized sentences

example7 :: [Text] #

TODO real data is a list of tokenized sentences

example8 :: [Text] #

TODO real data is a list of tokenized sentences

example9 :: [Text] #

TODO real data is a list of tokenized sentences