module Gargantext.Core.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence, words)
where
import Prelude (String)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.List as L
import qualified Data.Set as S
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
words :: Text -> [Text]
words :: Text -> [Text]
words = Text -> [Text]
monoTexts
isSep :: Char -> Bool
isSep :: Char -> Bool
isSep = (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
",.:;?!(){}[]\"\'" :: String))
monoTerms :: Lang -> Text -> [Terms]
monoTerms :: Lang -> Text -> [Terms]
monoTerms Lang
l Text
txt = (Text -> Terms) -> [Text] -> [Terms]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Lang -> Text -> Terms
monoText2term Lang
l) ([Text] -> [Terms]) -> [Text] -> [Terms]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
monoTexts Text
txt
monoTexts :: Text -> [Text]
monoTexts :: Text -> [Text]
monoTexts = [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
L.concat ([[Text]] -> [Text]) -> (Text -> [[Text]]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [[Text]]
monoTextsBySentence
monoText2term :: Lang -> Text -> Terms
monoText2term :: Lang -> Text -> Terms
monoText2term Lang
lang Text
txt = [Text] -> Stems -> Terms
Terms [Text
txt] (Text -> Stems
forall a. a -> Set a
S.singleton (Text -> Stems) -> Text -> Stems
forall a b. (a -> b) -> a -> b
$ Lang -> Text -> Text
stem Lang
lang Text
txt)
monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = (Text -> [Text]) -> [Text] -> [[Text]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Text -> [Text]
T.words
([Text] -> [[Text]]) -> (Text -> [Text]) -> Text -> [[Text]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> [Text]
T.split Char -> Bool
isSep
(Text -> [Text]) -> (Text -> Text) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.toLower