{-|
Module      : Gargantext.Core.Text.Terms.Mono
Description : Mono Terms module
Copyright   : (c) CNRS, 2017 - present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Mono-terms are Nterms where n == 1.

-}


module Gargantext.Core.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence, words)
  where

import Prelude (String)

import Data.Text (Text)
import qualified Data.Text as T

import qualified Data.List as L
import qualified Data.Set as S

import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Mono.Stem (stem)

import Gargantext.Prelude
--import Data.Char (isAlphaNum, isSpace)

-- | TODO remove Num ?
--isGram  c  = isAlphaNum c

words :: Text -> [Text]
words :: Text -> [Text]
words = Text -> [Text]
monoTexts

-- | Sentence split separators
isSep :: Char -> Bool
isSep :: Char -> Bool
isSep = (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
",.:;?!(){}[]\"\'" :: String))

monoTerms :: Lang -> Text -> [Terms]
monoTerms :: Lang -> Text -> [Terms]
monoTerms Lang
l Text
txt = (Text -> Terms) -> [Text] -> [Terms]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Lang -> Text -> Terms
monoText2term Lang
l) ([Text] -> [Terms]) -> [Text] -> [Terms]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
monoTexts Text
txt

monoTexts :: Text -> [Text]
monoTexts :: Text -> [Text]
monoTexts = [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
L.concat ([[Text]] -> [Text]) -> (Text -> [[Text]]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [[Text]]
monoTextsBySentence

-- | TODO use text2term only
monoText2term :: Lang -> Text -> Terms
monoText2term :: Lang -> Text -> Terms
monoText2term Lang
lang Text
txt = [Text] -> Stems -> Terms
Terms [Text
txt] (Text -> Stems
forall a. a -> Set a
S.singleton (Text -> Stems) -> Text -> Stems
forall a b. (a -> b) -> a -> b
$ Lang -> Text -> Text
stem Lang
lang Text
txt)

monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = (Text -> [Text]) -> [Text] -> [[Text]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Text -> [Text]
T.words
                    ([Text] -> [[Text]]) -> (Text -> [Text]) -> Text -> [[Text]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> [Text]
T.split Char -> Bool
isSep
                    (Text -> [Text]) -> (Text -> Text) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.toLower