module Gargantext.Core.Text
where
import Data.Text (Text, split)
import Gargantext.Prelude hiding (filter)
import NLP.FullStop (segment)
import qualified Data.Text as DT
class HasText h
where
hasText :: h -> [Text]
newtype Texte = Texte Text
newtype Paragraphe = Paragraphe Text
newtype Phrase = Phrase Text
newtype MultiTerme = MultiTerme Text
newtype Mot = Mot Text
newtype Lettre = Lettre Text
type Titre = Phrase
instance Show Texte where
show :: Texte -> String
show (Texte Text
t) = Text -> String
forall a. Show a => a -> String
show Text
t
instance Show Paragraphe where
show :: Paragraphe -> String
show (Paragraphe Text
p) = Text -> String
forall a. Show a => a -> String
show Text
p
instance Show Phrase where
show :: Phrase -> String
show (Phrase Text
p) = Text -> String
forall a. Show a => a -> String
show Text
p
instance Show MultiTerme where
show :: MultiTerme -> String
show (MultiTerme Text
mt) = Text -> String
forall a. Show a => a -> String
show Text
mt
instance Show Mot where
show :: Mot -> String
show (Mot Text
t) = Text -> String
forall a. Show a => a -> String
show Text
t
instance Show Lettre where
show :: Lettre -> String
show (Lettre Text
l) = Text -> String
forall a. Show a => a -> String
show Text
l
class Collage sup inf where
dec :: sup -> [inf]
inc :: [inf] -> sup
instance Collage Texte Paragraphe where
dec :: Texte -> [Paragraphe]
dec (Texte Text
t) = (Text -> Paragraphe) -> [Text] -> [Paragraphe]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Text -> Paragraphe
Paragraphe ([Text] -> [Paragraphe]) -> [Text] -> [Paragraphe]
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text]
DT.splitOn Text
"\n" Text
t
inc :: [Paragraphe] -> Texte
inc = Text -> Texte
Texte (Text -> Texte) -> ([Paragraphe] -> Text) -> [Paragraphe] -> Texte
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text] -> Text
DT.intercalate Text
"\n" ([Text] -> Text)
-> ([Paragraphe] -> [Text]) -> [Paragraphe] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Paragraphe -> Text) -> [Paragraphe] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Paragraphe Text
t) -> Text
t)
instance Collage Paragraphe Phrase where
dec :: Paragraphe -> [Phrase]
dec (Paragraphe Text
t) = (Text -> Phrase) -> [Text] -> [Phrase]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Text -> Phrase
Phrase ([Text] -> [Phrase]) -> [Text] -> [Phrase]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
sentences Text
t
inc :: [Phrase] -> Paragraphe
inc = Text -> Paragraphe
Paragraphe (Text -> Paragraphe)
-> ([Phrase] -> Text) -> [Phrase] -> Paragraphe
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
DT.unwords ([Text] -> Text) -> ([Phrase] -> [Text]) -> [Phrase] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Phrase -> Text) -> [Phrase] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Phrase Text
p) -> Text
p)
instance Collage Phrase MultiTerme where
dec :: Phrase -> [MultiTerme]
dec (Phrase Text
t) = (Text -> MultiTerme) -> [Text] -> [MultiTerme]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Text -> MultiTerme
MultiTerme ([Text] -> [MultiTerme]) -> [Text] -> [MultiTerme]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
DT.words Text
t
inc :: [MultiTerme] -> Phrase
inc = Text -> Phrase
Phrase (Text -> Phrase)
-> ([MultiTerme] -> Text) -> [MultiTerme] -> Phrase
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
DT.unwords ([Text] -> Text)
-> ([MultiTerme] -> [Text]) -> [MultiTerme] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (MultiTerme -> Text) -> [MultiTerme] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(MultiTerme Text
p) -> Text
p)
instance Collage MultiTerme Mot where
dec :: MultiTerme -> [Mot]
dec (MultiTerme Text
mt) = (Text -> Mot) -> [Text] -> [Mot]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Text -> Mot
Mot ([Text] -> [Mot]) -> [Text] -> [Mot]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
DT.words Text
mt
inc :: [Mot] -> MultiTerme
inc = Text -> MultiTerme
MultiTerme (Text -> MultiTerme) -> ([Mot] -> Text) -> [Mot] -> MultiTerme
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text] -> Text
DT.intercalate Text
" " ([Text] -> Text) -> ([Mot] -> [Text]) -> [Mot] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Mot -> Text) -> [Mot] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Mot Text
m) -> Text
m)
sentences :: Text -> [Text]
sentences :: Text -> [Text]
sentences Text
txt = (String -> Text) -> [String] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map String -> Text
DT.pack ([String] -> [Text]) -> [String] -> [Text]
forall a b. (a -> b) -> a -> b
$ String -> [String]
segment (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ Text -> String
DT.unpack Text
txt
sentences' :: Text -> [Text]
sentences' :: Text -> [Text]
sentences' Text
txt = (Char -> Bool) -> Text -> [Text]
split Char -> Bool
isCharStop Text
txt
isCharStop :: Char -> Bool
isCharStop :: Char -> Bool
isCharStop Char
c = Char
c Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'.',Char
'?',Char
'!']
unsentences :: [Text] -> Text
unsentences :: [Text] -> Text
unsentences [Text]
txts = Text -> [Text] -> Text
DT.intercalate Text
" " [Text]
txts
size :: Text -> Int
size :: Text -> Int
size Text
t = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Text -> Int
DT.count Text
" " Text
t