{-|
Module      : Gargantext.Core.Text.Context
Description : How to manage contexts of texts ?
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Context of text management tool, here are logic of main types:

- Term
- Multi-term
- Label
- Sentence
- Corpus

How to split contexts is describes in this module.

-}


module Gargantext.Core.Text.Context
  where

import Data.Text (Text, pack, unpack)
import Data.String (IsString)

import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
import Gargantext.Core.Text
import Gargantext.Prelude hiding (length)

------------------------------------------------------------------------
type Term = Text
type MultiTerm = [Term]
type Label = MultiTerm

type TermList = [(Label, [MultiTerm])]

type Sentence  a = [a] -- or a nominal group
type Corpus    a = [Sentence a] -- a list of sentences

-- type ConText a = [Sentence a]
-- type Corpus a = [ConText a]
------------------------------------------------------------------------

-- | Contexts definition to build/unbuild contexts.
data SplitContext = Chars Int | Sentences Int | Paragraphs Int

-- | splitBy contexts of Chars or Sentences or Paragraphs
-- To see some examples at a higher level (sentences and paragraph), see
-- 'Gargantext.Core.Text.Examples.ex_terms'
--
-- >>> splitBy (Chars 0) (pack "abcde")
-- ["a","b","c","d","e"]
--
-- >>> splitBy (Chars 1) (pack "abcde")
-- ["ab","bc","cd","de"]
--
-- >>> splitBy (Chars 2) (pack "abcde")
-- ["abc","bcd","cde"]
splitBy :: SplitContext -> Text -> [Text]
splitBy :: SplitContext -> Text -> [Text]
splitBy (Chars     Int
n)  = (String -> Text) -> [String] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map String -> Text
pack        ([String] -> [Text]) -> (Text -> [String]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> String -> [String]
forall a. Eq a => Int -> Int -> [a] -> [[a]]
chunkAlong (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
1 (String -> [String]) -> (Text -> String) -> Text -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack
splitBy (Sentences Int
n)  = ([Text] -> Text) -> [[Text]] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map [Text] -> Text
unsentences ([[Text]] -> [Text]) -> (Text -> [[Text]]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> [Text] -> [[Text]]
forall a. Eq a => Int -> Int -> [a] -> [[a]]
chunkAlong (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
1 ([Text] -> [[Text]]) -> (Text -> [Text]) -> Text -> [[Text]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
sentences
splitBy (Paragraphs Int
_) = (Tag Text -> Text) -> [Tag Text] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Tag Text -> Text
forall p. IsString p => Tag p -> p
unTag       ([Tag Text] -> [Text]) -> (Text -> [Tag Text]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Tag Text -> Bool) -> [Tag Text] -> [Tag Text]
forall a. (a -> Bool) -> [a] -> [a]
filter Tag Text -> Bool
forall str. Tag str -> Bool
isTagText   ([Tag Text] -> [Tag Text])
-> (Text -> [Tag Text]) -> Text -> [Tag Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Tag Text]
forall str. StringLike str => str -> [Tag str]
parseTags
  where
    unTag :: IsString p => Tag p -> p
    unTag :: Tag p -> p
unTag (TagText p
x) = p
x
    unTag Tag p
_           = p
""