{-|
Module      : Gargantext.Core.Text.Terms.WithList
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Here is a longer description of this module, containing some
commentary with @some markup@.

-}
{-# LANGUAGE BangPatterns      #-}

module Gargantext.Core.Text.Terms.WithList where

import Data.List (null)
import Data.Ord
import Data.Text (Text, concat, unwords)
import Gargantext.Prelude
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
import Prelude (error)
import qualified Data.Algorithms.KMP as KMP
import qualified Data.IntMap.Strict  as IntMap
import qualified Data.List           as List
------------------------------------------------------------------------

data Pattern = Pattern
  { Pattern -> Table Text
_pat_table  :: !(KMP.Table Text)
  , Pattern -> Int
_pat_length :: !Int
  , Pattern -> [Text]
_pat_terms  :: ![Text]
  }
type Patterns = [Pattern]

------------------------------------------------------------------------
replaceTerms :: Patterns -> [Text] -> [[Text]]
replaceTerms :: Patterns -> [Text] -> [[Text]]
replaceTerms Patterns
pats [Text]
terms = Int -> [[Text]]
go Int
0
  where
    terms_len :: Int
terms_len = [Text] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
terms

    go :: Int -> [[Text]]
go Int
ix | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
terms_len = []
          | Bool
otherwise =
      case Int -> IntMap (Int, [Text]) -> Maybe (Int, [Text])
forall a. Int -> IntMap a -> Maybe a
IntMap.lookup Int
ix IntMap (Int, [Text])
m of
        Maybe (Int, [Text])
Nothing -> Int -> [[Text]]
go (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        Just (Int
len, [Text]
term) ->
          [Text]
term [Text] -> [[Text]] -> [[Text]]
forall a. a -> [a] -> [a]
: Int -> [[Text]]
go (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len)


    merge :: (a, b) -> (a, b) -> (a, b)
merge (a
len1, b
lab1) (a
len2, b
lab2) =
      if a
len2 a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
len1 then (a
len1, b
lab1) else (a
len2, b
lab2)

    m :: IntMap (Int, [Text])
m =
      ((Int, [Text]) -> (Int, [Text]) -> (Int, [Text]))
-> [(Int, (Int, [Text]))] -> IntMap (Int, [Text])
forall a. (a -> a -> a) -> [(Int, a)] -> IntMap a
IntMap.fromListWith (Int, [Text]) -> (Int, [Text]) -> (Int, [Text])
forall a b. Ord a => (a, b) -> (a, b) -> (a, b)
merge
        [ (Int
ix, (Int
len, [Text]
term))
        | Pattern Table Text
pat Int
len [Text]
term <- Patterns
pats, Int
ix <- Table Text -> [Text] -> [Int]
forall a. Eq a => Table a -> [a] -> [Int]
KMP.match Table Text
pat [Text]
terms ]

buildPatterns :: TermList -> Patterns
buildPatterns :: TermList -> Patterns
buildPatterns = (Pattern -> Down Int) -> Patterns -> Patterns
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortWith (Int -> Down Int
forall a. a -> Down a
Down (Int -> Down Int) -> (Pattern -> Int) -> Pattern -> Down Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Pattern -> Int
_pat_length) (Patterns -> Patterns)
-> (TermList -> Patterns) -> TermList -> Patterns
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (([Text], [[Text]]) -> Patterns) -> TermList -> Patterns
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ([Text], [[Text]]) -> Patterns
buildPattern
  where
    buildPattern :: ([Text], [[Text]]) -> Patterns
buildPattern ([Text]
label, [[Text]]
alts) = ([Text] -> Pattern) -> [[Text]] -> Patterns
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map [Text] -> Pattern
f ([Text]
label [Text] -> [[Text]] -> [[Text]]
forall a. a -> [a] -> [a]
: [[Text]]
alts)
      where
        f :: [Text] -> Pattern
f [Text]
alt | Text
"" Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
alt = [Char] -> Pattern
forall a. HasCallStack => [Char] -> a
error [Char]
"buildPatterns: ERR1"
              | [Text] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Text]
alt      = [Char] -> Pattern
forall a. HasCallStack => [Char] -> a
error [Char]
"buildPatterns: ERR2"
              | Bool
otherwise     =
                Table Text -> Int -> [Text] -> Pattern
Pattern ([Text] -> Table Text
forall a. Eq a => [a] -> Table a
KMP.build [Text]
alt) ([Text] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
alt) [Text]
label
                        --(Terms label $ Set.empty) -- TODO check stems


--------------------------------------------------------------------------
-- Utils
type BlockText   = Text
type MatchedText = Text
termsInText :: Patterns -> BlockText -> [MatchedText]
termsInText :: Patterns -> Text -> [Text]
termsInText Patterns
pats Text
txt = [Text] -> [Text]
forall a. Eq a => [a] -> [a]
List.nub
                     ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
List.concat
                     ([[Text]] -> [Text]) -> [[Text]] -> [Text]
forall a b. (a -> b) -> a -> b
$ ([[Text]] -> [Text]) -> [[[Text]]] -> [[Text]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (([Text] -> Text) -> [[Text]] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map [Text] -> Text
unwords)
                     ([[[Text]]] -> [[Text]]) -> [[[Text]]] -> [[Text]]
forall a b. (a -> b) -> a -> b
$ Patterns -> Text -> [[[Text]]]
extractTermsWithList Patterns
pats Text
txt

--------------------------------------------------------------------------

extractTermsWithList :: Patterns -> Text -> Corpus [Text]
extractTermsWithList :: Patterns -> Text -> [[[Text]]]
extractTermsWithList Patterns
pats = ([Text] -> [[Text]]) -> [[Text]] -> [[[Text]]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Patterns -> [Text] -> [[Text]]
replaceTerms Patterns
pats) ([[Text]] -> [[[Text]]])
-> (Text -> [[Text]]) -> Text -> [[[Text]]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [[Text]]
monoTextsBySentence

-- | Extract terms
-- >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList
-- extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"]
-- ["chat blanc"]
extractTermsWithList' :: Patterns -> Text -> [Text]
extractTermsWithList' :: Patterns -> Text -> [Text]
extractTermsWithList' Patterns
pats = ([Text] -> Text) -> [[Text]] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ([Text] -> Text
concat ([Text] -> Text) -> ([Text] -> [Text]) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Text] -> Text) -> [[Text]] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map [Text] -> Text
concat ([[Text]] -> [Text]) -> ([Text] -> [[Text]]) -> [Text] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Patterns -> [Text] -> [[Text]]
replaceTerms Patterns
pats)
                           ([[Text]] -> [Text]) -> (Text -> [[Text]]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [[Text]]
monoTextsBySentence

--------------------------------------------------------------------------

{- | Not used
filterWith :: TermList
           -> (a -> Text)
           -> [a] 
           -> [(a, [Text])]
filterWith termList f xs = filterWith' termList f zip xs


filterWith' :: TermList
           -> (a -> Text)
           -> ([a] -> [[Text]] -> [b])
           -> [a] 
           -> [b]
filterWith' termList f f' xs = f' xs
                            $ map (extractTermsWithList' pats)
                            $ map f xs
    where
      pats = buildPatterns termList
-}