{-|
Module      : Gargantext.Core.Viz.Phylo.PhyloMaker
Description : Maker engine for rebuilding a Phylo
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}


module Gargantext.Core.Viz.Phylo.PhyloMaker where

import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Vector (Vector)
import Data.Text (Text)

import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)


import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Control.Lens hiding (Level)

import qualified Data.Vector as Vector
import qualified Data.Set as Set

------------------
-- | To Phylo | --
------------------

{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
            | PhyloN    { _phylo'_phylo1    :: Phylo}


toPhylo' :: Phylo' -> [Document] -> TermList -> Config -> Phylo
toPhylo' (PhyloN    phylo) = toPhylo' 
toPhylo' (PhyloBase phylo) = toPhylo 
-}


toPhylo :: Phylo -> Phylo
toPhylo :: Phylo -> Phylo
toPhylo Phylo
phyloStep = String -> Phylo -> Phylo
forall a. String -> a -> a
trace (String
"# phylo1 groups " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show([PhyloGroup] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([PhyloGroup] -> Int) -> [PhyloGroup] -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Phylo -> [PhyloGroup]
getGroupsFromLevel Int
1 Phylo
phylo1))
                      (Phylo -> Phylo) -> Phylo -> Phylo
forall a b. (a -> b) -> a -> b
$ Int -> Phylo -> Phylo
traceToPhylo (Config -> Int
phyloLevel (Config -> Int) -> Config -> Int
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phyloStep) (Phylo -> Phylo) -> Phylo -> Phylo
forall a b. (a -> b) -> a -> b
$
    if (Config -> Int
phyloLevel (Config -> Int) -> Config -> Int
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phyloStep) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1
      then (Phylo -> Int -> Phylo) -> Phylo -> [Int] -> Phylo
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Phylo
phylo' Int
_ -> Phylo -> Phylo
synchronicClustering Phylo
phylo') Phylo
phyloAncestors [Int
2..(Config -> Int
phyloLevel (Config -> Int) -> Config -> Int
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phyloStep)]
      else Phylo
phylo1 
    where
        --------------------------------------
        phyloAncestors :: Phylo
        phyloAncestors :: Phylo
phyloAncestors = 
            if (Config -> Bool
findAncestors (Config -> Bool) -> Config -> Bool
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phyloStep)
              then Phylo -> Phylo
toHorizon Phylo
phylo1
              else Phylo
phylo1
        --------------------------------------
        phylo1 :: Phylo
        phylo1 :: Phylo
phylo1 = Phylo -> Phylo
toPhylo1 Phylo
phyloStep
        -- > AD to db here
        --------------------------------------



--------------------
-- | To Phylo 1 | --
--------------------

toGroupsProxi :: Level -> Phylo -> Phylo
toGroupsProxi :: Int -> Phylo -> Phylo
toGroupsProxi Int
lvl Phylo
phylo = 
  let proximity :: Proximity
proximity = Config -> Proximity
phyloProximity (Config -> Proximity) -> Config -> Proximity
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phylo
      groupsProxi :: [((PhyloGroupId, PhyloGroupId), Double)]
groupsProxi = ([((PhyloGroupId, PhyloGroupId), Double)]
 -> PhyloPeriodId
 -> PhyloPeriod
 -> [((PhyloGroupId, PhyloGroupId), Double)])
-> [((PhyloGroupId, PhyloGroupId), Double)]
-> Map PhyloPeriodId PhyloPeriod
-> [((PhyloGroupId, PhyloGroupId), Double)]
forall a k b. (a -> k -> b -> a) -> a -> Map k b -> a
foldlWithKey (\[((PhyloGroupId, PhyloGroupId), Double)]
acc PhyloPeriodId
pId PhyloPeriod
pds -> 
                      -- 1) process period by period
                      let egos :: [(PhyloGroupId, [Int])]
egos = (PhyloGroup -> (PhyloGroupId, [Int]))
-> [PhyloGroup] -> [(PhyloGroupId, [Int])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\PhyloGroup
g -> (PhyloGroup -> PhyloGroupId
getGroupId PhyloGroup
g, PhyloGroup
g PhyloGroup -> Getting [Int] PhyloGroup [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloGroup [Int]
Lens' PhyloGroup [Int]
phylo_groupNgrams))
                               ([PhyloGroup] -> [(PhyloGroupId, [Int])])
-> [PhyloGroup] -> [(PhyloGroupId, [Int])]
forall a b. (a -> b) -> a -> b
$ Map PhyloGroupId PhyloGroup -> [PhyloGroup]
forall k a. Map k a -> [a]
elems 
                               (Map PhyloGroupId PhyloGroup -> [PhyloGroup])
-> Map PhyloGroupId PhyloGroup -> [PhyloGroup]
forall a b. (a -> b) -> a -> b
$ Getting
  (Map PhyloGroupId PhyloGroup)
  PhyloPeriod
  (Map PhyloGroupId PhyloGroup)
-> PhyloPeriod -> Map PhyloGroupId PhyloGroup
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view ( (Map PhyloLevelId PhyloLevel
 -> Const
      (Map PhyloGroupId PhyloGroup) (Map PhyloLevelId PhyloLevel))
-> PhyloPeriod -> Const (Map PhyloGroupId PhyloGroup) PhyloPeriod
Lens' PhyloPeriod (Map PhyloLevelId PhyloLevel)
phylo_periodLevels 
                                      ((Map PhyloLevelId PhyloLevel
  -> Const
       (Map PhyloGroupId PhyloGroup) (Map PhyloLevelId PhyloLevel))
 -> PhyloPeriod -> Const (Map PhyloGroupId PhyloGroup) PhyloPeriod)
-> ((Map PhyloGroupId PhyloGroup
     -> Const
          (Map PhyloGroupId PhyloGroup) (Map PhyloGroupId PhyloGroup))
    -> Map PhyloLevelId PhyloLevel
    -> Const
         (Map PhyloGroupId PhyloGroup) (Map PhyloLevelId PhyloLevel))
-> Getting
     (Map PhyloGroupId PhyloGroup)
     PhyloPeriod
     (Map PhyloGroupId PhyloGroup)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (PhyloLevel -> Const (Map PhyloGroupId PhyloGroup) PhyloLevel)
-> Map PhyloLevelId PhyloLevel
-> Const
     (Map PhyloGroupId PhyloGroup) (Map PhyloLevelId PhyloLevel)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse ((PhyloLevel -> Const (Map PhyloGroupId PhyloGroup) PhyloLevel)
 -> Map PhyloLevelId PhyloLevel
 -> Const
      (Map PhyloGroupId PhyloGroup) (Map PhyloLevelId PhyloLevel))
-> ((Map PhyloGroupId PhyloGroup
     -> Const
          (Map PhyloGroupId PhyloGroup) (Map PhyloGroupId PhyloGroup))
    -> PhyloLevel -> Const (Map PhyloGroupId PhyloGroup) PhyloLevel)
-> (Map PhyloGroupId PhyloGroup
    -> Const
         (Map PhyloGroupId PhyloGroup) (Map PhyloGroupId PhyloGroup))
-> Map PhyloLevelId PhyloLevel
-> Const
     (Map PhyloGroupId PhyloGroup) (Map PhyloLevelId PhyloLevel)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (PhyloLevel -> Bool)
-> Optic'
     (->) (Const (Map PhyloGroupId PhyloGroup)) PhyloLevel PhyloLevel
forall (p :: * -> * -> *) (f :: * -> *) a.
(Choice p, Applicative f) =>
(a -> Bool) -> Optic' p f a a
filtered (\PhyloLevel
phyloLvl -> PhyloLevel
phyloLvl PhyloLevel -> Getting Int PhyloLevel Int -> Int
forall s a. s -> Getting a s a -> a
^. Getting Int PhyloLevel Int
Lens' PhyloLevel Int
phylo_levelLevel Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
lvl) 
                                      Optic'
  (->) (Const (Map PhyloGroupId PhyloGroup)) PhyloLevel PhyloLevel
-> ((Map PhyloGroupId PhyloGroup
     -> Const
          (Map PhyloGroupId PhyloGroup) (Map PhyloGroupId PhyloGroup))
    -> PhyloLevel -> Const (Map PhyloGroupId PhyloGroup) PhyloLevel)
-> (Map PhyloGroupId PhyloGroup
    -> Const
         (Map PhyloGroupId PhyloGroup) (Map PhyloGroupId PhyloGroup))
-> PhyloLevel
-> Const (Map PhyloGroupId PhyloGroup) PhyloLevel
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map PhyloGroupId PhyloGroup
 -> Const
      (Map PhyloGroupId PhyloGroup) (Map PhyloGroupId PhyloGroup))
-> PhyloLevel -> Const (Map PhyloGroupId PhyloGroup) PhyloLevel
Lens' PhyloLevel (Map PhyloGroupId PhyloGroup)
phylo_levelGroups ) PhyloPeriod
pds
                          next :: [PhyloPeriodId]
next    = Filiation
-> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods Filiation
ToParents (TimeUnit -> Int
getTimeFrame (TimeUnit -> Int) -> TimeUnit -> Int
forall a b. (a -> b) -> a -> b
$ Config -> TimeUnit
timeUnit (Config -> TimeUnit) -> Config -> TimeUnit
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phylo) PhyloPeriodId
pId (Map PhyloPeriodId PhyloPeriod -> [PhyloPeriodId]
forall k a. Map k a -> [k]
keys (Map PhyloPeriodId PhyloPeriod -> [PhyloPeriodId])
-> Map PhyloPeriodId PhyloPeriod -> [PhyloPeriodId]
forall a b. (a -> b) -> a -> b
$ Phylo
phylo Phylo
-> Getting
     (Map PhyloPeriodId PhyloPeriod)
     Phylo
     (Map PhyloPeriodId PhyloPeriod)
-> Map PhyloPeriodId PhyloPeriod
forall s a. s -> Getting a s a -> a
^. Getting
  (Map PhyloPeriodId PhyloPeriod)
  Phylo
  (Map PhyloPeriodId PhyloPeriod)
Lens' Phylo (Map PhyloPeriodId PhyloPeriod)
phylo_periods)
                          targets :: [(PhyloGroupId, [Int])]
targets = (PhyloGroup -> (PhyloGroupId, [Int]))
-> [PhyloGroup] -> [(PhyloGroupId, [Int])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\PhyloGroup
g ->  (PhyloGroup -> PhyloGroupId
getGroupId PhyloGroup
g, PhyloGroup
g PhyloGroup -> Getting [Int] PhyloGroup [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloGroup [Int]
Lens' PhyloGroup [Int]
phylo_groupNgrams)) ([PhyloGroup] -> [(PhyloGroupId, [Int])])
-> [PhyloGroup] -> [(PhyloGroupId, [Int])]
forall a b. (a -> b) -> a -> b
$ Int -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods Int
lvl [PhyloPeriodId]
next Phylo
phylo
                          docs :: Map Int Double
docs    = Map Int Double -> [PhyloPeriodId] -> Map Int Double
filterDocs  (Phylo
phylo Phylo
-> Getting (Map Int Double) Phylo (Map Int Double)
-> Map Int Double
forall s a. s -> Getting a s a -> a
^. Getting (Map Int Double) Phylo (Map Int Double)
Lens' Phylo (Map Int Double)
phylo_timeDocs) ([PhyloPeriodId
pId] [PhyloPeriodId] -> [PhyloPeriodId] -> [PhyloPeriodId]
forall a. [a] -> [a] -> [a]
++ [PhyloPeriodId]
next)
                          diagos :: Map Int Cooc
diagos  = Map Int Cooc -> [PhyloPeriodId] -> Map Int Cooc
filterDiago (Phylo
phylo Phylo
-> Getting (Map Int Cooc) Phylo (Map Int Cooc) -> Map Int Cooc
forall s a. s -> Getting a s a -> a
^. Getting (Map Int Cooc) Phylo (Map Int Cooc)
Lens' Phylo (Map Int Cooc)
phylo_timeCooc) ([PhyloPeriodId
pId] [PhyloPeriodId] -> [PhyloPeriodId] -> [PhyloPeriodId]
forall a. [a] -> [a] -> [a]
++ [PhyloPeriodId]
next)
                          -- 2) compute the pairs in parallel
                          pairs :: [[((PhyloGroupId, PhyloGroupId), Double)]]
pairs  = ((PhyloGroupId, [Int]) -> [((PhyloGroupId, PhyloGroupId), Double)])
-> [(PhyloGroupId, [Int])]
-> [[((PhyloGroupId, PhyloGroupId), Double)]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(PhyloGroupId
id,[Int]
ngrams) -> 
                                        ((PhyloGroupId, [Int]) -> ((PhyloGroupId, PhyloGroupId), Double))
-> [(PhyloGroupId, [Int])]
-> [((PhyloGroupId, PhyloGroupId), Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(PhyloGroupId
id',[Int]
ngrams') -> 
                                            let nbDocs :: Double
nbDocs = ([Double] -> Double
forall (f :: * -> *) a. (Foldable f, Num a) => f a -> a
sum ([Double] -> Double)
-> (Map Int Double -> [Double]) -> Map Int Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Int Double -> [Double]
forall k a. Map k a -> [a]
elems) (Map Int Double -> Double) -> Map Int Double -> Double
forall a b. (a -> b) -> a -> b
$ Map Int Double -> [PhyloPeriodId] -> Map Int Double
filterDocs Map Int Double
docs    ([PhyloGroupId -> PhyloPeriodId
idToPrd PhyloGroupId
id, PhyloGroupId -> PhyloPeriodId
idToPrd PhyloGroupId
id'])
                                                diago :: Map Int Double
diago  = Map Int Cooc -> Map Int Double
reduceDiagos  (Map Int Cooc -> Map Int Double) -> Map Int Cooc -> Map Int Double
forall a b. (a -> b) -> a -> b
$ Map Int Cooc -> [PhyloPeriodId] -> Map Int Cooc
filterDiago Map Int Cooc
diagos ([PhyloGroupId -> PhyloPeriodId
idToPrd PhyloGroupId
id, PhyloGroupId -> PhyloPeriodId
idToPrd PhyloGroupId
id'])
                                             in ((PhyloGroupId
id,PhyloGroupId
id'),Double
-> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
toProximity Double
nbDocs Map Int Double
diago Proximity
proximity [Int]
ngrams [Int]
ngrams' [Int]
ngrams')
                                        ) ([(PhyloGroupId, [Int])]
 -> [((PhyloGroupId, PhyloGroupId), Double)])
-> [(PhyloGroupId, [Int])]
-> [((PhyloGroupId, PhyloGroupId), Double)]
forall a b. (a -> b) -> a -> b
$ ((PhyloGroupId, [Int]) -> Bool)
-> [(PhyloGroupId, [Int])] -> [(PhyloGroupId, [Int])]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(PhyloGroupId
_,[Int]
ngrams') -> (Bool -> Bool
not (Bool -> Bool) -> ([Int] -> Bool) -> [Int] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null) ([Int] -> Bool) -> [Int] -> Bool
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int] -> [Int]
forall a. Eq a => [a] -> [a] -> [a]
intersect [Int]
ngrams [Int]
ngrams') [(PhyloGroupId, [Int])]
targets 
                                 ) [(PhyloGroupId, [Int])]
egos
                          pairs' :: [[((PhyloGroupId, PhyloGroupId), Double)]]
pairs' = [[((PhyloGroupId, PhyloGroupId), Double)]]
pairs [[((PhyloGroupId, PhyloGroupId), Double)]]
-> Strategy [[((PhyloGroupId, PhyloGroupId), Double)]]
-> [[((PhyloGroupId, PhyloGroupId), Double)]]
forall a. a -> Strategy a -> a
`using` Strategy [((PhyloGroupId, PhyloGroupId), Double)]
-> Strategy [[((PhyloGroupId, PhyloGroupId), Double)]]
forall a. Strategy a -> Strategy [a]
parList Strategy [((PhyloGroupId, PhyloGroupId), Double)]
forall a. NFData a => Strategy a
rdeepseq
                       in [((PhyloGroupId, PhyloGroupId), Double)]
acc [((PhyloGroupId, PhyloGroupId), Double)]
-> [((PhyloGroupId, PhyloGroupId), Double)]
-> [((PhyloGroupId, PhyloGroupId), Double)]
forall a. [a] -> [a] -> [a]
++ ([[((PhyloGroupId, PhyloGroupId), Double)]]
-> [((PhyloGroupId, PhyloGroupId), Double)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[((PhyloGroupId, PhyloGroupId), Double)]]
pairs')
                    ) [] (Map PhyloPeriodId PhyloPeriod
 -> [((PhyloGroupId, PhyloGroupId), Double)])
-> Map PhyloPeriodId PhyloPeriod
-> [((PhyloGroupId, PhyloGroupId), Double)]
forall a b. (a -> b) -> a -> b
$ Phylo
phylo Phylo
-> Getting
     (Map PhyloPeriodId PhyloPeriod)
     Phylo
     (Map PhyloPeriodId PhyloPeriod)
-> Map PhyloPeriodId PhyloPeriod
forall s a. s -> Getting a s a -> a
^. Getting
  (Map PhyloPeriodId PhyloPeriod)
  Phylo
  (Map PhyloPeriodId PhyloPeriod)
Lens' Phylo (Map PhyloPeriodId PhyloPeriod)
phylo_periods
   in Phylo
phylo Phylo -> (Phylo -> Phylo) -> Phylo
forall a b. a -> (a -> b) -> b
& (Map (PhyloGroupId, PhyloGroupId) Double
 -> Identity (Map (PhyloGroupId, PhyloGroupId) Double))
-> Phylo -> Identity Phylo
Lens' Phylo (Map (PhyloGroupId, PhyloGroupId) Double)
phylo_groupsProxi ((Map (PhyloGroupId, PhyloGroupId) Double
  -> Identity (Map (PhyloGroupId, PhyloGroupId) Double))
 -> Phylo -> Identity Phylo)
-> Map (PhyloGroupId, PhyloGroupId) Double -> Phylo -> Phylo
forall s t a b. ASetter s t a b -> b -> s -> t
.~ ((Map (PhyloGroupId, PhyloGroupId) Double
-> Map (PhyloGroupId, PhyloGroupId) Double
traceGroupsProxi (Map (PhyloGroupId, PhyloGroupId) Double
 -> Map (PhyloGroupId, PhyloGroupId) Double)
-> ([((PhyloGroupId, PhyloGroupId), Double)]
    -> Map (PhyloGroupId, PhyloGroupId) Double)
-> [((PhyloGroupId, PhyloGroupId), Double)]
-> Map (PhyloGroupId, PhyloGroupId) Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [((PhyloGroupId, PhyloGroupId), Double)]
-> Map (PhyloGroupId, PhyloGroupId) Double
forall k a. Ord k => [(k, a)] -> Map k a
fromList) [((PhyloGroupId, PhyloGroupId), Double)]
groupsProxi) 


appendGroups :: (a -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups :: (a
 -> PhyloPeriodId
 -> (Text, Text)
 -> Int
 -> Int
 -> [Cooc]
 -> PhyloGroup)
-> Int -> Map PhyloPeriodId [a] -> Phylo -> Phylo
appendGroups a
-> PhyloPeriodId
-> (Text, Text)
-> Int
-> Int
-> [Cooc]
-> PhyloGroup
f Int
lvl Map PhyloPeriodId [a]
m Phylo
phylo =  String -> Phylo -> Phylo
forall a. String -> a -> a
trace (String
"\n" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"-- | Append " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([a] -> Int) -> [a] -> Int
forall a b. (a -> b) -> a -> b
$ [[a]] -> [a]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[a]] -> [a]) -> [[a]] -> [a]
forall a b. (a -> b) -> a -> b
$ Map PhyloPeriodId [a] -> [[a]]
forall k a. Map k a -> [a]
elems Map PhyloPeriodId [a]
m) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" groups to Level " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show (Int
lvl) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\n")
    (Phylo -> Phylo) -> Phylo -> Phylo
forall a b. (a -> b) -> a -> b
$ ASetter Phylo Phylo PhyloLevel PhyloLevel
-> (PhyloLevel -> PhyloLevel) -> Phylo -> Phylo
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ( (Map PhyloPeriodId PhyloPeriod
 -> Identity (Map PhyloPeriodId PhyloPeriod))
-> Phylo -> Identity Phylo
Lens' Phylo (Map PhyloPeriodId PhyloPeriod)
phylo_periods
           ((Map PhyloPeriodId PhyloPeriod
  -> Identity (Map PhyloPeriodId PhyloPeriod))
 -> Phylo -> Identity Phylo)
-> ((PhyloLevel -> Identity PhyloLevel)
    -> Map PhyloPeriodId PhyloPeriod
    -> Identity (Map PhyloPeriodId PhyloPeriod))
-> ASetter Phylo Phylo PhyloLevel PhyloLevel
forall b c a. (b -> c) -> (a -> b) -> a -> c
.  (PhyloPeriod -> Identity PhyloPeriod)
-> Map PhyloPeriodId PhyloPeriod
-> Identity (Map PhyloPeriodId PhyloPeriod)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse
           ((PhyloPeriod -> Identity PhyloPeriod)
 -> Map PhyloPeriodId PhyloPeriod
 -> Identity (Map PhyloPeriodId PhyloPeriod))
-> ((PhyloLevel -> Identity PhyloLevel)
    -> PhyloPeriod -> Identity PhyloPeriod)
-> (PhyloLevel -> Identity PhyloLevel)
-> Map PhyloPeriodId PhyloPeriod
-> Identity (Map PhyloPeriodId PhyloPeriod)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map PhyloLevelId PhyloLevel
 -> Identity (Map PhyloLevelId PhyloLevel))
-> PhyloPeriod -> Identity PhyloPeriod
Lens' PhyloPeriod (Map PhyloLevelId PhyloLevel)
phylo_periodLevels
           ((Map PhyloLevelId PhyloLevel
  -> Identity (Map PhyloLevelId PhyloLevel))
 -> PhyloPeriod -> Identity PhyloPeriod)
-> ((PhyloLevel -> Identity PhyloLevel)
    -> Map PhyloLevelId PhyloLevel
    -> Identity (Map PhyloLevelId PhyloLevel))
-> (PhyloLevel -> Identity PhyloLevel)
-> PhyloPeriod
-> Identity PhyloPeriod
forall b c a. (b -> c) -> (a -> b) -> a -> c
.  (PhyloLevel -> Identity PhyloLevel)
-> Map PhyloLevelId PhyloLevel
-> Identity (Map PhyloLevelId PhyloLevel)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse)
           (\PhyloLevel
phyloLvl -> if Int
lvl Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== (PhyloLevel
phyloLvl PhyloLevel -> Getting Int PhyloLevel Int -> Int
forall s a. s -> Getting a s a -> a
^. Getting Int PhyloLevel Int
Lens' PhyloLevel Int
phylo_levelLevel)
                         then
                            let pId :: PhyloPeriodId
pId  = PhyloLevel
phyloLvl PhyloLevel
-> Getting PhyloPeriodId PhyloLevel PhyloPeriodId -> PhyloPeriodId
forall s a. s -> Getting a s a -> a
^. Getting PhyloPeriodId PhyloLevel PhyloPeriodId
Lens' PhyloLevel PhyloPeriodId
phylo_levelPeriod
                                pId' :: (Text, Text)
pId' = PhyloLevel
phyloLvl PhyloLevel
-> Getting (Text, Text) PhyloLevel (Text, Text) -> (Text, Text)
forall s a. s -> Getting a s a -> a
^. Getting (Text, Text) PhyloLevel (Text, Text)
Lens' PhyloLevel (Text, Text)
phylo_levelPeriod' 
                                phyloCUnit :: [a]
phyloCUnit = Map PhyloPeriodId [a]
m Map PhyloPeriodId [a] -> PhyloPeriodId -> [a]
forall k a. Ord k => Map k a -> k -> a
! PhyloPeriodId
pId
                            in  PhyloLevel
phyloLvl 
                              PhyloLevel -> (PhyloLevel -> PhyloLevel) -> PhyloLevel
forall a b. a -> (a -> b) -> b
& (Map PhyloGroupId PhyloGroup
 -> Identity (Map PhyloGroupId PhyloGroup))
-> PhyloLevel -> Identity PhyloLevel
Lens' PhyloLevel (Map PhyloGroupId PhyloGroup)
phylo_levelGroups ((Map PhyloGroupId PhyloGroup
  -> Identity (Map PhyloGroupId PhyloGroup))
 -> PhyloLevel -> Identity PhyloLevel)
-> Map PhyloGroupId PhyloGroup -> PhyloLevel -> PhyloLevel
forall s t a b. ASetter s t a b -> b -> s -> t
.~ ([(PhyloGroupId, PhyloGroup)] -> Map PhyloGroupId PhyloGroup
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([(PhyloGroupId, PhyloGroup)] -> Map PhyloGroupId PhyloGroup)
-> [(PhyloGroupId, PhyloGroup)] -> Map PhyloGroupId PhyloGroup
forall a b. (a -> b) -> a -> b
$ ([(PhyloGroupId, PhyloGroup)] -> a -> [(PhyloGroupId, PhyloGroup)])
-> [(PhyloGroupId, PhyloGroup)]
-> [a]
-> [(PhyloGroupId, PhyloGroup)]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\[(PhyloGroupId, PhyloGroup)]
groups a
obj ->
                                    [(PhyloGroupId, PhyloGroup)]
groups [(PhyloGroupId, PhyloGroup)]
-> [(PhyloGroupId, PhyloGroup)] -> [(PhyloGroupId, PhyloGroup)]
forall a. [a] -> [a] -> [a]
++ [ (((PhyloPeriodId
pId,Int
lvl),[(PhyloGroupId, PhyloGroup)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(PhyloGroupId, PhyloGroup)]
groups)
                                              , a
-> PhyloPeriodId
-> (Text, Text)
-> Int
-> Int
-> [Cooc]
-> PhyloGroup
f a
obj PhyloPeriodId
pId (Text, Text)
pId' Int
lvl ([(PhyloGroupId, PhyloGroup)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(PhyloGroupId, PhyloGroup)]
groups)
                                                  (Map Int Cooc -> [Cooc]
forall k a. Map k a -> [a]
elems (Map Int Cooc -> [Cooc]) -> Map Int Cooc -> [Cooc]
forall a b. (a -> b) -> a -> b
$ Map Int Cooc -> Set Int -> Map Int Cooc
forall k a. Ord k => Map k a -> Set k -> Map k a
restrictKeys (Phylo
phylo Phylo
-> Getting (Map Int Cooc) Phylo (Map Int Cooc) -> Map Int Cooc
forall s a. s -> Getting a s a -> a
^. Getting (Map Int Cooc) Phylo (Map Int Cooc)
Lens' Phylo (Map Int Cooc)
phylo_timeCooc) (Set Int -> Map Int Cooc) -> Set Int -> Map Int Cooc
forall a b. (a -> b) -> a -> b
$ [PhyloPeriodId] -> Set Int
periodsToYears [PhyloPeriodId
pId]))
                                              ] ) [] [a]
phyloCUnit)
                         else 
                            PhyloLevel
phyloLvl )
           Phylo
phylo  


cliqueToGroup :: PhyloClique -> PhyloPeriodId -> (Text,Text) -> Level ->  Int -> [Cooc] -> PhyloGroup
cliqueToGroup :: PhyloClique
-> PhyloPeriodId
-> (Text, Text)
-> Int
-> Int
-> [Cooc]
-> PhyloGroup
cliqueToGroup PhyloClique
fis PhyloPeriodId
pId (Text, Text)
pId' Int
lvl Int
idx [Cooc]
coocs = PhyloPeriodId
-> (Text, Text)
-> Int
-> Int
-> Text
-> Int
-> Maybe Double
-> [Int]
-> [Int]
-> Cooc
-> PhyloBranchId
-> Map Text [Double]
-> [Pointer]
-> [Pointer]
-> [Pointer]
-> [Pointer]
-> [Pointer]
-> PhyloGroup
PhyloGroup PhyloPeriodId
pId (Text, Text)
pId' Int
lvl Int
idx Text
""
                   (PhyloClique
fis PhyloClique -> Getting Int PhyloClique Int -> Int
forall s a. s -> Getting a s a -> a
^. Getting Int PhyloClique Int
Lens' PhyloClique Int
phyloClique_support)
                   (PhyloClique
fis PhyloClique
-> Getting (Maybe Double) PhyloClique (Maybe Double)
-> Maybe Double
forall s a. s -> Getting a s a -> a
^. Getting (Maybe Double) PhyloClique (Maybe Double)
Lens' PhyloClique (Maybe Double)
phyloClique_weight)
                   (PhyloClique
fis PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_sources)
                   (PhyloClique
fis PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_nodes)
                   ([Int] -> [Cooc] -> Cooc
ngramsToCooc (PhyloClique
fis PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_nodes) [Cooc]
coocs)
                   (Int
1,[Int
0]) -- branchid (lvl,[path in the branching tree])
                   ([(Text, [Double])] -> Map Text [Double]
forall k a. Ord k => [(k, a)] -> Map k a
fromList [(Text
"breaks",[Double
0]),(Text
"seaLevels",[Double
0])])
                   [] [] [] [] []


toPhylo1 :: Phylo -> Phylo
toPhylo1 :: Phylo -> Phylo
toPhylo1 Phylo
phyloStep = case (Phylo -> SeaElevation
getSeaElevation Phylo
phyloStep) of 
    Constante Double
start Double
gap -> Double -> Double -> Phylo -> Phylo
constanteTemporalMatching  Double
start Double
gap Phylo
phyloStep
    Adaptative Double
steps    -> Double -> Phylo -> Phylo
adaptativeTemporalMatching Double
steps Phylo
phyloStep

-----------------------
-- | To Phylo Step | --
-----------------------    


indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)  
indexDates' :: Map PhyloPeriodId [Document] -> Map PhyloPeriodId (Text, Text)
indexDates' Map PhyloPeriodId [Document]
m = ([Document] -> (Text, Text))
-> Map PhyloPeriodId [Document] -> Map PhyloPeriodId (Text, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\[Document]
docs -> 
  let ds :: [Text]
ds = (Document -> Text) -> [Document] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
d -> Document -> Text
date' Document
d) [Document]
docs
      f :: Text
f = if ([Text] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Text]
ds)
            then Text
""
            else [Text] -> Text
toFstDate [Text]
ds
      l :: Text
l = if ([Text] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Text]
ds) 
            then Text
""
            else [Text] -> Text
toLstDate [Text]
ds
   in (Text
f,Text
l)) Map PhyloPeriodId [Document]
m


-- To build the first phylo step from docs and terms
toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep [Document]
docs TermList
lst Config
conf = case (Phylo -> SeaElevation
getSeaElevation Phylo
phyloBase) of 
    Constante  Double
_ Double
_ -> (PhyloClique
 -> PhyloPeriodId
 -> (Text, Text)
 -> Int
 -> Int
 -> [Cooc]
 -> PhyloGroup)
-> Int -> Map PhyloPeriodId [PhyloClique] -> Phylo -> Phylo
forall a.
(a
 -> PhyloPeriodId
 -> (Text, Text)
 -> Int
 -> Int
 -> [Cooc]
 -> PhyloGroup)
-> Int -> Map PhyloPeriodId [a] -> Phylo -> Phylo
appendGroups PhyloClique
-> PhyloPeriodId
-> (Text, Text)
-> Int
-> Int
-> [Cooc]
-> PhyloGroup
cliqueToGroup Int
1 Map PhyloPeriodId [PhyloClique]
phyloClique (Map PhyloPeriodId (Text, Text) -> Phylo -> Phylo
updatePeriods (Map PhyloPeriodId [Document] -> Map PhyloPeriodId (Text, Text)
indexDates' Map PhyloPeriodId [Document]
docs') Phylo
phyloBase)
    Adaptative Double
_   -> Int -> Phylo -> Phylo
toGroupsProxi Int
1 
                    (Phylo -> Phylo) -> Phylo -> Phylo
forall a b. (a -> b) -> a -> b
$ (PhyloClique
 -> PhyloPeriodId
 -> (Text, Text)
 -> Int
 -> Int
 -> [Cooc]
 -> PhyloGroup)
-> Int -> Map PhyloPeriodId [PhyloClique] -> Phylo -> Phylo
forall a.
(a
 -> PhyloPeriodId
 -> (Text, Text)
 -> Int
 -> Int
 -> [Cooc]
 -> PhyloGroup)
-> Int -> Map PhyloPeriodId [a] -> Phylo -> Phylo
appendGroups PhyloClique
-> PhyloPeriodId
-> (Text, Text)
-> Int
-> Int
-> [Cooc]
-> PhyloGroup
cliqueToGroup Int
1 Map PhyloPeriodId [PhyloClique]
phyloClique (Map PhyloPeriodId (Text, Text) -> Phylo -> Phylo
updatePeriods (Map PhyloPeriodId [Document] -> Map PhyloPeriodId (Text, Text)
indexDates' Map PhyloPeriodId [Document]
docs') Phylo
phyloBase)
    where
        --------------------------------------
        phyloClique :: Map (Date,Date) [PhyloClique]
        phyloClique :: Map PhyloPeriodId [PhyloClique]
phyloClique =  Phylo
-> Map PhyloPeriodId [Document] -> Map PhyloPeriodId [PhyloClique]
toPhyloClique Phylo
phyloBase Map PhyloPeriodId [Document]
docs'
        --------------------------------------
        docs' :: Map (Date,Date) [Document]
        docs' :: Map PhyloPeriodId [Document]
docs' =  (Document -> Int)
-> [PhyloPeriodId]
-> [Document]
-> Map PhyloPeriodId [Document]
-> Map PhyloPeriodId [Document]
forall doc date.
(NFData doc, Ord date, Enum date) =>
(doc -> date)
-> [(date, date)]
-> [doc]
-> Map (date, date) [doc]
-> Map (date, date) [doc]
groupDocsByPeriodRec Document -> Int
date (Phylo -> [PhyloPeriodId]
getPeriodIds Phylo
phyloBase) ((Document -> Int) -> [Document] -> [Document]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn Document -> Int
date [Document]
docs) Map PhyloPeriodId [Document]
forall k a. Map k a
empty
        --------------------------------------
        phyloBase :: Phylo
        phyloBase :: Phylo
phyloBase = [Document] -> TermList -> Config -> Phylo
toPhyloBase [Document]
docs TermList
lst Config
conf
        --------------------------------------

---------------------------
-- | Frequent Item Set | --
---------------------------


--  To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterClique :: Bool
-> Int
-> (Int -> [PhyloClique] -> [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
filterClique Bool
keep Int
thr Int -> [PhyloClique] -> [PhyloClique]
f Map PhyloPeriodId [PhyloClique]
m = case Bool
keep of
  Bool
False -> ([PhyloClique] -> [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\[PhyloClique]
l -> Int -> [PhyloClique] -> [PhyloClique]
f Int
thr [PhyloClique]
l) Map PhyloPeriodId [PhyloClique]
m
  Bool
True  -> ([PhyloClique] -> [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\[PhyloClique]
l -> (Int -> [PhyloClique] -> [PhyloClique])
-> Int -> [PhyloClique] -> [PhyloClique]
forall a. (Int -> [a] -> [a]) -> Int -> [a] -> [a]
keepFilled (Int -> [PhyloClique] -> [PhyloClique]
f) Int
thr [PhyloClique]
l) Map PhyloPeriodId [PhyloClique]
m


--  To filter Fis with small Support
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport Int
thr [PhyloClique]
l = (PhyloClique -> Bool) -> [PhyloClique] -> [PhyloClique]
forall a. (a -> Bool) -> [a] -> [a]
filter (\PhyloClique
clq -> (PhyloClique
clq PhyloClique -> Getting Int PhyloClique Int -> Int
forall s a. s -> Getting a s a -> a
^. Getting Int PhyloClique Int
Lens' PhyloClique Int
phyloClique_support) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
thr) [PhyloClique]
l


--  To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize Int
thr [PhyloClique]
l = (PhyloClique -> Bool) -> [PhyloClique] -> [PhyloClique]
forall a. (a -> Bool) -> [a] -> [a]
filter (\PhyloClique
clq -> ([Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ PhyloClique
clq PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_nodes) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
thr) [PhyloClique]
l


--  To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested :: Map PhyloPeriodId [PhyloClique] -> Map PhyloPeriodId [PhyloClique]
filterCliqueByNested Map PhyloPeriodId [PhyloClique]
m = 
  let clq :: [[PhyloClique]]
clq  = ([PhyloClique] -> [PhyloClique])
-> [[PhyloClique]] -> [[PhyloClique]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\[PhyloClique]
l -> 
                ([PhyloClique] -> PhyloClique -> [PhyloClique])
-> [PhyloClique] -> [PhyloClique] -> [PhyloClique]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\[PhyloClique]
mem PhyloClique
f -> if ((PhyloClique -> Bool) -> [PhyloClique] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\PhyloClique
f' -> [Int] -> [Int] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isNested (PhyloClique
f' PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_nodes) (PhyloClique
f PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_nodes)) [PhyloClique]
mem)
                                 then [PhyloClique]
mem
                                 else 
                                    let fMax :: [PhyloClique]
fMax = (PhyloClique -> Bool) -> [PhyloClique] -> [PhyloClique]
forall a. (a -> Bool) -> [a] -> [a]
filter (\PhyloClique
f' -> Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isNested (PhyloClique
f PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_nodes) (PhyloClique
f' PhyloClique -> Getting [Int] PhyloClique [Int] -> [Int]
forall s a. s -> Getting a s a -> a
^. Getting [Int] PhyloClique [Int]
Lens' PhyloClique [Int]
phyloClique_nodes)) [PhyloClique]
mem
                                    in  [PhyloClique]
fMax [PhyloClique] -> [PhyloClique] -> [PhyloClique]
forall a. [a] -> [a] -> [a]
++ [PhyloClique
f] ) [] [PhyloClique]
l)
           ([[PhyloClique]] -> [[PhyloClique]])
-> [[PhyloClique]] -> [[PhyloClique]]
forall a b. (a -> b) -> a -> b
$ Map PhyloPeriodId [PhyloClique] -> [[PhyloClique]]
forall k a. Map k a -> [a]
elems Map PhyloPeriodId [PhyloClique]
m 
      clq' :: [[PhyloClique]]
clq' = [[PhyloClique]]
clq [[PhyloClique]] -> Strategy [[PhyloClique]] -> [[PhyloClique]]
forall a. a -> Strategy a -> a
`using` Strategy [PhyloClique] -> Strategy [[PhyloClique]]
forall a. Strategy a -> Strategy [a]
parList Strategy [PhyloClique]
forall a. NFData a => Strategy a
rdeepseq
  in  [(PhyloPeriodId, [PhyloClique])] -> Map PhyloPeriodId [PhyloClique]
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([(PhyloPeriodId, [PhyloClique])]
 -> Map PhyloPeriodId [PhyloClique])
-> [(PhyloPeriodId, [PhyloClique])]
-> Map PhyloPeriodId [PhyloClique]
forall a b. (a -> b) -> a -> b
$ [PhyloPeriodId]
-> [[PhyloClique]] -> [(PhyloPeriodId, [PhyloClique])]
forall a b. [a] -> [b] -> [(a, b)]
zip (Map PhyloPeriodId [PhyloClique] -> [PhyloPeriodId]
forall k a. Map k a -> [k]
keys Map PhyloPeriodId [PhyloClique]
m) [[PhyloClique]]
clq' 


-- | To transform a time map of docs into a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique :: Phylo
-> Map PhyloPeriodId [Document] -> Map PhyloPeriodId [PhyloClique]
toPhyloClique Phylo
phylo Map PhyloPeriodId [Document]
phyloDocs = case (Config -> Clique
clique (Config -> Clique) -> Config -> Clique
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phylo) of 
    Fis Int
s Int
s'    -> -- traceFis "Filtered Fis"
                   Map PhyloPeriodId [PhyloClique] -> Map PhyloPeriodId [PhyloClique]
filterCliqueByNested 
                 {- \$ traceFis "Filtered by clique size" -}
                 (Map PhyloPeriodId [PhyloClique]
 -> Map PhyloPeriodId [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
forall a b. (a -> b) -> a -> b
$ Bool
-> Int
-> (Int -> [PhyloClique] -> [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
filterClique Bool
True Int
s' (Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize)
                 {- \$ traceFis "Filtered by support" -}
                 (Map PhyloPeriodId [PhyloClique]
 -> Map PhyloPeriodId [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
forall a b. (a -> b) -> a -> b
$ Bool
-> Int
-> (Int -> [PhyloClique] -> [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
filterClique Bool
True Int
s (Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport)
                 {- \$ traceFis "Unfiltered Fis" -}
                 Map PhyloPeriodId [PhyloClique]
phyloClique
    MaxClique Int
s Double
_ CliqueFilter
_ -> Bool
-> Int
-> (Int -> [PhyloClique] -> [PhyloClique])
-> Map PhyloPeriodId [PhyloClique]
-> Map PhyloPeriodId [PhyloClique]
filterClique Bool
True Int
s (Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize)
                       Map PhyloPeriodId [PhyloClique]
phyloClique
    where
        -------------------------------------- 
        phyloClique :: Map (Date,Date) [PhyloClique]
        phyloClique :: Map PhyloPeriodId [PhyloClique]
phyloClique = case (Config -> Clique
clique (Config -> Clique) -> Config -> Clique
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phylo) of 
          Fis Int
_ Int
_     ->  
                      let fis :: [(PhyloPeriodId, [PhyloClique])]
fis  = ((PhyloPeriodId, [Document]) -> (PhyloPeriodId, [PhyloClique]))
-> [(PhyloPeriodId, [Document])]
-> [(PhyloPeriodId, [PhyloClique])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(PhyloPeriodId
prd,[Document]
docs) -> 
                                      case (Config -> CorpusParser
corpusParser (Config -> CorpusParser) -> Config -> CorpusParser
forall a b. (a -> b) -> a -> b
$ Phylo -> Config
getConfig Phylo
phylo) of
                                        Csv' Int
_  -> let lst :: [(Set Int, (Int, (Maybe Double, [Int])))]
lst = Map (Set Int) (Int, (Maybe Double, [Int]))
-> [(Set Int, (Int, (Maybe Double, [Int])))]
forall k a. Map k a -> [(k, a)]
toList 
                                                                  (Map (Set Int) (Int, (Maybe Double, [Int]))
 -> [(Set Int, (Int, (Maybe Double, [Int])))])
-> Map (Set Int) (Int, (Maybe Double, [Int]))
-> [(Set Int, (Int, (Maybe Double, [Int])))]
forall a b. (a -> b) -> a -> b
$ Size
-> Int
-> [([Int], (Maybe Double, [Int]))]
-> Map (Set Int) (Int, (Maybe Double, [Int]))
forall a.
Ord a =>
Size
-> Int
-> [([a], (Maybe Double, [Int]))]
-> Map (Set a) (Int, (Maybe Double, [Int]))
fisWithSizePolyMap' (Int -> Int -> Size
Segment Int
1 Int
20) Int
1 ((Document -> ([Int], (Maybe Double, [Int])))
-> [Document] -> [([Int], (Maybe Double, [Int]))]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
d -> ([Text] -> Vector Text -> [Int]
ngramsToIdx (Document -> [Text]
text Document
d) (Phylo -> Vector Text
getRoots Phylo
phylo), (Document -> Maybe Double
weight Document
d, ([Text] -> Vector Text -> [Int]
sourcesToIdx (Document -> [Text]
sources Document
d) (Phylo -> Vector Text
getSources Phylo
phylo))))) [Document]
docs)
                                                           in (PhyloPeriodId
prd, ((Set Int, (Int, (Maybe Double, [Int]))) -> PhyloClique)
-> [(Set Int, (Int, (Maybe Double, [Int])))] -> [PhyloClique]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Set Int, (Int, (Maybe Double, [Int])))
f -> [Int]
-> Int -> PhyloPeriodId -> Maybe Double -> [Int] -> PhyloClique
PhyloClique (Set Int -> [Int]
forall a. Set a -> [a]
Set.toList (Set Int -> [Int]) -> Set Int -> [Int]
forall a b. (a -> b) -> a -> b
$ (Set Int, (Int, (Maybe Double, [Int]))) -> Set Int
forall a b. (a, b) -> a
fst (Set Int, (Int, (Maybe Double, [Int])))
f) (((Int, (Maybe Double, [Int])) -> Int
forall a b. (a, b) -> a
fst ((Int, (Maybe Double, [Int])) -> Int)
-> ((Set Int, (Int, (Maybe Double, [Int])))
    -> (Int, (Maybe Double, [Int])))
-> (Set Int, (Int, (Maybe Double, [Int])))
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Set Int, (Int, (Maybe Double, [Int])))
-> (Int, (Maybe Double, [Int]))
forall a b. (a, b) -> b
snd) (Set Int, (Int, (Maybe Double, [Int])))
f) PhyloPeriodId
prd (((Maybe Double, [Int]) -> Maybe Double
forall a b. (a, b) -> a
fst ((Maybe Double, [Int]) -> Maybe Double)
-> ((Set Int, (Int, (Maybe Double, [Int])))
    -> (Maybe Double, [Int]))
-> (Set Int, (Int, (Maybe Double, [Int])))
-> Maybe Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, (Maybe Double, [Int])) -> (Maybe Double, [Int])
forall a b. (a, b) -> b
snd ((Int, (Maybe Double, [Int])) -> (Maybe Double, [Int]))
-> ((Set Int, (Int, (Maybe Double, [Int])))
    -> (Int, (Maybe Double, [Int])))
-> (Set Int, (Int, (Maybe Double, [Int])))
-> (Maybe Double, [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Set Int, (Int, (Maybe Double, [Int])))
-> (Int, (Maybe Double, [Int]))
forall a b. (a, b) -> b
snd) (Set Int, (Int, (Maybe Double, [Int])))
f) ((((Maybe Double, [Int]) -> [Int]
forall a b. (a, b) -> b
snd ((Maybe Double, [Int]) -> [Int])
-> ((Set Int, (Int, (Maybe Double, [Int])))
    -> (Maybe Double, [Int]))
-> (Set Int, (Int, (Maybe Double, [Int])))
-> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, (Maybe Double, [Int])) -> (Maybe Double, [Int])
forall a b. (a, b) -> b
snd ((Int, (Maybe Double, [Int])) -> (Maybe Double, [Int]))
-> ((Set Int, (Int, (Maybe Double, [Int])))
    -> (Int, (Maybe Double, [Int])))
-> (Set Int, (Int, (Maybe Double, [Int])))
-> (Maybe Double, [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Set Int, (Int, (Maybe Double, [Int])))
-> (Int, (Maybe Double, [Int]))
forall a b. (a, b) -> b
snd) (Set Int, (Int, (Maybe Double, [Int])))
f))) [(Set Int, (Int, (Maybe Double, [Int])))]
lst)
                                        CorpusParser
_  -> let lst :: [(Set Int, Int)]
lst = Map (Set Int) Int -> [(Set Int, Int)]
forall k a. Map k a -> [(k, a)]
toList 
                                                      (Map (Set Int) Int -> [(Set Int, Int)])
-> Map (Set Int) Int -> [(Set Int, Int)]
forall a b. (a -> b) -> a -> b
$ Size -> Int -> [[Int]] -> Map (Set Int) Int
forall a. Ord a => Size -> Int -> [[a]] -> Map (Set a) Int
fisWithSizePolyMap (Int -> Int -> Size
Segment Int
1 Int
20) Int
1 ((Document -> [Int]) -> [Document] -> [[Int]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
d -> [Text] -> Vector Text -> [Int]
ngramsToIdx (Document -> [Text]
text Document
d) (Phylo -> Vector Text
getRoots Phylo
phylo)) [Document]
docs)
                                              in (PhyloPeriodId
prd, ((Set Int, Int) -> PhyloClique)
-> [(Set Int, Int)] -> [PhyloClique]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Set Int, Int)
f -> [Int]
-> Int -> PhyloPeriodId -> Maybe Double -> [Int] -> PhyloClique
PhyloClique (Set Int -> [Int]
forall a. Set a -> [a]
Set.toList (Set Int -> [Int]) -> Set Int -> [Int]
forall a b. (a -> b) -> a -> b
$ (Set Int, Int) -> Set Int
forall a b. (a, b) -> a
fst (Set Int, Int)
f) ((Set Int, Int) -> Int
forall a b. (a, b) -> b
snd (Set Int, Int)
f) PhyloPeriodId
prd Maybe Double
forall a. Maybe a
Nothing []) [(Set Int, Int)]
lst)
                                      )
                               ([(PhyloPeriodId, [Document])] -> [(PhyloPeriodId, [PhyloClique])])
-> [(PhyloPeriodId, [Document])]
-> [(PhyloPeriodId, [PhyloClique])]
forall a b. (a -> b) -> a -> b
$ Map PhyloPeriodId [Document] -> [(PhyloPeriodId, [Document])]
forall k a. Map k a -> [(k, a)]
toList Map PhyloPeriodId [Document]
phyloDocs
                          fis' :: [(PhyloPeriodId, [PhyloClique])]
fis' = [(PhyloPeriodId, [PhyloClique])]
fis [(PhyloPeriodId, [PhyloClique])]
-> Strategy [(PhyloPeriodId, [PhyloClique])]
-> [(PhyloPeriodId, [PhyloClique])]
forall a. a -> Strategy a -> a
`using` Strategy (PhyloPeriodId, [PhyloClique])
-> Strategy [(PhyloPeriodId, [PhyloClique])]
forall a. Strategy a -> Strategy [a]
parList Strategy (PhyloPeriodId, [PhyloClique])
forall a. NFData a => Strategy a
rdeepseq
                       in [(PhyloPeriodId, [PhyloClique])] -> Map PhyloPeriodId [PhyloClique]
forall k a. Ord k => [(k, a)] -> Map k a
fromList [(PhyloPeriodId, [PhyloClique])]
fis'
          MaxClique Int
_ Double
thr CliqueFilter
filterType -> 
                      let mcl :: [(PhyloPeriodId, [PhyloClique])]
mcl  = ((PhyloPeriodId, [Document]) -> (PhyloPeriodId, [PhyloClique]))
-> [(PhyloPeriodId, [Document])]
-> [(PhyloPeriodId, [PhyloClique])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(PhyloPeriodId
prd,[Document]
docs) -> 
                                    let cooc :: Map PhyloPeriodId Int
cooc = (Double -> Int) -> Cooc -> Map PhyloPeriodId Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round
                                             (Cooc -> Map PhyloPeriodId Int) -> Cooc -> Map PhyloPeriodId Int
forall a b. (a -> b) -> a -> b
$ (Cooc -> Cooc -> Cooc) -> Cooc -> [Cooc] -> Cooc
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Cooc -> Cooc -> Cooc
sumCooc Cooc
forall k a. Map k a
empty
                                             ([Cooc] -> Cooc) -> [Cooc] -> Cooc
forall a b. (a -> b) -> a -> b
$ ([Int] -> Cooc) -> [[Int]] -> [Cooc]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map [Int] -> Cooc
listToMatrix 
                                             ([[Int]] -> [Cooc]) -> [[Int]] -> [Cooc]
forall a b. (a -> b) -> a -> b
$ (Document -> [Int]) -> [Document] -> [[Int]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
d -> [Text] -> Vector Text -> [Int]
ngramsToIdx (Document -> [Text]
text Document
d) (Phylo -> Vector Text
getRoots Phylo
phylo)) [Document]
docs
                                     in (PhyloPeriodId
prd, ([Int] -> PhyloClique) -> [[Int]] -> [PhyloClique]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\[Int]
cl -> [Int]
-> Int -> PhyloPeriodId -> Maybe Double -> [Int] -> PhyloClique
PhyloClique [Int]
cl Int
0 PhyloPeriodId
prd Maybe Double
forall a. Maybe a
Nothing []) ([[Int]] -> [PhyloClique]) -> [[Int]] -> [PhyloClique]
forall a b. (a -> b) -> a -> b
$ CliqueFilter
-> Distance -> Double -> Map PhyloPeriodId Int -> [[Int]]
forall a.
Ord a =>
CliqueFilter -> Distance -> Double -> Map (a, a) Int -> [[a]]
getMaxCliques CliqueFilter
filterType Distance
Conditional Double
thr Map PhyloPeriodId Int
cooc)) 
                               ([(PhyloPeriodId, [Document])] -> [(PhyloPeriodId, [PhyloClique])])
-> [(PhyloPeriodId, [Document])]
-> [(PhyloPeriodId, [PhyloClique])]
forall a b. (a -> b) -> a -> b
$ Map PhyloPeriodId [Document] -> [(PhyloPeriodId, [Document])]
forall k a. Map k a -> [(k, a)]
toList Map PhyloPeriodId [Document]
phyloDocs
                          mcl' :: [(PhyloPeriodId, [PhyloClique])]
mcl' = [(PhyloPeriodId, [PhyloClique])]
mcl [(PhyloPeriodId, [PhyloClique])]
-> Strategy [(PhyloPeriodId, [PhyloClique])]
-> [(PhyloPeriodId, [PhyloClique])]
forall a. a -> Strategy a -> a
`using` Strategy (PhyloPeriodId, [PhyloClique])
-> Strategy [(PhyloPeriodId, [PhyloClique])]
forall a. Strategy a -> Strategy [a]
parList Strategy (PhyloPeriodId, [PhyloClique])
forall a. NFData a => Strategy a
rdeepseq                               
                       in [(PhyloPeriodId, [PhyloClique])] -> Map PhyloPeriodId [PhyloClique]
forall k a. Ord k => [(k, a)] -> Map k a
fromList [(PhyloPeriodId, [PhyloClique])]
mcl' 
        -------------------------------------- 

        -- dev viz graph maxClique getMaxClique


--------------------
-- | Coocurency | --
--------------------


--  To transform the docs into a time map of coocurency matrix 
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc :: [Document] -> Vector Text -> Map Int Cooc
docsToTimeScaleCooc [Document]
docs Vector Text
fdt = 
    let mCooc :: Map Int Cooc
mCooc  = (Cooc -> Cooc -> Cooc) -> [(Int, Cooc)] -> Map Int Cooc
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromListWith Cooc -> Cooc -> Cooc
sumCooc
               ([(Int, Cooc)] -> Map Int Cooc) -> [(Int, Cooc)] -> Map Int Cooc
forall a b. (a -> b) -> a -> b
$ (PhyloBranchId -> (Int, Cooc)) -> [PhyloBranchId] -> [(Int, Cooc)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Int
_d,[Int]
l) -> (Int
_d, [Int] -> Cooc
listToMatrix [Int]
l))
               ([PhyloBranchId] -> [(Int, Cooc)])
-> [PhyloBranchId] -> [(Int, Cooc)]
forall a b. (a -> b) -> a -> b
$ (Document -> PhyloBranchId) -> [Document] -> [PhyloBranchId]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
doc -> (Document -> Int
date Document
doc, [Int] -> [Int]
forall a. Ord a => [a] -> [a]
sort ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [Text] -> Vector Text -> [Int]
ngramsToIdx (Document -> [Text]
text Document
doc) Vector Text
fdt)) [Document]
docs
        mCooc' :: Map Int (Map k a)
mCooc' = [(Int, Map k a)] -> Map Int (Map k a)
forall k a. Ord k => [(k, a)] -> Map k a
fromList
               ([(Int, Map k a)] -> Map Int (Map k a))
-> [(Int, Map k a)] -> Map Int (Map k a)
forall a b. (a -> b) -> a -> b
$ (Int -> (Int, Map k a)) -> [Int] -> [(Int, Map k a)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Int
t -> (Int
t,Map k a
forall k a. Map k a
empty))
               ([Int] -> [(Int, Map k a)]) -> [Int] -> [(Int, Map k a)]
forall a b. (a -> b) -> a -> b
$ [Int] -> Int -> [Int]
toTimeScale ((Document -> Int) -> [Document] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Document -> Int
date [Document]
docs) Int
1
    in  String -> Map Int Cooc -> Map Int Cooc
forall a. String -> a -> a
trace (String
"\n" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"-- | Build the coocurency matrix for " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Map Int (Map Any Any) -> [Int]
forall k a. Map k a -> [k]
keys Map Int (Map Any Any)
forall k a. Map Int (Map k a)
mCooc') String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" unit of time" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\n")
       (Map Int Cooc -> Map Int Cooc) -> Map Int Cooc -> Map Int Cooc
forall a b. (a -> b) -> a -> b
$ (Cooc -> Cooc -> Cooc)
-> Map Int Cooc -> Map Int Cooc -> Map Int Cooc
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
unionWith Cooc -> Cooc -> Cooc
sumCooc Map Int Cooc
mCooc Map Int Cooc
forall k a. Map Int (Map k a)
mCooc'


-----------------------
-- | to Phylo Base | --
-----------------------
-- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec :: (doc -> date)
-> [(date, date)]
-> [doc]
-> Map (date, date) [doc]
-> Map (date, date) [doc]
groupDocsByPeriodRec doc -> date
f [(date, date)]
prds [doc]
docs Map (date, date) [doc]
acc =
    if (([(date, date)] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(date, date)]
prds) Bool -> Bool -> Bool
|| ([doc] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [doc]
docs))
      then Map (date, date) [doc]
acc 
      else 
        let prd :: (date, date)
prd = Text -> [(date, date)] -> (date, date)
forall a. Text -> [a] -> a
head' Text
"groupBy" [(date, date)]
prds
            docs' :: ([doc], [doc])
docs' = (doc -> Bool) -> [doc] -> ([doc], [doc])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition (\doc
d -> (doc -> date
f doc
d date -> date -> Bool
forall a. Ord a => a -> a -> Bool
>= (date, date) -> date
forall a b. (a, b) -> a
fst (date, date)
prd) Bool -> Bool -> Bool
&& (doc -> date
f doc
d date -> date -> Bool
forall a. Ord a => a -> a -> Bool
<= (date, date) -> date
forall a b. (a, b) -> b
snd (date, date)
prd)) [doc]
docs
         in (doc -> date)
-> [(date, date)]
-> [doc]
-> Map (date, date) [doc]
-> Map (date, date) [doc]
forall doc date.
(NFData doc, Ord date, Enum date) =>
(doc -> date)
-> [(date, date)]
-> [doc]
-> Map (date, date) [doc]
-> Map (date, date) [doc]
groupDocsByPeriodRec doc -> date
f ([(date, date)] -> [(date, date)]
forall a. [a] -> [a]
tail [(date, date)]
prds) (([doc], [doc]) -> [doc]
forall a b. (a, b) -> b
snd ([doc], [doc])
docs') ((date, date)
-> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
forall k a. Ord k => k -> a -> Map k a -> Map k a
insert (date, date)
prd (([doc], [doc]) -> [doc]
forall a b. (a, b) -> a
fst ([doc], [doc])
docs') Map (date, date) [doc]
acc)


--  To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' :: (doc -> date) -> [(date, date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' doc -> date
f [(date, date)]
pds [doc]
docs =
  let docs' :: [[doc]]
docs'    = (doc -> doc -> Bool) -> [doc] -> [[doc]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (\doc
d doc
d' -> doc -> date
f doc
d date -> date -> Bool
forall a. Eq a => a -> a -> Bool
== doc -> date
f doc
d') ([doc] -> [[doc]]) -> [doc] -> [[doc]]
forall a b. (a -> b) -> a -> b
$ (doc -> date) -> [doc] -> [doc]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn doc -> date
f [doc]
docs
      periods :: [[doc]]
periods  = ((date, date) -> [doc]) -> [(date, date)] -> [[doc]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ((doc -> date) -> [[doc]] -> (date, date) -> [doc]
forall b t. Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
inPeriode doc -> date
f [[doc]]
docs') [(date, date)]
pds
      periods' :: [[doc]]
periods' = [[doc]]
periods [[doc]] -> Strategy [[doc]] -> [[doc]]
forall a. a -> Strategy a -> a
`using` Strategy [doc] -> Strategy [[doc]]
forall a. Strategy a -> Strategy [a]
parList Strategy [doc]
forall a. NFData a => Strategy a
rdeepseq
   in String -> Map (date, date) [doc] -> Map (date, date) [doc]
forall a. String -> a -> a
trace (String
"\n" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"-- | Group " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show([doc] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [doc]
docs) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" docs by " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show([(date, date)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(date, date)]
pds) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" periods" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\n") 
    (Map (date, date) [doc] -> Map (date, date) [doc])
-> Map (date, date) [doc] -> Map (date, date) [doc]
forall a b. (a -> b) -> a -> b
$ [((date, date), [doc])] -> Map (date, date) [doc]
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([((date, date), [doc])] -> Map (date, date) [doc])
-> [((date, date), [doc])] -> Map (date, date) [doc]
forall a b. (a -> b) -> a -> b
$ [(date, date)] -> [[doc]] -> [((date, date), [doc])]
forall a b. [a] -> [b] -> [(a, b)]
zip [(date, date)]
pds [[doc]]
periods'
  where
    --------------------------------------
    inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
    inPeriode :: (t -> b) -> [[t]] -> (b, b) -> [t]
inPeriode t -> b
f' [[t]]
h (b
start,b
end) =
      [[t]] -> [t]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[t]] -> [t]) -> [[t]] -> [t]
forall a b. (a -> b) -> a -> b
$ ([[t]], [[t]]) -> [[t]]
forall a b. (a, b) -> a
fst (([[t]], [[t]]) -> [[t]]) -> ([[t]], [[t]]) -> [[t]]
forall a b. (a -> b) -> a -> b
$ ([t] -> Bool) -> [[t]] -> ([[t]], [[t]])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition (\[t]
d -> t -> b
f' (Text -> [t] -> t
forall a. Text -> [a] -> a
head' Text
"inPeriode" [t]
d) b -> b -> Bool
forall a. Ord a => a -> a -> Bool
>= b
start Bool -> Bool -> Bool
&& t -> b
f' (Text -> [t] -> t
forall a. Text -> [a] -> a
head' Text
"inPeriode" [t]
d) b -> b -> Bool
forall a. Ord a => a -> a -> Bool
<= b
end) [[t]]
h



--  To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod :: (doc -> date) -> [(date, date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod doc -> date
_ [(date, date)]
_   [] = Text -> Map (date, date) [doc]
forall a. HasCallStack => Text -> a
panic Text
"[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod doc -> date
f [(date, date)]
pds [doc]
es =
  let periods :: [[doc]]
periods  = ((date, date) -> [doc]) -> [(date, date)] -> [[doc]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ((doc -> date) -> [doc] -> (date, date) -> [doc]
forall b t. Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode doc -> date
f [doc]
es) [(date, date)]
pds
      periods' :: [[doc]]
periods' = [[doc]]
periods [[doc]] -> Strategy [[doc]] -> [[doc]]
forall a. a -> Strategy a -> a
`using` Strategy [doc] -> Strategy [[doc]]
forall a. Strategy a -> Strategy [a]
parList Strategy [doc]
forall a. NFData a => Strategy a
rdeepseq

  in  String -> Map (date, date) [doc] -> Map (date, date) [doc]
forall a. String -> a -> a
trace (String
"\n" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"-- | Group " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show([doc] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [doc]
es) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" docs by " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show([(date, date)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(date, date)]
pds) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" periods" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\n") 
    (Map (date, date) [doc] -> Map (date, date) [doc])
-> Map (date, date) [doc] -> Map (date, date) [doc]
forall a b. (a -> b) -> a -> b
$ [((date, date), [doc])] -> Map (date, date) [doc]
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([((date, date), [doc])] -> Map (date, date) [doc])
-> [((date, date), [doc])] -> Map (date, date) [doc]
forall a b. (a -> b) -> a -> b
$ [(date, date)] -> [[doc]] -> [((date, date), [doc])]
forall a b. [a] -> [b] -> [(a, b)]
zip [(date, date)]
pds [[doc]]
periods'
  where
    --------------------------------------
    inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
    inPeriode :: (t -> b) -> [t] -> (b, b) -> [t]
inPeriode t -> b
f' [t]
h (b
start,b
end) =
      ([t], [t]) -> [t]
forall a b. (a, b) -> a
fst (([t], [t]) -> [t]) -> ([t], [t]) -> [t]
forall a b. (a -> b) -> a -> b
$ (t -> Bool) -> [t] -> ([t], [t])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition (\t
d -> t -> b
f' t
d b -> b -> Bool
forall a. Ord a => a -> a -> Bool
>= b
start Bool -> Bool -> Bool
&& t -> b
f' t
d b -> b -> Bool
forall a. Ord a => a -> a -> Bool
<= b
end) [t]
h
    --------------------------------------   


docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermFreq :: [Document] -> Vector Text -> Map Int Double
docsToTermFreq [Document]
docs Vector Text
fdt =
  let nbDocs :: Double
nbDocs = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ [Document] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Document]
docs
      freqs :: Map Int Double
freqs = (Double -> Double) -> Map Int Double -> Map Int Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/(Double
nbDocs))
             (Map Int Double -> Map Int Double)
-> Map Int Double -> Map Int Double
forall a b. (a -> b) -> a -> b
$ [(Int, Double)] -> Map Int Double
forall k a. Ord k => [(k, a)] -> Map k a
fromList
             ([(Int, Double)] -> Map Int Double)
-> [(Int, Double)] -> Map Int Double
forall a b. (a -> b) -> a -> b
$ ([Int] -> (Int, Double)) -> [[Int]] -> [(Int, Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\[Int]
lst -> (Text -> [Int] -> Int
forall a. Text -> [a] -> a
head' Text
"docsToTermFreq" [Int]
lst, Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
lst)) 
             ([[Int]] -> [(Int, Double)]) -> [[Int]] -> [(Int, Double)]
forall a b. (a -> b) -> a -> b
$ [Int] -> [[Int]]
forall a. Eq a => [a] -> [[a]]
group ([Int] -> [[Int]]) -> [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
forall a. Ord a => [a] -> [a]
sort ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ (Document -> [Int]) -> [Document] -> [[Int]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
d -> [Int] -> [Int]
forall a. Eq a => [a] -> [a]
nub ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [Text] -> Vector Text -> [Int]
ngramsToIdx (Document -> [Text]
text Document
d) Vector Text
fdt) [Document]
docs
      sumFreqs :: Double
sumFreqs = [Double] -> Double
forall (f :: * -> *) a. (Foldable f, Num a) => f a -> a
sum ([Double] -> Double) -> [Double] -> Double
forall a b. (a -> b) -> a -> b
$ Map Int Double -> [Double]
forall k a. Map k a -> [a]
elems Map Int Double
freqs
   in (Double -> Double) -> Map Int Double -> Map Int Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
sumFreqs) Map Int Double
freqs

docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq :: Int -> [Document] -> Vector Text -> Map Int Double
docsToLastTermFreq Int
n [Document]
docs Vector Text
fdt = 
  let last :: [Int]
last   = Int -> [Int] -> [Int]
forall a. Int -> [a] -> [a]
take Int
n ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
forall a. [a] -> [a]
reverse ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
forall a. Ord a => [a] -> [a]
sort ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ (Document -> Int) -> [Document] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Document -> Int
date [Document]
docs
      nbDocs :: Double
nbDocs = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ [Document] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Document] -> Int) -> [Document] -> Int
forall a b. (a -> b) -> a -> b
$ (Document -> Bool) -> [Document] -> [Document]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Document
d -> Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem (Document -> Int
date Document
d) [Int]
last) [Document]
docs
      freqs :: Map Int Double
freqs  = (Double -> Double) -> Map Int Double -> Map Int Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/(Double
nbDocs))
             (Map Int Double -> Map Int Double)
-> Map Int Double -> Map Int Double
forall a b. (a -> b) -> a -> b
$ [(Int, Double)] -> Map Int Double
forall k a. Ord k => [(k, a)] -> Map k a
fromList
             ([(Int, Double)] -> Map Int Double)
-> [(Int, Double)] -> Map Int Double
forall a b. (a -> b) -> a -> b
$ ([Int] -> (Int, Double)) -> [[Int]] -> [(Int, Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\[Int]
lst -> (Text -> [Int] -> Int
forall a. Text -> [a] -> a
head' Text
"docsToLastTermFreq" [Int]
lst, Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
lst)) 
             ([[Int]] -> [(Int, Double)]) -> [[Int]] -> [(Int, Double)]
forall a b. (a -> b) -> a -> b
$ [Int] -> [[Int]]
forall a. Eq a => [a] -> [[a]]
group ([Int] -> [[Int]]) -> [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
forall a. Ord a => [a] -> [a]
sort ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ (Document -> [Int]) -> [Document] -> [[Int]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
d -> [Int] -> [Int]
forall a. Eq a => [a] -> [a]
nub ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [Text] -> Vector Text -> [Int]
ngramsToIdx (Document -> [Text]
text Document
d) Vector Text
fdt) ([Document] -> [[Int]]) -> [Document] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ (Document -> Bool) -> [Document] -> [Document]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Document
d -> Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem (Document -> Int
date Document
d) [Int]
last) [Document]
docs
      sumFreqs :: Double
sumFreqs = [Double] -> Double
forall (f :: * -> *) a. (Foldable f, Num a) => f a -> a
sum ([Double] -> Double) -> [Double] -> Double
forall a b. (a -> b) -> a -> b
$ Map Int Double -> [Double]
forall k a. Map k a -> [a]
elems Map Int Double
freqs
   in (Double -> Double) -> Map Int Double -> Map Int Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
sumFreqs) Map Int Double
freqs  


--  To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb :: [Document] -> Map Int Double
docsToTimeScaleNb [Document]
docs = 
    let docs' :: Map Int Double
docs' = (Double -> Double -> Double) -> [(Int, Double)] -> Map Int Double
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromListWith Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) ([(Int, Double)] -> Map Int Double)
-> [(Int, Double)] -> Map Int Double
forall a b. (a -> b) -> a -> b
$ (Document -> (Int, Double)) -> [Document] -> [(Int, Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Document
d -> (Document -> Int
date Document
d,Double
1)) [Document]
docs
        time :: Map Int Double
time  = [(Int, Double)] -> Map Int Double
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([(Int, Double)] -> Map Int Double)
-> [(Int, Double)] -> Map Int Double
forall a b. (a -> b) -> a -> b
$ (Int -> (Int, Double)) -> [Int] -> [(Int, Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Int
t -> (Int
t,Double
0)) ([Int] -> [(Int, Double)]) -> [Int] -> [(Int, Double)]
forall a b. (a -> b) -> a -> b
$ [Int] -> Int -> [Int]
toTimeScale (Map Int Double -> [Int]
forall k a. Map k a -> [k]
keys Map Int Double
docs') Int
1
    in  String -> Map Int Double -> Map Int Double
forall a. String -> a -> a
trace (String
"\n" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"-- | Group " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show([Document] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Document]
docs) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" docs by " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show(Map Int Double -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Map Int Double
time) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" unit of time" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\n") 
      (Map Int Double -> Map Int Double)
-> Map Int Double -> Map Int Double
forall a b. (a -> b) -> a -> b
$ (Double -> Double -> Double)
-> Map Int Double -> Map Int Double -> Map Int Double
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
unionWith Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) Map Int Double
time Map Int Double
docs'


initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels Int
lvlMax PhyloPeriodId
pId = 
    [(PhyloLevelId, PhyloLevel)] -> Map PhyloLevelId PhyloLevel
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([(PhyloLevelId, PhyloLevel)] -> Map PhyloLevelId PhyloLevel)
-> [(PhyloLevelId, PhyloLevel)] -> Map PhyloLevelId PhyloLevel
forall a b. (a -> b) -> a -> b
$ (Int -> (PhyloLevelId, PhyloLevel))
-> [Int] -> [(PhyloLevelId, PhyloLevel)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Int
lvl -> ((PhyloPeriodId
pId,Int
lvl),PhyloPeriodId
-> (Text, Text) -> Int -> Map PhyloGroupId PhyloGroup -> PhyloLevel
PhyloLevel PhyloPeriodId
pId (Text
"",Text
"") Int
lvl Map PhyloGroupId PhyloGroup
forall k a. Map k a
empty)) [Int
1..Int
lvlMax]



--  To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase [Document]
docs TermList
lst Config
conf = 
    let foundations :: PhyloFoundations
foundations  = Vector Text -> TermList -> PhyloFoundations
PhyloFoundations ([Text] -> Vector Text
forall a. [a] -> Vector a
Vector.fromList ([Text] -> Vector Text) -> [Text] -> Vector Text
forall a b. (a -> b) -> a -> b
$ [Text] -> [Text]
forall a. Eq a => [a] -> [a]
nub ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Text]] -> [Text]) -> [[Text]] -> [Text]
forall a b. (a -> b) -> a -> b
$ (Document -> [Text]) -> [Document] -> [[Text]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Document -> [Text]
text [Document]
docs) TermList
lst
        docsSources :: PhyloSources
docsSources  = Vector Text -> PhyloSources
PhyloSources     ([Text] -> Vector Text
forall a. [a] -> Vector a
Vector.fromList ([Text] -> Vector Text) -> [Text] -> Vector Text
forall a b. (a -> b) -> a -> b
$ [Text] -> [Text]
forall a. Eq a => [a] -> [a]
nub ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Text]] -> [Text]) -> [[Text]] -> [Text]
forall a b. (a -> b) -> a -> b
$ (Document -> [Text]) -> [Document] -> [[Text]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Document -> [Text]
sources [Document]
docs)
        params :: PhyloParam
params = PhyloParam
defaultPhyloParam { _phyloParam_config :: Config
_phyloParam_config = Config
conf }
        periods :: [PhyloPeriodId]
periods = [Int] -> Int -> Int -> [PhyloPeriodId]
toPeriods ([Int] -> [Int]
forall a. Ord a => [a] -> [a]
sort ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
forall a. Eq a => [a] -> [a]
nub ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ (Document -> Int) -> [Document] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Document -> Int
date [Document]
docs) (TimeUnit -> Int
getTimePeriod (TimeUnit -> Int) -> TimeUnit -> Int
forall a b. (a -> b) -> a -> b
$ Config -> TimeUnit
timeUnit Config
conf) (TimeUnit -> Int
getTimeStep (TimeUnit -> Int) -> TimeUnit -> Int
forall a b. (a -> b) -> a -> b
$ Config -> TimeUnit
timeUnit Config
conf)
    in String -> Phylo -> Phylo
forall a. String -> a -> a
trace (String
"\n" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"-- | Create PhyloBase out of " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show([Document] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Document]
docs) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" docs \n") 
       (Phylo -> Phylo) -> Phylo -> Phylo
forall a b. (a -> b) -> a -> b
$ PhyloFoundations
-> PhyloSources
-> Map Int Cooc
-> Map Int Double
-> Map Int Double
-> Map Int Double
-> Map (PhyloGroupId, PhyloGroupId) Double
-> Map (PhyloGroupId, PhyloGroupId) Double
-> PhyloParam
-> Map PhyloPeriodId PhyloPeriod
-> Phylo
Phylo PhyloFoundations
foundations
               PhyloSources
docsSources
               ([Document] -> Vector Text -> Map Int Cooc
docsToTimeScaleCooc [Document]
docs (PhyloFoundations
foundations PhyloFoundations
-> Getting (Vector Text) PhyloFoundations (Vector Text)
-> Vector Text
forall s a. s -> Getting a s a -> a
^. Getting (Vector Text) PhyloFoundations (Vector Text)
Lens' PhyloFoundations (Vector Text)
foundations_roots))
               ([Document] -> Map Int Double
docsToTimeScaleNb [Document]
docs)
               ([Document] -> Vector Text -> Map Int Double
docsToTermFreq [Document]
docs (PhyloFoundations
foundations PhyloFoundations
-> Getting (Vector Text) PhyloFoundations (Vector Text)
-> Vector Text
forall s a. s -> Getting a s a -> a
^. Getting (Vector Text) PhyloFoundations (Vector Text)
Lens' PhyloFoundations (Vector Text)
foundations_roots))
               (Int -> [Document] -> Vector Text -> Map Int Double
docsToLastTermFreq (TimeUnit -> Int
getTimePeriod (TimeUnit -> Int) -> TimeUnit -> Int
forall a b. (a -> b) -> a -> b
$ Config -> TimeUnit
timeUnit Config
conf) [Document]
docs (PhyloFoundations
foundations PhyloFoundations
-> Getting (Vector Text) PhyloFoundations (Vector Text)
-> Vector Text
forall s a. s -> Getting a s a -> a
^. Getting (Vector Text) PhyloFoundations (Vector Text)
Lens' PhyloFoundations (Vector Text)
foundations_roots))
               Map (PhyloGroupId, PhyloGroupId) Double
forall k a. Map k a
empty
               Map (PhyloGroupId, PhyloGroupId) Double
forall k a. Map k a
empty
               PhyloParam
params
               ([(PhyloPeriodId, PhyloPeriod)] -> Map PhyloPeriodId PhyloPeriod
forall k a. Ord k => [(k, a)] -> Map k a
fromList ([(PhyloPeriodId, PhyloPeriod)] -> Map PhyloPeriodId PhyloPeriod)
-> [(PhyloPeriodId, PhyloPeriod)] -> Map PhyloPeriodId PhyloPeriod
forall a b. (a -> b) -> a -> b
$ (PhyloPeriodId -> (PhyloPeriodId, PhyloPeriod))
-> [PhyloPeriodId] -> [(PhyloPeriodId, PhyloPeriod)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\PhyloPeriodId
prd -> (PhyloPeriodId
prd, PhyloPeriodId
-> (Text, Text) -> Map PhyloLevelId PhyloLevel -> PhyloPeriod
PhyloPeriod PhyloPeriodId
prd (Text
"",Text
"") (Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels Int
1 PhyloPeriodId
prd))) [PhyloPeriodId]
periods)