{-|
Module      : Gargantext.Core.Text.Metrics
Description : All parsers of Gargantext in one file.
Copyright   : (c) CNRS, 2017 - present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Mainly reexport functions in @Data.Text.Metrics@

-}

{-# LANGUAGE TemplateHaskell   #-}

module Gargantext.Core.Text.Metrics
  where

--import Data.Array.Accelerate ((:.)(..), Z(..))
--import Math.KMeans (kmeans, euclidSq, elements)
import Control.Lens (makeLenses)
import Data.Map (Map)
import Data.Monoid (Monoid, mempty)
import Data.HashMap.Strict (HashMap)
import Data.Semigroup (Semigroup)
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.Map  as Map
import qualified Data.Vector as V
import qualified Data.Vector.Storable as Vec
import qualified Data.HashMap.Strict as HashMap


type MapListSize = Int
type InclusionSize = Int

scored :: Ord t => HashMap (t,t) Int -> V.Vector (Scored t)
scored :: HashMap (t, t) Int -> Vector (Scored t)
scored = Map t (Vector Double) -> Vector (Scored t)
forall t. Ord t => Map t (Vector Double) -> Vector (Scored t)
map2scored (Map t (Vector Double) -> Vector (Scored t))
-> (HashMap (t, t) Int -> Map t (Vector Double))
-> HashMap (t, t) Int
-> Vector (Scored t)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Dimension -> Map t (Vector Double) -> Map t (Vector Double)
forall t.
Ord t =>
Dimension -> Map t (Vector Double) -> Map t (Vector Double)
pcaReduceTo (Int -> Dimension
Dimension Int
2)) (Map t (Vector Double) -> Map t (Vector Double))
-> (HashMap (t, t) Int -> Map t (Vector Double))
-> HashMap (t, t) Int
-> Map t (Vector Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map (t, t) Int -> Map t (Vector Double)
forall t. Ord t => Map (t, t) Int -> Map t (Vector Double)
scored2map (Map (t, t) Int -> Map t (Vector Double))
-> (HashMap (t, t) Int -> Map (t, t) Int)
-> HashMap (t, t) Int
-> Map t (Vector Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [((t, t), Int)] -> Map (t, t) Int
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([((t, t), Int)] -> Map (t, t) Int)
-> (HashMap (t, t) Int -> [((t, t), Int)])
-> HashMap (t, t) Int
-> Map (t, t) Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HashMap (t, t) Int -> [((t, t), Int)]
forall k v. HashMap k v -> [(k, v)]
HashMap.toList
  where
    scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
    scored2map :: Map (t, t) Int -> Map t (Vector Double)
scored2map Map (t, t) Int
m = [(t, Vector Double)] -> Map t (Vector Double)
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(t, Vector Double)] -> Map t (Vector Double))
-> [(t, Vector Double)] -> Map t (Vector Double)
forall a b. (a -> b) -> a -> b
$ (Scored t -> (t, Vector Double))
-> [Scored t] -> [(t, Vector Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Scored t
t Double
i Double
s) -> (t
t, [Double] -> Vector Double
forall a. Storable a => [a] -> Vector a
Vec.fromList [Double
i,Double
s])) ([Scored t] -> [(t, Vector Double)])
-> [Scored t] -> [(t, Vector Double)]
forall a b. (a -> b) -> a -> b
$ Map (t, t) Int -> [Scored t]
forall t. Ord t => Map (t, t) Int -> [Scored t]
scored' Map (t, t) Int
m

    map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
    map2scored :: Map t (Vector Double) -> Vector (Scored t)
map2scored = ((t, Vector Double) -> Scored t)
-> Vector (t, Vector Double) -> Vector (Scored t)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (\(t
t, Vector Double
ds) -> t -> Double -> Double -> Scored t
forall ts. ts -> Double -> Double -> Scored ts
Scored t
t (Vector Double -> Double
forall a. Storable a => Vector a -> a
Vec.head Vector Double
ds) (Vector Double -> Double
forall a. Storable a => Vector a -> a
Vec.last Vector Double
ds)) (Vector (t, Vector Double) -> Vector (Scored t))
-> (Map t (Vector Double) -> Vector (t, Vector Double))
-> Map t (Vector Double)
-> Vector (Scored t)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(t, Vector Double)] -> Vector (t, Vector Double)
forall a. [a] -> Vector a
V.fromList ([(t, Vector Double)] -> Vector (t, Vector Double))
-> (Map t (Vector Double) -> [(t, Vector Double)])
-> Map t (Vector Double)
-> Vector (t, Vector Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map t (Vector Double) -> [(t, Vector Double)]
forall k a. Map k a -> [(k, a)]
Map.toList

-- TODO change type with (x,y)
data Scored ts = Scored
  { Scored ts -> ts
_scored_terms  :: !ts
  , Scored ts -> Double
_scored_genInc :: !GenericityInclusion
  , Scored ts -> Double
_scored_speExc :: !SpecificityExclusion
  } deriving (Int -> Scored ts -> ShowS
[Scored ts] -> ShowS
Scored ts -> String
(Int -> Scored ts -> ShowS)
-> (Scored ts -> String)
-> ([Scored ts] -> ShowS)
-> Show (Scored ts)
forall ts. Show ts => Int -> Scored ts -> ShowS
forall ts. Show ts => [Scored ts] -> ShowS
forall ts. Show ts => Scored ts -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Scored ts] -> ShowS
$cshowList :: forall ts. Show ts => [Scored ts] -> ShowS
show :: Scored ts -> String
$cshow :: forall ts. Show ts => Scored ts -> String
showsPrec :: Int -> Scored ts -> ShowS
$cshowsPrec :: forall ts. Show ts => Int -> Scored ts -> ShowS
Show, Scored ts -> Scored ts -> Bool
(Scored ts -> Scored ts -> Bool)
-> (Scored ts -> Scored ts -> Bool) -> Eq (Scored ts)
forall ts. Eq ts => Scored ts -> Scored ts -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Scored ts -> Scored ts -> Bool
$c/= :: forall ts. Eq ts => Scored ts -> Scored ts -> Bool
== :: Scored ts -> Scored ts -> Bool
$c== :: forall ts. Eq ts => Scored ts -> Scored ts -> Bool
Eq, Eq (Scored ts)
Eq (Scored ts)
-> (Scored ts -> Scored ts -> Ordering)
-> (Scored ts -> Scored ts -> Bool)
-> (Scored ts -> Scored ts -> Bool)
-> (Scored ts -> Scored ts -> Bool)
-> (Scored ts -> Scored ts -> Bool)
-> (Scored ts -> Scored ts -> Scored ts)
-> (Scored ts -> Scored ts -> Scored ts)
-> Ord (Scored ts)
Scored ts -> Scored ts -> Bool
Scored ts -> Scored ts -> Ordering
Scored ts -> Scored ts -> Scored ts
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall ts. Ord ts => Eq (Scored ts)
forall ts. Ord ts => Scored ts -> Scored ts -> Bool
forall ts. Ord ts => Scored ts -> Scored ts -> Ordering
forall ts. Ord ts => Scored ts -> Scored ts -> Scored ts
min :: Scored ts -> Scored ts -> Scored ts
$cmin :: forall ts. Ord ts => Scored ts -> Scored ts -> Scored ts
max :: Scored ts -> Scored ts -> Scored ts
$cmax :: forall ts. Ord ts => Scored ts -> Scored ts -> Scored ts
>= :: Scored ts -> Scored ts -> Bool
$c>= :: forall ts. Ord ts => Scored ts -> Scored ts -> Bool
> :: Scored ts -> Scored ts -> Bool
$c> :: forall ts. Ord ts => Scored ts -> Scored ts -> Bool
<= :: Scored ts -> Scored ts -> Bool
$c<= :: forall ts. Ord ts => Scored ts -> Scored ts -> Bool
< :: Scored ts -> Scored ts -> Bool
$c< :: forall ts. Ord ts => Scored ts -> Scored ts -> Bool
compare :: Scored ts -> Scored ts -> Ordering
$ccompare :: forall ts. Ord ts => Scored ts -> Scored ts -> Ordering
$cp1Ord :: forall ts. Ord ts => Eq (Scored ts)
Ord)

instance Monoid a => Monoid (Scored a) where
  mempty :: Scored a
mempty = a -> Double -> Double -> Scored a
forall ts. ts -> Double -> Double -> Scored ts
Scored a
forall a. Monoid a => a
mempty Double
forall a. Monoid a => a
mempty Double
forall a. Monoid a => a
mempty

instance Semigroup a => Semigroup (Scored a) where
  <> :: Scored a -> Scored a -> Scored a
(<>) (Scored a
a  Double
b  Double
c )
       (Scored a
_a' Double
b' Double
c')
      = a -> Double -> Double -> Scored a
forall ts. ts -> Double -> Double -> Scored ts
Scored (a
a {-<> a'-})
               (Double
b Double -> Double -> Double
forall a. Semigroup a => a -> a -> a
<> Double
b')
               (Double
c Double -> Double -> Double
forall a. Semigroup a => a -> a -> a
<> Double
c')

localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics' :: Map (t, t) Int -> Map t (Vector Double)
localMetrics' Map (t, t) Int
m = [(t, Vector Double)] -> Map t (Vector Double)
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(t, Vector Double)] -> Map t (Vector Double))
-> [(t, Vector Double)] -> Map t (Vector Double)
forall a b. (a -> b) -> a -> b
$ ((Int, t) -> (Double, Double) -> (t, Vector Double))
-> [(Int, t)] -> [(Double, Double)] -> [(t, Vector Double)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\(Int
_,t
t) (Double
inc,Double
spe) -> (t
t, [Double] -> Vector Double
forall a. Storable a => [a] -> Vector a
Vec.fromList [Double
inc,Double
spe]))
                                         (Map Int t -> [(Int, t)]
forall k a. Map k a -> [(k, a)]
Map.toList Map Int t
fi)
                                          [(Double, Double)]
scores
  where
    (Map t Int
ti, Map Int t
fi) = Map (t, t) Int -> (Map t Int, Map Int t)
forall t b. Ord t => Map (t, t) b -> (Map t Int, Map Int t)
createIndices Map (t, t) Int
m
    (Vector Double
is, Vector Double
ss) = Matrix Int -> (Vector Double, Vector Double)
incExcSpeGen (Matrix Int -> (Vector Double, Vector Double))
-> Matrix Int -> (Vector Double, Vector Double)
forall a b. (a -> b) -> a -> b
$ MatrixShape -> Map t Int -> Map (t, t) Int -> Matrix Int
forall t.
Ord t =>
MatrixShape -> Map t Int -> Map (t, t) Int -> Matrix Int
cooc2mat MatrixShape
Triangle Map t Int
ti Map (t, t) Int
m
    scores :: [(Double, Double)]
scores   = Array DIM1 (Double, Double) -> [(Double, Double)]
forall sh e. Array sh e -> [e]
DAA.toList
             (Array DIM1 (Double, Double) -> [(Double, Double)])
-> Array DIM1 (Double, Double) -> [(Double, Double)]
forall a b. (a -> b) -> a -> b
$ Acc (Array DIM1 (Double, Double)) -> Array DIM1 (Double, Double)
forall a. Arrays a => Acc a -> a
DAA.run
             (Acc (Array DIM1 (Double, Double)) -> Array DIM1 (Double, Double))
-> Acc (Array DIM1 (Double, Double)) -> Array DIM1 (Double, Double)
forall a b. (a -> b) -> a -> b
$ Acc (Vector Double)
-> Acc (Vector Double) -> Acc (Array DIM1 (Double, Double))
forall sh a b.
(Shape sh, Elt a, Elt b) =>
Acc (Array sh a) -> Acc (Array sh b) -> Acc (Array sh (a, b))
DAA.zip (Vector Double -> Acc (Vector Double)
forall arrays. Arrays arrays => arrays -> Acc arrays
DAA.use Vector Double
is) (Vector Double -> Acc (Vector Double)
forall arrays. Arrays arrays => arrays -> Acc arrays
DAA.use Vector Double
ss)

-- TODO Code to be removed below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
scored' :: Ord t => Map (t,t) Int -> [Scored t]
scored' :: Map (t, t) Int -> [Scored t]
scored' Map (t, t) Int
m = ((Int, t) -> (Double, Double) -> Scored t)
-> [(Int, t)] -> [(Double, Double)] -> [Scored t]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\(Int
_,t
t) (Double
inc,Double
spe) -> t -> Double -> Double -> Scored t
forall ts. ts -> Double -> Double -> Scored ts
Scored t
t Double
inc Double
spe) (Map Int t -> [(Int, t)]
forall k a. Map k a -> [(k, a)]
Map.toList Map Int t
fi) [(Double, Double)]
scores
  where
    (Map t Int
ti, Map Int t
fi) = Map (t, t) Int -> (Map t Int, Map Int t)
forall t b. Ord t => Map (t, t) b -> (Map t Int, Map Int t)
createIndices Map (t, t) Int
m
    (Vector Double
is, Vector Double
ss) = Matrix Int -> (Vector Double, Vector Double)
incExcSpeGen (Matrix Int -> (Vector Double, Vector Double))
-> Matrix Int -> (Vector Double, Vector Double)
forall a b. (a -> b) -> a -> b
$ MatrixShape -> Map t Int -> Map (t, t) Int -> Matrix Int
forall t.
Ord t =>
MatrixShape -> Map t Int -> Map (t, t) Int -> Matrix Int
cooc2mat MatrixShape
Triangle Map t Int
ti Map (t, t) Int
m
    scores :: [(Double, Double)]
scores   = Array DIM1 (Double, Double) -> [(Double, Double)]
forall sh e. Array sh e -> [e]
DAA.toList
             (Array DIM1 (Double, Double) -> [(Double, Double)])
-> Array DIM1 (Double, Double) -> [(Double, Double)]
forall a b. (a -> b) -> a -> b
$ Acc (Array DIM1 (Double, Double)) -> Array DIM1 (Double, Double)
forall a. Arrays a => Acc a -> a
DAA.run
             (Acc (Array DIM1 (Double, Double)) -> Array DIM1 (Double, Double))
-> Acc (Array DIM1 (Double, Double)) -> Array DIM1 (Double, Double)
forall a b. (a -> b) -> a -> b
$ Acc (Vector Double)
-> Acc (Vector Double) -> Acc (Array DIM1 (Double, Double))
forall sh a b.
(Shape sh, Elt a, Elt b) =>
Acc (Array sh a) -> Acc (Array sh b) -> Acc (Array sh (a, b))
DAA.zip (Vector Double -> Acc (Vector Double)
forall arrays. Arrays arrays => arrays -> Acc arrays
DAA.use Vector Double
is) (Vector Double -> Acc (Vector Double)
forall arrays. Arrays arrays => arrays -> Acc arrays
DAA.use Vector Double
ss)


normalizeGlobal :: [Scored a] -> [Scored a]
normalizeGlobal :: [Scored a] -> [Scored a]
normalizeGlobal [Scored a]
ss = (Scored a -> Scored a) -> [Scored a] -> [Scored a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Scored a
t Double
s1 Double
s2)
                     -> a -> Double -> Double -> Scored a
forall ts. ts -> Double -> Double -> Scored ts
Scored a
t ((Double
s1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
s1min) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
s1max)
                                 ((Double
s2 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
s2min) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
s2max)) [Scored a]
ss
  where
    ss1 :: [Double]
ss1 = (Scored a -> Double) -> [Scored a] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Scored a -> Double
forall ts. Scored ts -> Double
_scored_genInc [Scored a]
ss
    ss2 :: [Double]
ss2 = (Scored a -> Double) -> [Scored a] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Scored a -> Double
forall ts. Scored ts -> Double
_scored_speExc [Scored a]
ss

    s1min :: Double
s1min = [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Double]
ss1
    s1max :: Double
s1max = [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
ss1

    s2min :: Double
s2min = [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Double]
ss2
    s2max :: Double
s2max = [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
ss2



normalizeLocal :: Scored a -> Scored a
normalizeLocal :: Scored a -> Scored a
normalizeLocal (Scored a
t Double
s1 Double
s2) = a -> Double -> Double -> Scored a
forall ts. ts -> Double -> Double -> Scored ts
Scored a
t (Int -> Double -> Double
forall a. (Ord a, Floating a) => Int -> a -> a
log' Int
5 Double
s1) (Int -> Double -> Double
forall a. (Ord a, Floating a) => Int -> a -> a
log' Int
2 Double
s2)
  where
    log' :: Int -> a -> a
log' Int
n' a
x = a
1 a -> a -> a
forall a. Num a => a -> a -> a
+ (if a
x a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0 then a
0 else a -> a
forall a. Floating a => a -> a
log (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ (a
10a -> Int -> a
forall a b. (Num a, Integral b) => a -> b -> a
^(Int
n'::Int)) a -> a -> a
forall a. Num a => a -> a -> a
* a
x)



-- | Type Instances
makeLenses 'Scored