{-|
Module      : Gargantext.Core.Text.Metrics.TFICF
Description : TFICF Ngrams tools
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Definition of TFICF : Term Frequency - Inverse of Context Frequency

TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).

-}


module Gargantext.Core.Text.Metrics.TFICF ( TFICF
                                     , TficfContext(..)
                                     , Total(..)
                                     , Count(..)
                                     , tficf
                                     , sortTficf
                                     )
  where

import Data.Map.Strict (Map, toList)
import Data.Text (Text)
import Gargantext.Core.Types (Ordering(..))
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Ord as DO (Down(..))

path :: Text
path :: Text
path = Text
"[G.T.Metrics.TFICF]"

type TFICF = Double

data TficfContext n m = TficfInfra n m
                      | TficfSupra n m
  deriving (Int -> TficfContext n m -> ShowS
[TficfContext n m] -> ShowS
TficfContext n m -> String
(Int -> TficfContext n m -> ShowS)
-> (TficfContext n m -> String)
-> ([TficfContext n m] -> ShowS)
-> Show (TficfContext n m)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall n m. (Show n, Show m) => Int -> TficfContext n m -> ShowS
forall n m. (Show n, Show m) => [TficfContext n m] -> ShowS
forall n m. (Show n, Show m) => TficfContext n m -> String
showList :: [TficfContext n m] -> ShowS
$cshowList :: forall n m. (Show n, Show m) => [TficfContext n m] -> ShowS
show :: TficfContext n m -> String
$cshow :: forall n m. (Show n, Show m) => TficfContext n m -> String
showsPrec :: Int -> TficfContext n m -> ShowS
$cshowsPrec :: forall n m. (Show n, Show m) => Int -> TficfContext n m -> ShowS
Show)

data Total = Total {Total -> Double
unTotal :: !Double}
data Count = Count {Count -> Double
unCount :: !Double}

tficf :: TficfContext Count Total
      -> TficfContext Count Total
      -> TFICF
tficf :: TficfContext Count Total -> TficfContext Count Total -> Double
tficf (TficfInfra (Count Double
ic) (Total Double
it) )
      (TficfSupra (Count Double
sc) (Total Double
st) )
            | Double
it Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
ic Bool -> Bool -> Bool
&& Double
st Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
sc Bool -> Bool -> Bool
&& Double
it Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
st = (Double
itDouble -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
ic) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
forall a. Floating a => a -> a
log (Double
stDouble -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
sc)
            | Bool
otherwise                        = Text -> Double
forall a. HasCallStack => Text -> a
panic
                                               (Text -> Double) -> Text -> Double
forall a b. (a -> b) -> a -> b
$ Text
"[ERR]"
                                               Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
path
                                               Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" Frequency impossible"
tficf TficfContext Count Total
_ TficfContext Count Total
_ = Text -> Double
forall a. HasCallStack => Text -> a
panic (Text -> Double) -> Text -> Double
forall a b. (a -> b) -> a -> b
$ Text
"[ERR]" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
path Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"Undefined for these contexts"


sortTficf :: Ordering
          -> Map Text Double
          -> [(Text, Double)]
sortTficf :: Ordering -> Map Text Double -> [(Text, Double)]
sortTficf Ordering
Down = ((Text, Double) -> Down Double)
-> [(Text, Double)] -> [(Text, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
List.sortOn (Double -> Down Double
forall a. a -> Down a
DO.Down (Double -> Down Double)
-> ((Text, Double) -> Double) -> (Text, Double) -> Down Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, Double) -> Double
forall a b. (a, b) -> b
snd) ([(Text, Double)] -> [(Text, Double)])
-> (Map Text Double -> [(Text, Double)])
-> Map Text Double
-> [(Text, Double)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Double -> [(Text, Double)]
forall k a. Map k a -> [(k, a)]
toList
sortTficf Ordering
Up   = ((Text, Double) -> Double) -> [(Text, Double)] -> [(Text, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
List.sortOn (Text, Double) -> Double
forall a b. (a, b) -> b
snd ([(Text, Double)] -> [(Text, Double)])
-> (Map Text Double -> [(Text, Double)])
-> Map Text Double
-> [(Text, Double)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Double -> [(Text, Double)]
forall k a. Map k a -> [(k, a)]
toList