{-|
Module      : Gargantext.Database.Query.Table.NgramsPostag
Description : Deal with in Gargantext Database.
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

TODO use Opaleye for the queries.

-}

{-# LANGUAGE Arrows            #-}
{-# LANGUAGE QuasiQuotes       #-}
{-# LANGUAGE TemplateHaskell   #-}

module Gargantext.Database.Query.Table.NgramsPostag
    where

import Control.Lens (view, (^.))
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Text (Text)
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd, runPGSQuery, runPGSQuery_)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Types
import Gargantext.Prelude
import qualified Data.HashMap.Strict        as HashMap
import qualified Data.List                  as List
import qualified Database.PostgreSQL.Simple as PGS

data NgramsPostag = NgramsPostag { NgramsPostag -> Lang
_np_lang   :: !Lang
                                 , NgramsPostag -> PosTagAlgo
_np_algo   :: !PosTagAlgo
                                 , NgramsPostag -> POS
_np_postag :: !POS
                                 , NgramsPostag -> Ngrams
_np_form   :: !Ngrams
                                 , NgramsPostag -> Ngrams
_np_lem    :: !Ngrams
                                 }
  deriving (NgramsPostag -> NgramsPostag -> Bool
(NgramsPostag -> NgramsPostag -> Bool)
-> (NgramsPostag -> NgramsPostag -> Bool) -> Eq NgramsPostag
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: NgramsPostag -> NgramsPostag -> Bool
$c/= :: NgramsPostag -> NgramsPostag -> Bool
== :: NgramsPostag -> NgramsPostag -> Bool
$c== :: NgramsPostag -> NgramsPostag -> Bool
Eq, Eq NgramsPostag
Eq NgramsPostag
-> (NgramsPostag -> NgramsPostag -> Ordering)
-> (NgramsPostag -> NgramsPostag -> Bool)
-> (NgramsPostag -> NgramsPostag -> Bool)
-> (NgramsPostag -> NgramsPostag -> Bool)
-> (NgramsPostag -> NgramsPostag -> Bool)
-> (NgramsPostag -> NgramsPostag -> NgramsPostag)
-> (NgramsPostag -> NgramsPostag -> NgramsPostag)
-> Ord NgramsPostag
NgramsPostag -> NgramsPostag -> Bool
NgramsPostag -> NgramsPostag -> Ordering
NgramsPostag -> NgramsPostag -> NgramsPostag
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: NgramsPostag -> NgramsPostag -> NgramsPostag
$cmin :: NgramsPostag -> NgramsPostag -> NgramsPostag
max :: NgramsPostag -> NgramsPostag -> NgramsPostag
$cmax :: NgramsPostag -> NgramsPostag -> NgramsPostag
>= :: NgramsPostag -> NgramsPostag -> Bool
$c>= :: NgramsPostag -> NgramsPostag -> Bool
> :: NgramsPostag -> NgramsPostag -> Bool
$c> :: NgramsPostag -> NgramsPostag -> Bool
<= :: NgramsPostag -> NgramsPostag -> Bool
$c<= :: NgramsPostag -> NgramsPostag -> Bool
< :: NgramsPostag -> NgramsPostag -> Bool
$c< :: NgramsPostag -> NgramsPostag -> Bool
compare :: NgramsPostag -> NgramsPostag -> Ordering
$ccompare :: NgramsPostag -> NgramsPostag -> Ordering
$cp1Ord :: Eq NgramsPostag
Ord, (forall x. NgramsPostag -> Rep NgramsPostag x)
-> (forall x. Rep NgramsPostag x -> NgramsPostag)
-> Generic NgramsPostag
forall x. Rep NgramsPostag x -> NgramsPostag
forall x. NgramsPostag -> Rep NgramsPostag x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep NgramsPostag x -> NgramsPostag
$cfrom :: forall x. NgramsPostag -> Rep NgramsPostag x
Generic, Int -> NgramsPostag -> ShowS
[NgramsPostag] -> ShowS
NgramsPostag -> String
(Int -> NgramsPostag -> ShowS)
-> (NgramsPostag -> String)
-> ([NgramsPostag] -> ShowS)
-> Show NgramsPostag
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [NgramsPostag] -> ShowS
$cshowList :: [NgramsPostag] -> ShowS
show :: NgramsPostag -> String
$cshow :: NgramsPostag -> String
showsPrec :: Int -> NgramsPostag -> ShowS
$cshowsPrec :: Int -> NgramsPostag -> ShowS
Show)
makeLenses ''NgramsPostag
instance Hashable NgramsPostag


type NgramsPostagInsert = ( Int
                          , Int
                          , Text
                          , Text
                          , Int
                          , Text
                          , Int
                          )

toInsert :: NgramsPostag -> NgramsPostagInsert
toInsert :: NgramsPostag -> NgramsPostagInsert
toInsert (NgramsPostag Lang
l PosTagAlgo
a POS
p Ngrams
form Ngrams
lem) =
  ( Lang -> Int
forall a. HasDBid a => a -> Int
toDBid Lang
l
  , PosTagAlgo -> Int
forall a. HasDBid a => a -> Int
toDBid PosTagAlgo
a
  , String -> Text
forall a b. ConvertibleStrings a b => a -> b
cs (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ POS -> String
forall a. Show a => a -> String
show POS
p
  , Getting Text Ngrams Text -> Ngrams -> Text
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view Getting Text Ngrams Text
Lens' Ngrams Text
ngramsTerms Ngrams
form
  , Getting Int Ngrams Int -> Ngrams -> Int
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view Getting Int Ngrams Int
Lens' Ngrams Int
ngramsSize  Ngrams
form
  , Getting Text Ngrams Text -> Ngrams -> Text
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view Getting Text Ngrams Text
Lens' Ngrams Text
ngramsTerms Ngrams
lem
  , Getting Int Ngrams Int -> Ngrams -> Int
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view Getting Int Ngrams Int
Lens' Ngrams Int
ngramsSize  Ngrams
lem
  )

insertNgramsPostag :: [NgramsPostag] -> Cmd err (HashMap Text NgramsId)
insertNgramsPostag :: [NgramsPostag] -> Cmd err (HashMap Text Int)
insertNgramsPostag [NgramsPostag]
xs =
  if [NgramsPostag] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
List.null [NgramsPostag]
xs
     then HashMap Text Int -> m (HashMap Text Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure HashMap Text Int
forall k v. HashMap k v
HashMap.empty
     else do
        -- We do not store the lem if it equals to its self form
       let
          ([NgramsPostag]
ns, [NgramsPostag]
nps) =
            (NgramsPostag -> Bool)
-> [NgramsPostag] -> ([NgramsPostag], [NgramsPostag])
forall a. (a -> Bool) -> [a] -> ([a], [a])
List.partition (\NgramsPostag
np -> NgramsPostag
np NgramsPostag -> Getting Text NgramsPostag Text -> Text
forall s a. s -> Getting a s a -> a
^. (Ngrams -> Const Text Ngrams)
-> NgramsPostag -> Const Text NgramsPostag
Lens' NgramsPostag Ngrams
np_form ((Ngrams -> Const Text Ngrams)
 -> NgramsPostag -> Const Text NgramsPostag)
-> Getting Text Ngrams Text -> Getting Text NgramsPostag Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Getting Text Ngrams Text
Lens' Ngrams Text
ngramsTerms
                                Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== NgramsPostag
np NgramsPostag -> Getting Text NgramsPostag Text -> Text
forall s a. s -> Getting a s a -> a
^. (Ngrams -> Const Text Ngrams)
-> NgramsPostag -> Const Text NgramsPostag
Lens' NgramsPostag Ngrams
np_lem  ((Ngrams -> Const Text Ngrams)
 -> NgramsPostag -> Const Text NgramsPostag)
-> Getting Text Ngrams Text -> Getting Text NgramsPostag Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Getting Text Ngrams Text
Lens' Ngrams Text
ngramsTerms
                           ) [NgramsPostag]
xs

       HashMap Text Int
ns' <- [Ngrams] -> Cmd err (HashMap Text Int)
forall err. [Ngrams] -> Cmd err (HashMap Text Int)
insertNgrams ((NgramsPostag -> Ngrams) -> [NgramsPostag] -> [Ngrams]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Getting Ngrams NgramsPostag Ngrams -> NgramsPostag -> Ngrams
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view Getting Ngrams NgramsPostag Ngrams
Lens' NgramsPostag Ngrams
np_form) [NgramsPostag]
ns)

       HashMap Text Int
nps' <- [(Text, Int)] -> HashMap Text Int
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
           ([(Text, Int)] -> HashMap Text Int)
-> ([Indexed Text Int] -> [(Text, Int)])
-> [Indexed Text Int]
-> HashMap Text Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Indexed Text Int -> (Text, Int))
-> [Indexed Text Int] -> [(Text, Int)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(Indexed Text
t Int
i) -> (Text
t,Int
i))
           ([Indexed Text Int] -> HashMap Text Int)
-> m [Indexed Text Int] -> m (HashMap Text Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [NgramsPostagInsert] -> Cmd err [Indexed Text Int]
forall err. [NgramsPostagInsert] -> Cmd err [Indexed Text Int]
insertNgramsPostag' ((NgramsPostag -> NgramsPostagInsert)
-> [NgramsPostag] -> [NgramsPostagInsert]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map NgramsPostag -> NgramsPostagInsert
toInsert [NgramsPostag]
nps)

       HashMap Text Int -> m (HashMap Text Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (HashMap Text Int -> m (HashMap Text Int))
-> HashMap Text Int -> m (HashMap Text Int)
forall a b. (a -> b) -> a -> b
$ HashMap Text Int -> HashMap Text Int -> HashMap Text Int
forall k v.
(Eq k, Hashable k) =>
HashMap k v -> HashMap k v -> HashMap k v
HashMap.union HashMap Text Int
ns' HashMap Text Int
nps'


insertNgramsPostag' :: [NgramsPostagInsert] -> Cmd err [Indexed Text Int]
insertNgramsPostag' :: [NgramsPostagInsert] -> Cmd err [Indexed Text Int]
insertNgramsPostag' [NgramsPostagInsert]
ns = Query -> Only (Values NgramsPostagInsert) -> m [Indexed Text Int]
forall env err (m :: * -> *) r q.
(CmdM env err m, FromRow r, ToRow q) =>
Query -> q -> m [r]
runPGSQuery Query
queryInsertNgramsPostag (Values NgramsPostagInsert -> Only (Values NgramsPostagInsert)
forall a. a -> Only a
PGS.Only (Values NgramsPostagInsert -> Only (Values NgramsPostagInsert))
-> Values NgramsPostagInsert -> Only (Values NgramsPostagInsert)
forall a b. (a -> b) -> a -> b
$ [QualifiedIdentifier]
-> [NgramsPostagInsert] -> Values NgramsPostagInsert
forall a. [QualifiedIdentifier] -> [a] -> Values a
Values [QualifiedIdentifier]
fields [NgramsPostagInsert]
ns)
  where

    fields :: [QualifiedIdentifier]
fields = (Text -> QualifiedIdentifier) -> [Text] -> [QualifiedIdentifier]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Text
t -> Maybe Text -> Text -> QualifiedIdentifier
QualifiedIdentifier Maybe Text
forall a. Maybe a
Nothing Text
t) ([Text] -> [QualifiedIdentifier])
-> [Text] -> [QualifiedIdentifier]
forall a b. (a -> b) -> a -> b
$ ([Text], [Text]) -> [Text]
forall a b. (a, b) -> b
snd ([Text], [Text])
fields_name

    fields_name :: ( [Text], [Text])
    fields_name :: ([Text], [Text])
fields_name = ( [Text
"lang_id", Text
"algo_id", Text
"postag", Text
"form", Text
"form_n", Text
"lem" , Text
"lem_n"]
                  , [Text
"int4"   , Text
"int4"   , Text
"text"  , Text
"text", Text
"int4"  , Text
"text", Text
"int4" ]
                  )

----------------------
queryInsertNgramsPostag :: PGS.Query
queryInsertNgramsPostag :: Query
queryInsertNgramsPostag = [sql|
  WITH input_rows(lang_id,algo_id,postag,form,form_n, lem, lem_n)
   AS (?)
   -- ((VALUES (1::"int4",2::"int4",'VB'::"text",'dansaient'::"text",1::"int4",'danser'::"text",1::"int4")))
  ------------------------------------------------
  , ins_form AS (INSERT INTO ngrams (terms,n)
    SELECT ir1.form, ir1.form_n
      FROM input_rows as ir1
      UNION ALL
      SELECT ir2.lem, ir2.lem_n
      FROM input_rows as ir2
      ON CONFLICT (terms)
        DO NOTHING
        RETURNING id,terms
      )
  ------------------------------------------------
  , ins_form_ret AS (
      SELECT id, terms
      FROM   ins_form
      UNION  ALL
      SELECT n.id, ir.form
      FROM   input_rows ir
      JOIN   ngrams n ON n.terms = ir.form
    )

  , ins_lem_ret AS (
      SELECT id, terms
      FROM   ins_form
      UNION  ALL
      SELECT n.id, ir.lem
      FROM   input_rows ir
      JOIN   ngrams n ON n.terms = ir.lem
    )
  ------------------------------------------------
  ------------------------------------------------
  , ins_postag AS (
    INSERT INTO ngrams_postag (lang_id, algo_id, postag, ngrams_id, lemm_id,score)
    SELECT ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id,1 -- count(*) as s
    FROM input_rows ir
      JOIN ins_form_ret  form ON form.terms = ir.form
      JOIN ins_lem_ret   lem  ON lem.terms  = ir.lem
       -- GROUP BY ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id
       -- ORDER BY s DESC
       -- LIMIT 1
      ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id)
        DO NOTHING -- acceptable for now since we are using NP mainly
        -- DO UPDATE SET score = ngrams_postag.score + 1
    )

SELECT terms,id FROM ins_form_ret
 INNER JOIN input_rows ir ON ins_form_ret.terms = ir.form

  |]

-- TODO add lang and postag algo
-- TODO remove when form == lem in insert
selectLems :: Lang -> PosTagAlgo -> [Ngrams] -> Cmd err [(Form, Lem)]
selectLems :: Lang -> PosTagAlgo -> [Ngrams] -> Cmd err [(Text, Text)]
selectLems Lang
l PosTagAlgo
a [Ngrams]
ns = Query -> Only (Values [Action]) -> m [(Text, Text)]
forall env err (m :: * -> *) r q.
(CmdM env err m, FromRow r, ToRow q) =>
Query -> q -> m [r]
runPGSQuery Query
querySelectLems (Values [Action] -> Only (Values [Action])
forall a. a -> Only a
PGS.Only (Values [Action] -> Only (Values [Action]))
-> Values [Action] -> Only (Values [Action])
forall a b. (a -> b) -> a -> b
$ [QualifiedIdentifier] -> [[Action]] -> Values [Action]
forall a. [QualifiedIdentifier] -> [a] -> Values a
Values [QualifiedIdentifier]
fields [[Action]]
datas)
  where
    fields :: [QualifiedIdentifier]
fields = (Text -> QualifiedIdentifier) -> [Text] -> [QualifiedIdentifier]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Text
t -> Maybe Text -> Text -> QualifiedIdentifier
QualifiedIdentifier Maybe Text
forall a. Maybe a
Nothing Text
t) [Text
"int4",Text
"int4",Text
"text", Text
"int4"]
    datas :: [[Action]]
datas  = (Ngrams -> [Action]) -> [Ngrams] -> [[Action]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Ngrams
d -> [Int -> Action
forall a. ToField a => a -> Action
toField (Int -> Action) -> Int -> Action
forall a b. (a -> b) -> a -> b
$ Lang -> Int
forall a. HasDBid a => a -> Int
toDBid Lang
l, Int -> Action
forall a. ToField a => a -> Action
toField (Int -> Action) -> Int -> Action
forall a b. (a -> b) -> a -> b
$ PosTagAlgo -> Int
forall a. HasDBid a => a -> Int
toDBid PosTagAlgo
a] [Action] -> [Action] -> [Action]
forall a. Semigroup a => a -> a -> a
<> Ngrams -> [Action]
forall a. ToRow a => a -> [Action]
toRow Ngrams
d) [Ngrams]
ns 

----------------------
querySelectLems :: PGS.Query
querySelectLems :: Query
querySelectLems = [sql|
  WITH input_rows(lang_id, algo_id, terms,n)
    AS (?) -- ((VALUES ('automata' :: "text")))
    , lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
    JOIN ngrams        n1 ON ir.terms = n1.terms
    JOIN ngrams_postag np ON np.ngrams_id = n1.id
    JOIN ngrams        n2 ON n2.id    = np.lemm_id
    WHERE np.lang_id = ir.lang_id
      AND np.algo_id = ir.algo_id
    GROUP BY n1.terms, n2.terms
    ORDER BY score DESC
  )

  SELECT t1,t2 from lems
  |]

-- | Insert Table
createTable_NgramsPostag :: Cmd err [Int]
createTable_NgramsPostag :: m [Int]
createTable_NgramsPostag = (Only Int -> Int) -> [Only Int] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\(PGS.Only Int
a) -> Int
a)
                        ([Only Int] -> [Int]) -> m [Only Int] -> m [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Query -> m [Only Int]
forall env err (m :: * -> *) r.
(CmdM env err m, FromRow r) =>
Query -> m [r]
runPGSQuery_ Query
queryCreateTable
  where
    queryCreateTable :: PGS.Query
    queryCreateTable :: Query
queryCreateTable = [sql|

    CREATE TABLE public.ngrams_postag (
        id SERIAL,
        lang_id INTEGER,
        algo_id INTEGER,
        postag CHARACTER varying(5),
        ngrams_id INTEGER NOT NULL,
        lemm_id   INTEGER NOT NULL,
        score     INTEGER DEFAULT 1 ::integer NOT NULL,
        FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
        FOREIGN KEY (lemm_id)   REFERENCES public.ngrams(id) ON DELETE CASCADE
    )  ;
    -- ALTER TABLE public.ngrams_postag OWNER TO gargantua;

    CREATE UNIQUE INDEX ON public.ngrams_postag (lang_id,algo_id,postag,ngrams_id,lemm_id);

      |]