{-|
Module      : Gargantext.Database.Flow
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-- TODO-ACCESS:
--   check userId       CanFillUserCorpus   userCorpusId
--   check masterUserId CanFillMasterCorpus masterCorpusId

-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
-}

{-# OPTIONS_GHC -fno-warn-orphans #-}

{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds         #-}
{-# LANGUAGE InstanceSigs            #-}
{-# LANGUAGE ScopedTypeVariables     #-}
{-# LANGUAGE TemplateHaskell         #-}

module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
  ( DataText(..)
  , getDataText
  , flowDataText
  , flow

  , flowCorpusFile
  , flowCorpus
  , flowAnnuaire
  , insertMasterDocs
  , saveDocNgramsWith

  , getOrMkRoot
  , getOrMk_RootWithCorpus
  , TermType(..)
  , DataOrigin(..)
  , allDataOrigins

  , do_api
  , indexAllDocumentsWithPosTag
  )
    where

import Control.Lens ((^.), view, _Just, makeLenses)
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.List (concat)
import Data.Map (Map, lookup)
import Data.Maybe (catMaybes)
import Data.Monoid
import Data.Swagger
import qualified Data.Text as T
import Data.Traversable (traverse)
import Data.Tuple.Extra (first, second)
import GHC.Generics (Generic)
import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map

import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (POS(NP))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow.List
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..))
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Table.NodeNodeNgrams2
import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Schema.Node (NodePoly(..), node_id)
import Gargantext.Database.Types
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Database.Query.Table.Node.Document.Add  as Doc  (add)

------------------------------------------------------------------------
-- Imports for upgrade function
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Query.Tree (findNodesId)
import qualified Data.List as List
------------------------------------------------------------------------
-- TODO use internal with API name (could be old data)
data DataOrigin = InternalOrigin { DataOrigin -> ExternalAPIs
_do_api :: API.ExternalAPIs }
                | ExternalOrigin { _do_api :: API.ExternalAPIs }
               -- TODO Web
  deriving ((forall x. DataOrigin -> Rep DataOrigin x)
-> (forall x. Rep DataOrigin x -> DataOrigin) -> Generic DataOrigin
forall x. Rep DataOrigin x -> DataOrigin
forall x. DataOrigin -> Rep DataOrigin x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep DataOrigin x -> DataOrigin
$cfrom :: forall x. DataOrigin -> Rep DataOrigin x
Generic, DataOrigin -> DataOrigin -> Bool
(DataOrigin -> DataOrigin -> Bool)
-> (DataOrigin -> DataOrigin -> Bool) -> Eq DataOrigin
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DataOrigin -> DataOrigin -> Bool
$c/= :: DataOrigin -> DataOrigin -> Bool
== :: DataOrigin -> DataOrigin -> Bool
$c== :: DataOrigin -> DataOrigin -> Bool
Eq)

makeLenses ''DataOrigin
deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where
  declareNamedSchema :: Proxy DataOrigin -> Declare (Definitions Schema) NamedSchema
declareNamedSchema = SchemaOptions
-> Proxy DataOrigin -> Declare (Definitions Schema) NamedSchema
forall a.
(Generic a, GToSchema (Rep a),
 TypeHasSimpleShape a "genericDeclareNamedSchemaUnrestricted") =>
SchemaOptions
-> Proxy a -> Declare (Definitions Schema) NamedSchema
genericDeclareNamedSchema (String -> SchemaOptions
unPrefixSwagger String
"_do_")

allDataOrigins :: [DataOrigin]
allDataOrigins :: [DataOrigin]
allDataOrigins = (ExternalAPIs -> DataOrigin) -> [ExternalAPIs] -> [DataOrigin]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ExternalAPIs -> DataOrigin
InternalOrigin [ExternalAPIs]
API.externalAPIs
              [DataOrigin] -> [DataOrigin] -> [DataOrigin]
forall a. Semigroup a => a -> a -> a
<> (ExternalAPIs -> DataOrigin) -> [ExternalAPIs] -> [DataOrigin]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ExternalAPIs -> DataOrigin
ExternalOrigin [ExternalAPIs]
API.externalAPIs

---------------
data DataText = DataOld ![NodeId]
              | DataNew ![[HyperdataDocument]]

-- TODO use the split parameter in config file
getDataText :: FlowCmdM env err m
            => DataOrigin
            -> TermType Lang
            -> API.Query
            -> Maybe API.Limit
            -> m DataText
getDataText :: DataOrigin -> TermType Lang -> Query -> Maybe Limit -> m DataText
getDataText (ExternalOrigin ExternalAPIs
api) TermType Lang
la Query
q Maybe Limit
li = IO DataText -> m DataText
forall (b :: * -> *) (m :: * -> *) α. MonadBase b m => b α -> m α
liftBase (IO DataText -> m DataText) -> IO DataText -> m DataText
forall a b. (a -> b) -> a -> b
$ [[HyperdataDocument]] -> DataText
DataNew
                                  ([[HyperdataDocument]] -> DataText)
-> ([HyperdataDocument] -> [[HyperdataDocument]])
-> [HyperdataDocument]
-> DataText
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> [HyperdataDocument] -> [[HyperdataDocument]]
forall a. Int -> [a] -> [[a]]
splitEvery Int
500
                                  ([HyperdataDocument] -> DataText)
-> IO [HyperdataDocument] -> IO DataText
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ExternalAPIs
-> Lang -> Query -> Maybe Limit -> IO [HyperdataDocument]
API.get ExternalAPIs
api (TermType Lang -> Lang
forall lang. TermType lang -> lang
_tt_lang TermType Lang
la) Query
q Maybe Limit
li

getDataText (InternalOrigin ExternalAPIs
_) TermType Lang
_la Query
q Maybe Limit
_li = do
  (Int
_masterUserId, RootId
_masterRootId, RootId
cId) <- User
-> Either Query [RootId]
-> Maybe HyperdataCorpus
-> Cmd err (Int, RootId, RootId)
forall err a.
(HasNodeError err, MkCorpus a) =>
User
-> Either Query [RootId]
-> Maybe a
-> Cmd err (Int, RootId, RootId)
getOrMk_RootWithCorpus
                                           (Query -> User
UserName Query
userMaster)
                                           (Query -> Either Query [RootId]
forall a b. a -> Either a b
Left Query
"")
                                           (Maybe HyperdataCorpus
forall a. Maybe a
Nothing :: Maybe HyperdataCorpus)
  [RootId]
ids <-  ((RootId, HyperdataDocument) -> RootId)
-> [(RootId, HyperdataDocument)] -> [RootId]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (RootId, HyperdataDocument) -> RootId
forall a b. (a, b) -> a
fst ([(RootId, HyperdataDocument)] -> [RootId])
-> m [(RootId, HyperdataDocument)] -> m [RootId]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RootId -> Query -> Cmd err [(RootId, HyperdataDocument)]
forall err.
HasDBid NodeType =>
RootId -> Query -> Cmd err [(RootId, HyperdataDocument)]
searchDocInDatabase RootId
cId (Query -> Query
stemIt Query
q)
  DataText -> m DataText
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataText -> m DataText) -> DataText -> m DataText
forall a b. (a -> b) -> a -> b
$ [RootId] -> DataText
DataOld [RootId]
ids

-------------------------------------------------------------------------------
flowDataText :: ( FlowCmdM env err m
                )
                => User
                -> DataText
                -> TermType Lang
                -> CorpusId
                -> Maybe FlowSocialListWith
                -> (JobLog -> m ())
                -> m CorpusId
flowDataText :: User
-> DataText
-> TermType Lang
-> RootId
-> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m RootId
flowDataText User
u (DataOld [RootId]
ids) TermType Lang
tt RootId
cid Maybe FlowSocialListWith
mfslw JobLog -> m ()
_ = Lang
-> User
-> Either Query [RootId]
-> Maybe HyperdataCorpus
-> [RootId]
-> Maybe FlowSocialListWith
-> m RootId
forall env err (m :: * -> *) c.
(FlowCmdM env err m, MkCorpus c) =>
Lang
-> User
-> Either Query [RootId]
-> Maybe c
-> [RootId]
-> Maybe FlowSocialListWith
-> m RootId
flowCorpusUser (TermType Lang -> Lang
forall lang. TermType lang -> lang
_tt_lang TermType Lang
tt) User
u ([RootId] -> Either Query [RootId]
forall a b. b -> Either a b
Right [RootId
cid]) Maybe HyperdataCorpus
corpusType [RootId]
ids Maybe FlowSocialListWith
mfslw
  where
    corpusType :: Maybe HyperdataCorpus
corpusType = (Maybe HyperdataCorpus
forall a. Maybe a
Nothing :: Maybe HyperdataCorpus)
flowDataText User
u (DataNew [[HyperdataDocument]]
txt) TermType Lang
tt RootId
cid Maybe FlowSocialListWith
mfslw JobLog -> m ()
logStatus = User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[HyperdataDocument]]
-> (JobLog -> m ())
-> m RootId
forall env err (m :: * -> *) a.
(FlowCmdM env err m, FlowCorpus a) =>
User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m RootId
flowCorpus User
u ([RootId] -> Either Query [RootId]
forall a b. b -> Either a b
Right [RootId
cid]) TermType Lang
tt Maybe FlowSocialListWith
mfslw [[HyperdataDocument]]
txt JobLog -> m ()
logStatus

------------------------------------------------------------------------
-- TODO use proxy
flowAnnuaire :: (FlowCmdM env err m)
             => User
             -> Either CorpusName [CorpusId]
             -> (TermType Lang)
             -> FilePath
             -> (JobLog -> m ())
             -> m AnnuaireId
flowAnnuaire :: User
-> Either Query [RootId]
-> TermType Lang
-> String
-> (JobLog -> m ())
-> m RootId
flowAnnuaire User
u Either Query [RootId]
n TermType Lang
l String
filePath JobLog -> m ()
logStatus = do
  [[HyperdataContact]]
docs <- IO [[HyperdataContact]] -> m [[HyperdataContact]]
forall (b :: * -> *) (m :: * -> *) α. MonadBase b m => b α -> m α
liftBase (IO [[HyperdataContact]] -> m [[HyperdataContact]])
-> IO [[HyperdataContact]] -> m [[HyperdataContact]]
forall a b. (a -> b) -> a -> b
$ (( Int -> [HyperdataContact] -> [[HyperdataContact]]
forall a. Int -> [a] -> [[a]]
splitEvery Int
500 ([HyperdataContact] -> [[HyperdataContact]])
-> IO [HyperdataContact] -> IO [[HyperdataContact]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO [HyperdataContact]
readFile_Annuaire String
filePath) :: IO [[HyperdataContact]])
  Maybe HyperdataAnnuaire
-> User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[HyperdataContact]]
-> (JobLog -> m ())
-> m RootId
forall env err (m :: * -> *) a c.
(FlowCmdM env err m, FlowCorpus a, MkCorpus c) =>
Maybe c
-> User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m RootId
flow (Maybe HyperdataAnnuaire
forall a. Maybe a
Nothing :: Maybe HyperdataAnnuaire) User
u Either Query [RootId]
n TermType Lang
l Maybe FlowSocialListWith
forall a. Maybe a
Nothing [[HyperdataContact]]
docs JobLog -> m ()
logStatus

------------------------------------------------------------------------
flowCorpusFile :: (FlowCmdM env err m)
           => User
           -> Either CorpusName [CorpusId]
           -> Limit -- Limit the number of docs (for dev purpose)
           -> TermType Lang -> FileFormat -> FilePath
           -> Maybe FlowSocialListWith
           -> (JobLog -> m ())
           -> m CorpusId
flowCorpusFile :: User
-> Either Query [RootId]
-> Int
-> TermType Lang
-> FileFormat
-> String
-> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m RootId
flowCorpusFile User
u Either Query [RootId]
n Int
l TermType Lang
la FileFormat
ff String
fp Maybe FlowSocialListWith
mfslw JobLog -> m ()
logStatus = do
  Either String [HyperdataDocument]
eParsed <- IO (Either String [HyperdataDocument])
-> m (Either String [HyperdataDocument])
forall (b :: * -> *) (m :: * -> *) α. MonadBase b m => b α -> m α
liftBase (IO (Either String [HyperdataDocument])
 -> m (Either String [HyperdataDocument]))
-> IO (Either String [HyperdataDocument])
-> m (Either String [HyperdataDocument])
forall a b. (a -> b) -> a -> b
$ FileFormat -> String -> IO (Either String [HyperdataDocument])
parseFile FileFormat
ff String
fp
  case Either String [HyperdataDocument]
eParsed of
    Right [HyperdataDocument]
parsed -> do
      let docs :: [[HyperdataDocument]]
docs = Int -> [HyperdataDocument] -> [[HyperdataDocument]]
forall a. Int -> [a] -> [[a]]
splitEvery Int
500 ([HyperdataDocument] -> [[HyperdataDocument]])
-> [HyperdataDocument] -> [[HyperdataDocument]]
forall a b. (a -> b) -> a -> b
$ Int -> [HyperdataDocument] -> [HyperdataDocument]
forall a. Int -> [a] -> [a]
take Int
l [HyperdataDocument]
parsed
      User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[HyperdataDocument]]
-> (JobLog -> m ())
-> m RootId
forall env err (m :: * -> *) a.
(FlowCmdM env err m, FlowCorpus a) =>
User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m RootId
flowCorpus User
u Either Query [RootId]
n TermType Lang
la Maybe FlowSocialListWith
mfslw (([HyperdataDocument] -> [HyperdataDocument])
-> [[HyperdataDocument]] -> [[HyperdataDocument]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ((HyperdataDocument -> HyperdataDocument)
-> [HyperdataDocument] -> [HyperdataDocument]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map HyperdataDocument -> HyperdataDocument
forall a. ToHyperdataDocument a => a -> HyperdataDocument
toHyperdataDocument) [[HyperdataDocument]]
docs) JobLog -> m ()
logStatus
    Left String
e       -> Query -> m RootId
forall a. HasCallStack => Query -> a
panic (Query -> m RootId) -> Query -> m RootId
forall a b. (a -> b) -> a -> b
$ Query
"Error: " Query -> Query -> Query
forall a. Semigroup a => a -> a -> a
<> (String -> Query
T.pack String
e)

------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
-- (For now, Either is enough)
flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
           => User
           -> Either CorpusName [CorpusId]
           -> TermType Lang
           -> Maybe FlowSocialListWith
           -> [[a]]
           -> (JobLog -> m ())
           -> m CorpusId
flowCorpus :: User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m RootId
flowCorpus = Maybe HyperdataCorpus
-> User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m RootId
forall env err (m :: * -> *) a c.
(FlowCmdM env err m, FlowCorpus a, MkCorpus c) =>
Maybe c
-> User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m RootId
flow (Maybe HyperdataCorpus
forall a. Maybe a
Nothing :: Maybe HyperdataCorpus)


flow :: ( FlowCmdM env err m
        , FlowCorpus a
        , MkCorpus c
        )
        => Maybe c
        -> User
        -> Either CorpusName [CorpusId]
        -> TermType Lang
        -> Maybe FlowSocialListWith
        -> [[a]]
        -> (JobLog -> m ())
        -> m CorpusId
flow :: Maybe c
-> User
-> Either Query [RootId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (JobLog -> m ())
-> m RootId
flow Maybe c
c User
u Either Query [RootId]
cn TermType Lang
la Maybe FlowSocialListWith
mfslw [[a]]
docs JobLog -> m ()
logStatus = do
  -- TODO if public insertMasterDocs else insertUserDocs
  [[RootId]]
ids <- ((Int, [a]) -> m [RootId]) -> [(Int, [a])] -> m [[RootId]]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse (\(Int
idx, [a]
doc) -> do
                      [RootId]
id <- Maybe c -> TermType Lang -> [a] -> m [RootId]
forall env err (m :: * -> *) a c.
(FlowCmdM env err m, FlowCorpus a, MkCorpus c) =>
Maybe c -> TermType Lang -> [a] -> m [RootId]
insertMasterDocs Maybe c
c TermType Lang
la [a]
doc
                      JobLog -> m ()
logStatus JobLog :: Maybe Int
-> Maybe Int -> Maybe Int -> Maybe [ScraperEvent] -> JobLog
JobLog { _scst_succeeded :: Maybe Int
_scst_succeeded = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
idx
                                       , _scst_failed :: Maybe Int
_scst_failed    = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
                                       , _scst_remaining :: Maybe Int
_scst_remaining = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ [[a]] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [[a]]
docs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
idx
                                       , _scst_events :: Maybe [ScraperEvent]
_scst_events    = [ScraperEvent] -> Maybe [ScraperEvent]
forall a. a -> Maybe a
Just []
                                       }
                      [RootId] -> m [RootId]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [RootId]
id
                  ) ([Int] -> [[a]] -> [(Int, [a])]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1..] [[a]]
docs)
  Lang
-> User
-> Either Query [RootId]
-> Maybe c
-> [RootId]
-> Maybe FlowSocialListWith
-> m RootId
forall env err (m :: * -> *) c.
(FlowCmdM env err m, MkCorpus c) =>
Lang
-> User
-> Either Query [RootId]
-> Maybe c
-> [RootId]
-> Maybe FlowSocialListWith
-> m RootId
flowCorpusUser (TermType Lang
la TermType Lang -> Getting Lang (TermType Lang) Lang -> Lang
forall s a. s -> Getting a s a -> a
^. Getting Lang (TermType Lang) Lang
forall lang lang2. Lens (TermType lang) (TermType lang2) lang lang2
tt_lang) User
u Either Query [RootId]
cn Maybe c
c ([[RootId]] -> [RootId]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[RootId]]
ids) Maybe FlowSocialListWith
mfslw

------------------------------------------------------------------------
flowCorpusUser :: ( FlowCmdM env err m
                  , MkCorpus c
                  )
               => Lang
               -> User
               -> Either CorpusName [CorpusId]
               -> Maybe c
               -> [NodeId]
               -> Maybe FlowSocialListWith
               -> m CorpusId
flowCorpusUser :: Lang
-> User
-> Either Query [RootId]
-> Maybe c
-> [RootId]
-> Maybe FlowSocialListWith
-> m RootId
flowCorpusUser Lang
l User
user Either Query [RootId]
corpusName Maybe c
ctype [RootId]
ids Maybe FlowSocialListWith
mfslw = do
  -- User Flow
  (Int
userId, RootId
_rootId, RootId
userCorpusId) <- User
-> Either Query [RootId]
-> Maybe c
-> Cmd err (Int, RootId, RootId)
forall err a.
(HasNodeError err, MkCorpus a) =>
User
-> Either Query [RootId]
-> Maybe a
-> Cmd err (Int, RootId, RootId)
getOrMk_RootWithCorpus User
user Either Query [RootId]
corpusName Maybe c
ctype
  -- NodeTexts is first
  [RootId]
_tId <- NodeType -> RootId -> Int -> Cmd err [RootId]
forall err.
HasDBid NodeType =>
NodeType -> RootId -> Int -> Cmd err [RootId]
insertDefaultNode NodeType
NodeTexts RootId
userCorpusId Int
userId
  -- printDebug "NodeTexts: " tId

  -- NodeList is second
  RootId
listId <- RootId -> Int -> Cmd err RootId
forall err.
(HasNodeError err, HasDBid NodeType) =>
RootId -> Int -> Cmd err RootId
getOrMkList RootId
userCorpusId Int
userId
  -- _cooc  <- insertDefaultNode NodeListCooc listId userId
  -- TODO: check if present already, ignore
  [Only Int]
_ <- RootId -> [RootId] -> Cmd err [Only Int]
forall err. RootId -> [RootId] -> Cmd err [Only Int]
Doc.add RootId
userCorpusId [RootId]
ids

  -- printDebug "Node Text Ids:" tId

  -- User List Flow
  (Int
masterUserId, RootId
_masterRootId, RootId
masterCorpusId)
    <- User
-> Either Query [RootId]
-> Maybe c
-> Cmd err (Int, RootId, RootId)
forall err a.
(HasNodeError err, MkCorpus a) =>
User
-> Either Query [RootId]
-> Maybe a
-> Cmd err (Int, RootId, RootId)
getOrMk_RootWithCorpus (Query -> User
UserName Query
userMaster) (Query -> Either Query [RootId]
forall a b. a -> Either a b
Left Query
"") Maybe c
ctype

  --let gp = (GroupParams l 2 3 (StopSize 3)) 
  let gp :: GroupParams
gp = Lang -> PosTagAlgo -> HashMap Query Query -> GroupParams
GroupWithPosTag Lang
l PosTagAlgo
CoreNLP HashMap Query Query
forall k v. HashMap k v
HashMap.empty 
  Map NgramsType [NgramsElement]
ngs         <- User
-> RootId
-> RootId
-> Maybe FlowSocialListWith
-> GroupParams
-> m (Map NgramsType [NgramsElement])
forall env err (m :: * -> *).
(HasNodeStory env err m, CmdM env err m, HasTreeError err,
 HasNodeError err) =>
User
-> RootId
-> RootId
-> Maybe FlowSocialListWith
-> GroupParams
-> m (Map NgramsType [NgramsElement])
buildNgramsLists User
user RootId
userCorpusId RootId
masterCorpusId Maybe FlowSocialListWith
mfslw GroupParams
gp

  RootId
_userListId <- RootId -> Map NgramsType [NgramsElement] -> m RootId
forall env err (m :: * -> *).
FlowCmdM env err m =>
RootId -> Map NgramsType [NgramsElement] -> m RootId
flowList_DbRepo RootId
listId Map NgramsType [NgramsElement]
ngs
  RootId
_mastListId <- RootId -> Int -> Cmd err RootId
forall err.
(HasNodeError err, HasDBid NodeType) =>
RootId -> Int -> Cmd err RootId
getOrMkList RootId
masterCorpusId Int
masterUserId
  -- _ <- insertOccsUpdates userCorpusId mastListId
  -- printDebug "userListId" userListId
  -- User Graph Flow
  [RootId]
_ <- NodeType -> RootId -> Int -> Cmd err [RootId]
forall err.
HasDBid NodeType =>
NodeType -> RootId -> Int -> Cmd err [RootId]
insertDefaultNode NodeType
NodeDashboard RootId
userCorpusId Int
userId
  [RootId]
_ <- NodeType -> RootId -> Int -> Cmd err [RootId]
forall err.
HasDBid NodeType =>
NodeType -> RootId -> Int -> Cmd err [RootId]
insertDefaultNode NodeType
NodeGraph     RootId
userCorpusId Int
userId
  --_ <- mkPhylo  userCorpusId userId
  -- Annuaire Flow
  -- _ <- mkAnnuaire  rootUserId userId
  RootId -> m RootId
forall (f :: * -> *) a. Applicative f => a -> f a
pure RootId
userCorpusId


insertMasterDocs :: ( FlowCmdM env err m
                    , FlowCorpus a
                    , MkCorpus   c
                    )
                 => Maybe c
                 -> TermType Lang
                 -> [a]
                 -> m [DocId]
insertMasterDocs :: Maybe c -> TermType Lang -> [a] -> m [RootId]
insertMasterDocs Maybe c
c TermType Lang
lang [a]
hs  =  do
  (Int
masterUserId, RootId
_, RootId
masterCorpusId) <- User
-> Either Query [RootId]
-> Maybe c
-> Cmd err (Int, RootId, RootId)
forall err a.
(HasNodeError err, MkCorpus a) =>
User
-> Either Query [RootId]
-> Maybe a
-> Cmd err (Int, RootId, RootId)
getOrMk_RootWithCorpus (Query -> User
UserName Query
userMaster) (Query -> Either Query [RootId]
forall a b. a -> Either a b
Left Query
corpusMasterName) Maybe c
c
  ([RootId]
ids', [Indexed RootId (Node a)]
documentsWithId) <- Int
-> RootId -> [Node a] -> m ([RootId], [Indexed RootId (Node a)])
forall env err (m :: * -> *) a.
(FlowCmdM env err m, FlowInsertDB a) =>
Int -> RootId -> [a] -> m ([RootId], [Indexed RootId a])
insertDocs Int
masterUserId RootId
masterCorpusId ((a -> Node a) -> [a] -> [Node a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Int -> RootId -> a -> Node a
forall a.
(ToNode a, HasDBid NodeType) =>
Int -> RootId -> a -> Node a
toNode Int
masterUserId RootId
masterCorpusId) [a]
hs )
  [Only Int]
_ <- RootId -> [RootId] -> Cmd err [Only Int]
forall err. RootId -> [RootId] -> Cmd err [Only Int]
Doc.add RootId
masterCorpusId [RootId]
ids'
  -- TODO
  -- create a corpus with database name (CSV or PubMed)
  -- add documents to the corpus (create node_node link)
  -- this will enable global database monitoring

  -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
  HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
                <- [DocumentIdWithNgrams (Node a) ExtractedNgrams]
-> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
forall b a.
(Ord b, Hashable b) =>
[DocumentIdWithNgrams a b]
-> HashMap b (Map NgramsType (Map RootId Int))
mapNodeIdNgrams
                ([DocumentIdWithNgrams (Node a) ExtractedNgrams]
 -> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int)))
-> m [DocumentIdWithNgrams (Node a) ExtractedNgrams]
-> m (HashMap ExtractedNgrams (Map NgramsType (Map RootId Int)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Node a -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)))
-> [Indexed RootId (Node a)]
-> Cmd err [DocumentIdWithNgrams (Node a) ExtractedNgrams]
forall err a b.
HasNodeError err =>
(a -> Cmd err (HashMap b (Map NgramsType Int)))
-> [Indexed RootId a] -> Cmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams
                    (TermType Lang
-> Node a -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
forall h err.
(ExtractNgramsT h, HasText h) =>
TermType Lang
-> h -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT (TermType Lang
 -> Node a
 -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)))
-> TermType Lang
-> Node a
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
forall a b. (a -> b) -> a -> b
$ TermType Lang -> [Indexed RootId (Node a)] -> TermType Lang
forall (t :: * -> *) h.
(Foldable t, Functor t, HasText h) =>
TermType Lang -> t h -> TermType Lang
withLang TermType Lang
lang [Indexed RootId (Node a)]
documentsWithId)
                    [Indexed RootId (Node a)]
documentsWithId

  RootId
lId      <- RootId -> Int -> Cmd err RootId
forall err.
(HasNodeError err, HasDBid NodeType) =>
RootId -> Int -> Cmd err RootId
getOrMkList RootId
masterCorpusId Int
masterUserId
  ()
_ <- RootId
-> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
-> m ()
forall env err (m :: * -> *).
FlowCmdM env err m =>
RootId
-> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
-> m ()
saveDocNgramsWith RootId
lId HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
mapNgramsDocs'

  -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
  [RootId] -> m [RootId]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [RootId]
ids'

saveDocNgramsWith :: ( FlowCmdM env err m)
                  => ListId
                  -> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
                  -> m ()
saveDocNgramsWith :: RootId
-> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
-> m ()
saveDocNgramsWith RootId
lId HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
mapNgramsDocs' = do
  HashMap Query Int
terms2id <- [ExtractedNgrams] -> Cmd err (HashMap Query Int)
forall err. [ExtractedNgrams] -> Cmd err (HashMap Query Int)
insertExtractedNgrams ([ExtractedNgrams] -> Cmd err (HashMap Query Int))
-> [ExtractedNgrams] -> Cmd err (HashMap Query Int)
forall a b. (a -> b) -> a -> b
$ HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
-> [ExtractedNgrams]
forall k v. HashMap k v -> [k]
HashMap.keys HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
mapNgramsDocs'
  let mapNgramsDocs :: HashMap Ngrams (Map NgramsType (Map RootId Int))
mapNgramsDocs = (ExtractedNgrams -> Ngrams)
-> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
-> HashMap Ngrams (Map NgramsType (Map RootId Int))
forall k2 k1 v.
(Eq k2, Hashable k2) =>
(k1 -> k2) -> HashMap k1 v -> HashMap k2 v
HashMap.mapKeys ExtractedNgrams -> Ngrams
extracted2ngrams HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
mapNgramsDocs'

  -- to be removed
  let indexedNgrams :: HashMap (Indexed Int Ngrams) (Map NgramsType (Map RootId Int))
indexedNgrams = (Ngrams -> Indexed Int Ngrams)
-> HashMap Ngrams (Map NgramsType (Map RootId Int))
-> HashMap (Indexed Int Ngrams) (Map NgramsType (Map RootId Int))
forall k2 k1 v.
(Eq k2, Hashable k2) =>
(k1 -> k2) -> HashMap k1 v -> HashMap k2 v
HashMap.mapKeys (HashMap Query Int -> Ngrams -> Indexed Int Ngrams
indexNgrams HashMap Query Int
terms2id) HashMap Ngrams (Map NgramsType (Map RootId Int))
mapNgramsDocs

  -- new
  Map NgramsType (Map Query Int)
mapCgramsId <- RootId
-> (RootId -> [(Query, [NgramsType])] -> [NodeNgramsW])
-> [(Query, [NgramsType])]
-> Cmd err (Map NgramsType (Map Query Int))
forall a err.
Show a =>
RootId
-> (RootId -> a -> [NodeNgramsW])
-> a
-> Cmd err (Map NgramsType (Map Query Int))
listInsertDb RootId
lId RootId -> [(Query, [NgramsType])] -> [NodeNgramsW]
toNodeNgramsW'
               ([(Query, [NgramsType])]
 -> Cmd err (Map NgramsType (Map Query Int)))
-> [(Query, [NgramsType])]
-> Cmd err (Map NgramsType (Map Query Int))
forall a b. (a -> b) -> a -> b
$ ((Ngrams, Map NgramsType (Map RootId Int))
 -> (Query, [NgramsType]))
-> [(Ngrams, Map NgramsType (Map RootId Int))]
-> [(Query, [NgramsType])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ((Ngrams -> Query)
-> (Ngrams, [NgramsType]) -> (Query, [NgramsType])
forall a a' b. (a -> a') -> (a, b) -> (a', b)
first Ngrams -> Query
_ngramsTerms ((Ngrams, [NgramsType]) -> (Query, [NgramsType]))
-> ((Ngrams, Map NgramsType (Map RootId Int))
    -> (Ngrams, [NgramsType]))
-> (Ngrams, Map NgramsType (Map RootId Int))
-> (Query, [NgramsType])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map NgramsType (Map RootId Int) -> [NgramsType])
-> (Ngrams, Map NgramsType (Map RootId Int))
-> (Ngrams, [NgramsType])
forall b b' a. (b -> b') -> (a, b) -> (a, b')
second Map NgramsType (Map RootId Int) -> [NgramsType]
forall k a. Map k a -> [k]
Map.keys)
               ([(Ngrams, Map NgramsType (Map RootId Int))]
 -> [(Query, [NgramsType])])
-> [(Ngrams, Map NgramsType (Map RootId Int))]
-> [(Query, [NgramsType])]
forall a b. (a -> b) -> a -> b
$ HashMap Ngrams (Map NgramsType (Map RootId Int))
-> [(Ngrams, Map NgramsType (Map RootId Int))]
forall k v. HashMap k v -> [(k, v)]
HashMap.toList HashMap Ngrams (Map NgramsType (Map RootId Int))
mapNgramsDocs

  -- insertDocNgrams
  Int
_return <- [NodeNodeNgrams2] -> Cmd err Int
forall err. [NodeNodeNgrams2] -> Cmd err Int
insertNodeNodeNgrams2
           ([NodeNodeNgrams2] -> Cmd err Int)
-> [NodeNodeNgrams2] -> Cmd err Int
forall a b. (a -> b) -> a -> b
$ [Maybe NodeNodeNgrams2] -> [NodeNodeNgrams2]
forall a. [Maybe a] -> [a]
catMaybes [ RootId -> Int -> Double -> NodeNodeNgrams2
forall node_id nodengrams_id w.
node_id
-> nodengrams_id
-> w
-> NodeNodeNgrams2Poly node_id nodengrams_id w
NodeNodeNgrams2 (RootId -> Int -> Double -> NodeNodeNgrams2)
-> Maybe RootId -> Maybe (Int -> Double -> NodeNodeNgrams2)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RootId -> Maybe RootId
forall a. a -> Maybe a
Just RootId
nId
                                         Maybe (Int -> Double -> NodeNodeNgrams2)
-> Maybe Int -> Maybe (Double -> NodeNodeNgrams2)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Map NgramsType (Map Query Int) -> NgramsType -> Query -> Maybe Int
getCgramsId Map NgramsType (Map Query Int)
mapCgramsId NgramsType
ngrams_type (Ngrams -> Query
_ngramsTerms Ngrams
terms'')
                                         Maybe (Double -> NodeNodeNgrams2)
-> Maybe Double -> Maybe NodeNodeNgrams2
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Double -> Maybe Double
forall a. a -> Maybe a
Just (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
w :: Double)
                       | (Ngrams
terms'', Map NgramsType (Map RootId Int)
mapNgramsTypes)      <- HashMap Ngrams (Map NgramsType (Map RootId Int))
-> [(Ngrams, Map NgramsType (Map RootId Int))]
forall k v. HashMap k v -> [(k, v)]
HashMap.toList HashMap Ngrams (Map NgramsType (Map RootId Int))
mapNgramsDocs
                       , (NgramsType
ngrams_type, Map RootId Int
mapNodeIdWeight) <- Map NgramsType (Map RootId Int) -> [(NgramsType, Map RootId Int)]
forall k a. Map k a -> [(k, a)]
Map.toList Map NgramsType (Map RootId Int)
mapNgramsTypes
                       , (RootId
nId, Int
w)                       <- Map RootId Int -> [(RootId, Int)]
forall k a. Map k a -> [(k, a)]
Map.toList Map RootId Int
mapNodeIdWeight
                       ]
  -- to be removed
  Int
_   <- RootId
-> HashMap (Indexed Int Ngrams) (Map NgramsType (Map RootId Int))
-> Cmd err Int
forall err.
RootId
-> HashMap (Indexed Int Ngrams) (Map NgramsType (Map RootId Int))
-> Cmd err Int
insertDocNgrams RootId
lId HashMap (Indexed Int Ngrams) (Map NgramsType (Map RootId Int))
indexedNgrams

  () -> m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()


------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised
insertDocs :: ( FlowCmdM env err m
              -- , FlowCorpus a
              , FlowInsertDB a
              )
              => UserId
              -> CorpusId
              -> [a]
              -> m ([DocId], [Indexed NodeId a])
insertDocs :: Int -> RootId -> [a] -> m ([RootId], [Indexed RootId a])
insertDocs Int
uId RootId
cId [a]
hs = do
  let docs :: [a]
docs = (a -> a) -> [a] -> [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map a -> a
forall a. AddUniqId a => a -> a
addUniqId [a]
hs
  [ReturnId]
newIds <- Int -> RootId -> [a] -> Cmd err [ReturnId]
forall a err.
(InsertDb a, HasDBid NodeType) =>
Int -> RootId -> [a] -> Cmd err [ReturnId]
insertDb Int
uId RootId
cId [a]
docs
  -- printDebug "newIds" newIds
  let
    newIds' :: [RootId]
newIds' = (ReturnId -> RootId) -> [ReturnId] -> [RootId]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map ReturnId -> RootId
reId [ReturnId]
newIds
    documentsWithId :: [Indexed RootId a]
documentsWithId = Map Query ReturnId -> Map Query a -> [Indexed RootId a]
forall a. Map Query ReturnId -> Map Query a -> [Indexed RootId a]
mergeData ([ReturnId] -> Map Query ReturnId
toInserted [ReturnId]
newIds) ([(Query, a)] -> Map Query a
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(Query, a)] -> Map Query a) -> [(Query, a)] -> Map Query a
forall a b. (a -> b) -> a -> b
$ (a -> (Query, a)) -> [a] -> [(Query, a)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map a -> (Query, a)
forall a. UniqId a => a -> (Query, a)
viewUniqId' [a]
docs)
  [Only Int]
_ <- RootId -> [RootId] -> Cmd err [Only Int]
forall err. RootId -> [RootId] -> Cmd err [Only Int]
Doc.add RootId
cId [RootId]
newIds'
  ([RootId], [Indexed RootId a]) -> m ([RootId], [Indexed RootId a])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([RootId]
newIds', [Indexed RootId a]
documentsWithId)


------------------------------------------------------------------------
viewUniqId' :: UniqId a
            => a
            -> (Hash, a)
viewUniqId' :: a -> (Query, a)
viewUniqId' a
d = (Query, a) -> (Query -> (Query, a)) -> Maybe Query -> (Query, a)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Query, a)
forall a. a
err (\Query
h -> (Query
h,a
d)) (Getting (Maybe Query) a (Maybe Query) -> a -> Maybe Query
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view Getting (Maybe Query) a (Maybe Query)
forall a. UniqId a => Lens' a (Maybe Query)
uniqId a
d)
      where
        err :: a
err = Query -> a
forall a. HasCallStack => Query -> a
panic Query
"[ERROR] Database.Flow.toInsert"


toInserted :: [ReturnId]
           -> Map Hash ReturnId
toInserted :: [ReturnId] -> Map Query ReturnId
toInserted =
  [(Query, ReturnId)] -> Map Query ReturnId
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(Query, ReturnId)] -> Map Query ReturnId)
-> ([ReturnId] -> [(Query, ReturnId)])
-> [ReturnId]
-> Map Query ReturnId
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ReturnId -> (Query, ReturnId))
-> [ReturnId] -> [(Query, ReturnId)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map    (\ReturnId
r -> (ReturnId -> Query
reUniqId ReturnId
r, ReturnId
r)     )
               ([ReturnId] -> [(Query, ReturnId)])
-> ([ReturnId] -> [ReturnId]) -> [ReturnId] -> [(Query, ReturnId)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ReturnId -> Bool) -> [ReturnId] -> [ReturnId]
forall a. (a -> Bool) -> [a] -> [a]
filter (\ReturnId
r -> ReturnId -> Bool
reInserted ReturnId
r Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool
True)

mergeData :: Map Hash ReturnId
          -> Map Hash a
          -> [Indexed NodeId a]
mergeData :: Map Query ReturnId -> Map Query a -> [Indexed RootId a]
mergeData Map Query ReturnId
rs = [Maybe (Indexed RootId a)] -> [Indexed RootId a]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Indexed RootId a)] -> [Indexed RootId a])
-> (Map Query a -> [Maybe (Indexed RootId a)])
-> Map Query a
-> [Indexed RootId a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Query, a) -> Maybe (Indexed RootId a))
-> [(Query, a)] -> [Maybe (Indexed RootId a)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Query, a) -> Maybe (Indexed RootId a)
forall a. (Query, a) -> Maybe (Indexed RootId a)
toDocumentWithId ([(Query, a)] -> [Maybe (Indexed RootId a)])
-> (Map Query a -> [(Query, a)])
-> Map Query a
-> [Maybe (Indexed RootId a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Query a -> [(Query, a)]
forall k a. Map k a -> [(k, a)]
Map.toList
  where
    toDocumentWithId :: (Query, a) -> Maybe (Indexed RootId a)
toDocumentWithId (Query
sha,a
hpd) =
      RootId -> a -> Indexed RootId a
forall i a. i -> a -> Indexed i a
Indexed (RootId -> a -> Indexed RootId a)
-> Maybe RootId -> Maybe (a -> Indexed RootId a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ReturnId -> RootId) -> Maybe ReturnId -> Maybe RootId
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ReturnId -> RootId
reId (Query -> Map Query ReturnId -> Maybe ReturnId
forall k a. Ord k => k -> Map k a -> Maybe a
lookup Query
sha Map Query ReturnId
rs)
              Maybe (a -> Indexed RootId a)
-> Maybe a -> Maybe (Indexed RootId a)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> a -> Maybe a
forall a. a -> Maybe a
Just a
hpd

------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
documentIdWithNgrams :: HasNodeError err
                     => (a
                     -> Cmd err (HashMap b (Map NgramsType Int)))
                     -> [Indexed NodeId a]
                     -> Cmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams :: (a -> Cmd err (HashMap b (Map NgramsType Int)))
-> [Indexed RootId a] -> Cmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams a -> Cmd err (HashMap b (Map NgramsType Int))
f = (Indexed RootId a -> m (DocumentIdWithNgrams a b))
-> [Indexed RootId a] -> m [DocumentIdWithNgrams a b]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Indexed RootId a -> m (DocumentIdWithNgrams a b)
forall (m :: * -> *) env.
(MonadReader env m, MonadError err m, MonadBaseControl IO m,
 HasConnectionPool env, HasConfig env, HasMail env) =>
Indexed RootId a -> m (DocumentIdWithNgrams a b)
toDocumentIdWithNgrams
  where
    toDocumentIdWithNgrams :: Indexed RootId a -> m (DocumentIdWithNgrams a b)
toDocumentIdWithNgrams Indexed RootId a
d = do
      HashMap b (Map NgramsType Int)
e <- a -> Cmd err (HashMap b (Map NgramsType Int))
f (a -> Cmd err (HashMap b (Map NgramsType Int)))
-> a -> Cmd err (HashMap b (Map NgramsType Int))
forall a b. (a -> b) -> a -> b
$ Indexed RootId a -> a
forall i a. Indexed i a -> a
_unIndex         Indexed RootId a
d
      DocumentIdWithNgrams a b -> m (DocumentIdWithNgrams a b)
forall (f :: * -> *) a. Applicative f => a -> f a
pure   (DocumentIdWithNgrams a b -> m (DocumentIdWithNgrams a b))
-> DocumentIdWithNgrams a b -> m (DocumentIdWithNgrams a b)
forall a b. (a -> b) -> a -> b
$ Indexed RootId a
-> HashMap b (Map NgramsType Int) -> DocumentIdWithNgrams a b
forall a b.
Indexed RootId a
-> HashMap b (Map NgramsType Int) -> DocumentIdWithNgrams a b
DocumentIdWithNgrams Indexed RootId a
d HashMap b (Map NgramsType Int)
e


-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b)
                => [DocumentIdWithNgrams a b]
                -> HashMap b
                       (Map NgramsType 
                            (Map NodeId Int)
                       )
mapNodeIdNgrams :: [DocumentIdWithNgrams a b]
-> HashMap b (Map NgramsType (Map RootId Int))
mapNodeIdNgrams = (Map NgramsType (Map RootId Int)
 -> Map NgramsType (Map RootId Int)
 -> Map NgramsType (Map RootId Int))
-> [HashMap b (Map NgramsType (Map RootId Int))]
-> HashMap b (Map NgramsType (Map RootId Int))
forall (f :: * -> *) k a.
(Foldable f, Eq k, Hashable k) =>
(a -> a -> a) -> f (HashMap k a) -> HashMap k a
HashMap.unionsWith ((Map RootId Int -> Map RootId Int -> Map RootId Int)
-> Map NgramsType (Map RootId Int)
-> Map NgramsType (Map RootId Int)
-> Map NgramsType (Map RootId Int)
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
Map.unionWith ((Int -> Int -> Int)
-> Map RootId Int -> Map RootId Int -> Map RootId Int
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
Map.unionWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+))) ([HashMap b (Map NgramsType (Map RootId Int))]
 -> HashMap b (Map NgramsType (Map RootId Int)))
-> ([DocumentIdWithNgrams a b]
    -> [HashMap b (Map NgramsType (Map RootId Int))])
-> [DocumentIdWithNgrams a b]
-> HashMap b (Map NgramsType (Map RootId Int))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DocumentIdWithNgrams a b
 -> HashMap b (Map NgramsType (Map RootId Int)))
-> [DocumentIdWithNgrams a b]
-> [HashMap b (Map NgramsType (Map RootId Int))]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap DocumentIdWithNgrams a b
-> HashMap b (Map NgramsType (Map RootId Int))
forall a b.
DocumentIdWithNgrams a b
-> HashMap b (Map NgramsType (Map RootId Int))
f
  where
    f :: DocumentIdWithNgrams a b
      -> HashMap b (Map NgramsType (Map NodeId Int))
    f :: DocumentIdWithNgrams a b
-> HashMap b (Map NgramsType (Map RootId Int))
f DocumentIdWithNgrams a b
d = (Map NgramsType Int -> Map NgramsType (Map RootId Int))
-> HashMap b (Map NgramsType Int)
-> HashMap b (Map NgramsType (Map RootId Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Int -> Map RootId Int)
-> Map NgramsType Int -> Map NgramsType (Map RootId Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (RootId -> Int -> Map RootId Int
forall k a. k -> a -> Map k a
Map.singleton RootId
nId)) (HashMap b (Map NgramsType Int)
 -> HashMap b (Map NgramsType (Map RootId Int)))
-> HashMap b (Map NgramsType Int)
-> HashMap b (Map NgramsType (Map RootId Int))
forall a b. (a -> b) -> a -> b
$ DocumentIdWithNgrams a b -> HashMap b (Map NgramsType Int)
forall a b.
DocumentIdWithNgrams a b -> HashMap b (Map NgramsType Int)
documentNgrams DocumentIdWithNgrams a b
d
      where
        nId :: RootId
nId = Indexed RootId a -> RootId
forall i a. Indexed i a -> i
_index (Indexed RootId a -> RootId) -> Indexed RootId a -> RootId
forall a b. (a -> b) -> a -> b
$ DocumentIdWithNgrams a b -> Indexed RootId a
forall a b. DocumentIdWithNgrams a b -> Indexed RootId a
documentWithId DocumentIdWithNgrams a b
d


------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact
  where
    extractNgramsT :: TermType Lang
-> HyperdataContact
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT TermType Lang
l HyperdataContact
hc = (ExtractedNgrams -> ExtractedNgrams)
-> HashMap ExtractedNgrams (Map NgramsType Int)
-> HashMap ExtractedNgrams (Map NgramsType Int)
forall k2 k1 v.
(Eq k2, Hashable k2) =>
(k1 -> k2) -> HashMap k1 v -> HashMap k2 v
HashMap.mapKeys (Int -> ExtractedNgrams -> ExtractedNgrams
cleanExtractedNgrams Int
255) (HashMap ExtractedNgrams (Map NgramsType Int)
 -> HashMap ExtractedNgrams (Map NgramsType Int))
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TermType Lang
-> HyperdataContact
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
forall err.
TermType Lang
-> HyperdataContact
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extract TermType Lang
l HyperdataContact
hc
      where
        extract :: TermType Lang -> HyperdataContact
                -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
        extract :: TermType Lang
-> HyperdataContact
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extract TermType Lang
_l HyperdataContact
hc' = do
          let authors :: [Ngrams]
authors = (Query -> Ngrams) -> [Query] -> [Ngrams]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Query -> Ngrams
text2ngrams
                      ([Query] -> [Ngrams]) -> [Query] -> [Ngrams]
forall a b. (a -> b) -> a -> b
$ [Query] -> (Query -> [Query]) -> Maybe Query -> [Query]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Query
"Nothing"] (\Query
a -> [Query
a])
                      (Maybe Query -> [Query]) -> Maybe Query -> [Query]
forall a b. (a -> b) -> a -> b
$ Getting (Maybe Query) HyperdataContact (Maybe Query)
-> HyperdataContact -> Maybe Query
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view ((Maybe ContactWho -> Const (Maybe Query) (Maybe ContactWho))
-> HyperdataContact -> Const (Maybe Query) HyperdataContact
Lens' HyperdataContact (Maybe ContactWho)
hc_who ((Maybe ContactWho -> Const (Maybe Query) (Maybe ContactWho))
 -> HyperdataContact -> Const (Maybe Query) HyperdataContact)
-> ((Maybe Query -> Const (Maybe Query) (Maybe Query))
    -> Maybe ContactWho -> Const (Maybe Query) (Maybe ContactWho))
-> Getting (Maybe Query) HyperdataContact (Maybe Query)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ContactWho -> Const (Maybe Query) ContactWho)
-> Maybe ContactWho -> Const (Maybe Query) (Maybe ContactWho)
forall a b. Prism (Maybe a) (Maybe b) a b
_Just ((ContactWho -> Const (Maybe Query) ContactWho)
 -> Maybe ContactWho -> Const (Maybe Query) (Maybe ContactWho))
-> ((Maybe Query -> Const (Maybe Query) (Maybe Query))
    -> ContactWho -> Const (Maybe Query) ContactWho)
-> (Maybe Query -> Const (Maybe Query) (Maybe Query))
-> Maybe ContactWho
-> Const (Maybe Query) (Maybe ContactWho)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe Query -> Const (Maybe Query) (Maybe Query))
-> ContactWho -> Const (Maybe Query) ContactWho
Lens' ContactWho (Maybe Query)
cw_lastName) HyperdataContact
hc'

          HashMap ExtractedNgrams (Map NgramsType Int)
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (HashMap ExtractedNgrams (Map NgramsType Int)
 -> m (HashMap ExtractedNgrams (Map NgramsType Int)))
-> HashMap ExtractedNgrams (Map NgramsType Int)
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
forall a b. (a -> b) -> a -> b
$ [(ExtractedNgrams, Map NgramsType Int)]
-> HashMap ExtractedNgrams (Map NgramsType Int)
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList ([(ExtractedNgrams, Map NgramsType Int)]
 -> HashMap ExtractedNgrams (Map NgramsType Int))
-> [(ExtractedNgrams, Map NgramsType Int)]
-> HashMap ExtractedNgrams (Map NgramsType Int)
forall a b. (a -> b) -> a -> b
$ [(Ngrams -> ExtractedNgrams
SimpleNgrams Ngrams
a', NgramsType -> Int -> Map NgramsType Int
forall k a. k -> a -> Map k a
Map.singleton NgramsType
Authors Int
1) | Ngrams
a' <- [Ngrams]
authors ]


instance ExtractNgramsT HyperdataDocument
  where
    extractNgramsT :: TermType Lang
                   -> HyperdataDocument
                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
    extractNgramsT :: TermType Lang
-> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT TermType Lang
lang HyperdataDocument
hd = (ExtractedNgrams -> ExtractedNgrams)
-> HashMap ExtractedNgrams (Map NgramsType Int)
-> HashMap ExtractedNgrams (Map NgramsType Int)
forall k2 k1 v.
(Eq k2, Hashable k2) =>
(k1 -> k2) -> HashMap k1 v -> HashMap k2 v
HashMap.mapKeys (Int -> ExtractedNgrams -> ExtractedNgrams
cleanExtractedNgrams Int
255) (HashMap ExtractedNgrams (Map NgramsType Int)
 -> HashMap ExtractedNgrams (Map NgramsType Int))
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TermType Lang
-> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
forall err.
TermType Lang
-> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT' TermType Lang
lang HyperdataDocument
hd
      where
        extractNgramsT' :: TermType Lang
                        -> HyperdataDocument
                       -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
        extractNgramsT' :: TermType Lang
-> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT' TermType Lang
lang' HyperdataDocument
doc = do
          let source :: Ngrams
source    = Query -> Ngrams
text2ngrams
                        (Query -> Ngrams) -> Query -> Ngrams
forall a b. (a -> b) -> a -> b
$ Query -> (Query -> Query) -> Maybe Query -> Query
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Query
"Nothing" Query -> Query
forall a. a -> a
identity
                        (Maybe Query -> Query) -> Maybe Query -> Query
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Query
_hd_source HyperdataDocument
doc

              institutes :: [Ngrams]
institutes = (Query -> Ngrams) -> [Query] -> [Ngrams]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Query -> Ngrams
text2ngrams
                         ([Query] -> [Ngrams]) -> [Query] -> [Ngrams]
forall a b. (a -> b) -> a -> b
$ [Query] -> (Query -> [Query]) -> Maybe Query -> [Query]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Query
"Nothing"] ((Query -> Query) -> [Query] -> [Query]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Query -> Query
toSchoolName ([Query] -> [Query]) -> (Query -> [Query]) -> Query -> [Query]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Query -> Query -> [Query]
T.splitOn Query
", "))
                         (Maybe Query -> [Query]) -> Maybe Query -> [Query]
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Query
_hd_institutes HyperdataDocument
doc

              authors :: [Ngrams]
authors    = (Query -> Ngrams) -> [Query] -> [Ngrams]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Query -> Ngrams
text2ngrams
                         ([Query] -> [Ngrams]) -> [Query] -> [Ngrams]
forall a b. (a -> b) -> a -> b
$ [Query] -> (Query -> [Query]) -> Maybe Query -> [Query]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Query
"Nothing"] (Query -> Query -> [Query]
T.splitOn Query
", ")
                         (Maybe Query -> [Query]) -> Maybe Query -> [Query]
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Query
_hd_authors HyperdataDocument
doc

          [NgramsPostag]
terms' <- (Terms -> NgramsPostag) -> [Terms] -> [NgramsPostag]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms (TermType Lang
lang' TermType Lang -> Getting Lang (TermType Lang) Lang -> Lang
forall s a. s -> Getting a s a -> a
^. Getting Lang (TermType Lang) Lang
forall lang lang2. Lens (TermType lang) (TermType lang2) lang lang2
tt_lang) PosTagAlgo
CoreNLP POS
NP)
                 ([Terms] -> [NgramsPostag])
-> ([[Terms]] -> [Terms]) -> [[Terms]] -> [NgramsPostag]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[Terms]] -> [Terms]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
                 ([[Terms]] -> [NgramsPostag]) -> m [[Terms]] -> m [NgramsPostag]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO [[Terms]] -> m [[Terms]]
forall (b :: * -> *) (m :: * -> *) α. MonadBase b m => b α -> m α
liftBase (TermType Lang -> [Query] -> IO [[Terms]]
extractTerms TermType Lang
lang' ([Query] -> IO [[Terms]]) -> [Query] -> IO [[Terms]]
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> [Query]
forall h. HasText h => h -> [Query]
hasText HyperdataDocument
doc)

          HashMap ExtractedNgrams (Map NgramsType Int)
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (HashMap ExtractedNgrams (Map NgramsType Int)
 -> m (HashMap ExtractedNgrams (Map NgramsType Int)))
-> HashMap ExtractedNgrams (Map NgramsType Int)
-> m (HashMap ExtractedNgrams (Map NgramsType Int))
forall a b. (a -> b) -> a -> b
$ [(ExtractedNgrams, Map NgramsType Int)]
-> HashMap ExtractedNgrams (Map NgramsType Int)
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
               ([(ExtractedNgrams, Map NgramsType Int)]
 -> HashMap ExtractedNgrams (Map NgramsType Int))
-> [(ExtractedNgrams, Map NgramsType Int)]
-> HashMap ExtractedNgrams (Map NgramsType Int)
forall a b. (a -> b) -> a -> b
$  [(Ngrams -> ExtractedNgrams
SimpleNgrams Ngrams
source, NgramsType -> Int -> Map NgramsType Int
forall k a. k -> a -> Map k a
Map.singleton NgramsType
Sources     Int
1)                    ]
               [(ExtractedNgrams, Map NgramsType Int)]
-> [(ExtractedNgrams, Map NgramsType Int)]
-> [(ExtractedNgrams, Map NgramsType Int)]
forall a. Semigroup a => a -> a -> a
<> [(Ngrams -> ExtractedNgrams
SimpleNgrams     Ngrams
i', NgramsType -> Int -> Map NgramsType Int
forall k a. k -> a -> Map k a
Map.singleton NgramsType
Institutes  Int
1) | Ngrams
i' <- [Ngrams]
institutes ]
               [(ExtractedNgrams, Map NgramsType Int)]
-> [(ExtractedNgrams, Map NgramsType Int)]
-> [(ExtractedNgrams, Map NgramsType Int)]
forall a. Semigroup a => a -> a -> a
<> [(Ngrams -> ExtractedNgrams
SimpleNgrams     Ngrams
a', NgramsType -> Int -> Map NgramsType Int
forall k a. k -> a -> Map k a
Map.singleton NgramsType
Authors     Int
1) | Ngrams
a' <- [Ngrams]
authors    ]
               [(ExtractedNgrams, Map NgramsType Int)]
-> [(ExtractedNgrams, Map NgramsType Int)]
-> [(ExtractedNgrams, Map NgramsType Int)]
forall a. Semigroup a => a -> a -> a
<> [(NgramsPostag -> ExtractedNgrams
EnrichedNgrams   NgramsPostag
t', NgramsType -> Int -> Map NgramsType Int
forall k a. k -> a -> Map k a
Map.singleton NgramsType
NgramsTerms Int
1) | NgramsPostag
t' <- [NgramsPostag]
terms'     ]

instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
  where
    extractNgramsT :: TermType Lang
-> Node a -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT TermType Lang
l (Node RootId
_ Maybe Query
_ Int
_ Int
_ Maybe RootId
_ Query
_ UTCTime
_ a
h) = TermType Lang
-> a -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
forall h err.
(ExtractNgramsT h, HasText h) =>
TermType Lang
-> h -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT TermType Lang
l a
h

instance HasText a => HasText (Node a)
  where
    hasText :: Node a -> [Query]
hasText (Node RootId
_ Maybe Query
_ Int
_ Int
_ Maybe RootId
_ Query
_ UTCTime
_ a
h) = a -> [Query]
forall h. HasText h => h -> [Query]
hasText a
h



-- | TODO putelsewhere
-- | Upgrade function
-- Suppose all documents are English (this is the case actually)
indexAllDocumentsWithPosTag :: FlowCmdM env err m => m ()
indexAllDocumentsWithPosTag :: m ()
indexAllDocumentsWithPosTag = do
  RootId
rootId    <- User -> Cmd err RootId
forall err. HasNodeError err => User -> Cmd err RootId
getRootId (Query -> User
UserName Query
userMaster)
  [RootId]
corpusIds <- RootId -> [NodeType] -> Cmd err [RootId]
forall err. RootId -> [NodeType] -> Cmd err [RootId]
findNodesId RootId
rootId [NodeType
NodeCorpus]
  [Node HyperdataDocument]
docs      <- [[Node HyperdataDocument]] -> [Node HyperdataDocument]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
List.concat ([[Node HyperdataDocument]] -> [Node HyperdataDocument])
-> m [[Node HyperdataDocument]] -> m [Node HyperdataDocument]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (RootId -> m [Node HyperdataDocument])
-> [RootId] -> m [[Node HyperdataDocument]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM RootId -> m [Node HyperdataDocument]
forall err.
HasDBid NodeType =>
RootId -> Cmd err [Node HyperdataDocument]
getDocumentsWithParentId [RootId]
corpusIds

  [()]
_ <- ([Node HyperdataDocument] -> m ())
-> [[Node HyperdataDocument]] -> m [()]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM [Node HyperdataDocument] -> m ()
forall env err (m :: * -> *).
FlowCmdM env err m =>
[Node HyperdataDocument] -> m ()
extractInsert (Int -> [Node HyperdataDocument] -> [[Node HyperdataDocument]]
forall a. Int -> [a] -> [[a]]
splitEvery Int
1000 [Node HyperdataDocument]
docs)

  () -> m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

extractInsert :: FlowCmdM env err m => [Node HyperdataDocument] -> m ()
extractInsert :: [Node HyperdataDocument] -> m ()
extractInsert [Node HyperdataDocument]
docs = do
  let documentsWithId :: [Indexed RootId (Node HyperdataDocument)]
documentsWithId = (Node HyperdataDocument -> Indexed RootId (Node HyperdataDocument))
-> [Node HyperdataDocument]
-> [Indexed RootId (Node HyperdataDocument)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Node HyperdataDocument
doc -> RootId
-> Node HyperdataDocument
-> Indexed RootId (Node HyperdataDocument)
forall i a. i -> a -> Indexed i a
Indexed (Node HyperdataDocument
doc Node HyperdataDocument
-> Getting RootId (Node HyperdataDocument) RootId -> RootId
forall s a. s -> Getting a s a -> a
^. Getting RootId (Node HyperdataDocument) RootId
forall id1 hash_id typename user_id parent_id name date hyperdata
       id2.
Lens
  (NodePoly
     id1 hash_id typename user_id parent_id name date hyperdata)
  (NodePoly
     id2 hash_id typename user_id parent_id name date hyperdata)
  id1
  id2
node_id) Node HyperdataDocument
doc) [Node HyperdataDocument]
docs

  HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
mapNgramsDocs' <- [DocumentIdWithNgrams (Node HyperdataDocument) ExtractedNgrams]
-> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
forall b a.
(Ord b, Hashable b) =>
[DocumentIdWithNgrams a b]
-> HashMap b (Map NgramsType (Map RootId Int))
mapNodeIdNgrams
                ([DocumentIdWithNgrams (Node HyperdataDocument) ExtractedNgrams]
 -> HashMap ExtractedNgrams (Map NgramsType (Map RootId Int)))
-> m [DocumentIdWithNgrams
        (Node HyperdataDocument) ExtractedNgrams]
-> m (HashMap ExtractedNgrams (Map NgramsType (Map RootId Int)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Node HyperdataDocument
 -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)))
-> [Indexed RootId (Node HyperdataDocument)]
-> Cmd
     err [DocumentIdWithNgrams (Node HyperdataDocument) ExtractedNgrams]
forall err a b.
HasNodeError err =>
(a -> Cmd err (HashMap b (Map NgramsType Int)))
-> [Indexed RootId a] -> Cmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams
                    (TermType Lang
-> Node HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
forall h err.
(ExtractNgramsT h, HasText h) =>
TermType Lang
-> h -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT (TermType Lang
 -> Node HyperdataDocument
 -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)))
-> TermType Lang
-> Node HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
forall a b. (a -> b) -> a -> b
$ TermType Lang
-> [Indexed RootId (Node HyperdataDocument)] -> TermType Lang
forall (t :: * -> *) h.
(Foldable t, Functor t, HasText h) =>
TermType Lang -> t h -> TermType Lang
withLang (Lang -> TermType Lang
forall lang. lang -> TermType lang
Multi Lang
EN) [Indexed RootId (Node HyperdataDocument)]
documentsWithId)
                    [Indexed RootId (Node HyperdataDocument)]
documentsWithId

  HashMap Query Int
_ <- [ExtractedNgrams] -> Cmd err (HashMap Query Int)
forall err. [ExtractedNgrams] -> Cmd err (HashMap Query Int)
insertExtractedNgrams ([ExtractedNgrams] -> Cmd err (HashMap Query Int))
-> [ExtractedNgrams] -> Cmd err (HashMap Query Int)
forall a b. (a -> b) -> a -> b
$ HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
-> [ExtractedNgrams]
forall k v. HashMap k v -> [k]
HashMap.keys HashMap ExtractedNgrams (Map NgramsType (Map RootId Int))
mapNgramsDocs'

  () -> m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()