{-|
Module      : Gargantext.Core.Text.List.Formats.CSV
Description : 
Copyright   : (c) CNRS, 2018-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

CSV parser for Gargantext corpus files.

-}


module Gargantext.Core.Text.List.Formats.CSV where

import Control.Applicative
import Control.Monad (mzero)
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(Left, Right))
import Data.List (null)
import Data.Text (Text, pack)
import Data.Vector (Vector)
import GHC.IO (FilePath)
import Gargantext.Core.Text.Context
import Gargantext.Prelude hiding (length)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as DT
import qualified Data.Vector as V

------------------------------------------------------------------------
csvMapTermList :: FilePath -> IO TermList
csvMapTermList :: FilePath -> IO TermList
csvMapTermList FilePath
fp = CsvListType -> Vector CsvList -> TermList
csv2list CsvListType
CsvMap (Vector CsvList -> TermList)
-> ((Header, Vector CsvList) -> Vector CsvList)
-> (Header, Vector CsvList)
-> TermList
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Header, Vector CsvList) -> Vector CsvList
forall a b. (a, b) -> b
snd ((Header, Vector CsvList) -> TermList)
-> IO (Header, Vector CsvList) -> IO TermList
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>  FilePath -> IO (Header, Vector CsvList)
fromCsvListFile FilePath
fp

csv2list :: CsvListType -> Vector CsvList -> TermList
csv2list :: CsvListType -> Vector CsvList -> TermList
csv2list CsvListType
lt Vector CsvList
vs = Vector ([Text], [[Text]]) -> TermList
forall a. Vector a -> [a]
V.toList (Vector ([Text], [[Text]]) -> TermList)
-> Vector ([Text], [[Text]]) -> TermList
forall a b. (a -> b) -> a -> b
$ (CsvList -> ([Text], [[Text]]))
-> Vector CsvList -> Vector ([Text], [[Text]])
forall a b. (a -> b) -> Vector a -> Vector b
V.map (\(CsvList CsvListType
_ Text
label Text
forms)
                           -> (Text -> [Text]
DT.words Text
label, [Text -> [Text]
DT.words Text
label] [[Text]] -> [[Text]] -> [[Text]]
forall a. Semigroup a => a -> a -> a
<> (([Text] -> Bool) -> [[Text]] -> [[Text]]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> ([Text] -> Bool) -> [Text] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null) ([[Text]] -> [[Text]])
-> ([Text] -> [[Text]]) -> [Text] -> [[Text]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> [Text]) -> [Text] -> [[Text]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map Text -> [Text]
DT.words ([Text] -> [[Text]]) -> [Text] -> [[Text]]
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text]
DT.splitOn Text
csvListFormsDelimiter Text
forms)))
                         (Vector CsvList -> Vector ([Text], [[Text]]))
-> Vector CsvList -> Vector ([Text], [[Text]])
forall a b. (a -> b) -> a -> b
$ (CsvList -> Bool) -> Vector CsvList -> Vector CsvList
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (\CsvList
l -> CsvList -> CsvListType
csvList_status CsvList
l CsvListType -> CsvListType -> Bool
forall a. Eq a => a -> a -> Bool
== CsvListType
lt ) Vector CsvList
vs

------------------------------------------------------------------------
data CsvListType = CsvMap | CsvStop | CsvCandidate
  deriving (ReadPrec [CsvListType]
ReadPrec CsvListType
Int -> ReadS CsvListType
ReadS [CsvListType]
(Int -> ReadS CsvListType)
-> ReadS [CsvListType]
-> ReadPrec CsvListType
-> ReadPrec [CsvListType]
-> Read CsvListType
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [CsvListType]
$creadListPrec :: ReadPrec [CsvListType]
readPrec :: ReadPrec CsvListType
$creadPrec :: ReadPrec CsvListType
readList :: ReadS [CsvListType]
$creadList :: ReadS [CsvListType]
readsPrec :: Int -> ReadS CsvListType
$creadsPrec :: Int -> ReadS CsvListType
Read, Int -> CsvListType -> ShowS
[CsvListType] -> ShowS
CsvListType -> FilePath
(Int -> CsvListType -> ShowS)
-> (CsvListType -> FilePath)
-> ([CsvListType] -> ShowS)
-> Show CsvListType
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [CsvListType] -> ShowS
$cshowList :: [CsvListType] -> ShowS
show :: CsvListType -> FilePath
$cshow :: CsvListType -> FilePath
showsPrec :: Int -> CsvListType -> ShowS
$cshowsPrec :: Int -> CsvListType -> ShowS
Show, CsvListType -> CsvListType -> Bool
(CsvListType -> CsvListType -> Bool)
-> (CsvListType -> CsvListType -> Bool) -> Eq CsvListType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CsvListType -> CsvListType -> Bool
$c/= :: CsvListType -> CsvListType -> Bool
== :: CsvListType -> CsvListType -> Bool
$c== :: CsvListType -> CsvListType -> Bool
Eq)
------------------------------------------------------------------------
-- CSV List Main Configuration
csvListFieldDelimiter :: Char
csvListFieldDelimiter :: Char
csvListFieldDelimiter = Char
'\t'

csvListFormsDelimiter :: Text
csvListFormsDelimiter :: Text
csvListFormsDelimiter = Text
"|&|"
------------------------------------------------------------------------
data CsvList = CsvList
    { CsvList -> CsvListType
csvList_status :: !CsvListType
    , CsvList -> Text
csvList_label  :: !Text
    , CsvList -> Text
csvList_forms  :: !Text
    }
    deriving (Int -> CsvList -> ShowS
[CsvList] -> ShowS
CsvList -> FilePath
(Int -> CsvList -> ShowS)
-> (CsvList -> FilePath) -> ([CsvList] -> ShowS) -> Show CsvList
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [CsvList] -> ShowS
$cshowList :: [CsvList] -> ShowS
show :: CsvList -> FilePath
$cshow :: CsvList -> FilePath
showsPrec :: Int -> CsvList -> ShowS
$cshowsPrec :: Int -> CsvList -> ShowS
Show)
------------------------------------------------------------------------
instance FromNamedRecord CsvList where
  parseNamedRecord :: NamedRecord -> Parser CsvList
parseNamedRecord NamedRecord
r = CsvListType -> Text -> Text -> CsvList
CsvList (CsvListType -> Text -> Text -> CsvList)
-> Parser CsvListType -> Parser (Text -> Text -> CsvList)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NamedRecord
r NamedRecord -> ByteString -> Parser CsvListType
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"status"
                               Parser (Text -> Text -> CsvList)
-> Parser Text -> Parser (Text -> CsvList)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"label"
                               Parser (Text -> CsvList) -> Parser Text -> Parser CsvList
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"forms"

instance ToNamedRecord CsvList where
  toNamedRecord :: CsvList -> NamedRecord
toNamedRecord (CsvList CsvListType
s Text
l Text
f) =
    [(ByteString, ByteString)] -> NamedRecord
namedRecord [ ByteString
"status" ByteString -> CsvListType -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= CsvListType
s
                , ByteString
"label"  ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
l
                , ByteString
"forms"  ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
f
                ]
------------------------------------------------------------------------
instance FromField CsvListType where
    parseField :: ByteString -> Parser CsvListType
parseField ByteString
"map"  = CsvListType -> Parser CsvListType
forall (f :: * -> *) a. Applicative f => a -> f a
pure CsvListType
CsvMap
    parseField ByteString
"main" = CsvListType -> Parser CsvListType
forall (f :: * -> *) a. Applicative f => a -> f a
pure CsvListType
CsvCandidate
    parseField ByteString
"stop" = CsvListType -> Parser CsvListType
forall (f :: * -> *) a. Applicative f => a -> f a
pure CsvListType
CsvStop
    parseField ByteString
_      = Parser CsvListType
forall (m :: * -> *) a. MonadPlus m => m a
mzero

instance ToField CsvListType where
    toField :: CsvListType -> ByteString
toField CsvListType
CsvMap       = ByteString
"map"
    toField CsvListType
CsvCandidate = ByteString
"main"
    toField CsvListType
CsvStop      = ByteString
"stop"
------------------------------------------------------------------------
csvDecodeOptions :: DecodeOptions
csvDecodeOptions :: DecodeOptions
csvDecodeOptions = (DecodeOptions
defaultDecodeOptions
                      {decDelimiter :: Word8
decDelimiter = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
csvListFieldDelimiter}
                    )

csvEncodeOptions :: EncodeOptions
csvEncodeOptions :: EncodeOptions
csvEncodeOptions = ( EncodeOptions
defaultEncodeOptions 
                      {encDelimiter :: Word8
encDelimiter = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
csvListFieldDelimiter}
                    )
------------------------------------------------------------------------
fromCsvListFile :: FilePath -> IO (Header, Vector CsvList)
fromCsvListFile :: FilePath -> IO (Header, Vector CsvList)
fromCsvListFile FilePath
fp = do
    ByteString
csvData <- FilePath -> IO ByteString
BL.readFile FilePath
fp
    case DecodeOptions
-> ByteString -> Either FilePath (Header, Vector CsvList)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either FilePath (Header, Vector a)
decodeByNameWith DecodeOptions
csvDecodeOptions ByteString
csvData of
      Left FilePath
e        -> Text -> IO (Header, Vector CsvList)
forall a. HasCallStack => Text -> a
panic (FilePath -> Text
pack FilePath
e)
      Right (Header, Vector CsvList)
csvList -> (Header, Vector CsvList) -> IO (Header, Vector CsvList)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Header, Vector CsvList)
csvList
------------------------------------------------------------------------
toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO ()
toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO ()
toCsvListFile FilePath
fp (Header
h, Vector CsvList
vs) = FilePath -> ByteString -> IO ()
BL.writeFile FilePath
fp (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
                      EncodeOptions -> Header -> [CsvList] -> ByteString
forall a.
ToNamedRecord a =>
EncodeOptions -> Header -> [a] -> ByteString
encodeByNameWith EncodeOptions
csvEncodeOptions Header
h (Vector CsvList -> [CsvList]
forall a. Vector a -> [a]
V.toList Vector CsvList
vs)
------------------------------------------------------------------------