{-|
Module      : Gargantext.Core.Text.Corpus.Parsers.CSV
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

CSV parser for Gargantext corpus files.

-}


module Gargantext.Core.Text.Corpus.Parsers.CSV where

import Control.Applicative
import qualified Data.ByteString      as BS
import qualified Data.ByteString.Lazy as BL
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(..))
import Data.Maybe (fromMaybe)
import Data.Text (Text, pack, length, intercalate)
import Data.Time.Segment (jour)
import qualified Data.Vector          as V
import Data.Vector (Vector)
import GHC.IO (FilePath)
import GHC.Word (Word8)

import qualified Prelude as Prelude

import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (length)
import Gargantext.Core.Text
import Gargantext.Core.Text.Context

---------------------------------------------------------------
headerCsvGargV3 :: Header
headerCsvGargV3 :: Header
headerCsvGargV3 =
  [ByteString] -> Header
header [ ByteString
"title"
         , ByteString
"source"
         , ByteString
"publication_year"
         , ByteString
"publication_month"
         , ByteString
"publication_day"
         , ByteString
"abstract"
         , ByteString
"authors"
         ]
---------------------------------------------------------------
data CsvGargV3 = CsvGargV3
    { CsvGargV3 -> Int
d_docId             :: !Int
    , CsvGargV3 -> Text
d_title             :: !Text
    , CsvGargV3 -> Text
d_source            :: !Text
    , CsvGargV3 -> Int
d_publication_year  :: !Int
    , CsvGargV3 -> Int
d_publication_month :: !Int
    , CsvGargV3 -> Int
d_publication_day   :: !Int
    , CsvGargV3 -> Text
d_abstract          :: !Text
    , CsvGargV3 -> Text
d_authors           :: !Text
    }
    deriving (Int -> CsvGargV3 -> ShowS
[CsvGargV3] -> ShowS
CsvGargV3 -> String
(Int -> CsvGargV3 -> ShowS)
-> (CsvGargV3 -> String)
-> ([CsvGargV3] -> ShowS)
-> Show CsvGargV3
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CsvGargV3] -> ShowS
$cshowList :: [CsvGargV3] -> ShowS
show :: CsvGargV3 -> String
$cshow :: CsvGargV3 -> String
showsPrec :: Int -> CsvGargV3 -> ShowS
$cshowsPrec :: Int -> CsvGargV3 -> ShowS
Show)
---------------------------------------------------------------
-- | Doc 2 HyperdataDocument
toDoc :: CsvGargV3 -> HyperdataDocument
toDoc :: CsvGargV3 -> HyperdataDocument
toDoc (CsvGargV3 Int
did Text
dt Text
_ Int
dpy Int
dpm Int
dpd Text
dab Text
dau) =
  HyperdataDocument :: Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Int
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Text
-> HyperdataDocument
HyperdataDocument { _hd_bdd :: Maybe Text
_hd_bdd = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"CSV"
                    , _hd_doi :: Maybe Text
_hd_doi = Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> (Int -> Text) -> Int -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
pack (String -> Text) -> (Int -> String) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String
forall a. Show a => a -> String
show (Int -> Maybe Text) -> Int -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Int
did
                    , _hd_url :: Maybe Text
_hd_url = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_uniqId :: Maybe Text
_hd_uniqId = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_uniqIdBdd :: Maybe Text
_hd_uniqIdBdd = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_page :: Maybe Int
_hd_page = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_title :: Maybe Text
_hd_title = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
dt
                    , _hd_authors :: Maybe Text
_hd_authors = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_institutes :: Maybe Text
_hd_institutes = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
dau
                    , _hd_source :: Maybe Text
_hd_source = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
dab
                    , _hd_abstract :: Maybe Text
_hd_abstract = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_publication_date :: Maybe Text
_hd_publication_date = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_publication_year :: Maybe Int
_hd_publication_year = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
dpy
                    , _hd_publication_month :: Maybe Int
_hd_publication_month = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
dpm
                    , _hd_publication_day :: Maybe Int
_hd_publication_day = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
dpd
                    , _hd_publication_hour :: Maybe Int
_hd_publication_hour = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_publication_minute :: Maybe Int
_hd_publication_minute = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_publication_second :: Maybe Int
_hd_publication_second = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_language_iso2 :: Maybe Text
_hd_language_iso2 = Maybe Text
forall a. Maybe a
Nothing }

---------------------------------------------------------------
-- | Types Conversions
toDocs :: Vector CsvDoc -> [CsvGargV3]
toDocs :: Vector CsvDoc -> [CsvGargV3]
toDocs Vector CsvDoc
v = Vector CsvGargV3 -> [CsvGargV3]
forall a. Vector a -> [a]
V.toList
         (Vector CsvGargV3 -> [CsvGargV3])
-> Vector CsvGargV3 -> [CsvGargV3]
forall a b. (a -> b) -> a -> b
$ (Int -> CsvDoc -> CsvGargV3)
-> Vector Int -> Vector CsvDoc -> Vector CsvGargV3
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith (\Int
nId (CsvDoc { Maybe Int
Maybe IntOrDec
Text
csv_authors :: CsvDoc -> Text
csv_abstract :: CsvDoc -> Text
csv_publication_day :: CsvDoc -> Maybe Int
csv_publication_month :: CsvDoc -> Maybe Int
csv_publication_year :: CsvDoc -> Maybe IntOrDec
csv_source :: CsvDoc -> Text
csv_title :: CsvDoc -> Text
csv_authors :: Text
csv_abstract :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
.. }) -- (CsvDoc t s mPy pm pd abst auth)
                       -> CsvGargV3 :: Int
-> Text -> Text -> Int -> Int -> Int -> Text -> Text -> CsvGargV3
CsvGargV3 { d_docId :: Int
d_docId = Int
nId
                                    , d_title :: Text
d_title = Text
csv_title
                                    , d_source :: Text
d_source = Text
csv_source
                                    , d_publication_year :: Int
d_publication_year = Int -> Maybe IntOrDec -> Int
fromMIntOrDec Int
defaultYear Maybe IntOrDec
csv_publication_year
                                    , d_publication_month :: Int
d_publication_month = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
defaultMonth Maybe Int
csv_publication_month
                                    , d_publication_day :: Int
d_publication_day = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
defaultDay Maybe Int
csv_publication_day
                                    , d_abstract :: Text
d_abstract = Text
csv_abstract
                                    , d_authors :: Text
d_authors = Text
csv_authors })
                       (Int -> Int -> Vector Int
forall a. Num a => a -> Int -> Vector a
V.enumFromN Int
1 (Vector CsvDoc -> Int
forall a. Vector a -> Int
V.length Vector CsvDoc
v'')) Vector CsvDoc
v''
          where
            v'' :: Vector CsvDoc
v'' = (Vector CsvDoc -> SplitContext -> Vector CsvDoc)
-> Vector CsvDoc -> Vector SplitContext -> Vector CsvDoc
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl (\Vector CsvDoc
v' SplitContext
sep -> (CsvDoc -> Vector CsvDoc) -> Vector CsvDoc -> Vector CsvDoc
forall a b. (a -> Vector b) -> Vector a -> Vector b
V.concatMap (Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc (Vector CsvDoc -> Mean
docsSize Vector CsvDoc
v') SplitContext
sep) Vector CsvDoc
v') Vector CsvDoc
v Vector SplitContext
seps
            seps :: Vector SplitContext
seps= ([SplitContext] -> Vector SplitContext
forall a. [a] -> Vector a
V.fromList [Int -> SplitContext
Paragraphs Int
1, Int -> SplitContext
Sentences Int
3, Int -> SplitContext
Chars Int
3])

---------------------------------------------------------------
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs Vector CsvGargV3
docs = (CsvGargV3 -> CsvDoc) -> Vector CsvGargV3 -> Vector CsvDoc
forall a b. (a -> b) -> Vector a -> Vector b
V.map CsvGargV3 -> CsvDoc
fromDocs' Vector CsvGargV3
docs
  where
    fromDocs' :: CsvGargV3 -> CsvDoc
fromDocs' (CsvGargV3 { Int
Text
d_authors :: Text
d_abstract :: Text
d_publication_day :: Int
d_publication_month :: Int
d_publication_year :: Int
d_source :: Text
d_title :: Text
d_docId :: Int
d_authors :: CsvGargV3 -> Text
d_abstract :: CsvGargV3 -> Text
d_publication_day :: CsvGargV3 -> Int
d_publication_month :: CsvGargV3 -> Int
d_publication_year :: CsvGargV3 -> Int
d_source :: CsvGargV3 -> Text
d_title :: CsvGargV3 -> Text
d_docId :: CsvGargV3 -> Int
.. }) = CsvDoc :: Text
-> Text
-> Maybe IntOrDec
-> Maybe Int
-> Maybe Int
-> Text
-> Text
-> CsvDoc
CsvDoc { csv_title :: Text
csv_title = Text
d_title
                                          , csv_source :: Text
csv_source = Text
d_source
                                          , csv_publication_year :: Maybe IntOrDec
csv_publication_year = IntOrDec -> Maybe IntOrDec
forall a. a -> Maybe a
Just (IntOrDec -> Maybe IntOrDec) -> IntOrDec -> Maybe IntOrDec
forall a b. (a -> b) -> a -> b
$ Int -> IntOrDec
IntOrDec Int
d_publication_year
                                          , csv_publication_month :: Maybe Int
csv_publication_month = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
d_publication_month
                                          , csv_publication_day :: Maybe Int
csv_publication_day = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
d_publication_day
                                          , csv_abstract :: Text
csv_abstract = Text
d_abstract
                                          , csv_authors :: Text
csv_authors = Text
d_authors }

---------------------------------------------------------------
-- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc Mean
m SplitContext
splt CsvDoc
doc = let docSize :: Int
docSize = (Text -> Int
length (Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ CsvDoc -> Text
csv_abstract CsvDoc
doc) in
                          if Int
docSize Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1000
                            then
                              if (Int -> Int -> Int
forall a. Integral a => a -> a -> a
mod (Mean -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round Mean
m) Int
docSize) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
10
                                then
                                  SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' SplitContext
splt CsvDoc
doc
                                else
                                  [CsvDoc] -> Vector CsvDoc
forall a. [a] -> Vector a
V.fromList [CsvDoc
doc]
                            else
                              [CsvDoc] -> Vector CsvDoc
forall a. [a] -> Vector a
V.fromList [CsvDoc
doc]
  where
    splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
    splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' SplitContext
contextSize (CsvDoc { Maybe Int
Maybe IntOrDec
Text
csv_authors :: Text
csv_abstract :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
csv_authors :: CsvDoc -> Text
csv_abstract :: CsvDoc -> Text
csv_publication_day :: CsvDoc -> Maybe Int
csv_publication_month :: CsvDoc -> Maybe Int
csv_publication_year :: CsvDoc -> Maybe IntOrDec
csv_source :: CsvDoc -> Text
csv_title :: CsvDoc -> Text
.. }) = [CsvDoc] -> Vector CsvDoc
forall a. [a] -> Vector a
V.fromList ([CsvDoc] -> Vector CsvDoc) -> [CsvDoc] -> Vector CsvDoc
forall a b. (a -> b) -> a -> b
$ [CsvDoc
firstDoc] [CsvDoc] -> [CsvDoc] -> [CsvDoc]
forall a. Semigroup a => a -> a -> a
<> [CsvDoc]
nextDocs
        where
          firstDoc :: CsvDoc
firstDoc = CsvDoc :: Text
-> Text
-> Maybe IntOrDec
-> Maybe Int
-> Maybe Int
-> Text
-> Text
-> CsvDoc
CsvDoc { csv_abstract :: Text
csv_abstract = Text
firstAbstract, Maybe Int
Maybe IntOrDec
Text
csv_authors :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
csv_authors :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
.. }
          firstAbstract :: Text
firstAbstract = Text -> [Text] -> Text
forall a. Text -> [a] -> a
head' Text
"splitDoc'1" [Text]
abstracts

          nextDocs :: [CsvDoc]
nextDocs = (Text -> CsvDoc) -> [Text] -> [CsvDoc]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\Text
txt -> CsvDoc :: Text
-> Text
-> Maybe IntOrDec
-> Maybe Int
-> Maybe Int
-> Text
-> Text
-> CsvDoc
CsvDoc { csv_title :: Text
csv_title = Text -> [Text] -> Text
forall a. Text -> [a] -> a
head' Text
"splitDoc'2" ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
sentences Text
txt
                                         , csv_abstract :: Text
csv_abstract = [Text] -> Text
unsentences ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [Text] -> [Text]
forall a. Text -> [a] -> [a]
tail' Text
"splitDoc'1" ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
sentences Text
txt
                                         , Maybe Int
Maybe IntOrDec
Text
csv_authors :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_authors :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
.. }
                          ) (Text -> [Text] -> [Text]
forall a. Text -> [a] -> [a]
tail' Text
"splitDoc'2" [Text]
abstracts)

          abstracts :: [Text]
abstracts    = (SplitContext -> Text -> [Text]
splitBy (SplitContext -> Text -> [Text]) -> SplitContext -> Text -> [Text]
forall a b. (a -> b) -> a -> b
$ SplitContext
contextSize) Text
csv_abstract

---------------------------------------------------------------
---------------------------------------------------------------
type Mean = Double

docsSize :: Vector CsvDoc -> Mean
docsSize :: Vector CsvDoc -> Mean
docsSize Vector CsvDoc
csvDoc = [Mean] -> Mean
forall a. Fractional a => [a] -> a
mean [Mean]
ls
  where
    ls :: [Mean]
ls = Vector Mean -> [Mean]
forall a. Vector a -> [a]
V.toList (Vector Mean -> [Mean]) -> Vector Mean -> [Mean]
forall a b. (a -> b) -> a -> b
$ (CsvDoc -> Mean) -> Vector CsvDoc -> Vector Mean
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Mean
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Mean) -> (CsvDoc -> Int) -> CsvDoc -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Int
length (Text -> Int) -> (CsvDoc -> Text) -> CsvDoc -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CsvDoc -> Text
csv_abstract) Vector CsvDoc
csvDoc


---------------------------------------------------------------
newtype IntOrDec = IntOrDec Int
  deriving (Int -> IntOrDec -> ShowS
[IntOrDec] -> ShowS
IntOrDec -> String
(Int -> IntOrDec -> ShowS)
-> (IntOrDec -> String) -> ([IntOrDec] -> ShowS) -> Show IntOrDec
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [IntOrDec] -> ShowS
$cshowList :: [IntOrDec] -> ShowS
show :: IntOrDec -> String
$cshow :: IntOrDec -> String
showsPrec :: Int -> IntOrDec -> ShowS
$cshowsPrec :: Int -> IntOrDec -> ShowS
Show, IntOrDec -> IntOrDec -> Bool
(IntOrDec -> IntOrDec -> Bool)
-> (IntOrDec -> IntOrDec -> Bool) -> Eq IntOrDec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: IntOrDec -> IntOrDec -> Bool
$c/= :: IntOrDec -> IntOrDec -> Bool
== :: IntOrDec -> IntOrDec -> Bool
$c== :: IntOrDec -> IntOrDec -> Bool
Eq, ReadPrec [IntOrDec]
ReadPrec IntOrDec
Int -> ReadS IntOrDec
ReadS [IntOrDec]
(Int -> ReadS IntOrDec)
-> ReadS [IntOrDec]
-> ReadPrec IntOrDec
-> ReadPrec [IntOrDec]
-> Read IntOrDec
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [IntOrDec]
$creadListPrec :: ReadPrec [IntOrDec]
readPrec :: ReadPrec IntOrDec
$creadPrec :: ReadPrec IntOrDec
readList :: ReadS [IntOrDec]
$creadList :: ReadS [IntOrDec]
readsPrec :: Int -> ReadS IntOrDec
$creadsPrec :: Int -> ReadS IntOrDec
Read)
unIntOrDec :: IntOrDec -> Int
unIntOrDec :: IntOrDec -> Int
unIntOrDec (IntOrDec Int
i) = Int
i
instance FromField IntOrDec where
  parseField :: ByteString -> Parser IntOrDec
parseField ByteString
s = case Parser Int -> Either String Int
forall a. Parser a -> Either String a
runParser (ByteString -> Parser Int
forall a. FromField a => ByteString -> Parser a
parseField ByteString
s :: Parser Int) of
    Left String
_err -> Int -> IntOrDec
IntOrDec (Int -> IntOrDec) -> (Mean -> Int) -> Mean -> IntOrDec
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Mean -> Int
forall a b. (RealFrac a, Integral b) => a -> b
Prelude.floor (Mean -> IntOrDec) -> Parser Mean -> Parser IntOrDec
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Parser Mean
forall a. FromField a => ByteString -> Parser a
parseField ByteString
s :: Parser Double)
    Right Int
n   -> IntOrDec -> Parser IntOrDec
forall (f :: * -> *) a. Applicative f => a -> f a
pure (IntOrDec -> Parser IntOrDec) -> IntOrDec -> Parser IntOrDec
forall a b. (a -> b) -> a -> b
$ Int -> IntOrDec
IntOrDec Int
n
instance ToField IntOrDec where
  toField :: IntOrDec -> ByteString
toField (IntOrDec Int
i) = Int -> ByteString
forall a. ToField a => a -> ByteString
toField Int
i

fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
fromMIntOrDec Int
default' Maybe IntOrDec
mVal = IntOrDec -> Int
unIntOrDec (IntOrDec -> Int) -> IntOrDec -> Int
forall a b. (a -> b) -> a -> b
$ IntOrDec -> Maybe IntOrDec -> IntOrDec
forall a. a -> Maybe a -> a
fromMaybe (Int -> IntOrDec
IntOrDec Int
default') Maybe IntOrDec
mVal
defaultYear :: Int
defaultYear :: Int
defaultYear = Int
1973
defaultMonth :: Int
defaultMonth :: Int
defaultMonth = Int
1
defaultDay :: Int
defaultDay :: Int
defaultDay = Int
1

data CsvDoc = CsvDoc
    { CsvDoc -> Text
csv_title             :: !Text
    , CsvDoc -> Text
csv_source            :: !Text
    , CsvDoc -> Maybe IntOrDec
csv_publication_year  :: !(Maybe IntOrDec)
    , CsvDoc -> Maybe Int
csv_publication_month :: !(Maybe Int)
    , CsvDoc -> Maybe Int
csv_publication_day   :: !(Maybe Int)
    , CsvDoc -> Text
csv_abstract          :: !Text
    , CsvDoc -> Text
csv_authors           :: !Text
    }
    deriving (Int -> CsvDoc -> ShowS
[CsvDoc] -> ShowS
CsvDoc -> String
(Int -> CsvDoc -> ShowS)
-> (CsvDoc -> String) -> ([CsvDoc] -> ShowS) -> Show CsvDoc
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CsvDoc] -> ShowS
$cshowList :: [CsvDoc] -> ShowS
show :: CsvDoc -> String
$cshow :: CsvDoc -> String
showsPrec :: Int -> CsvDoc -> ShowS
$cshowsPrec :: Int -> CsvDoc -> ShowS
Show)

instance FromNamedRecord CsvDoc where
  parseNamedRecord :: NamedRecord -> Parser CsvDoc
parseNamedRecord NamedRecord
r = do
    Text
csv_title <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"title" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"Title"
    Text
csv_source <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"source" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"Source"
    Maybe IntOrDec
csv_publication_year <- NamedRecord
r NamedRecord -> ByteString -> Parser (Maybe IntOrDec)
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_year" Parser (Maybe IntOrDec)
-> Parser (Maybe IntOrDec) -> Parser (Maybe IntOrDec)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> NamedRecord
r NamedRecord -> ByteString -> Parser (Maybe IntOrDec)
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"Publication Year"
    Maybe Int
csv_publication_month <- NamedRecord
r NamedRecord -> ByteString -> Parser (Maybe Int)
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_month" Parser (Maybe Int) -> Parser (Maybe Int) -> Parser (Maybe Int)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> NamedRecord
r NamedRecord -> ByteString -> Parser (Maybe Int)
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"Publication Month"
    Maybe Int
csv_publication_day <- NamedRecord
r NamedRecord -> ByteString -> Parser (Maybe Int)
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_day" Parser (Maybe Int) -> Parser (Maybe Int) -> Parser (Maybe Int)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> NamedRecord
r NamedRecord -> ByteString -> Parser (Maybe Int)
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"Publication Day"
    Text
csv_abstract <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"abstract" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"Abstract"
    Text
csv_authors <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"authors" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"Authors"
    CsvDoc -> Parser CsvDoc
forall (f :: * -> *) a. Applicative f => a -> f a
pure (CsvDoc -> Parser CsvDoc) -> CsvDoc -> Parser CsvDoc
forall a b. (a -> b) -> a -> b
$ CsvDoc :: Text
-> Text
-> Maybe IntOrDec
-> Maybe Int
-> Maybe Int
-> Text
-> Text
-> CsvDoc
CsvDoc { Maybe Int
Maybe IntOrDec
Text
csv_authors :: Text
csv_abstract :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
csv_authors :: Text
csv_abstract :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
.. }

instance ToNamedRecord CsvDoc where
  toNamedRecord :: CsvDoc -> NamedRecord
toNamedRecord (CsvDoc{ Maybe Int
Maybe IntOrDec
Text
csv_authors :: Text
csv_abstract :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
csv_authors :: CsvDoc -> Text
csv_abstract :: CsvDoc -> Text
csv_publication_day :: CsvDoc -> Maybe Int
csv_publication_month :: CsvDoc -> Maybe Int
csv_publication_year :: CsvDoc -> Maybe IntOrDec
csv_source :: CsvDoc -> Text
csv_title :: CsvDoc -> Text
.. }) =
    [(ByteString, ByteString)] -> NamedRecord
namedRecord [ ByteString
"title"             ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csv_title
                , ByteString
"source"            ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csv_source
                , ByteString
"publication_year"  ByteString -> Maybe IntOrDec -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Maybe IntOrDec
csv_publication_year
                , ByteString
"publication_month" ByteString -> Maybe Int -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Maybe Int
csv_publication_month
                , ByteString
"publication_day"   ByteString -> Maybe Int -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Maybe Int
csv_publication_day
                , ByteString
"abstract"          ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csv_abstract
                , ByteString
"authors"           ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csv_authors
                ]

hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc HyperdataDocument
h = CsvDoc :: Text
-> Text
-> Maybe IntOrDec
-> Maybe Int
-> Maybe Int
-> Text
-> Text
-> CsvDoc
CsvDoc { csv_title :: Text
csv_title = Maybe Text -> Text
m (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Text
_hd_title HyperdataDocument
h
                                    , csv_source :: Text
csv_source = Maybe Text -> Text
m (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Text
_hd_source HyperdataDocument
h
                                    , csv_publication_year :: Maybe IntOrDec
csv_publication_year = IntOrDec -> Maybe IntOrDec
forall a. a -> Maybe a
Just (IntOrDec -> Maybe IntOrDec) -> IntOrDec -> Maybe IntOrDec
forall a b. (a -> b) -> a -> b
$ Int -> IntOrDec
IntOrDec (Int -> IntOrDec) -> Int -> IntOrDec
forall a b. (a -> b) -> a -> b
$ Maybe Int -> Int
mI (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Int
_hd_publication_year HyperdataDocument
h
                                    , csv_publication_month :: Maybe Int
csv_publication_month = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Maybe Int -> Int
mI (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Int
_hd_publication_month HyperdataDocument
h
                                    , csv_publication_day :: Maybe Int
csv_publication_day = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Maybe Int -> Int
mI (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Int
_hd_publication_day   HyperdataDocument
h
                                    , csv_abstract :: Text
csv_abstract = Maybe Text -> Text
m (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Text
_hd_abstract HyperdataDocument
h
                                    , csv_authors :: Text
csv_authors = Maybe Text -> Text
m (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ HyperdataDocument -> Maybe Text
_hd_authors HyperdataDocument
h }

  where
    m :: Maybe Text -> Text
m = Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" Text -> Text
forall a. a -> a
identity
    mI :: Maybe Int -> Int
mI = Int -> (Int -> Int) -> Maybe Int -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
0 Int -> Int
forall a. a -> a
identity


data Delimiter = Tab | Comma

csvDecodeOptions :: Delimiter -> DecodeOptions
csvDecodeOptions :: Delimiter -> DecodeOptions
csvDecodeOptions Delimiter
d = DecodeOptions
defaultDecodeOptions {decDelimiter :: Word8
decDelimiter = Delimiter -> Word8
delimiter Delimiter
d}

csvEncodeOptions :: Delimiter -> EncodeOptions
csvEncodeOptions :: Delimiter -> EncodeOptions
csvEncodeOptions Delimiter
d = EncodeOptions
defaultEncodeOptions {encDelimiter :: Word8
encDelimiter = Delimiter -> Word8
delimiter Delimiter
d}

delimiter :: Delimiter -> Word8
delimiter :: Delimiter -> Word8
delimiter Delimiter
Tab   = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
'\t'
delimiter Delimiter
Comma = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
','
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO (Either Prelude.String [Text])
readCsvOn' :: [CsvDoc -> Text] -> String -> IO (Either String [Text])
readCsvOn' [CsvDoc -> Text]
fields String
fp = do
  Either String (Header, Vector CsvDoc)
r <- String -> IO (Either String (Header, Vector CsvDoc))
readFile String
fp
  Either String [Text] -> IO (Either String [Text])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String [Text] -> IO (Either String [Text]))
-> Either String [Text] -> IO (Either String [Text])
forall a b. (a -> b) -> a -> b
$ ( Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList
          (Vector Text -> [Text])
-> ((Header, Vector CsvDoc) -> Vector Text)
-> (Header, Vector CsvDoc)
-> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CsvDoc -> Text) -> Vector CsvDoc -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (\CsvDoc
l -> Text -> [Text] -> Text
intercalate (String -> Text
pack String
" ") ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ ((CsvDoc -> Text) -> Text) -> [CsvDoc -> Text] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map (\CsvDoc -> Text
field -> CsvDoc -> Text
field CsvDoc
l) [CsvDoc -> Text]
fields)
          (Vector CsvDoc -> Vector Text)
-> ((Header, Vector CsvDoc) -> Vector CsvDoc)
-> (Header, Vector CsvDoc)
-> Vector Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header, Vector CsvDoc) -> Vector CsvDoc
forall a b. (a, b) -> b
snd ) ((Header, Vector CsvDoc) -> [Text])
-> Either String (Header, Vector CsvDoc) -> Either String [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either String (Header, Vector CsvDoc)
r

------------------------------------------------------------------------

readFileLazy :: (FromNamedRecord a) => proxy a -> Delimiter -> FilePath -> IO (Either Prelude.String (Header, Vector a))
readFileLazy :: proxy a
-> Delimiter -> String -> IO (Either String (Header, Vector a))
readFileLazy proxy a
d Delimiter
f = (ByteString -> Either String (Header, Vector a))
-> IO ByteString -> IO (Either String (Header, Vector a))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
forall a (proxy :: * -> *).
FromNamedRecord a =>
proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
readByteStringLazy proxy a
d Delimiter
f) (IO ByteString -> IO (Either String (Header, Vector a)))
-> (String -> IO ByteString)
-> String
-> IO (Either String (Header, Vector a))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO ByteString
BL.readFile

readFileStrict :: (FromNamedRecord a)
               => proxy a
               -> Delimiter
               -> FilePath
               -> IO (Either Prelude.String (Header, Vector a))
readFileStrict :: proxy a
-> Delimiter -> String -> IO (Either String (Header, Vector a))
readFileStrict proxy a
d Delimiter
f = (ByteString -> Either String (Header, Vector a))
-> IO ByteString -> IO (Either String (Header, Vector a))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
forall a (proxy :: * -> *).
FromNamedRecord a =>
proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
readByteStringStrict proxy a
d Delimiter
f) (IO ByteString -> IO (Either String (Header, Vector a)))
-> (String -> IO ByteString)
-> String
-> IO (Either String (Header, Vector a))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO ByteString
BS.readFile

readByteStringLazy :: (FromNamedRecord a)
                   => proxy a
                   -> Delimiter
                   -> BL.ByteString
                   -> Either Prelude.String (Header, Vector a)
readByteStringLazy :: proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
readByteStringLazy proxy a
_f Delimiter
d ByteString
bs = DecodeOptions -> ByteString -> Either String (Header, Vector a)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith (Delimiter -> DecodeOptions
csvDecodeOptions Delimiter
d) ByteString
bs

readByteStringStrict :: (FromNamedRecord a)
                     => proxy a
                     -> Delimiter
                     -> BS.ByteString
                     -> Either Prelude.String (Header, Vector a)
readByteStringStrict :: proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
readByteStringStrict proxy a
d Delimiter
ff = (proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
forall a (proxy :: * -> *).
FromNamedRecord a =>
proxy a
-> Delimiter -> ByteString -> Either String (Header, Vector a)
readByteStringLazy proxy a
d Delimiter
ff) (ByteString -> Either String (Header, Vector a))
-> (ByteString -> ByteString)
-> ByteString
-> Either String (Header, Vector a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict

------------------------------------------------------------------------
-- | TODO use readFileLazy
readFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc))
readFile :: String -> IO (Either String (Header, Vector CsvDoc))
readFile String
fp = do
  Either String (Header, Vector CsvDoc)
result <- (ByteString -> Either String (Header, Vector CsvDoc))
-> IO ByteString -> IO (Either String (Header, Vector CsvDoc))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Delimiter -> ByteString -> Either String (Header, Vector CsvDoc)
readCsvLazyBS Delimiter
Comma) (IO ByteString -> IO (Either String (Header, Vector CsvDoc)))
-> IO ByteString -> IO (Either String (Header, Vector CsvDoc))
forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
BL.readFile String
fp
  case Either String (Header, Vector CsvDoc)
result of
    Left String
_err -> (ByteString -> Either String (Header, Vector CsvDoc))
-> IO ByteString -> IO (Either String (Header, Vector CsvDoc))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Delimiter -> ByteString -> Either String (Header, Vector CsvDoc)
readCsvLazyBS Delimiter
Tab) (IO ByteString -> IO (Either String (Header, Vector CsvDoc)))
-> IO ByteString -> IO (Either String (Header, Vector CsvDoc))
forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
BL.readFile String
fp
    Right (Header, Vector CsvDoc)
res -> Either String (Header, Vector CsvDoc)
-> IO (Either String (Header, Vector CsvDoc))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String (Header, Vector CsvDoc)
 -> IO (Either String (Header, Vector CsvDoc)))
-> Either String (Header, Vector CsvDoc)
-> IO (Either String (Header, Vector CsvDoc))
forall a b. (a -> b) -> a -> b
$ (Header, Vector CsvDoc) -> Either String (Header, Vector CsvDoc)
forall a b. b -> Either a b
Right (Header, Vector CsvDoc)
res



-- | TODO use readByteStringLazy
readCsvLazyBS :: Delimiter -> BL.ByteString -> Either Prelude.String (Header, Vector CsvDoc)
readCsvLazyBS :: Delimiter -> ByteString -> Either String (Header, Vector CsvDoc)
readCsvLazyBS Delimiter
d ByteString
bs = DecodeOptions
-> ByteString -> Either String (Header, Vector CsvDoc)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith (Delimiter -> DecodeOptions
csvDecodeOptions Delimiter
d) ByteString
bs

------------------------------------------------------------------------
-- | TODO use readFileLazy
readCsvHal :: FilePath -> IO (Either Prelude.String (Header, Vector CsvHal))
readCsvHal :: String -> IO (Either String (Header, Vector CsvHal))
readCsvHal = (ByteString -> Either String (Header, Vector CsvHal))
-> IO ByteString -> IO (Either String (Header, Vector CsvHal))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Either String (Header, Vector CsvHal)
readCsvHalLazyBS (IO ByteString -> IO (Either String (Header, Vector CsvHal)))
-> (String -> IO ByteString)
-> String
-> IO (Either String (Header, Vector CsvHal))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO ByteString
BL.readFile

-- | TODO use readByteStringLazy
readCsvHalLazyBS :: BL.ByteString -> Either Prelude.String (Header, Vector CsvHal)
readCsvHalLazyBS :: ByteString -> Either String (Header, Vector CsvHal)
readCsvHalLazyBS ByteString
bs = DecodeOptions
-> ByteString -> Either String (Header, Vector CsvHal)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith (Delimiter -> DecodeOptions
csvDecodeOptions Delimiter
Tab) ByteString
bs

readCsvHalBSStrict :: BS.ByteString -> Either Prelude.String (Header, Vector CsvHal)
readCsvHalBSStrict :: ByteString -> Either String (Header, Vector CsvHal)
readCsvHalBSStrict = ByteString -> Either String (Header, Vector CsvHal)
readCsvHalLazyBS (ByteString -> Either String (Header, Vector CsvHal))
-> (ByteString -> ByteString)
-> ByteString
-> Either String (Header, Vector CsvHal)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict

------------------------------------------------------------------------
writeFile :: FilePath -> (Header, Vector CsvDoc) -> IO ()
writeFile :: String -> (Header, Vector CsvDoc) -> IO ()
writeFile String
fp (Header
h, Vector CsvDoc
vs) = String -> ByteString -> IO ()
BL.writeFile String
fp (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
                      EncodeOptions -> Header -> [CsvDoc] -> ByteString
forall a.
ToNamedRecord a =>
EncodeOptions -> Header -> [a] -> ByteString
encodeByNameWith (Delimiter -> EncodeOptions
csvEncodeOptions Delimiter
Tab) Header
h (Vector CsvDoc -> [CsvDoc]
forall a. Vector a -> [a]
V.toList Vector CsvDoc
vs)

writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
writeDocs2Csv :: String -> [HyperdataDocument] -> IO ()
writeDocs2Csv String
fp [HyperdataDocument]
hs = String -> ByteString -> IO ()
BL.writeFile String
fp (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ [HyperdataDocument] -> ByteString
hyperdataDocument2csv [HyperdataDocument]
hs

hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString
hyperdataDocument2csv :: [HyperdataDocument] -> ByteString
hyperdataDocument2csv [HyperdataDocument]
hs = EncodeOptions -> Header -> [CsvDoc] -> ByteString
forall a.
ToNamedRecord a =>
EncodeOptions -> Header -> [a] -> ByteString
encodeByNameWith (Delimiter -> EncodeOptions
csvEncodeOptions Delimiter
Tab) Header
headerCsvGargV3 ((HyperdataDocument -> CsvDoc) -> [HyperdataDocument] -> [CsvDoc]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc [HyperdataDocument]
hs)

------------------------------------------------------------------------
-- Hal Format
data CsvHal = CsvHal
    { CsvHal -> Text
csvHal_title  :: !Text
    , CsvHal -> Text
csvHal_source :: !Text
    , CsvHal -> Integer
csvHal_publication_year  :: !Integer
    , CsvHal -> Int
csvHal_publication_month :: !Int
    , CsvHal -> Int
csvHal_publication_day   :: !Int
    , CsvHal -> Text
csvHal_abstract          :: !Text
    , CsvHal -> Text
csvHal_authors           :: !Text

    , CsvHal -> Text
csvHal_url               :: !Text
    , CsvHal -> Text
csvHal_isbn_s            :: !Text
    , CsvHal -> Text
csvHal_issue_s           :: !Text
    , CsvHal -> Text
csvHal_journalPublisher_s:: !Text
    , CsvHal -> Text
csvHal_language_s        :: !Text

    , CsvHal -> Text
csvHal_doiId_s           :: !Text
    , CsvHal -> Text
csvHal_authId_i          :: !Text
    , CsvHal -> Text
csvHal_instStructId_i    :: !Text
    , CsvHal -> Text
csvHal_deptStructId_i    :: !Text
    , CsvHal -> Text
csvHal_labStructId_i     :: !Text

    , CsvHal -> Text
csvHal_rteamStructId_i   :: !Text
    , CsvHal -> Text
csvHal_docType_s         :: !Text
    }
    deriving (Int -> CsvHal -> ShowS
[CsvHal] -> ShowS
CsvHal -> String
(Int -> CsvHal -> ShowS)
-> (CsvHal -> String) -> ([CsvHal] -> ShowS) -> Show CsvHal
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CsvHal] -> ShowS
$cshowList :: [CsvHal] -> ShowS
show :: CsvHal -> String
$cshow :: CsvHal -> String
showsPrec :: Int -> CsvHal -> ShowS
$cshowsPrec :: Int -> CsvHal -> ShowS
Show)

instance FromNamedRecord CsvHal where
  parseNamedRecord :: NamedRecord -> Parser CsvHal
parseNamedRecord NamedRecord
r = do
    Text
csvHal_title <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"title"
    Text
csvHal_source <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"source"
    Integer
csvHal_publication_year <- NamedRecord
r NamedRecord -> ByteString -> Parser Integer
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_year"
    Int
csvHal_publication_month <- NamedRecord
r NamedRecord -> ByteString -> Parser Int
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_month"
    Int
csvHal_publication_day <- NamedRecord
r NamedRecord -> ByteString -> Parser Int
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_day"
    Text
csvHal_abstract <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"abstract"
    Text
csvHal_authors <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"authors"
    Text
csvHal_url <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"url"
    Text
csvHal_isbn_s <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"isbn_s"
    Text
csvHal_issue_s <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"issue_s"
    Text
csvHal_journalPublisher_s <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"journalPublisher_s"
    Text
csvHal_language_s <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"language_s"
    Text
csvHal_doiId_s <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"doiId_s"
    Text
csvHal_authId_i <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"authId_i"
    Text
csvHal_instStructId_i <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"instStructId_i"
    Text
csvHal_deptStructId_i <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"deptStructId_i"
    Text
csvHal_labStructId_i <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"labStructId_i"
    Text
csvHal_rteamStructId_i <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"rteamStructId_i"
    Text
csvHal_docType_s <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"docType_s"
    CsvHal -> Parser CsvHal
forall (f :: * -> *) a. Applicative f => a -> f a
pure (CsvHal -> Parser CsvHal) -> CsvHal -> Parser CsvHal
forall a b. (a -> b) -> a -> b
$ CsvHal :: Text
-> Text
-> Integer
-> Int
-> Int
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> Text
-> CsvHal
CsvHal { Int
Integer
Text
csvHal_docType_s :: Text
csvHal_rteamStructId_i :: Text
csvHal_labStructId_i :: Text
csvHal_deptStructId_i :: Text
csvHal_instStructId_i :: Text
csvHal_authId_i :: Text
csvHal_doiId_s :: Text
csvHal_language_s :: Text
csvHal_journalPublisher_s :: Text
csvHal_issue_s :: Text
csvHal_isbn_s :: Text
csvHal_url :: Text
csvHal_authors :: Text
csvHal_abstract :: Text
csvHal_publication_day :: Int
csvHal_publication_month :: Int
csvHal_publication_year :: Integer
csvHal_source :: Text
csvHal_title :: Text
csvHal_docType_s :: Text
csvHal_rteamStructId_i :: Text
csvHal_labStructId_i :: Text
csvHal_deptStructId_i :: Text
csvHal_instStructId_i :: Text
csvHal_authId_i :: Text
csvHal_doiId_s :: Text
csvHal_language_s :: Text
csvHal_journalPublisher_s :: Text
csvHal_issue_s :: Text
csvHal_isbn_s :: Text
csvHal_url :: Text
csvHal_authors :: Text
csvHal_abstract :: Text
csvHal_publication_day :: Int
csvHal_publication_month :: Int
csvHal_publication_year :: Integer
csvHal_source :: Text
csvHal_title :: Text
.. }

instance ToNamedRecord CsvHal where
  --toNamedRecord (CsvHal t s py  pm pd abst aut  url isbn iss j lang  doi auth inst dept lab team doct) =
  toNamedRecord :: CsvHal -> NamedRecord
toNamedRecord (CsvHal { Int
Integer
Text
csvHal_docType_s :: Text
csvHal_rteamStructId_i :: Text
csvHal_labStructId_i :: Text
csvHal_deptStructId_i :: Text
csvHal_instStructId_i :: Text
csvHal_authId_i :: Text
csvHal_doiId_s :: Text
csvHal_language_s :: Text
csvHal_journalPublisher_s :: Text
csvHal_issue_s :: Text
csvHal_isbn_s :: Text
csvHal_url :: Text
csvHal_authors :: Text
csvHal_abstract :: Text
csvHal_publication_day :: Int
csvHal_publication_month :: Int
csvHal_publication_year :: Integer
csvHal_source :: Text
csvHal_title :: Text
csvHal_docType_s :: CsvHal -> Text
csvHal_rteamStructId_i :: CsvHal -> Text
csvHal_labStructId_i :: CsvHal -> Text
csvHal_deptStructId_i :: CsvHal -> Text
csvHal_instStructId_i :: CsvHal -> Text
csvHal_authId_i :: CsvHal -> Text
csvHal_doiId_s :: CsvHal -> Text
csvHal_language_s :: CsvHal -> Text
csvHal_journalPublisher_s :: CsvHal -> Text
csvHal_issue_s :: CsvHal -> Text
csvHal_isbn_s :: CsvHal -> Text
csvHal_url :: CsvHal -> Text
csvHal_authors :: CsvHal -> Text
csvHal_abstract :: CsvHal -> Text
csvHal_publication_day :: CsvHal -> Int
csvHal_publication_month :: CsvHal -> Int
csvHal_publication_year :: CsvHal -> Integer
csvHal_source :: CsvHal -> Text
csvHal_title :: CsvHal -> Text
.. }) =
    [(ByteString, ByteString)] -> NamedRecord
namedRecord [ ByteString
"title"  ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_title
                , ByteString
"source" ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_source

                , ByteString
"publication_year"  ByteString -> Integer -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Integer
csvHal_publication_year
                , ByteString
"publication_month" ByteString -> Int -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Int
csvHal_publication_month
                , ByteString
"publication_day"   ByteString -> Int -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Int
csvHal_publication_day

                , ByteString
"abstract"          ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_abstract
                , ByteString
"authors"           ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_authors

                , ByteString
"url"                ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_url
                , ByteString
"isbn_s"             ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_isbn_s
                , ByteString
"issue_s"            ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_issue_s
                , ByteString
"journalPublisher_s" ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_journalPublisher_s
                , ByteString
"language_s"         ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_language_s

                , ByteString
"doiId_s"            ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_doiId_s
                , ByteString
"authId_i"           ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_authId_i
                , ByteString
"instStructId_i"     ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_instStructId_i
                , ByteString
"deptStructId_i"     ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_deptStructId_i
                , ByteString
"labStructId_i"      ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_labStructId_i
 
                , ByteString
"rteamStructId_i"    ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_rteamStructId_i
                , ByteString
"docType_s"          ByteString -> Text -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
.= Text
csvHal_docType_s
               ]

csvHal2doc :: CsvHal -> HyperdataDocument
csvHal2doc :: CsvHal -> HyperdataDocument
csvHal2doc (CsvHal { Int
Integer
Text
csvHal_docType_s :: Text
csvHal_rteamStructId_i :: Text
csvHal_labStructId_i :: Text
csvHal_deptStructId_i :: Text
csvHal_instStructId_i :: Text
csvHal_authId_i :: Text
csvHal_doiId_s :: Text
csvHal_language_s :: Text
csvHal_journalPublisher_s :: Text
csvHal_issue_s :: Text
csvHal_isbn_s :: Text
csvHal_url :: Text
csvHal_authors :: Text
csvHal_abstract :: Text
csvHal_publication_day :: Int
csvHal_publication_month :: Int
csvHal_publication_year :: Integer
csvHal_source :: Text
csvHal_title :: Text
csvHal_docType_s :: CsvHal -> Text
csvHal_rteamStructId_i :: CsvHal -> Text
csvHal_labStructId_i :: CsvHal -> Text
csvHal_deptStructId_i :: CsvHal -> Text
csvHal_instStructId_i :: CsvHal -> Text
csvHal_authId_i :: CsvHal -> Text
csvHal_doiId_s :: CsvHal -> Text
csvHal_language_s :: CsvHal -> Text
csvHal_journalPublisher_s :: CsvHal -> Text
csvHal_issue_s :: CsvHal -> Text
csvHal_isbn_s :: CsvHal -> Text
csvHal_url :: CsvHal -> Text
csvHal_authors :: CsvHal -> Text
csvHal_abstract :: CsvHal -> Text
csvHal_publication_day :: CsvHal -> Int
csvHal_publication_month :: CsvHal -> Int
csvHal_publication_year :: CsvHal -> Integer
csvHal_source :: CsvHal -> Text
csvHal_title :: CsvHal -> Text
.. }) =
  HyperdataDocument :: Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Int
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Text
-> HyperdataDocument
HyperdataDocument { _hd_bdd :: Maybe Text
_hd_bdd = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"CsvHal"
                    , _hd_doi :: Maybe Text
_hd_doi = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csvHal_doiId_s
                    , _hd_url :: Maybe Text
_hd_url = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csvHal_url
                    , _hd_uniqId :: Maybe Text
_hd_uniqId = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_uniqIdBdd :: Maybe Text
_hd_uniqIdBdd = Maybe Text
forall a. Maybe a
Nothing
                    , _hd_page :: Maybe Int
_hd_page = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_title :: Maybe Text
_hd_title = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csvHal_title
                    , _hd_authors :: Maybe Text
_hd_authors = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csvHal_authors
                    , _hd_institutes :: Maybe Text
_hd_institutes = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csvHal_instStructId_i
                    , _hd_source :: Maybe Text
_hd_source = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csvHal_source
                    , _hd_abstract :: Maybe Text
_hd_abstract = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csvHal_abstract
                    , _hd_publication_date :: Maybe Text
_hd_publication_date = Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ String -> Text
pack (String -> Text) -> (UTCTime -> String) -> UTCTime -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UTCTime -> String
forall a. Show a => a -> String
show (UTCTime -> Text) -> UTCTime -> Text
forall a b. (a -> b) -> a -> b
$ Integer -> Int -> Int -> UTCTime
jour Integer
csvHal_publication_year
                                                                      Int
csvHal_publication_month
                                                                      Int
csvHal_publication_day
                    , _hd_publication_year :: Maybe Int
_hd_publication_year = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
csvHal_publication_year
                    , _hd_publication_month :: Maybe Int
_hd_publication_month = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
csvHal_publication_month
                    , _hd_publication_day :: Maybe Int
_hd_publication_day = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
csvHal_publication_day
                    , _hd_publication_hour :: Maybe Int
_hd_publication_hour = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_publication_minute :: Maybe Int
_hd_publication_minute = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_publication_second :: Maybe Int
_hd_publication_second = Maybe Int
forall a. Maybe a
Nothing
                    , _hd_language_iso2 :: Maybe Text
_hd_language_iso2 = Maybe Text
forall a. Maybe a
Nothing }


csv2doc :: CsvDoc -> HyperdataDocument
csv2doc :: CsvDoc -> HyperdataDocument
csv2doc (CsvDoc { Maybe Int
Maybe IntOrDec
Text
csv_authors :: Text
csv_abstract :: Text
csv_publication_day :: Maybe Int
csv_publication_month :: Maybe Int
csv_publication_year :: Maybe IntOrDec
csv_source :: Text
csv_title :: Text
csv_authors :: CsvDoc -> Text
csv_abstract :: CsvDoc -> Text
csv_publication_day :: CsvDoc -> Maybe Int
csv_publication_month :: CsvDoc -> Maybe Int
csv_publication_year :: CsvDoc -> Maybe IntOrDec
csv_source :: CsvDoc -> Text
csv_title :: CsvDoc -> Text
.. })
  = HyperdataDocument :: Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Int
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Text
-> HyperdataDocument
HyperdataDocument { _hd_bdd :: Maybe Text
_hd_bdd = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"CsvHal"
                      , _hd_doi :: Maybe Text
_hd_doi = Maybe Text
forall a. Maybe a
Nothing
                      , _hd_url :: Maybe Text
_hd_url = Maybe Text
forall a. Maybe a
Nothing
                      , _hd_uniqId :: Maybe Text
_hd_uniqId = Maybe Text
forall a. Maybe a
Nothing
                      , _hd_uniqIdBdd :: Maybe Text
_hd_uniqIdBdd = Maybe Text
forall a. Maybe a
Nothing
                      , _hd_page :: Maybe Int
_hd_page = Maybe Int
forall a. Maybe a
Nothing
                      , _hd_title :: Maybe Text
_hd_title = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csv_title
                      , _hd_authors :: Maybe Text
_hd_authors = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csv_authors
                      , _hd_institutes :: Maybe Text
_hd_institutes = Maybe Text
forall a. Maybe a
Nothing
                      , _hd_source :: Maybe Text
_hd_source = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csv_source
                      , _hd_abstract :: Maybe Text
_hd_abstract = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
csv_abstract
                      , _hd_publication_date :: Maybe Text
_hd_publication_date = Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ String -> Text
pack (String -> Text) -> (UTCTime -> String) -> UTCTime -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UTCTime -> String
forall a. Show a => a -> String
show (UTCTime -> Text) -> UTCTime -> Text
forall a b. (a -> b) -> a -> b
$ Integer -> Int -> Int -> UTCTime
jour (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
pubYear)
                                                                         Int
pubMonth
                                                                         Int
pubDay
                      , _hd_publication_year :: Maybe Int
_hd_publication_year = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
pubYear
                      , _hd_publication_month :: Maybe Int
_hd_publication_month = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
pubMonth
                      , _hd_publication_day :: Maybe Int
_hd_publication_day = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
pubDay
                      , _hd_publication_hour :: Maybe Int
_hd_publication_hour = Maybe Int
forall a. Maybe a
Nothing
                      , _hd_publication_minute :: Maybe Int
_hd_publication_minute = Maybe Int
forall a. Maybe a
Nothing
                      , _hd_publication_second :: Maybe Int
_hd_publication_second = Maybe Int
forall a. Maybe a
Nothing
                      , _hd_language_iso2 :: Maybe Text
_hd_language_iso2 = Maybe Text
forall a. Maybe a
Nothing }
  where
    pubYear :: Int
pubYear = Int -> Maybe IntOrDec -> Int
fromMIntOrDec Int
defaultYear Maybe IntOrDec
csv_publication_year
    pubMonth :: Int
pubMonth = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
defaultMonth Maybe Int
csv_publication_month
    pubDay :: Int
pubDay = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
defaultDay Maybe Int
csv_publication_day

------------------------------------------------------------------------
parseHal :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseHal :: String -> IO (Either String [HyperdataDocument])
parseHal String
fp = do
  Either String (Header, Vector CsvHal)
r <- String -> IO (Either String (Header, Vector CsvHal))
readCsvHal String
fp
  Either String [HyperdataDocument]
-> IO (Either String [HyperdataDocument])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String [HyperdataDocument]
 -> IO (Either String [HyperdataDocument]))
-> Either String [HyperdataDocument]
-> IO (Either String [HyperdataDocument])
forall a b. (a -> b) -> a -> b
$ (Vector HyperdataDocument -> [HyperdataDocument]
forall a. Vector a -> [a]
V.toList (Vector HyperdataDocument -> [HyperdataDocument])
-> ((Header, Vector CsvHal) -> Vector HyperdataDocument)
-> (Header, Vector CsvHal)
-> [HyperdataDocument]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CsvHal -> HyperdataDocument)
-> Vector CsvHal -> Vector HyperdataDocument
forall a b. (a -> b) -> Vector a -> Vector b
V.map CsvHal -> HyperdataDocument
csvHal2doc (Vector CsvHal -> Vector HyperdataDocument)
-> ((Header, Vector CsvHal) -> Vector CsvHal)
-> (Header, Vector CsvHal)
-> Vector HyperdataDocument
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header, Vector CsvHal) -> Vector CsvHal
forall a b. (a, b) -> b
snd) ((Header, Vector CsvHal) -> [HyperdataDocument])
-> Either String (Header, Vector CsvHal)
-> Either String [HyperdataDocument]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either String (Header, Vector CsvHal)
r

parseHal' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseHal' :: ByteString -> Either String [HyperdataDocument]
parseHal' ByteString
bs = (Vector HyperdataDocument -> [HyperdataDocument]
forall a. Vector a -> [a]
V.toList (Vector HyperdataDocument -> [HyperdataDocument])
-> ((Header, Vector CsvHal) -> Vector HyperdataDocument)
-> (Header, Vector CsvHal)
-> [HyperdataDocument]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CsvHal -> HyperdataDocument)
-> Vector CsvHal -> Vector HyperdataDocument
forall a b. (a -> b) -> Vector a -> Vector b
V.map CsvHal -> HyperdataDocument
csvHal2doc (Vector CsvHal -> Vector HyperdataDocument)
-> ((Header, Vector CsvHal) -> Vector CsvHal)
-> (Header, Vector CsvHal)
-> Vector HyperdataDocument
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header, Vector CsvHal) -> Vector CsvHal
forall a b. (a, b) -> b
snd) ((Header, Vector CsvHal) -> [HyperdataDocument])
-> Either String (Header, Vector CsvHal)
-> Either String [HyperdataDocument]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Either String (Header, Vector CsvHal)
readCsvHalLazyBS ByteString
bs

------------------------------------------------------------------------

parseCsv :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseCsv :: String -> IO (Either String [HyperdataDocument])
parseCsv String
fp = ((Header, Vector CsvDoc) -> [HyperdataDocument])
-> Either String (Header, Vector CsvDoc)
-> Either String [HyperdataDocument]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Vector HyperdataDocument -> [HyperdataDocument]
forall a. Vector a -> [a]
V.toList (Vector HyperdataDocument -> [HyperdataDocument])
-> ((Header, Vector CsvDoc) -> Vector HyperdataDocument)
-> (Header, Vector CsvDoc)
-> [HyperdataDocument]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CsvDoc -> HyperdataDocument)
-> Vector CsvDoc -> Vector HyperdataDocument
forall a b. (a -> b) -> Vector a -> Vector b
V.map CsvDoc -> HyperdataDocument
csv2doc (Vector CsvDoc -> Vector HyperdataDocument)
-> ((Header, Vector CsvDoc) -> Vector CsvDoc)
-> (Header, Vector CsvDoc)
-> Vector HyperdataDocument
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header, Vector CsvDoc) -> Vector CsvDoc
forall a b. (a, b) -> b
snd) (Either String (Header, Vector CsvDoc)
 -> Either String [HyperdataDocument])
-> IO (Either String (Header, Vector CsvDoc))
-> IO (Either String [HyperdataDocument])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO (Either String (Header, Vector CsvDoc))
readFile String
fp

{-
parseCsv' ::  BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseCsv' bs = (V.toList . V.map csv2doc . snd) <$> readCsvLazyBS Comma bs
-}

parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseCsv' :: ByteString -> Either String [HyperdataDocument]
parseCsv' ByteString
bs = do
  let
    result :: Either String (Header, Vector CsvDoc)
result = case Delimiter -> ByteString -> Either String (Header, Vector CsvDoc)
readCsvLazyBS Delimiter
Comma ByteString
bs of
      Left  String
_err -> Delimiter -> ByteString -> Either String (Header, Vector CsvDoc)
readCsvLazyBS Delimiter
Tab ByteString
bs
      Right (Header, Vector CsvDoc)
res -> (Header, Vector CsvDoc) -> Either String (Header, Vector CsvDoc)
forall a b. b -> Either a b
Right (Header, Vector CsvDoc)
res
  (Vector HyperdataDocument -> [HyperdataDocument]
forall a. Vector a -> [a]
V.toList (Vector HyperdataDocument -> [HyperdataDocument])
-> ((Header, Vector CsvDoc) -> Vector HyperdataDocument)
-> (Header, Vector CsvDoc)
-> [HyperdataDocument]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CsvDoc -> HyperdataDocument)
-> Vector CsvDoc -> Vector HyperdataDocument
forall a b. (a -> b) -> Vector a -> Vector b
V.map CsvDoc -> HyperdataDocument
csv2doc (Vector CsvDoc -> Vector HyperdataDocument)
-> ((Header, Vector CsvDoc) -> Vector CsvDoc)
-> (Header, Vector CsvDoc)
-> Vector HyperdataDocument
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header, Vector CsvDoc) -> Vector CsvDoc
forall a b. (a, b) -> b
snd) ((Header, Vector CsvDoc) -> [HyperdataDocument])
-> Either String (Header, Vector CsvDoc)
-> Either String [HyperdataDocument]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either String (Header, Vector CsvDoc)
result

------------------------------------------------------------------------
-- Csv v3 weighted for phylo

data Csv' = Csv'
      { Csv' -> Text
csv'_title             :: !Text
      , Csv' -> Text
csv'_source            :: !Text
      , Csv' -> Int
csv'_publication_year  :: !Int
      , Csv' -> Int
csv'_publication_month :: !Int
      , Csv' -> Int
csv'_publication_day   :: !Int
      , Csv' -> Text
csv'_abstract          :: !Text
      , Csv' -> Text
csv'_authors           :: !Text
      , Csv' -> Mean
csv'_weight            :: !Double } deriving (Int -> Csv' -> ShowS
[Csv'] -> ShowS
Csv' -> String
(Int -> Csv' -> ShowS)
-> (Csv' -> String) -> ([Csv'] -> ShowS) -> Show Csv'
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Csv'] -> ShowS
$cshowList :: [Csv'] -> ShowS
show :: Csv' -> String
$cshow :: Csv' -> String
showsPrec :: Int -> Csv' -> ShowS
$cshowsPrec :: Int -> Csv' -> ShowS
Show)


instance FromNamedRecord Csv' where
  parseNamedRecord :: NamedRecord -> Parser Csv'
parseNamedRecord NamedRecord
r = do
    Text
csv'_title <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"title"
    Text
csv'_source <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"source"
    Int
csv'_publication_year <- NamedRecord
r NamedRecord -> ByteString -> Parser Int
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_year"
    Int
csv'_publication_month <- NamedRecord
r NamedRecord -> ByteString -> Parser Int
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_month"
    Int
csv'_publication_day <- NamedRecord
r NamedRecord -> ByteString -> Parser Int
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"publication_day"
    Text
csv'_abstract <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"abstract"
    Text
csv'_authors <- NamedRecord
r NamedRecord -> ByteString -> Parser Text
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"authors"
    Mean
csv'_weight <- NamedRecord
r NamedRecord -> ByteString -> Parser Mean
forall a. FromField a => NamedRecord -> ByteString -> Parser a
.: ByteString
"weight"
    Csv' -> Parser Csv'
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Csv' -> Parser Csv') -> Csv' -> Parser Csv'
forall a b. (a -> b) -> a -> b
$ Csv' :: Text -> Text -> Int -> Int -> Int -> Text -> Text -> Mean -> Csv'
Csv' { Mean
Int
Text
csv'_weight :: Mean
csv'_authors :: Text
csv'_abstract :: Text
csv'_publication_day :: Int
csv'_publication_month :: Int
csv'_publication_year :: Int
csv'_source :: Text
csv'_title :: Text
csv'_weight :: Mean
csv'_authors :: Text
csv'_abstract :: Text
csv'_publication_day :: Int
csv'_publication_month :: Int
csv'_publication_year :: Int
csv'_source :: Text
csv'_title :: Text
.. }

readWeightedCsv :: FilePath -> IO (Header, Vector Csv')
readWeightedCsv :: String -> IO (Header, Vector Csv')
readWeightedCsv String
fp =
  (ByteString -> (Header, Vector Csv'))
-> IO ByteString -> IO (Header, Vector Csv')
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\ByteString
bs ->
    case DecodeOptions -> ByteString -> Either String (Header, Vector Csv')
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith (Delimiter -> DecodeOptions
csvDecodeOptions Delimiter
Tab) ByteString
bs of
      Left String
e       -> Text -> (Header, Vector Csv')
forall a. HasCallStack => Text -> a
panic (String -> Text
pack String
e)
      Right (Header, Vector Csv')
corpus -> (Header, Vector Csv')
corpus
    ) (IO ByteString -> IO (Header, Vector Csv'))
-> IO ByteString -> IO (Header, Vector Csv')
forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
BL.readFile String
fp