Copyright | (c) CNRS 2017-Present |
---|---|
License | AGPL + CECILL v3 |
Maintainer | team@gargantext.org |
Stability | experimental |
Portability | POSIX |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
CSV parser for Gargantext corpus files.
Synopsis
- headerCsvGargV3 :: Header
- data CsvGargV3 = CsvGargV3 {
- d_docId :: !Int
- d_title :: !Text
- d_source :: !Text
- d_publication_year :: !Int
- d_publication_month :: !Int
- d_publication_day :: !Int
- d_abstract :: !Text
- d_authors :: !Text
- toDoc :: CsvGargV3 -> HyperdataDocument
- toDocs :: Vector CsvDoc -> [CsvGargV3]
- fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
- splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
- type Mean = Double
- docsSize :: Vector CsvDoc -> Mean
- newtype IntOrDec = IntOrDec Int
- unIntOrDec :: IntOrDec -> Int
- fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
- defaultYear :: Int
- defaultMonth :: Int
- defaultDay :: Int
- data CsvDoc = CsvDoc {
- csv_title :: !Text
- csv_source :: !Text
- csv_publication_year :: !(Maybe IntOrDec)
- csv_publication_month :: !(Maybe Int)
- csv_publication_day :: !(Maybe Int)
- csv_abstract :: !Text
- csv_authors :: !Text
- hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
- data Delimiter
- csvDecodeOptions :: Delimiter -> DecodeOptions
- csvEncodeOptions :: Delimiter -> EncodeOptions
- delimiter :: Delimiter -> Word8
- readFileLazy :: FromNamedRecord a => proxy a -> Delimiter -> FilePath -> IO (Either Text (Header, Vector a))
- readFileStrict :: FromNamedRecord a => proxy a -> Delimiter -> FilePath -> IO (Either Text (Header, Vector a))
- readByteStringLazy :: FromNamedRecord a => proxy a -> Delimiter -> ByteString -> Either Text (Header, Vector a)
- readByteStringStrict :: FromNamedRecord a => proxy a -> Delimiter -> ByteString -> Either Text (Header, Vector a)
- readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc))
- readCsvLazyBS :: Delimiter -> ByteString -> Either Text (Header, Vector CsvDoc)
- readCsvHal :: FilePath -> IO (Either Text (Header, Vector CsvHal))
- readCsvHalLazyBS :: ByteString -> Either Text (Header, Vector CsvHal)
- readCsvHalBSStrict :: ByteString -> Either Text (Header, Vector CsvHal)
- writeFile :: FilePath -> (Header, Vector CsvDoc) -> IO ()
- writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
- hyperdataDocument2csv :: [HyperdataDocument] -> ByteString
- data CsvHal = CsvHal {
- csvHal_title :: !Text
- csvHal_source :: !Text
- csvHal_publication_year :: !Integer
- csvHal_publication_month :: !Int
- csvHal_publication_day :: !Int
- csvHal_abstract :: !Text
- csvHal_authors :: !Text
- csvHal_url :: !Text
- csvHal_isbn_s :: !Text
- csvHal_issue_s :: !Text
- csvHal_journalPublisher_s :: !Text
- csvHal_language_s :: !Text
- csvHal_doiId_s :: !Text
- csvHal_authId_i :: !Text
- csvHal_instStructId_i :: !Text
- csvHal_deptStructId_i :: !Text
- csvHal_labStructId_i :: !Text
- csvHal_rteamStructId_i :: !Text
- csvHal_docType_s :: !Text
- csvHal2doc :: CsvHal -> HyperdataDocument
- csv2doc :: CsvDoc -> HyperdataDocument
- parseHal :: FilePath -> IO (Either Text [HyperdataDocument])
- parseHal' :: ByteString -> Either Text [HyperdataDocument]
- parseCsv :: FilePath -> IO (Either Text [HyperdataDocument])
- parseCsv' :: ByteString -> Either Text [HyperdataDocument]
- parseCsvC :: ByteString -> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
- data Csv' = Csv' {
- csv'_title :: !Text
- csv'_source :: !Text
- csv'_publication_year :: !Int
- csv'_publication_month :: !Int
- csv'_publication_day :: !Int
- csv'_abstract :: !Text
- csv'_authors :: !Text
- csv'_weight :: !Double
- readWeightedCsv :: FilePath -> IO (Header, Vector Csv')
Documentation
CsvGargV3 | |
|
toDoc :: CsvGargV3 -> HyperdataDocument #
Doc 2 HyperdataDocument
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc #
Split a document in its context TODO adapt the size of the paragraph according to the corpus average
unIntOrDec :: IntOrDec -> Int #
defaultYear :: Int #
defaultMonth :: Int #
defaultDay :: Int #
CsvDoc | |
|
Instances
Show CsvDoc # | |
FromNamedRecord CsvDoc # | |
Defined in Gargantext.Core.Text.Corpus.Parsers.CSV parseNamedRecord :: NamedRecord -> Parser CsvDoc # | |
ToNamedRecord CsvDoc # | |
Defined in Gargantext.Core.Text.Corpus.Parsers.CSV toNamedRecord :: CsvDoc -> NamedRecord # |
readFileLazy :: FromNamedRecord a => proxy a -> Delimiter -> FilePath -> IO (Either Text (Header, Vector a)) #
readFileStrict :: FromNamedRecord a => proxy a -> Delimiter -> FilePath -> IO (Either Text (Header, Vector a)) #
readByteStringLazy :: FromNamedRecord a => proxy a -> Delimiter -> ByteString -> Either Text (Header, Vector a) #
readByteStringStrict :: FromNamedRecord a => proxy a -> Delimiter -> ByteString -> Either Text (Header, Vector a) #
readCsvLazyBS :: Delimiter -> ByteString -> Either Text (Header, Vector CsvDoc) #
TODO use readByteStringLazy
readCsvHalLazyBS :: ByteString -> Either Text (Header, Vector CsvHal) #
TODO use readByteStringLazy
readCsvHalBSStrict :: ByteString -> Either Text (Header, Vector CsvHal) #
writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO () #
CsvHal | |
|
Instances
Show CsvHal # | |
FromNamedRecord CsvHal # | |
Defined in Gargantext.Core.Text.Corpus.Parsers.CSV parseNamedRecord :: NamedRecord -> Parser CsvHal # | |
ToNamedRecord CsvHal # | |
Defined in Gargantext.Core.Text.Corpus.Parsers.CSV toNamedRecord :: CsvHal -> NamedRecord # |
csvHal2doc :: CsvHal -> HyperdataDocument #
csv2doc :: CsvDoc -> HyperdataDocument #
parseHal' :: ByteString -> Either Text [HyperdataDocument] #
parseCsv' :: ByteString -> Either Text [HyperdataDocument] #
parseCsvC :: ByteString -> Either Text (Integer, ConduitT () HyperdataDocument Identity ()) #
Csv' | |
|