Copyright	(c) CNRS 2017
License	AGPL + CECILL v3
Maintainer	team@gargantext.org
Stability	experimental
Portability	POSIX
Safe Haskell	Safe-Inferred
Language	Haskell2010

Gargantext.Core.Text.Corpus.Parsers

Description

Gargantext enables analyzing semi-structured text that should be parsed in order to be analyzed.

The parsers suppose we know the format of the Text (TextFormat data type) according to which the right parser is chosen among the list of available parsers.

This module mainly describe how to add a new parser to Gargantext, please follow the types.

Synopsis

data FileFormat
- = Plain
- | ZIP
data FileType
- = WOS
- | RIS
- | RisPresse
- | CsvGargV3
- | CsvHal
- | Iramuteq
- | JSON
- | Istex
newtype ParseFormatError = ParseFormatError {
- _ParseFormatError :: Text
}
clean :: ByteString -> ByteString
parseFile :: FileType -> FileFormat -> FilePath -> IO (Either Text [HyperdataDocument])
cleanText :: Text -> Text
parseFormatC :: forall m. MonadBaseControl IO m => FileType -> FileFormat -> ByteString -> m (Either ParseFormatError (Integer, ConduitT () HyperdataDocument IO ()))
splitOn :: NgramsType -> Maybe Text -> Text -> [Text]
etale :: [HyperdataDocument] -> [HyperdataDocument]

Documentation

data FileFormat #

Constructors

Plain
ZIP

Instances

Instances details

Arbitrary FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods arbitrary :: Gen FileFormat # shrink :: FileFormat -> [FileFormat] #
FromJSON FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods parseJSON :: Value -> Parser FileFormat # parseJSONList :: Value -> Parser [FileFormat] #
ToJSON FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods toJSON :: FileFormat -> Value # toEncoding :: FileFormat -> Encoding # toJSONList :: [FileFormat] -> Value # toEncodingList :: [FileFormat] -> Encoding #
Generic FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Associated Types type Rep FileFormat :: Type -> Type # Methods from :: FileFormat -> Rep FileFormat x # to :: Rep FileFormat x -> FileFormat #
Show FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods showsPrec :: Int -> FileFormat -> ShowS # show :: FileFormat -> String # showList :: [FileFormat] -> ShowS #
Eq FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods (==) :: FileFormat -> FileFormat -> Bool # (/=) :: FileFormat -> FileFormat -> Bool #
FromHttpApiData FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods parseUrlPiece :: Text -> Either Text FileFormat # parseHeader :: ByteString -> Either Text FileFormat # parseQueryParam :: Text -> Either Text FileFormat #
ToHttpApiData FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods toUrlPiece :: FileFormat -> Text # toEncodedUrlPiece :: FileFormat -> Builder # toHeader :: FileFormat -> ByteString # toQueryParam :: FileFormat -> Text #
ToParamSchema FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods toParamSchema :: forall (t :: SwaggerKind Type). Proxy FileFormat -> ParamSchema t #
ToSchema FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types Methods declareNamedSchema :: Proxy FileFormat -> Declare (Definitions Schema) NamedSchema #
type Rep FileFormat #
Instance details Defined in Gargantext.API.Node.Corpus.New.Types type Rep FileFormat = D1 ('MetaData "FileFormat" "Gargantext.API.Node.Corpus.New.Types" "gargantext-0.0.7.1.5.3-inplace" 'False) (C1 ('MetaCons "Plain" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "ZIP" 'PrefixI 'False) (U1 :: Type -> Type))

data FileType #

According to the format of Input file, different parser are available.

Constructors

WOS
RIS
RisPresse
CsvGargV3
CsvHal
Iramuteq
JSON
Istex

Instances

Instances details

Show FileType #
Instance details Defined in Gargantext.Core.Text.Corpus.Parsers Methods showsPrec :: Int -> FileType -> ShowS # show :: FileType -> String # showList :: [FileType] -> ShowS #
Eq FileType #
Instance details Defined in Gargantext.Core.Text.Corpus.Parsers Methods (==) :: FileType -> FileType -> Bool # (/=) :: FileType -> FileType -> Bool #

newtype ParseFormatError #

Constructors

ParseFormatError
Fields _ParseFormatError :: Text

Instances

Instances details

IsString ParseFormatError #
Instance details Defined in Gargantext.Core.Text.Corpus.Parsers Methods fromString :: String -> ParseFormatError #
Show ParseFormatError #
Instance details Defined in Gargantext.Core.Text.Corpus.Parsers Methods showsPrec :: Int -> ParseFormatError -> ShowS # show :: ParseFormatError -> String # showList :: [ParseFormatError] -> ShowS #
ToHumanFriendlyError ParseFormatError #
Instance details Defined in Gargantext.Core.Text.Corpus.Parsers Methods mkHumanFriendly :: ParseFormatError -> Text #
Eq ParseFormatError #
Instance details Defined in Gargantext.Core.Text.Corpus.Parsers Methods (==) :: ParseFormatError -> ParseFormatError -> Bool # (/=) :: ParseFormatError -> ParseFormatError -> Bool #
Ord ParseFormatError #
Instance details Defined in Gargantext.Core.Text.Corpus.Parsers Methods compare :: ParseFormatError -> ParseFormatError -> Ordering # (<) :: ParseFormatError -> ParseFormatError -> Bool # (<=) :: ParseFormatError -> ParseFormatError -> Bool # (>) :: ParseFormatError -> ParseFormatError -> Bool # (>=) :: ParseFormatError -> ParseFormatError -> Bool # max :: ParseFormatError -> ParseFormatError -> ParseFormatError # min :: ParseFormatError -> ParseFormatError -> ParseFormatError #

clean :: ByteString -> ByteString #

parseFile :: FileType -> FileFormat -> FilePath -> IO (Either Text [HyperdataDocument]) #

Parse file into documents TODO manage errors here TODO: to debug maybe add the filepath in error message

cleanText :: Text -> Text #

parseFormatC :: forall m. MonadBaseControl IO m => FileType -> FileFormat -> ByteString -> m (Either ParseFormatError (Integer, ConduitT () HyperdataDocument IO ())) #

splitOn :: NgramsType -> Maybe Text -> Text -> [Text] #

etale :: [HyperdataDocument] -> [HyperdataDocument] #