module Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
where
import Data.Text (Text)
import NLP.RAKE.Text
import Gargantext.Core.Text.Samples.EN (stopList)
import Gargantext.Prelude
select :: Double -> [a] -> [a]
select :: Double -> [a] -> [a]
select Double
part [a]
ns = Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
take Int
n [a]
ns
where
n :: Int
n = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
part Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
ns)
multiterms_rake :: Text -> [WordScore]
multiterms_rake :: Text -> [WordScore]
multiterms_rake = StopwordsMap -> NoSplit -> NoList -> NoList -> [WordScore]
candidates StopwordsMap
hardStopList
NoSplit
defaultNosplit
NoList
defaultNolist (NoList -> [WordScore]) -> (Text -> NoList) -> Text -> [WordScore]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> NoList
pSplitter
hardStopList :: StopwordsMap
hardStopList :: StopwordsMap
hardStopList = [NoSplit] -> StopwordsMap
mkStopwordsStr [NoSplit]
stopList