61 lines
1.6 KiB
Haskell
61 lines
1.6 KiB
Haskell
|
{-# OPTIONS_GHC -fdefer-typed-holes #-}
|
||
|
|
||
|
module Split (split) where
|
||
|
|
||
|
import Glyphs
|
||
|
import Data.Text (Text)
|
||
|
import qualified Data.Text as Text
|
||
|
import qualified Data.Char as Char
|
||
|
import Data.Map (Map, (!))
|
||
|
import qualified Data.Map.Strict as Map
|
||
|
import Text.Megaparsec
|
||
|
import Text.Megaparsec.Char
|
||
|
import Data.Void
|
||
|
|
||
|
|
||
|
type P = Parsec Void Text
|
||
|
|
||
|
|
||
|
longestWith :: String -> (Text -> Maybe a) -> P a
|
||
|
longestWith name p = try $ go . Text.singleton =<< anySingle where
|
||
|
go acc = case p acc of
|
||
|
Nothing -> fail $ "longestWith " <> name <> ": " <> show acc
|
||
|
Just x -> try (do c <- anySingle; go $ Text.snoc acc c) <|> pure x
|
||
|
|
||
|
maxFrom :: String -> Map Text a -> P a
|
||
|
maxFrom name i = longestWith name \x -> Map.lookup x i
|
||
|
|
||
|
initial :: P Glyph
|
||
|
initial = maxFrom "initial" initials
|
||
|
|
||
|
medial :: P Segs
|
||
|
medial = maxFrom "medial" medials
|
||
|
|
||
|
final :: P Glyph
|
||
|
final = maxFrom "final" finals
|
||
|
|
||
|
ivowel :: P Glyph
|
||
|
ivowel = maxFrom "vowel" vowels
|
||
|
|
||
|
word :: P [Piece]
|
||
|
word = (<>) <$> some initMed <*> fin where
|
||
|
initMed = try $
|
||
|
[(i, ms) | i <- initial, ms <- some medial] <|>
|
||
|
[(v, []) | v <- ivowel]
|
||
|
fin = maybe [] (\x -> [(x, [])]) <$> optional final
|
||
|
|
||
|
number :: P [Piece]
|
||
|
number = [[h1] <> ns <> [h2] | h1 <- hash, ns <- some digit, h2 <- hash] where
|
||
|
hash = (num, []) <$ chunk "#"
|
||
|
digit = [(numbers ! Char.digitToInt i, []) | i <- digitChar]
|
||
|
|
||
|
punct :: P [Piece]
|
||
|
punct = [[(p, [])] | p <- maxFrom "punctuation" punctuation]
|
||
|
|
||
|
text :: P [[Piece]]
|
||
|
text = space *> many (segment <* space) <* eof where
|
||
|
segment = punct <|> number <|> word
|
||
|
|
||
|
split :: Text -> [[Piece]]
|
||
|
split = either (error . errorBundlePretty) id . parse text ""
|