lang/laantas-script/Split.hs

60 lines
1.6 KiB
Haskell

{-# OPTIONS_GHC -fdefer-typed-holes #-}
module Split (split) where
import Glyphs
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Char as Char
import Data.Map (Map, (!))
import qualified Data.Map.Strict as Map
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Void
type P = Parsec Void Text
longestWith :: String -> (Text -> Maybe a) -> P a
longestWith name p = try $ go . Text.singleton =<< anySingle where
go acc = case p acc of
Nothing -> fail $ "longestWith " <> name <> ": " <> show acc
Just x -> try (do c <- anySingle; go $ Text.snoc acc c) <|> pure x
maxFrom :: String -> Map Text a -> P a
maxFrom name i = longestWith name \x -> Map.lookup x i
initial :: P Glyph
initial = maxFrom "initial" initials
medial :: P Segs
medial = maxFrom "medial" medials
final :: P Glyph
final = maxFrom "final" finals
ivowel :: P Glyph
ivowel = maxFrom "vowel" vowels
word :: P [Piece]
word = (<>) <$> some initMed <*> fin where
initMed = try $
[(i, ms) | i <- initial, ms <- some medial] <|>
[(v, []) | v <- ivowel]
fin = maybe [] (\x -> [(x, [])]) <$> optional final
number :: P [Piece]
number = [[h1] <> ns <> [h2] | h1 <- hash, ns <- some digit, h2 <- hash] where
hash = (num, []) <$ chunk "#"
digit = [(numbers ! Char.digitToInt i, []) | i <- digitChar]
punct :: P [Piece]
punct = [[(p, [])] | p <- maxFrom "punctuation" punctuation]
text :: P [[Piece]]
text = space *> many (segment <* space) <* eof where
segment = punct <|> number <|> word
split :: Text -> [[Piece]]
split = either (error . errorBundlePretty) id . parse text ""