module Split (split) where import Glyphs import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Char as Char import Data.Map (Map, (!)) import qualified Data.Map.Strict as Map import Text.Megaparsec import Text.Megaparsec.Char import Data.Void type P = Parsec Void Text lcChar :: P Char lcChar = Char.toLower <$> anySingle longestWith :: String -> (Text -> Maybe a) -> P a longestWith name p = try $ go . Text.singleton =<< lcChar where go acc = case p acc of Nothing -> fail $ "longestWith " <> name <> ": " <> show acc Just x -> try (do c <- lcChar; go $ Text.snoc acc c) <|> pure x maxFrom :: String -> Map Text a -> P a maxFrom name i = longestWith name \x -> Map.lookup x i initial :: P Glyph initial = maxFrom "initial" initials medial :: P Segs medial = P [] <$ chunk "\\" <|> maxFrom "medial" medials final :: P Glyph final = maxFrom "final" finals ivowel :: P Glyph ivowel = maxFrom "vowel" vowels word :: P [Piece] word = (<>) <$> some initMed <*> fin where initMed = try $ [(i, [m]) | i <- initial, m <- medial] <|> [(v, []) | v <- ivowel] <|> dash fin = maybe [] (\x -> [(x, [])]) <$> optional final number :: P [Piece] number = some (digit <|> hash) where hash = (num, []) <$ chunk "#" digit = [(numbers ! Char.digitToInt i, []) | i <- digitChar] punct :: P [Piece] punct = [[(p, [])] | p <- maxFrom "punctuation" punctuation] dash :: P Piece dash = (wave, []) <$ chunk "–" text :: P [[Piece]] text = space *> many (segment <* space) <* eof where segment = punct <|> number <|> word split :: Text -> [[Piece]] split = either (error . errorBundlePretty) id . parse text ""