module Split (split) where import Glyphs import Text.Megaparsec import Text.Megaparsec.Char import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Char as Char import Data.Map (Map, (!)) import qualified Data.Map.Strict as Map import Data.Void type P = Parsec Void Text lcChar :: P Char lcChar = Char.toLower <$> anySingle longestWith :: String -> (Text -> Maybe a) -> P a longestWith name p = try $ go . Text.singleton =<< lcChar where go acc = case p acc of Nothing -> fail $ "longestWith " <> name <> ": " <> show acc Just x -> try (do c <- lcChar; go $ Text.snoc acc c) <|> pure x unthorn :: Text -> Text unthorn = Text.map \case 'þ' -> 'ƶ'; 'ð' -> 'ƶ'; c -> c maxFrom :: String -> Map Text a -> P a maxFrom name i = longestWith name \x -> Map.lookup (unthorn x) i initial :: P Glyph initial = maxFrom "initial" initials medial :: P [Diacritic] medial = [] <$ chunk "\\" <|> maxFrom "medial" medials final :: P Glyph final = maxFrom "final" finals ivowel :: P Glyph ivowel = maxFrom "vowel" vowels word :: P [EGlyph] word = [is <> f <> concat p | is <- some initMed, f <- fin, p <- many punct] where initMed = try $ (,) <$> initial <*> medial <|> [(v, []) | v <- ivowel] <|> dash fin = maybe [] (\x -> [(x, [])]) <$> optional final number :: P [EGlyph] number = some (digit <|> hash) where hash = (num, []) <$ chunk "#" digit = [(numbers ! Char.digitToInt i, []) | i <- digitChar] punct :: P [EGlyph] punct = [[(p, [])] | p <- maxFrom "punctuation" punctuation] <* space dash :: P EGlyph dash = (wave, []) <$ chunk "–" text :: P [[EGlyph]] text = space *> many (segment <* space) <* eof where segment = punct <|> number <|> word split :: Text -> [[EGlyph]] split = either (error . errorBundlePretty) id . parse text ""