lang/laantas-script/Split.hs

module Split (split) where

import Glyphs
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Char as Char
import Data.Map (Map, (!))
import qualified Data.Map.Strict as Map
import Data.Void


type P = Parsec Void Text


lcChar :: P Char
lcChar = Char.toLower <$> anySingle

longestWith :: String -> (Text -> Maybe a) -> P a
longestWith name p = try $ go . Text.singleton =<< lcChar where
  go acc = case p acc of
    Nothing -> fail $ "longestWith " <> name <> ": " <> show acc
    Just x  -> try (do c <- lcChar; go $ Text.snoc acc c) <|> pure x

unthorn :: Text -> Text
unthorn = Text.map \case 'þ' -> 'ƶ'; 'ð' -> 'ƶ'; c -> c

maxFrom :: String -> Map Text a -> P a
maxFrom name i = longestWith name \x -> Map.lookup (unthorn x) i

initial :: P Glyph
initial = maxFrom "initial" initials

medial :: P [Diacritic]
medial = [] <$ chunk "\\" <|> maxFrom "medial" medials

final :: P Glyph
final = maxFrom "final" finals

ivowel :: P Glyph
ivowel = maxFrom "vowel" vowels

word :: P [EGlyph]
word = [is <> f <> concat p | is <- some initMed, f <- fin, p <- many punct]
 where
  initMed = try $
    (,) <$> initial <*> medial <|>
    [(v, []) | v <- ivowel] <|>
    dash
  fin = maybe [] (\x -> [(x, [])]) <$> optional final

number :: P [EGlyph]
number = some (digit <|> hash) where
  hash = (num, []) <$ chunk "#"
  digit = [(numbers ! Char.digitToInt i, []) | i <- digitChar]

punct :: P [EGlyph]
punct = [[(p, [])] | p <- maxFrom "punctuation" punctuation] <* space

dash :: P EGlyph
dash = (wave, []) <$ chunk "–"

text :: P [[EGlyph]]
text = space *> many (segment <* space) <* eof where
  segment = punct <|> number <|> word

split :: Text -> [[EGlyph]]
split = either (error . errorBundlePretty) id . parse text ""