69 lines
1.8 KiB
Haskell
69 lines
1.8 KiB
Haskell
module Split (split) where
|
||
|
||
import Glyphs
|
||
import Text.Megaparsec
|
||
import Text.Megaparsec.Char
|
||
import Data.Text (Text)
|
||
import qualified Data.Text as Text
|
||
import qualified Data.Char as Char
|
||
import Data.Map (Map, (!))
|
||
import qualified Data.Map.Strict as Map
|
||
import Data.Void
|
||
|
||
|
||
type P = Parsec Void Text
|
||
|
||
|
||
lcChar :: P Char
|
||
lcChar = Char.toLower <$> anySingle
|
||
|
||
longestWith :: String -> (Text -> Maybe a) -> P a
|
||
longestWith name p = try $ go . Text.singleton =<< lcChar where
|
||
go acc = case p acc of
|
||
Nothing -> fail $ "longestWith " <> name <> ": " <> show acc
|
||
Just x -> try (do c <- lcChar; go $ Text.snoc acc c) <|> pure x
|
||
|
||
unthorn :: Text -> Text
|
||
unthorn = Text.map \case 'þ' -> 'ƶ'; 'ð' -> 'ƶ'; c -> c
|
||
|
||
maxFrom :: String -> Map Text a -> P a
|
||
maxFrom name i = longestWith name \x -> Map.lookup (unthorn x) i
|
||
|
||
initial :: P Glyph
|
||
initial = maxFrom "initial" initials
|
||
|
||
medial :: P [Diacritic]
|
||
medial = [] <$ chunk "\\" <|> maxFrom "medial" medials
|
||
|
||
final :: P Glyph
|
||
final = maxFrom "final" finals
|
||
|
||
ivowel :: P Glyph
|
||
ivowel = maxFrom "vowel" vowels
|
||
|
||
word :: P [EGlyph]
|
||
word = [is <> f <> concat p | is <- some initMed, f <- fin, p <- many punct]
|
||
where
|
||
initMed = try $
|
||
(,) <$> initial <*> medial <|>
|
||
[(v, []) | v <- ivowel] <|>
|
||
dash
|
||
fin = maybe [] (\x -> [(x, [])]) <$> optional final
|
||
|
||
number :: P [EGlyph]
|
||
number = some (digit <|> hash) where
|
||
hash = (num, []) <$ chunk "#"
|
||
digit = [(numbers ! Char.digitToInt i, []) | i <- digitChar]
|
||
|
||
punct :: P [EGlyph]
|
||
punct = [[(p, [])] | p <- maxFrom "punctuation" punctuation] <* space
|
||
|
||
dash :: P EGlyph
|
||
dash = (wave, []) <$ chunk "–"
|
||
|
||
text :: P [[EGlyph]]
|
||
text = space *> many (segment <* space) <* eof where
|
||
segment = punct <|> number <|> word
|
||
|
||
split :: Text -> [[EGlyph]]
|
||
split = either (error . errorBundlePretty) id . parse text ""
|