lang/laantas-script/Split.hs

70 lines
1.8 KiB
Haskell
Raw Normal View History

2021-04-28 06:29:21 -04:00
module Split (split) where
import Glyphs
2021-04-29 13:01:42 -04:00
import Text.Megaparsec
import Text.Megaparsec.Char
2021-04-28 06:29:21 -04:00
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Char as Char
import Data.Map (Map, (!))
import qualified Data.Map.Strict as Map
import Data.Void
type P = Parsec Void Text
2021-04-29 05:55:54 -04:00
lcChar :: P Char
lcChar = Char.toLower <$> anySingle
2021-04-28 06:29:21 -04:00
longestWith :: String -> (Text -> Maybe a) -> P a
2021-04-29 05:55:54 -04:00
longestWith name p = try $ go . Text.singleton =<< lcChar where
2021-04-28 06:29:21 -04:00
go acc = case p acc of
Nothing -> fail $ "longestWith " <> name <> ": " <> show acc
2021-04-29 05:55:54 -04:00
Just x -> try (do c <- lcChar; go $ Text.snoc acc c) <|> pure x
2021-04-28 06:29:21 -04:00
2023-12-25 15:44:48 -05:00
unthorn :: Text -> Text
unthorn = Text.map \case 'þ' -> 'ƶ'; 'ð' -> 'ƶ'; c -> c
2021-04-28 06:29:21 -04:00
maxFrom :: String -> Map Text a -> P a
2023-12-25 15:44:48 -05:00
maxFrom name i = longestWith name \x -> Map.lookup (unthorn x) i
2021-04-28 06:29:21 -04:00
initial :: P Glyph
initial = maxFrom "initial" initials
2021-05-10 18:52:17 -04:00
medial :: P [Diacritic]
medial = [] <$ chunk "\\" <|> maxFrom "medial" medials
2021-04-28 06:29:21 -04:00
final :: P Glyph
final = maxFrom "final" finals
ivowel :: P Glyph
ivowel = maxFrom "vowel" vowels
2021-05-10 18:52:17 -04:00
word :: P [EGlyph]
2021-04-29 13:01:42 -04:00
word = [is <> f <> concat p | is <- some initMed, f <- fin, p <- many punct]
where
2021-04-28 06:29:21 -04:00
initMed = try $
2021-05-10 18:52:17 -04:00
(,) <$> initial <*> medial <|>
2021-04-29 05:55:54 -04:00
[(v, []) | v <- ivowel] <|>
dash
2021-04-28 06:29:21 -04:00
fin = maybe [] (\x -> [(x, [])]) <$> optional final
2021-05-10 18:52:17 -04:00
number :: P [EGlyph]
2021-04-29 05:55:54 -04:00
number = some (digit <|> hash) where
2021-04-28 06:29:21 -04:00
hash = (num, []) <$ chunk "#"
digit = [(numbers ! Char.digitToInt i, []) | i <- digitChar]
2021-05-10 18:52:17 -04:00
punct :: P [EGlyph]
2021-04-29 13:01:42 -04:00
punct = [[(p, [])] | p <- maxFrom "punctuation" punctuation] <* space
2021-04-28 06:29:21 -04:00
2021-05-10 18:52:17 -04:00
dash :: P EGlyph
2021-04-29 05:55:54 -04:00
dash = (wave, []) <$ chunk ""
2021-05-10 18:52:17 -04:00
text :: P [[EGlyph]]
2021-04-28 06:29:21 -04:00
text = space *> many (segment <* space) <* eof where
segment = punct <|> number <|> word
2021-05-10 18:52:17 -04:00
split :: Text -> [[EGlyph]]
2021-04-28 06:29:21 -04:00
split = either (error . errorBundlePretty) id . parse text ""