From ebde4773e693895ec77855d42eff9306a964fec5 Mon Sep 17 00:00:00 2001 From: Rhiannon Morris Date: Fri, 4 Jun 2021 05:21:47 +0200 Subject: [PATCH] use megaparsec for ebnf --- langfilter/Ebnf.hs | 72 ++++++++++++++++++++++++------------- langfilter/langfilter.cabal | 1 + 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/langfilter/Ebnf.hs b/langfilter/Ebnf.hs index 254b31f..41ed72e 100644 --- a/langfilter/Ebnf.hs +++ b/langfilter/Ebnf.hs @@ -1,12 +1,15 @@ module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where import Prelude hiding (span) -import Data.Maybe (fromMaybe) import Data.List (intercalate) import Data.Char (isAlphaNum) -import Text.ParserCombinators.ReadP +import Text.Megaparsec hiding (parse) +import Text.Megaparsec.Char hiding (char') +import qualified Text.Megaparsec as MP import Text.Pandoc.Definition -import Data.Text (Text, pack, unpack) +import Data.Text (Text) +import qualified Data.Text as Text +import Data.Void data Rule = Rule Text Def @@ -28,7 +31,7 @@ data Def = makeEbnf :: Block -> Block makeEbnf (CodeBlock (_, cs, _) txt) | "ebnf" `elem` cs - = render $ fromMaybe (error "invalid ebnf") $ parse $ unpack txt + = render $ either (error . errorBundlePretty) id $ parse txt makeEbnf b = b render :: [Rule] -> Block @@ -85,64 +88,85 @@ renderDefAt p = \case Com txt -> [span "ebnf-com" txt] +type P = Parsec Void Text -parse :: String -> Maybe [Rule] -parse str = - case readP_to_S (parse' <* eof) str of - [(res, _)] -> Just res - _ -> Nothing +parse :: Text -> Either (ParseErrorBundle Text Void) [Rule] +parse str = MP.parse (parse' <* eof) "" str -parse' :: ReadP [Rule] +parse' :: Parsec Void Text [Rule] parse' = many rule +rule :: P Rule rule = choice [Rule <$> nt <* sym "=" <*> def <* sym ";", RCom <$> comment] -nt = pack . unwords <$> many1 word - where word = munch1 isWordChar <* skipSpaces - isWordChar c = c == '_' || c == '-' || isAlphaNum c +nt :: P Text +nt = Text.unwords <$> some (word <* space) where + word = Text.cons <$> first <*> takeWhileP Nothing isWordChar + first = letterChar + isWordChar c = c == '_' || c == '-' || isAlphaNum c +def :: P Def def = ors +ors :: P Def ors = list Or <$> seqs `sepBy1` (sym "|") +seqs :: P Def seqs = list Seq <$> sub `sepBy1` (sym ",") +sub :: P Def sub = do lhs <- adef rhs <- optMaybe $ sym "-" *> adef pure $ maybe lhs (Sub lhs) rhs +adef :: P Def adef = choice $ [N <$> nt, T <$> term, S <$> special, + Com <$> comment, bracketed id '(' ')', bracketed Opt '[' ']', - bracketed Many '{' '}', - Com <$> comment] + bracketed Many '{' '}'] -term = pack <$> choice [str '\'', str '"'] +term :: P Text +term = choice [str '\'', str '"'] -special = pack <$> str '?' +special :: P Text +special = str '?' -str c = lexeme $ between (char c) (char c) (munch1 (/= c)) +str :: Char -> P Text +str c = lexeme $ between (char c) (char c) (takeWhileP Nothing (/= c)) -comment = pack <$> lexeme go where - go = concat <$> sequence - [string "(*", - concat <$> many (choice [go, munch1 \c -> c /= '(' && c /= '*']), - string "*)"] +comment :: P Text +comment = do try (string_ "(*"); go ["(*"] 1 where + go :: [Text] -> Int -> P Text + go acc 0 = pure $ mconcat $ reverse acc + go acc i = choice + [fragment (string "(*") (+ 1) acc i, + fragment (string "*)") (subtract 1) acc i, + fragment (takeWhileP Nothing notComChar) id acc i] + fragment p f acc i = do str <- p; go (str : acc) (f i) + notComChar c = c /= '(' && c /= '*' + string_ str = () <$ string str +bracketed :: (Def -> a) -> Char -> Char -> P a bracketed f o c = f <$> between (char' o) (char' c) def +list :: ([a] -> a) -> [a] -> a list _ [x] = x list f xs = f xs +sym :: Text -> P Text sym str = lexeme $ string str +char' :: Char -> P Char char' c = lexeme $ char c -lexeme p = p <* skipSpaces +lexeme :: P a -> P a +lexeme p = p <* space +optMaybe :: P a -> P (Maybe a) optMaybe = option Nothing . fmap Just diff --git a/langfilter/langfilter.cabal b/langfilter/langfilter.cabal index 9173881..3ee25cf 100644 --- a/langfilter/langfilter.cabal +++ b/langfilter/langfilter.cabal @@ -36,6 +36,7 @@ executable langfilter base ^>= 4.14.0.0, containers ^>= 0.6.2.1, filepath ^>= 1.4.2.1, + megaparsec ^>= 9.0.1, process ^>= 1.6.11.0, pandoc-types ^>= 1.22, text,