use megaparsec for ebnf

2021-06-04 05:21:47 +02:00 · 2021-06-04 05:21:47 +02:00 · ebde4773e6
commit ebde4773e6
parent 8cd6985cd2
2 changed files with 49 additions and 24 deletions
--- a/langfilter/Ebnf.hs
+++ b/langfilter/Ebnf.hs
@ -1,12 +1,15 @@
 module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where
 import Prelude hiding (span)
 import Data.Maybe (fromMaybe)
 import Data.List (intercalate)
 import Data.Char (isAlphaNum)
-import Text.ParserCombinators.ReadP
+import Text.Megaparsec hiding (parse)
 import Text.Megaparsec.Char hiding (char')
 import qualified Text.Megaparsec as MP
 import Text.Pandoc.Definition
-import Data.Text (Text, pack, unpack)
+import Data.Text (Text)
 import qualified Data.Text as Text
 import Data.Void
 data Rule =
    Rule Text Def
@ -28,7 +31,7 @@ data Def =
 makeEbnf :: Block -> Block
 makeEbnf (CodeBlock (_, cs, _) txt)
  | "ebnf" `elem` cs
-  = render $ fromMaybe (error "invalid ebnf") $ parse $ unpack txt
+  = render $ either (error . errorBundlePretty) id $ parse txt
 makeEbnf b = b
 render :: [Rule] -> Block
@ -85,64 +88,85 @@ renderDefAt p = \case
  Com txt -> [span "ebnf-com" txt]
 type P = Parsec Void Text
-parse :: String -> Maybe [Rule]
+parse :: Text -> Either (ParseErrorBundle Text Void) [Rule]
-parse str =
+parse str = MP.parse (parse' <* eof) "<ebnf>" str
  case readP_to_S (parse' <* eof) str of
    [(res, _)] -> Just res
    _          -> Nothing
-parse' :: ReadP [Rule]
+parse' :: Parsec Void Text [Rule]
 parse' = many rule
 rule :: P Rule
 rule = choice
  [Rule <$> nt <* sym "=" <*> def <* sym ";",
   RCom <$> comment]
-nt = pack . unwords <$> many1 word
+nt :: P Text
-  where word = munch1 isWordChar <* skipSpaces
+nt = Text.unwords <$> some (word <* space) where
  word = Text.cons <$> first <*> takeWhileP Nothing isWordChar
  first = letterChar
  isWordChar c = c == '_' || c == '-' || isAlphaNum c
 def :: P Def
 def = ors
 ors :: P Def
 ors = list Or <$> seqs `sepBy1` (sym "|")
 seqs :: P Def
 seqs = list Seq <$> sub `sepBy1` (sym ",")
 sub :: P Def
 sub = do
  lhs <- adef
  rhs <- optMaybe $ sym "-" *> adef
  pure $ maybe lhs (Sub lhs) rhs
 adef :: P Def
 adef = choice $
  [N <$> nt, T <$> term, S <$> special,
   Com <$> comment,
   bracketed id '(' ')',
   bracketed Opt '[' ']',
-   bracketed Many '{' '}',
+   bracketed Many '{' '}']
   Com <$> comment]
-term = pack <$> choice [str '\'', str '"']
+term :: P Text
 term = choice [str '\'', str '"']
-special = pack <$> str '?'
+special :: P Text
 special = str '?'
-str c = lexeme $ between (char c) (char c) (munch1 (/= c))
+str :: Char -> P Text
 str c = lexeme $ between (char c) (char c) (takeWhileP Nothing (/= c))
-comment = pack <$> lexeme go where
+comment :: P Text
-  go = concat <$> sequence
+comment = do try (string_ "(*"); go ["(*"] 1 where
-    [string "(*",
+  go :: [Text] -> Int -> P Text
-     concat <$> many (choice [go, munch1 \c -> c /= '(' && c /= '*']),
+  go acc 0 = pure $ mconcat $ reverse acc
-     string "*)"]
+  go acc i = choice
    [fragment (string "(*") (+ 1) acc i,
     fragment (string "*)") (subtract 1) acc i,
     fragment (takeWhileP Nothing notComChar) id acc i]
  fragment p f acc i = do str <- p; go (str : acc) (f i)
  notComChar c = c /= '(' && c /= '*'
  string_ str = () <$ string str
 bracketed :: (Def -> a) -> Char -> Char -> P a
 bracketed f o c = f <$> between (char' o) (char' c) def
 list :: ([a] -> a) -> [a] -> a
 list _ [x] = x
 list f xs  = f xs
 sym :: Text -> P Text
 sym str = lexeme $ string str
 char' :: Char -> P Char
 char' c = lexeme $ char c
-lexeme p = p <* skipSpaces
+lexeme :: P a -> P a
 lexeme p = p <* space
 optMaybe :: P a -> P (Maybe a)
 optMaybe = option Nothing . fmap Just
--- a/langfilter/langfilter.cabal
+++ b/langfilter/langfilter.cabal
@ -36,6 +36,7 @@ executable langfilter
    base ^>= 4.14.0.0,
    containers ^>= 0.6.2.1,
    filepath ^>= 1.4.2.1,
    megaparsec ^>= 9.0.1,
    process ^>= 1.6.11.0,
    pandoc-types ^>= 1.22,
    text,