use megaparsec for ebnf

2021-06-04 05:21:47 +02:00 · 2021-06-04 05:21:47 +02:00 · ebde4773e6
commit ebde4773e6
parent 8cd6985cd2
2 changed files with 49 additions and 24 deletions
--- a/langfilter/Ebnf.hs
+++ b/langfilter/Ebnf.hs
@ -1,12 +1,15 @@
 module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where

 import Prelude hiding (span)
-import Data.Maybe (fromMaybe)
 import Data.List (intercalate)
 import Data.Char (isAlphaNum)
-import Text.ParserCombinators.ReadP
+import Text.Megaparsec hiding (parse)
+import Text.Megaparsec.Char hiding (char')
+import qualified Text.Megaparsec as MP
 import Text.Pandoc.Definition
-import Data.Text (Text, pack, unpack)
+import Data.Text (Text)
+import qualified Data.Text as Text
+import Data.Void

 data Rule =
    Rule Text Def
@ -28,7 +31,7 @@ data Def =
 makeEbnf :: Block -> Block
 makeEbnf (CodeBlock (_, cs, _) txt)
  | "ebnf" `elem` cs
-  = render $ fromMaybe (error "invalid ebnf") $ parse $ unpack txt
+  = render $ either (error . errorBundlePretty) id $ parse txt
 makeEbnf b = b

 render :: [Rule] -> Block
@ -85,64 +88,85 @@ renderDefAt p = \case
  Com txt -> [span "ebnf-com" txt]


+type P = Parsec Void Text

-parse :: String -> Maybe [Rule]
-parse str =
-  case readP_to_S (parse' <* eof) str of
-    [(res, _)] -> Just res
-    _          -> Nothing
+parse :: Text -> Either (ParseErrorBundle Text Void) [Rule]
+parse str = MP.parse (parse' <* eof) "<ebnf>" str

-parse' :: ReadP [Rule]
+parse' :: Parsec Void Text [Rule]
 parse' = many rule

+rule :: P Rule
 rule = choice
  [Rule <$> nt <* sym "=" <*> def <* sym ";",
   RCom <$> comment]

-nt = pack . unwords <$> many1 word
-  where word = munch1 isWordChar <* skipSpaces
+nt :: P Text
+nt = Text.unwords <$> some (word <* space) where
+  word = Text.cons <$> first <*> takeWhileP Nothing isWordChar
+  first = letterChar
  isWordChar c = c == '_' || c == '-' || isAlphaNum c

+def :: P Def
 def = ors

+ors :: P Def
 ors = list Or <$> seqs `sepBy1` (sym "|")

+seqs :: P Def
 seqs = list Seq <$> sub `sepBy1` (sym ",")

+sub :: P Def
 sub = do
  lhs <- adef
  rhs <- optMaybe $ sym "-" *> adef
  pure $ maybe lhs (Sub lhs) rhs

+adef :: P Def
 adef = choice $
  [N <$> nt, T <$> term, S <$> special,
+   Com <$> comment,
   bracketed id '(' ')',
   bracketed Opt '[' ']',
-   bracketed Many '{' '}',
-   Com <$> comment]
+   bracketed Many '{' '}']

-term = pack <$> choice [str '\'', str '"']
+term :: P Text
+term = choice [str '\'', str '"']

-special = pack <$> str '?'
+special :: P Text
+special = str '?'

-str c = lexeme $ between (char c) (char c) (munch1 (/= c))
+str :: Char -> P Text
+str c = lexeme $ between (char c) (char c) (takeWhileP Nothing (/= c))

-comment = pack <$> lexeme go where
-  go = concat <$> sequence
-    [string "(*",
-     concat <$> many (choice [go, munch1 \c -> c /= '(' && c /= '*']),
-     string "*)"]
+comment :: P Text
+comment = do try (string_ "(*"); go ["(*"] 1 where
+  go :: [Text] -> Int -> P Text
+  go acc 0 = pure $ mconcat $ reverse acc
+  go acc i = choice
+    [fragment (string "(*") (+ 1) acc i,
+     fragment (string "*)") (subtract 1) acc i,
+     fragment (takeWhileP Nothing notComChar) id acc i]
+  fragment p f acc i = do str <- p; go (str : acc) (f i)
+  notComChar c = c /= '(' && c /= '*'
+  string_ str = () <$ string str

+bracketed :: (Def -> a) -> Char -> Char -> P a
 bracketed f o c = f <$> between (char' o) (char' c) def

+list :: ([a] -> a) -> [a] -> a
 list _ [x] = x
 list f xs  = f xs


+sym :: Text -> P Text
 sym str = lexeme $ string str

+char' :: Char -> P Char
 char' c = lexeme $ char c

-lexeme p = p <* skipSpaces
+lexeme :: P a -> P a
+lexeme p = p <* space

+optMaybe :: P a -> P (Maybe a)
 optMaybe = option Nothing . fmap Just
--- a/langfilter/langfilter.cabal
+++ b/langfilter/langfilter.cabal
@ -36,6 +36,7 @@ executable langfilter
    base ^>= 4.14.0.0,
    containers ^>= 0.6.2.1,
    filepath ^>= 1.4.2.1,
+    megaparsec ^>= 9.0.1,
    process ^>= 1.6.11.0,
    pandoc-types ^>= 1.22,
    text,