use megaparsec for ebnf

This commit is contained in:
Rhiannon Morris 2021-06-04 05:21:47 +02:00
parent 8cd6985cd2
commit ebde4773e6
2 changed files with 49 additions and 24 deletions

View file

@ -1,12 +1,15 @@
module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where
import Prelude hiding (span) import Prelude hiding (span)
import Data.Maybe (fromMaybe)
import Data.List (intercalate) import Data.List (intercalate)
import Data.Char (isAlphaNum) import Data.Char (isAlphaNum)
import Text.ParserCombinators.ReadP import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char hiding (char')
import qualified Text.Megaparsec as MP
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Data.Text (Text, pack, unpack) import Data.Text (Text)
import qualified Data.Text as Text
import Data.Void
data Rule = data Rule =
Rule Text Def Rule Text Def
@ -28,7 +31,7 @@ data Def =
makeEbnf :: Block -> Block makeEbnf :: Block -> Block
makeEbnf (CodeBlock (_, cs, _) txt) makeEbnf (CodeBlock (_, cs, _) txt)
| "ebnf" `elem` cs | "ebnf" `elem` cs
= render $ fromMaybe (error "invalid ebnf") $ parse $ unpack txt = render $ either (error . errorBundlePretty) id $ parse txt
makeEbnf b = b makeEbnf b = b
render :: [Rule] -> Block render :: [Rule] -> Block
@ -85,64 +88,85 @@ renderDefAt p = \case
Com txt -> [span "ebnf-com" txt] Com txt -> [span "ebnf-com" txt]
type P = Parsec Void Text
parse :: String -> Maybe [Rule] parse :: Text -> Either (ParseErrorBundle Text Void) [Rule]
parse str = parse str = MP.parse (parse' <* eof) "<ebnf>" str
case readP_to_S (parse' <* eof) str of
[(res, _)] -> Just res
_ -> Nothing
parse' :: ReadP [Rule] parse' :: Parsec Void Text [Rule]
parse' = many rule parse' = many rule
rule :: P Rule
rule = choice rule = choice
[Rule <$> nt <* sym "=" <*> def <* sym ";", [Rule <$> nt <* sym "=" <*> def <* sym ";",
RCom <$> comment] RCom <$> comment]
nt = pack . unwords <$> many1 word nt :: P Text
where word = munch1 isWordChar <* skipSpaces nt = Text.unwords <$> some (word <* space) where
isWordChar c = c == '_' || c == '-' || isAlphaNum c word = Text.cons <$> first <*> takeWhileP Nothing isWordChar
first = letterChar
isWordChar c = c == '_' || c == '-' || isAlphaNum c
def :: P Def
def = ors def = ors
ors :: P Def
ors = list Or <$> seqs `sepBy1` (sym "|") ors = list Or <$> seqs `sepBy1` (sym "|")
seqs :: P Def
seqs = list Seq <$> sub `sepBy1` (sym ",") seqs = list Seq <$> sub `sepBy1` (sym ",")
sub :: P Def
sub = do sub = do
lhs <- adef lhs <- adef
rhs <- optMaybe $ sym "-" *> adef rhs <- optMaybe $ sym "-" *> adef
pure $ maybe lhs (Sub lhs) rhs pure $ maybe lhs (Sub lhs) rhs
adef :: P Def
adef = choice $ adef = choice $
[N <$> nt, T <$> term, S <$> special, [N <$> nt, T <$> term, S <$> special,
Com <$> comment,
bracketed id '(' ')', bracketed id '(' ')',
bracketed Opt '[' ']', bracketed Opt '[' ']',
bracketed Many '{' '}', bracketed Many '{' '}']
Com <$> comment]
term = pack <$> choice [str '\'', str '"'] term :: P Text
term = choice [str '\'', str '"']
special = pack <$> str '?' special :: P Text
special = str '?'
str c = lexeme $ between (char c) (char c) (munch1 (/= c)) str :: Char -> P Text
str c = lexeme $ between (char c) (char c) (takeWhileP Nothing (/= c))
comment = pack <$> lexeme go where comment :: P Text
go = concat <$> sequence comment = do try (string_ "(*"); go ["(*"] 1 where
[string "(*", go :: [Text] -> Int -> P Text
concat <$> many (choice [go, munch1 \c -> c /= '(' && c /= '*']), go acc 0 = pure $ mconcat $ reverse acc
string "*)"] go acc i = choice
[fragment (string "(*") (+ 1) acc i,
fragment (string "*)") (subtract 1) acc i,
fragment (takeWhileP Nothing notComChar) id acc i]
fragment p f acc i = do str <- p; go (str : acc) (f i)
notComChar c = c /= '(' && c /= '*'
string_ str = () <$ string str
bracketed :: (Def -> a) -> Char -> Char -> P a
bracketed f o c = f <$> between (char' o) (char' c) def bracketed f o c = f <$> between (char' o) (char' c) def
list :: ([a] -> a) -> [a] -> a
list _ [x] = x list _ [x] = x
list f xs = f xs list f xs = f xs
sym :: Text -> P Text
sym str = lexeme $ string str sym str = lexeme $ string str
char' :: Char -> P Char
char' c = lexeme $ char c char' c = lexeme $ char c
lexeme p = p <* skipSpaces lexeme :: P a -> P a
lexeme p = p <* space
optMaybe :: P a -> P (Maybe a)
optMaybe = option Nothing . fmap Just optMaybe = option Nothing . fmap Just

View file

@ -36,6 +36,7 @@ executable langfilter
base ^>= 4.14.0.0, base ^>= 4.14.0.0,
containers ^>= 0.6.2.1, containers ^>= 0.6.2.1,
filepath ^>= 1.4.2.1, filepath ^>= 1.4.2.1,
megaparsec ^>= 9.0.1,
process ^>= 1.6.11.0, process ^>= 1.6.11.0,
pandoc-types ^>= 1.22, pandoc-types ^>= 1.22,
text, text,