use megaparsec for ebnf

This commit is contained in:
Rhiannon Morris 2021-06-04 05:21:47 +02:00
parent 8cd6985cd2
commit ebde4773e6
2 changed files with 49 additions and 24 deletions

View File

@ -1,12 +1,15 @@
module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where
import Prelude hiding (span)
import Data.Maybe (fromMaybe)
import Data.List (intercalate)
import Data.Char (isAlphaNum)
import Text.ParserCombinators.ReadP
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char hiding (char')
import qualified Text.Megaparsec as MP
import Text.Pandoc.Definition
import Data.Text (Text, pack, unpack)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Void
data Rule =
Rule Text Def
@ -28,7 +31,7 @@ data Def =
makeEbnf :: Block -> Block
makeEbnf (CodeBlock (_, cs, _) txt)
| "ebnf" `elem` cs
= render $ fromMaybe (error "invalid ebnf") $ parse $ unpack txt
= render $ either (error . errorBundlePretty) id $ parse txt
makeEbnf b = b
render :: [Rule] -> Block
@ -85,64 +88,85 @@ renderDefAt p = \case
Com txt -> [span "ebnf-com" txt]
type P = Parsec Void Text
parse :: String -> Maybe [Rule]
parse str =
case readP_to_S (parse' <* eof) str of
[(res, _)] -> Just res
_ -> Nothing
parse :: Text -> Either (ParseErrorBundle Text Void) [Rule]
parse str = MP.parse (parse' <* eof) "<ebnf>" str
parse' :: ReadP [Rule]
parse' :: Parsec Void Text [Rule]
parse' = many rule
rule :: P Rule
rule = choice
[Rule <$> nt <* sym "=" <*> def <* sym ";",
RCom <$> comment]
nt = pack . unwords <$> many1 word
where word = munch1 isWordChar <* skipSpaces
isWordChar c = c == '_' || c == '-' || isAlphaNum c
nt :: P Text
nt = Text.unwords <$> some (word <* space) where
word = Text.cons <$> first <*> takeWhileP Nothing isWordChar
first = letterChar
isWordChar c = c == '_' || c == '-' || isAlphaNum c
def :: P Def
def = ors
ors :: P Def
ors = list Or <$> seqs `sepBy1` (sym "|")
seqs :: P Def
seqs = list Seq <$> sub `sepBy1` (sym ",")
sub :: P Def
sub = do
lhs <- adef
rhs <- optMaybe $ sym "-" *> adef
pure $ maybe lhs (Sub lhs) rhs
adef :: P Def
adef = choice $
[N <$> nt, T <$> term, S <$> special,
Com <$> comment,
bracketed id '(' ')',
bracketed Opt '[' ']',
bracketed Many '{' '}',
Com <$> comment]
bracketed Many '{' '}']
term = pack <$> choice [str '\'', str '"']
term :: P Text
term = choice [str '\'', str '"']
special = pack <$> str '?'
special :: P Text
special = str '?'
str c = lexeme $ between (char c) (char c) (munch1 (/= c))
str :: Char -> P Text
str c = lexeme $ between (char c) (char c) (takeWhileP Nothing (/= c))
comment = pack <$> lexeme go where
go = concat <$> sequence
[string "(*",
concat <$> many (choice [go, munch1 \c -> c /= '(' && c /= '*']),
string "*)"]
comment :: P Text
comment = do try (string_ "(*"); go ["(*"] 1 where
go :: [Text] -> Int -> P Text
go acc 0 = pure $ mconcat $ reverse acc
go acc i = choice
[fragment (string "(*") (+ 1) acc i,
fragment (string "*)") (subtract 1) acc i,
fragment (takeWhileP Nothing notComChar) id acc i]
fragment p f acc i = do str <- p; go (str : acc) (f i)
notComChar c = c /= '(' && c /= '*'
string_ str = () <$ string str
bracketed :: (Def -> a) -> Char -> Char -> P a
bracketed f o c = f <$> between (char' o) (char' c) def
list :: ([a] -> a) -> [a] -> a
list _ [x] = x
list f xs = f xs
sym :: Text -> P Text
sym str = lexeme $ string str
char' :: Char -> P Char
char' c = lexeme $ char c
lexeme p = p <* skipSpaces
lexeme :: P a -> P a
lexeme p = p <* space
optMaybe :: P a -> P (Maybe a)
optMaybe = option Nothing . fmap Just

View File

@ -36,6 +36,7 @@ executable langfilter
base ^>= 4.14.0.0,
containers ^>= 0.6.2.1,
filepath ^>= 1.4.2.1,
megaparsec ^>= 9.0.1,
process ^>= 1.6.11.0,
pandoc-types ^>= 1.22,
text,