lang/langfilter/Ebnf.hs

172 lines
4.6 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where
import Prelude hiding (span)
import Data.List (intercalate)
import Data.Char (isAlphaNum)
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char hiding (char')
import qualified Text.Megaparsec as MP
import Text.Pandoc.Definition
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Void
data Rule =
Rule Text Def
| RCom Text -- ^ @(* comment *)@
deriving (Eq, Show)
data Def =
N Text -- ^ @nonterminal@
| T Text -- ^ @\'terminal\'@ or @\"terminal\"@
| S Text -- ^ @?special?@
| Or [Def] -- ^ choice @a | b | c@
| Seq [Def] -- ^ sequence @a, b, c@
| Sub Def Def -- ^ difference @a - b@
| Opt Def -- ^ opt @[a]@
| Many Def -- ^ repetition @{a}@
| Com Text -- ^ comment
deriving (Eq, Show)
makeEbnf :: Block -> Block
makeEbnf (CodeBlock (_, cs, _) txt)
| "ebnf" `elem` cs
= render $ either (error . errorBundlePretty) id $ parse txt
makeEbnf b = b
render :: [Rule] -> Block
render rs =
Table ("", ["ebnf"], [])
(Caption Nothing [])
[(AlignRight, ColWidthDefault),
(AlignCenter, ColWidthDefault),
(AlignLeft, ColWidthDefault)]
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] (concatMap render1 rs)]
(TableFoot mempty [])
render1 :: Rule -> [Row]
render1 (RCom txt) =
[Row mempty [Cell mempty AlignLeft (RowSpan 1) (ColSpan 3) [Plain [Str txt]]]]
render1 (Rule name def) =
row' [span "ebnf-nt" name] "=" d : map (row' [] "|") ds
where
d:ds = splitOrs def
splitOrs (Or ds) = ds
splitOrs d = [d]
row' c1 p d = Row mempty [cell c1, cell [punc p], cell (renderDef d)]
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
span c str = Span ("", [c], []) [Str str]
punc = span "ebnf-punc"
brack = span "ebnf-brack"
data Prec = OUTER | OR | SEQ | SUB deriving (Eq, Ord)
renderParens :: Bool -> [Inline] -> [Inline]
renderParens False is = is
renderParens True is = [punc "("] <> is <> [punc ")"]
renderDef :: Def -> [Inline]
renderDef = renderDefAt OUTER
renderDefAt :: Prec -> Def -> [Inline]
renderDefAt p = \case
N txt -> [span "ebnf-nt" txt]
T txt -> [span "ebnf-t" txt]
S txt -> [span "ebnf-s" txt]
Or ds -> renderParens (p > OR) $
intercalate [Space, punc "|", Space] $ renderDefAt OR <$> ds
Seq ds -> renderParens (p > SEQ) $
intercalate [punc ",", Space] $ renderDefAt SEQ <$> ds
Sub d e -> renderParens (p >= SUB) $
renderDefAt SUB d <>
[Space, span "ebnf-sub" "", Space] <>
renderDefAt SUB e
Opt d -> [brack "["] <> renderDef d <> [brack "]"]
Many d -> [brack "{"] <> renderDef d <> [brack "}"]
Com txt -> [span "ebnf-com" txt]
type P = Parsec Void Text
parse :: Text -> Either (ParseErrorBundle Text Void) [Rule]
parse str = MP.parse (parse' <* eof) "<ebnf>" str
parse' :: Parsec Void Text [Rule]
parse' = many rule
rule :: P Rule
rule = choice
[Rule <$> nt <* sym "=" <*> def <* sym ";",
RCom <$> comment]
nt :: P Text
nt = Text.unwords <$> some (word <* space) where
word = Text.cons <$> first <*> takeWhileP Nothing isWordChar
first = letterChar
isWordChar c = c == '_' || c == '-' || isAlphaNum c
def :: P Def
def = ors
ors :: P Def
ors = list Or <$> seqs `sepBy1` (sym "|")
seqs :: P Def
seqs = list Seq <$> sub `sepBy1` (sym ",")
sub :: P Def
sub = do
lhs <- adef
rhs <- optMaybe $ sym "-" *> adef
pure $ maybe lhs (Sub lhs) rhs
adef :: P Def
adef = choice $
[N <$> nt, T <$> term, S <$> special,
Com <$> comment,
bracketed id '(' ')',
bracketed Opt '[' ']',
bracketed Many '{' '}']
term :: P Text
term = choice [str '\'', str '"']
special :: P Text
special = str '?'
str :: Char -> P Text
str c = lexeme $ between (char c) (char c) (takeWhileP Nothing (/= c))
comment :: P Text
comment = do try (string_ "(*"); go ["(*"] 1 where
go :: [Text] -> Int -> P Text
go acc 0 = pure $ mconcat $ reverse acc
go acc i = choice
[fragment (string "(*") (+ 1) acc i,
fragment (string "*)") (subtract 1) acc i,
fragment (takeWhileP Nothing notComChar) id acc i]
fragment p f acc i = do str <- p; go (str : acc) (f i)
notComChar c = c /= '(' && c /= '*'
string_ str = () <$ string str
bracketed :: (Def -> a) -> Char -> Char -> P a
bracketed f o c = f <$> between (char' o) (char' c) def
list :: ([a] -> a) -> [a] -> a
list _ [x] = x
list f xs = f xs
sym :: Text -> P Text
sym str = lexeme $ string str
char' :: Char -> P Char
char' c = lexeme $ char c
lexeme :: P a -> P a
lexeme p = p <* space
optMaybe :: P a -> P (Maybe a)
optMaybe = option Nothing . fmap Just