lang/langfilter/Ebnf.hs

149 lines
3.8 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where
import Prelude hiding (span)
import Data.Maybe (fromMaybe)
import Data.List (intercalate)
import Data.Char (isAlphaNum)
import Text.ParserCombinators.ReadP
import Text.Pandoc.Definition
import Data.Text (Text, pack, unpack)
data Rule =
Rule Text Def
| RCom Text -- ^ @(* comment *)@
deriving (Eq, Show)
data Def =
N Text -- ^ @nonterminal@
| T Text -- ^ @\'terminal\'@ or @\"terminal\"@
| S Text -- ^ @?special?@
| Or [Def] -- ^ choice @a | b | c@
| Seq [Def] -- ^ sequence @a, b, c@
| Sub Def Def -- ^ difference @a - b@
| Opt Def -- ^ opt @[a]@
| Many Def -- ^ repetition @{a}@
| Com Text -- ^ comment
deriving (Eq, Show)
makeEbnf :: Block -> Block
makeEbnf (CodeBlock (_, cs, _) txt)
| "ebnf" `elem` cs
= render $ fromMaybe (error "invalid ebnf") $ parse $ unpack txt
makeEbnf b = b
render :: [Rule] -> Block
render rs =
Table ("", ["ebnf"], [])
(Caption Nothing [])
[(AlignRight, ColWidthDefault),
(AlignCenter, ColWidthDefault),
(AlignLeft, ColWidthDefault)]
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] (concatMap render1 rs)]
(TableFoot mempty [])
render1 :: Rule -> [Row]
render1 (RCom txt) =
[Row mempty [Cell mempty AlignLeft (RowSpan 1) (ColSpan 3) [Plain [Str txt]]]]
render1 (Rule name def) =
row' [span "ebnf-nt" name] "=" d : map (row' [] "|") ds
where
d:ds = splitOrs def
splitOrs (Or ds) = ds
splitOrs d = [d]
row' c1 p d = Row mempty [cell c1, cell [punc p], cell (renderDef d)]
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
span c str = Span ("", [c], []) [Str str]
punc = span "ebnf-punc"
brack = span "ebnf-brack"
data Prec = OUTER | OR | SEQ | SUB deriving (Eq, Ord)
renderParens :: Bool -> [Inline] -> [Inline]
renderParens False is = is
renderParens True is = [punc "("] <> is <> [punc ")"]
renderDef :: Def -> [Inline]
renderDef = renderDefAt OUTER
renderDefAt :: Prec -> Def -> [Inline]
renderDefAt p = \case
N txt -> [span "ebnf-nt" txt]
T txt -> [span "ebnf-t" txt]
S txt -> [span "ebnf-s" txt]
Or ds -> renderParens (p > OR) $
intercalate [Space, punc "|", Space] $ renderDefAt OR <$> ds
Seq ds -> renderParens (p > SEQ) $
intercalate [punc ",", Space] $ renderDefAt SEQ <$> ds
Sub d e -> renderParens (p >= SUB) $
renderDefAt SUB d <>
[Space, span "ebnf-sub" "", Space] <>
renderDefAt SUB e
Opt d -> [brack "["] <> renderDef d <> [brack "]"]
Many d -> [brack "{"] <> renderDef d <> [brack "}"]
Com txt -> [span "ebnf-com" txt]
parse :: String -> Maybe [Rule]
parse str =
case readP_to_S (parse' <* eof) str of
[(res, _)] -> Just res
_ -> Nothing
parse' :: ReadP [Rule]
parse' = many rule
rule = choice
[Rule <$> nt <* sym "=" <*> def <* sym ";",
RCom <$> comment]
nt = pack . unwords <$> many1 word
where word = munch1 isWordChar <* skipSpaces
isWordChar c = c == '_' || c == '-' || isAlphaNum c
def = ors
ors = list Or <$> seqs `sepBy1` (sym "|")
seqs = list Seq <$> sub `sepBy1` (sym ",")
sub = do
lhs <- adef
rhs <- optMaybe $ sym "-" *> adef
pure $ maybe lhs (Sub lhs) rhs
adef = choice $
[N <$> nt, T <$> term, S <$> special,
bracketed id '(' ')',
bracketed Opt '[' ']',
bracketed Many '{' '}',
Com <$> comment]
term = pack <$> choice [str '\'', str '"']
special = pack <$> str '?'
str c = lexeme $ between (char c) (char c) (munch1 (/= c))
comment = pack <$> lexeme go where
go = concat <$> sequence
[string "(*",
concat <$> many (choice [go, munch1 \c -> c /= '(' && c /= '*']),
string "*)"]
bracketed f o c = f <$> between (char' o) (char' c) def
list _ [x] = x
list f xs = f xs
sym str = lexeme $ string str
char' c = lexeme $ char c
lexeme p = p <* skipSpaces
optMaybe = option Nothing . fmap Just