librarify langfilter too

This commit is contained in:
Rhiannon Morris 2024-11-28 02:04:06 +01:00
parent 1098cbdc1b
commit 89270a82fb
9 changed files with 35 additions and 23 deletions

199
langfilter/lib/Ebnf.hs Normal file
View file

@ -0,0 +1,199 @@
module Ebnf (makeEbnf, Rule (..), Def (..), render, parse) where
import Prelude hiding (span)
import Data.List (intercalate)
import Data.Char (isAlphaNum)
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char hiding (char')
import qualified Text.Megaparsec as MP
import Text.Pandoc.Definition
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Void
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.List.NonEmpty as NonEmpty
data Rule =
Rule Text [Text] Def
| RCom Text -- ^ @(* comment *)@
deriving (Eq, Show)
data Def =
N Text -- ^ @nonterminal@
| T Text -- ^ @\'terminal\'@ or @\"terminal\"@
| S Text -- ^ @?special?@
| C Text (NonEmpty Def) -- ^ @param(X, Y)@
| Or (NonEmpty Def) -- ^ choice @a | b | c@
| Seq (NonEmpty Def) -- ^ sequence @a, b, c@
| Sub Def Def -- ^ difference @a - b@
| Opt Def -- ^ opt @[a]@
| Many Def -- ^ repetition @{a}@
| Com Text -- ^ comment
deriving (Eq, Show)
makeEbnf :: Block -> Block
makeEbnf (CodeBlock (_, cs, _) txt)
| "ebnf" `elem` cs
= render $ either (error . errorBundlePretty) id $ parse txt
makeEbnf b = b
render :: [Rule] -> Block
render rs =
Table ("", ["ebnf"], [])
(Caption Nothing [])
[(AlignRight, ColWidthDefault),
(AlignCenter, ColWidthDefault),
(AlignLeft, ColWidthDefault)]
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] (concatMap render1 rs)]
(TableFoot mempty [])
render1 :: Rule -> [Row]
render1 (RCom txt) =
[Row mempty [Cell mempty AlignLeft (RowSpan 1) (ColSpan 3)
[Plain [span "ebnf-com" txt]]]]
render1 (Rule name args def) =
row' lhs "=" d : map (row' [] "|") ds
where
lhs = case args of
[] -> [span "ebnf-nt" name]
_ -> [span "ebnf-f" name, punc "("] ++
intercalate [punc ";", Space] [[span "ebnf-nt" x] | x <- args] ++
[punc ")"]
d :| ds = splitOrs def
splitOrs (Or ds) = ds
splitOrs d = NonEmpty.singleton d
row' c1 p d = Row mempty [cell c1, cell [punc p], cell (renderDef d)]
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
span c str = Span ("", [c], []) [Str str]
punc = span "ebnf-punc"
brack = span "ebnf-brack"
data Prec = OUTER | OR | SEQ | SUB deriving (Eq, Ord)
renderParens :: Bool -> [Inline] -> [Inline]
renderParens False is = is
renderParens True is = [punc "("] <> is <> [punc ")"]
renderDef :: Def -> [Inline]
renderDef = renderDefAt OUTER
renderDefAt :: Prec -> Def -> [Inline]
renderDefAt p = \case
N txt -> [span "ebnf-nt" txt]
T txt -> [span "ebnf-t" txt]
S txt -> [span "ebnf-s" txt]
C f xs ->
[span "ebnf-f" f, punc "("] ++
intercalate [punc ";", Space] (renderDefAt OUTER <$> NonEmpty.toList xs) ++
[punc ")"]
Or ds -> renderParens (p > OR) $
intercalate [Space, punc "|", Space] $ renderDefAt OR <$> NonEmpty.toList ds
Seq ds -> renderParens (p > SEQ) $
intercalate [punc ",", Space] $ renderDefAt SEQ <$> NonEmpty.toList ds
Sub d e -> renderParens (p >= SUB) $
renderDefAt SUB d <>
[Space, span "ebnf-sub" "", Space] <>
renderDefAt SUB e
Opt d -> [brack "["] <> renderDef d <> [brack "]"]
Many d -> [brack "{"] <> renderDef d <> [brack "}"]
Com txt -> [span "ebnf-com" txt]
type P = Parsec Void Text
parse :: Text -> Either (ParseErrorBundle Text Void) [Rule]
parse str = MP.parse (parse' <* eof) "<ebnf>" str
parse' :: Parsec Void Text [Rule]
parse' = many rule
rule :: P Rule
rule = choice
[Rule <$> nt <*> lhsArgs <* sym "=" <*> def <* sym ";",
RCom <$> comment]
lhsArgs :: P [Text]
lhsArgs =
sym "(" *> nt `sepBy1` sym ";" <* sym ")"
<|> pure []
nt :: P Text
nt = Text.unwords <$> some (word <* space) where
word = Text.cons <$> first <*> takeWhileP Nothing isWordChar
first = letterChar
isWordChar c = c == '_' || c == '-' || isAlphaNum c
def :: P Def
def = ors
ors :: P Def
ors = list1 Or <$> seqs `sepBy1'` sym "|"
seqs :: P Def
seqs = list1 Seq <$> sub `sepBy1'` sym ","
sepBy1' :: P a -> P z -> P (NonEmpty a)
sepBy1' a b = NonEmpty.fromList <$> sepBy1 a b
sub :: P Def
sub = do
lhs <- adef
rhs <- optMaybe $ sym "-" *> adef
pure $ maybe lhs (Sub lhs) rhs
adef :: P Def
adef = choice $
[call, T <$> term, S <$> special,
Com <$> comment,
bracketed id '(' ')',
bracketed Opt '[' ']',
bracketed Many '{' '}']
call :: P Def
call = do
f <- nt
args <- optional $ sym "(" *> def `sepBy1` (sym ";") <* sym ")"
pure $ maybe (N f) (C f . NonEmpty.fromList) args
term :: P Text
term = choice [str '\'', str '"']
special :: P Text
special = str '?'
str :: Char -> P Text
str c = lexeme $ between (char c) (char c) (takeWhileP Nothing (/= c))
comment :: P Text
comment = do try (string_ "(*"); go ["(*"] 1 where
go :: [Text] -> Int -> P Text
go acc 0 = mconcat (reverse acc) <$ space
go acc i = choice
[fragment (string "(*") (+ 1) acc i,
fragment (string "*)") (subtract 1) acc i,
fragment (takeWhileP Nothing notComChar) id acc i]
fragment p f acc i = do str <- p; go (str : acc) (f i)
notComChar c = c /= '(' && c /= '*'
string_ str = () <$ string str
bracketed :: (Def -> a) -> Char -> Char -> P a
bracketed f o c = f <$> between (char' o) (char' c) def
list1 :: (NonEmpty a -> a) -> NonEmpty a -> a
list1 _ (x :| []) = x
list1 f xs = f xs
sym :: Text -> P Text
sym str = lexeme $ string str
char' :: Char -> P Char
char' c = lexeme $ char c
lexeme :: P a -> P a
lexeme p = try $ p <* space
optMaybe :: P a -> P (Maybe a)
optMaybe = option Nothing . fmap Just

96
langfilter/lib/Glosses.hs Normal file
View file

@ -0,0 +1,96 @@
module Glosses (glosses) where
import Lang
import LaantasImage
import Spans (abbrs)
import qualified Spans
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.Pandoc.Walk
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Data (toConstr)
glosses :: Vars => Block -> [Block]
glosses = \case
Div (i, cs, _) blocks | "glosses" `elem` cs ->
let tables = map glossTable blocks in
[RawBlock (Format "html") $ "<figure" <> id <> classes <> ">"] ++
catMaybes tables ++
[RawBlock (Format "html") "</figure>"]
where
id = if i == "" then "" else " id=\"" <> i <> "\""
classes = " class=\"" <> Text.unwords cs <> "\""
b -> [b]
pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
pattern PGloss l p g w t =
BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
pattern PNGloss l b n g w t =
BulletList [[Plain l], [Plain b], [Plain n], [Plain g], [Plain w], [Plain t]]
glossTable :: Vars => Block -> Maybe Block
glossTable = \case
Gloss l s g t -> Just $ make l Nothing Nothing s g t
PGloss l p s g t -> Just $ make l (Just p) Nothing s g t
PNGloss l b n s g t -> Just $ make l (Just b) (Just n) s g t
HorizontalRule -> Nothing
BulletList xs | let = length xs, < 4 || > 6 ->
fail $ "found list of length " ++ show ++
" in gloss section (missing `---`?)"
b -> fail $ "found unexpected " ++ show (toConstr b) ++ " in gloss section"
where
make l b n s g t =
let = length $ splitInlines s
colspecs = replicate (AlignDefault, ColWidthDefault)
l' = cell1 $ underlines $ noHash l
b' = cell1 <$> b; n' = cell1 <$> n
ss = cells s; gs = cells g; t' = cell1 t
img = case ?lang of
Just Lántas -> Just $ cell1 [makeItem $ splitItem' $ stripInlines l]
Nothing -> Nothing
in
Table ("", ["gloss"], []) (Caption Nothing []) colspecs
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] $ concat
[[row ["gloss-scr", "scr"] [i] | Just i <- [img]],
[row ["gloss-lang", "lang"] [l']],
[row ["gloss-pron", "ipa"] [b] | Just b <- [b']],
[row ["gloss-pron", "ipa"] [n] | Just n <- [n']],
[row ["gloss-split", "lang"] ss],
[row ["gloss-gloss"] gs],
[row ["gloss-trans"] [t']]]]
(TableFoot mempty [])
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
cell1 is = Cell mempty AlignDefault (RowSpan 1) (ColSpan ) [Plain is]
cells = map (cell . concatMap abbrs) . splitInlines
row c = Row ("", c, [])
stripInlines :: [Inline] -> Text
stripInlines = query \case
Str s -> s
Space -> " "
SoftBreak -> " "
LineBreak -> " "
_ -> ""
splitInlines :: [Inline] -> [Inlines]
splitInlines is = filter (not . null) $ go is where
go [] = []
go is =
let (is1, is') = break (== Space) is in
fromList is1 : splitInlines (dropWhile (== Space) is')
underlines :: [Inline] -> [Inline]
underlines = concatMap underlineStr . takeWhile (/= Str "|") where
underlineStr = \case
Str txt -> Spans.underlines txt
i -> [i]
noHash :: [Inline] -> [Inline]
noHash = walk \case
Str str -> Str $ Text.filter (/= '#') str
i -> i

View file

@ -0,0 +1,94 @@
module LaantasImage
(Item (..), splitItem, splitItem', makeItem)
where
import Lang
import Text.Pandoc.Definition
import Data.Bifunctor
import Data.Function
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as Lazy
import qualified Laantas
data Item =
Item {
text, title :: Text,
size, stroke :: Double,
width :: Double,
color :: Text,
showText :: Bool
} deriving (Eq, Show)
splitItem :: Vars => Text -> Maybe Item
splitItem (Text.uncons -> Just (c, txt))
| c == '!' = Just $ splitItem' txt
| c == '#' = Just $ (splitItem' txt) {showText = False}
splitItem _ = Nothing
splitItem' :: Vars => Text -> Item
splitItem' txt =
case imageOpts txt of
Just (txt, opts) -> defaultItem txt ?defColor
& withOpt opts "size" (\s i -> i {size = readt s})
& withOpt opts "stroke" (\k i -> i {stroke = readt k})
& withOpt opts "width" (\w i -> i {width = readt w})
& withOpt opts "color" (\c i -> i {color = c})
Nothing -> defaultItem txt ?defColor
where readt x = read $ Text.unpack x
withOpt :: Ord k => Map k v -> k -> (v -> a -> a) -> (a -> a)
withOpt m k f =
case Map.lookup k m of
Just v -> f v
Nothing -> id
defaultItem :: Text -> Text -> Item
defaultItem txt color =
Item {
text = Text.filter notPunc txt,
title = toTitle txt,
size = 2,
stroke = 1.25,
width = 600,
color = color,
showText = True
}
split1 :: Text -> Text -> Maybe (Text, Text)
split1 s txt =
let (a, b) = Text.breakOn s txt in
if Text.null b then
Nothing
else
Just (Text.strip a, Text.strip $ Text.drop (Text.length s) b)
type Opts = Map Text Text
imageOpts :: Text -> Maybe (Text, Opts)
imageOpts = fmap (second splitOpts) . getOpts
getOpts :: Text -> Maybe (Text, Text)
getOpts = split1 "|"
splitOpts :: Text -> Map Text Text
splitOpts = Map.fromList . map splitOpt . Text.splitOn ";" where
splitOpt txt = fromMaybe ("file", txt) $ split1 "=" txt
toTitle :: Text -> Text
toTitle = Text.filter \c -> c /= '\\' && c /= '#'
makeItem :: Item -> Inline
makeItem (Item {..}) =
let env = Laantas.E {..}
words = Laantas.split text in
RawInline "html" $ Lazy.toStrict $ Laantas.prettyText $
Laantas.doGlyphsNoDoctype words env `Laantas.with`
[Laantas.Class_ Laantas.<<- "scr"]
notPunc :: Char -> Bool
notPunc c = c `notElem` ("{}·" :: String)

31
langfilter/lib/Lang.hs Normal file
View file

@ -0,0 +1,31 @@
module Lang where
import Text.Pandoc.Definition
import Data.Char (toLower)
import qualified Data.Text as Text
import System.IO
import Data.Text (Text)
data Lang = Lántas deriving (Eq, Show)
type Vars = (?lang :: Maybe Lang, ?defColor :: Text)
toText :: Maybe MetaValue -> IO (Maybe Text)
toText (Just (MetaInlines [Str s])) = toText (Just (MetaString s)) -- ugh
toText (Just (MetaString s)) = pure $ Just s
toText Nothing = pure Nothing
toText (Just ) = do
hPutStrLn stderr $ "[WARN] expected a string, got: " <> show
pure Nothing
toLang :: Maybe MetaValue -> IO (Maybe Lang)
toLang m = do
mres <- fmap (Text.map toLower) <$> toText m
case mres of
Just res -> do
if res `elem` ["laantas", "lántas"] then
pure $ Just Lántas
else do
hPutStrLn stderr $ "[WARN] unknown language: " <> show res
pure Nothing
Nothing -> pure Nothing

View file

@ -0,0 +1,90 @@
module LangFilter where
import Lang
import Ebnf
import Spans
import Glosses
import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as Text
import Control.Applicative
import Control.Monad
import System.Environment
langFilter :: Pandoc -> IO Pandoc
langFilter p@(Pandoc (Meta m) _) = do
lang <- toLang $ Map.lookup "conlang" m
defColor <- getDefColor m
let ?lang = lang
let ?defColor = defColor
let f = map (walk spans . fixFigureClass . makeEbnf .
makeQuotes . letterList) .
concatMap (makeBlocks <=< glosses)
pure $ walk f p
getDefColor :: Map Text MetaValue -> IO Text
getDefColor m = do
res <- toText $ Map.lookup "lang-color" m
case res of
Just col -> pure col
Nothing -> do
env <- lookupEnv "LANG_COLOR"
pure $ maybe "black" Text.pack env
pluck :: Eq a => a -> [a] -> Maybe [a]
pluck _ [] = Nothing
pluck x (y:ys) | x == y = Just ys
| otherwise = (x :) <$> pluck x ys
pluck1 :: Eq a => [a] -> [a] -> Maybe (a, [a])
pluck1 [] _ = Nothing
pluck1 (x:xs) ys = (x,) <$> pluck x ys <|> pluck1 xs ys
fixFigureClass :: Block -> Block
fixFigureClass (Figure (_, c1, a1) cap [Plain [Image (i, c2, a2) alt path]]) =
Figure (i, c1 ++ c2, a1 ++ a2) cap [Plain [Image ("", [], []) alt path]]
fixFigureClass b = b
makeBlocks :: Block -> [Block]
makeBlocks (Div ("", clss, []) blks)
| Just (cls, rest) <- pluck1 ["figure", "aside"] clss =
let html = RawBlock $ Format "html"
open = if null rest then
html $ "<" <> cls <> ">"
else
html $ "<" <> cls <> " class='" <> Text.unwords rest <> "'>"
close = html $ "</" <> cls <> ">"
in
[open] ++ blks ++ [close]
makeBlocks b = [b]
makeQuotes :: Block -> Block
makeQuotes para@(Para b) = fromMaybe para $ do
inner <- split b
return (BlockQuote [Para inner])
where
isDelim str = Text.length str == 3 && Text.all isQuote str
isQuote c = c == '"' || c == '“' || c == '”'
split (Str begin:SoftBreak:rest) = guard (isDelim begin) *> checkEnd rest
split _ = empty
checkEnd [SoftBreak, Str end] = [] <$ guard (isDelim end)
checkEnd (start:rest) = (start :) <$> checkEnd rest
checkEnd _ = empty
makeQuotes other = other
letterList :: Block -> Block
letterList (Div a@(_, cs, _) blks)
| "letter-list" `elem` cs = Div a (walk go blks)
where
go (Para xs) = Plain xs
go b = b
letterList b = b

92
langfilter/lib/Spans.hs Normal file
View file

@ -0,0 +1,92 @@
module Spans (spans, ipaB, ipaN, abbr, abbrs, underlines) where
import LaantasImage hiding (text)
import Lang
import Text.Pandoc.Definition hiding (Image)
import Text.Pandoc.Builder hiding (Image)
import Data.Char (isUpper, isDigit)
import Data.Text (Text)
import qualified Data.Text as Text
spans :: Vars => Inline -> Inline
spans = \case
Code attrs txt
| Just ('\\', txt') <- Text.uncons txt -> Code attrs txt'
| Just txt' <- enclosed "" "" txt -> ipaA txt'
| Just txt' <- enclosed "//" "//" txt -> ipaA txt'
| Just _ <- enclosed "/" "/" txt -> ipaB txt
| Just _ <- enclosed "[" "]" txt -> ipaN txt
| Just txt' <- enclosed "{" "}" txt -> lang txt'
| Just txt' <- enclosed "!" "!" txt -> abbr txt'
| Just txt' <- enclosed "*" "*" txt -> mark txt'
| Just txt' <- enclosed "@" "@" txt -> dfn txt'
i -> i
ipaA, ipaB, ipaN, abbr, mark :: Text -> Inline
ipaA = Span (cls ["ipa", "ipa-arch"]) . text' . surround ""
ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
abbr = Span (cls ["abbr"]) . text' . endash
mark txt = RawInline "html" $ "<mark>" <> txt <> "</mark>"
dfn txt = RawInline "html" $ "<dfn>" <> txt <> "</dfn>"
surround :: Text -> Text -> Text
surround s txt = s <> txt <> s
text' :: Text -> [Inline]
text' = toList . text
lang :: Vars => Text -> Inline
lang = Span (cls ["lang"]) . lang'
lang' :: Vars => Text -> [Inline]
lang' txt = case ?lang of
Just Lántas
| Just li@(Item {..}) <- splitItem txt,
let label = Span (cls ["text"]) $ underlines title ->
if showText then [makeItem li, label] else [makeItem li]
_ -> underlines txt
notBrace :: Char -> Bool
notBrace c = c /= '{' && c /= '}'
underlines :: Text -> [Inline]
underlines txt = case Text.uncons txt of
Nothing -> []
Just ('{', txt') -> RawInline "html" "<u>" : underlines txt'
Just ('}', txt') -> RawInline "html" "</u>" : underlines txt'
_ -> Str a : underlines b
where (a, b) = Text.span notBrace txt
cls :: [Text] -> Attr
cls cs = ("", cs, [])
enclosed :: Text -> Text -> Text -> Maybe Text
enclosed o c txt
| Text.length txt >= o + c,
Text.take o txt == o,
Text.takeEnd c txt == c
= Just $ Text.drop o $ Text.dropEnd c txt
where o = Text.length o; c = Text.length c
enclosed _ _ _ = Nothing
endash :: Text -> Text
endash = Text.map \case '-' -> ''; '_' -> ' '; c -> c
abbrs :: Inline -> [Inline]
abbrs (Str txt) = go $ endash txt where
go "" = []
go txt
| (l, r) <- Text.span isAbbr txt,
not $ Text.null l
= abbr' l : go r
| (l, r) <- Text.break isAbbr txt
= Str l : go r
abbr' txt = if Text.any (not . isDigit) txt then abbr txt else Str txt
isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
abbrs i = [i]