lang/langfilter/langfilter.hs

127 lines
3.9 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import Ebnf
import Text.Pandoc.Definition
import Text.Pandoc.JSON
import Text.Pandoc.Builder
import Text.Pandoc.Walk
import Data.Char (isUpper, isDigit)
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
main :: IO ()
main = toJSONFilter (filter :: Pandoc -> Pandoc) where
filter =
walk makeEbnf .
walk (concatMap makeFigures) .
walk spans .
walk (concatMap glosses)
spans :: Inline -> Inline
spans = \case
Code attrs txt
| Just ('\\', txt') <- Text.uncons txt -> Code attrs txt'
| Just _ <- enclosed '/' '/' txt -> ipaB txt
| Just _ <- enclosed '[' ']' txt -> ipaN txt
| Just txt' <- enclosed '{' '}' txt -> lang txt'
| Just txt' <- enclosed '!' '!' txt -> abbr txt'
i -> i
ipaB, ipaN, lang, abbr :: Text -> Inline
ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
lang = Span (cls ["lang"]) . lang'
abbr = Span (cls ["abbr"]) . text' . endash
text' :: Text -> [Inline]
text' = toList . text
lang' :: Text -> [Inline]
lang' txt = case Text.uncons txt of
Nothing -> []
Just ('{', txt') -> RawInline "html" "<u>" : lang' txt'
Just ('}', txt') -> RawInline "html" "</u>" : lang' txt'
_ -> Str a : lang' b
where (a, b) = Text.span (\c -> c /= '{' && c /= '}') txt
cls :: [Text] -> Attr
cls cs = ("", cs, [])
glosses :: Block -> [Block]
glosses = \case
Div (_, cs, _) blocks | "glosses" `elem` cs ->
[RawBlock (Format "html") "<figure class=glosses>"] ++
mapMaybe glossTable blocks ++
[RawBlock (Format "html") "</figure>"]
b -> [b]
pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
pattern PGloss l p g w t =
BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
glossTable :: Block -> Maybe Block
glossTable = \case
Gloss l s g t -> Just $ make l Nothing s g t
PGloss l p s g t -> Just $ make l (Just p) s g t
HorizontalRule -> Nothing
b -> error $ "found " ++ show b ++ " in gloss section"
where
make l p s g t =
let n = length $ splitInlines s
colspecs = replicate n (AlignDefault, ColWidthDefault)
l' = cell1 n l; p' = cell1 n <$> p
ss = cells s; gs = cells g; t' = cell1 n t
in
Table ("", ["gloss"], []) (Caption Nothing []) colspecs
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] $ concat
[[row ["gloss-lang", "lang"] [l']],
maybe [] (pure . row ["gloss-pron", "ipa"] . pure) p',
[row ["gloss-split", "lang"] ss],
[row ["gloss-gloss"] gs],
[row ["gloss-trans"] [t']]]]
(TableFoot mempty [])
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is]
cells = map (cell . concatMap abbrs) . splitInlines
row c = Row ("", c, [])
endash :: Text -> Text
endash = Text.map \case '-' -> ''; '_' -> ' '; c -> c
abbrs :: Inline -> [Inline]
abbrs (Str txt) = go $ endash txt where
go "" = []
go txt
| (l, r) <- Text.span isAbbr txt,
not $ Text.null l
= abbr' l : go r
| (l, r) <- Text.break isAbbr txt
= Str l : go r
abbr' txt = if Text.length txt == 1 then Str txt else abbr txt
isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
abbrs i = [i]
splitInlines :: [Inline] -> [Inlines]
splitInlines is = filter (not . null) $ go is where
go [] = []
go is =
let (is1, is') = break (== Space) is in
fromList is1 : splitInlines (dropWhile (== Space) is')
makeFigures :: Block -> [Block]
makeFigures (Div ("", ["figure"], []) blks) =
[html "<figure>"] ++ blks ++ [html "</figure>"]
where html = RawBlock (Format "html")
makeFigures b = [b]
enclosed :: Char -> Char -> Text -> Maybe Text
enclosed o c txt
| Text.length txt >= 2,
Text.head txt == o,
Text.last txt == c
= Just $ Text.init $ Text.tail txt
enclosed _ _ _ = Nothing