import Text.Pandoc.Definition import Text.Pandoc.JSON import Text.Pandoc.Builder import Text.Pandoc.Walk import Data.Char (isUpper, isDigit) import Data.Maybe import Data.Text (Text) import qualified Data.Text as Text main :: IO () main = toJSONFilter (filter :: Pandoc -> Pandoc) where filter = walk (concatMap makeFigures) . walk spans . walk (concatMap glosses) spans :: Inline -> Inline spans = \case Code attrs txt | Just ('\\', txt') <- Text.uncons txt -> Code attrs txt' | Just _ <- enclosed '/' '/' txt -> ipaB txt | Just _ <- enclosed '[' ']' txt -> ipaN txt | Just txt' <- enclosed '{' '}' txt -> lang txt' | Just txt' <- enclosed '!' '!' txt -> abbr txt' i -> i ipaB, ipaN, lang, abbr :: Text -> Inline ipaB = Span (cls ["ipa", "ipa-broad"]) . text' ipaN = Span (cls ["ipa", "ipa-narrow"]) . text' lang = Span (cls ["lang"]) . lang' abbr = Span (cls ["abbr"]) . text' . endash text' :: Text -> [Inline] text' = toList . text lang' :: Text -> [Inline] lang' txt = case Text.uncons txt of Nothing -> [] Just ('{', txt') -> RawInline "html" "" : lang' txt' Just ('}', txt') -> RawInline "html" "" : lang' txt' _ -> Str a : lang' b where (a, b) = Text.span (\c -> c /= '{' && c /= '}') txt cls :: [Text] -> Attr cls cs = ("", cs, []) glosses :: Block -> [Block] glosses = \case Div (_, cs, _) blocks | "glosses" `elem` cs -> [RawBlock (Format "html") "
"] ++ mapMaybe glossTable blocks ++ [RawBlock (Format "html") "
"] b -> [b] glossTable :: Block -> Maybe Block glossTable = \case Gloss l s g t -> Just $ make l Nothing s g t PGloss l p s g t -> Just $ make l (Just p) s g t HorizontalRule -> Nothing b -> error $ "found " ++ show b ++ " in gloss section" where make l p s g t = let n = length $ splitInlines s colspecs = replicate n (AlignDefault, ColWidthDefault) l' = cell1 n l; p' = cell1 n <$> p ss = cells s; gs = cells g; t' = cell1 n t in Table ("", ["gloss"], []) (Caption Nothing []) colspecs (TableHead mempty []) [TableBody mempty (RowHeadColumns 0) [] $ concat [[row "gloss-lang" [l']], maybe [] (pure . row "gloss-pron" . pure) p', [row "gloss-split" ss], [row "gloss-gloss" gs], [row "gloss-trans" [t']]]] (TableFoot mempty []) cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is] cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is] cells = map (cell . concatMap abbrs) . splitInlines row c = Row ("", [c], []) endash :: Text -> Text endash = Text.map \case '-' -> '–'; '_' -> ' '; c -> c abbrs :: Inline -> [Inline] abbrs (Str txt) = go $ endash txt where go "" = [] go txt | (l, r) <- Text.span isAbbr txt, not $ Text.null l = abbr' l : go r | (l, r) <- Text.break isAbbr txt = Str l : go r abbr' txt = if Text.length txt == 1 then Str txt else abbr txt isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String) abbrs i = [i] splitInlines :: [Inline] -> [Inlines] splitInlines is = filter (not . null) $ go is where go [] = [] go is = let (is1, is') = break (== Space) is in fromList is1 : splitInlines (dropWhile (== Space) is') pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]] pattern PGloss l p g w t = BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]] makeFigures :: Block -> [Block] makeFigures (Div ("", ["figure"], []) blks) = [html "
"] ++ blks ++ [html "
"] where html = RawBlock (Format "html") makeFigures b = [b] enclosed :: Char -> Char -> Text -> Maybe Text enclosed o c txt | Text.length txt >= 2, Text.head txt == o, Text.last txt == c = Just $ Text.init $ Text.tail txt enclosed _ _ _ = Nothing