123 lines
3.9 KiB
Haskell
123 lines
3.9 KiB
Haskell
import Text.Pandoc.Definition
|
||
import Text.Pandoc.JSON
|
||
import Text.Pandoc.Builder
|
||
import Text.Pandoc.Walk
|
||
import Data.Char (isUpper, isDigit)
|
||
import Data.Maybe
|
||
import Data.Text (Text)
|
||
import qualified Data.Text as Text
|
||
|
||
main :: IO ()
|
||
main = toJSONFilter (filter :: Pandoc -> Pandoc) where
|
||
filter =
|
||
walk (concatMap makeFigures) .
|
||
walk spans .
|
||
walk (concatMap glosses)
|
||
|
||
spans :: Inline -> Inline
|
||
spans = \case
|
||
Code attrs txt
|
||
| Just ('\\', txt') <- Text.uncons txt -> Code attrs txt'
|
||
| Just _ <- enclosed '/' '/' txt -> ipaB txt
|
||
| Just _ <- enclosed '[' ']' txt -> ipaN txt
|
||
| Just txt' <- enclosed '{' '}' txt -> lang txt'
|
||
| Just txt' <- enclosed '!' '!' txt -> abbr txt'
|
||
i -> i
|
||
|
||
ipaB, ipaN, lang, abbr :: Text -> Inline
|
||
ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
|
||
ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
|
||
lang = Span (cls ["lang"]) . lang'
|
||
abbr = Span (cls ["abbr"]) . text' . endash
|
||
|
||
text' :: Text -> [Inline]
|
||
text' = toList . text
|
||
|
||
lang' :: Text -> [Inline]
|
||
lang' txt = case Text.uncons txt of
|
||
Nothing -> []
|
||
Just ('{', txt') -> RawInline "html" "<u>" : lang' txt'
|
||
Just ('}', txt') -> RawInline "html" "</u>" : lang' txt'
|
||
_ -> Str a : lang' b
|
||
where (a, b) = Text.span (\c -> c /= '{' && c /= '}') txt
|
||
|
||
cls :: [Text] -> Attr
|
||
cls cs = ("", cs, [])
|
||
|
||
glosses :: Block -> [Block]
|
||
glosses = \case
|
||
Div (_, cs, _) blocks | "glosses" `elem` cs ->
|
||
[RawBlock (Format "html") "<figure class=glosses>"] ++
|
||
mapMaybe glossTable blocks ++
|
||
[RawBlock (Format "html") "</figure>"]
|
||
b -> [b]
|
||
|
||
pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
|
||
pattern PGloss l p g w t =
|
||
BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
|
||
|
||
glossTable :: Block -> Maybe Block
|
||
glossTable = \case
|
||
Gloss l s g t -> Just $ make l Nothing s g t
|
||
PGloss l p s g t -> Just $ make l (Just p) s g t
|
||
HorizontalRule -> Nothing
|
||
b -> error $ "found " ++ show b ++ " in gloss section"
|
||
where
|
||
make l p s g t =
|
||
let n = length $ splitInlines s
|
||
colspecs = replicate n (AlignDefault, ColWidthDefault)
|
||
l' = cell1 n l; p' = cell1 n <$> p
|
||
ss = cells s; gs = cells g; t' = cell1 n t
|
||
in
|
||
Table ("", ["gloss"], []) (Caption Nothing []) colspecs
|
||
(TableHead mempty [])
|
||
[TableBody mempty (RowHeadColumns 0) [] $ concat
|
||
[[row ["gloss-lang", "lang"] [l']],
|
||
maybe [] (pure . row ["gloss-pron", "ipa"] . pure) p',
|
||
[row ["gloss-split", "lang"] ss],
|
||
[row ["gloss-gloss"] gs],
|
||
[row ["gloss-trans"] [t']]]]
|
||
(TableFoot mempty [])
|
||
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
|
||
cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is]
|
||
cells = map (cell . concatMap abbrs) . splitInlines
|
||
row c = Row ("", c, [])
|
||
|
||
endash :: Text -> Text
|
||
endash = Text.map \case '-' -> '–'; '_' -> ' '; c -> c
|
||
|
||
abbrs :: Inline -> [Inline]
|
||
abbrs (Str txt) = go $ endash txt where
|
||
go "" = []
|
||
go txt
|
||
| (l, r) <- Text.span isAbbr txt,
|
||
not $ Text.null l
|
||
= abbr' l : go r
|
||
| (l, r) <- Text.break isAbbr txt
|
||
= Str l : go r
|
||
abbr' txt = if Text.length txt == 1 then Str txt else abbr txt
|
||
isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
|
||
abbrs i = [i]
|
||
|
||
splitInlines :: [Inline] -> [Inlines]
|
||
splitInlines is = filter (not . null) $ go is where
|
||
go [] = []
|
||
go is =
|
||
let (is1, is') = break (== Space) is in
|
||
fromList is1 : splitInlines (dropWhile (== Space) is')
|
||
|
||
|
||
makeFigures :: Block -> [Block]
|
||
makeFigures (Div ("", ["figure"], []) blks) =
|
||
[html "<figure>"] ++ blks ++ [html "</figure>"]
|
||
where html = RawBlock (Format "html")
|
||
makeFigures b = [b]
|
||
|
||
|
||
enclosed :: Char -> Char -> Text -> Maybe Text
|
||
enclosed o c txt
|
||
| Text.length txt >= 2,
|
||
Text.head txt == o,
|
||
Text.last txt == c
|
||
= Just $ Text.init $ Text.tail txt
|
||
enclosed _ _ _ = Nothing
|