improve langfilter & add glosses
This commit is contained in:
parent
776ae2ccee
commit
d61d7c791f
2 changed files with 111 additions and 16 deletions
|
@ -11,13 +11,18 @@ executable langfilter
|
||||||
hs-source-dirs: .
|
hs-source-dirs: .
|
||||||
main-is: langfilter.hs
|
main-is: langfilter.hs
|
||||||
ghc-options:
|
ghc-options:
|
||||||
-Wall -threaded -rtsopts -with-rtsopts=-N
|
-Wall -Wno-missing-signatures -Wno-missing-pattern-synonym-signatures
|
||||||
|
-Wno-name-shadowing
|
||||||
|
-threaded -rtsopts -with-rtsopts=-N
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
default-extensions:
|
default-extensions:
|
||||||
BlockArguments,
|
BlockArguments,
|
||||||
LambdaCase,
|
LambdaCase,
|
||||||
|
OverloadedStrings,
|
||||||
PatternSynonyms,
|
PatternSynonyms,
|
||||||
ViewPatterns
|
ViewPatterns
|
||||||
build-depends:
|
build-depends:
|
||||||
base ^>= 4.14.0.0,
|
base ^>= 4.14.0.0,
|
||||||
pandoc-types == 1.17.*
|
pandoc-types ^>= 1.22,
|
||||||
|
text,
|
||||||
|
pretty-show ^>= 1.10
|
||||||
|
|
|
@ -1,27 +1,117 @@
|
||||||
|
{-# OPTIONS_GHC -fdefer-typed-holes #-}
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.JSON
|
import Text.Pandoc.JSON
|
||||||
import Text.Pandoc.Builder
|
import Text.Pandoc.Builder
|
||||||
|
import Text.Pandoc.Walk
|
||||||
|
import Data.Char (isUpper, isDigit)
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = toJSONFilter \case
|
main = toJSONFilter filter where
|
||||||
Code _ txt
|
filter :: Pandoc -> Pandoc
|
||||||
| Just _ <- enclosed '/' '/' txt ->
|
filter =
|
||||||
Span (cls ["ipa", "ipa-broad"]) $ text' txt
|
walk spans .
|
||||||
| Just _ <- enclosed '[' ']' txt ->
|
walk (concatMap makeFigures) .
|
||||||
Span (cls ["ipa", "ipa-narrow"]) $ text' txt
|
walk (concatMap glosses)
|
||||||
| Just txt' <- enclosed '{' '}' txt ->
|
|
||||||
Span (cls ["lang"]) $ text' txt'
|
spans :: Inline -> Inline
|
||||||
|
spans = \case
|
||||||
|
Code attrs txt
|
||||||
|
| Just ('\\', txt') <- Text.uncons txt -> Code attrs txt'
|
||||||
|
| Just _ <- enclosed '/' '/' txt -> ipaB txt
|
||||||
|
| Just _ <- enclosed '[' ']' txt -> ipaN txt
|
||||||
|
| Just txt' <- enclosed '{' '}' txt -> lang txt'
|
||||||
|
| Just txt' <- enclosed '!' '!' txt -> abbr txt'
|
||||||
i -> i
|
i -> i
|
||||||
|
|
||||||
cls :: [String] -> Attr
|
ipaB, ipaN, lang, abbr :: Text -> Inline
|
||||||
cls cs = ("", cs, [])
|
ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
|
||||||
|
ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
|
||||||
|
lang = Span (cls ["lang"]) . text'
|
||||||
|
abbr = Span (cls ["abbr"]) . text' . endash
|
||||||
|
|
||||||
text' :: String -> [Inline]
|
text' :: Text -> [Inline]
|
||||||
text' = toList . text
|
text' = toList . text
|
||||||
|
|
||||||
|
cls :: [Text] -> Attr
|
||||||
|
cls cs = ("", cs, [])
|
||||||
|
|
||||||
enclosed :: Char -> Char -> String -> Maybe String
|
glosses :: Block -> [Block]
|
||||||
|
glosses = \case
|
||||||
|
Div (_, cs, _) blocks | "glosses" `elem` cs ->
|
||||||
|
[RawBlock (Format "html") "<figure class=glosses>"] ++
|
||||||
|
mapMaybe glossTable blocks ++
|
||||||
|
[RawBlock (Format "html") "</figure>"]
|
||||||
|
b -> [b]
|
||||||
|
|
||||||
|
glossTable :: Block -> Maybe Block
|
||||||
|
glossTable = \case
|
||||||
|
Gloss l s g t -> Just $ make l Nothing s g t
|
||||||
|
PGloss l p s g t -> Just $ make l (Just p) s g t
|
||||||
|
HorizontalRule -> Nothing
|
||||||
|
b -> error $ "found " ++ show b ++ " in gloss section"
|
||||||
|
where
|
||||||
|
make l p s g t =
|
||||||
|
let n = length $ splitInlines s
|
||||||
|
colspecs = replicate n (AlignDefault, ColWidthDefault)
|
||||||
|
l' = cell1 n l; p' = cell1 n <$> p
|
||||||
|
ss = cells s; gs = cells g; t' = cell1 n t
|
||||||
|
in
|
||||||
|
Table ("", ["gloss"], []) (Caption Nothing []) colspecs
|
||||||
|
(TableHead mempty [])
|
||||||
|
[TableBody mempty (RowHeadColumns 0) [] $ concat
|
||||||
|
[[row "gloss-lang" [l']],
|
||||||
|
maybe [] (pure . row "gloss-pron" . pure) p',
|
||||||
|
[row "gloss-split" ss],
|
||||||
|
[row "gloss-gloss" gs],
|
||||||
|
[row "gloss-trans" [t']]]]
|
||||||
|
(TableFoot mempty [])
|
||||||
|
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
|
||||||
|
cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is]
|
||||||
|
cells = map (cell . concatMap abbrs) . splitInlines
|
||||||
|
row c = Row ("", [c], [])
|
||||||
|
|
||||||
|
endash :: Text -> Text
|
||||||
|
endash = Text.map \case '-' -> '–'; '_' -> ' '; c -> c
|
||||||
|
|
||||||
|
abbrs :: Inline -> [Inline]
|
||||||
|
abbrs (Str txt) = go $ endash txt where
|
||||||
|
go "" = []
|
||||||
|
go txt
|
||||||
|
| (l, r) <- Text.span isAbbr txt,
|
||||||
|
not $ Text.null l
|
||||||
|
= abbr' l : go r
|
||||||
|
| (l, r) <- Text.break isAbbr txt
|
||||||
|
= Str l : go r
|
||||||
|
abbr' txt = if Text.length txt == 1 then Str txt else abbr txt
|
||||||
|
isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
|
||||||
|
abbrs i = [i]
|
||||||
|
|
||||||
|
splitInlines :: [Inline] -> [Inlines]
|
||||||
|
splitInlines is = filter (not . null) $ go is where
|
||||||
|
go [] = []
|
||||||
|
go is =
|
||||||
|
let (is1, is') = break (== Space) is in
|
||||||
|
fromList is1 : splitInlines (dropWhile (== Space) is')
|
||||||
|
|
||||||
|
pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
|
||||||
|
pattern PGloss l p g w t =
|
||||||
|
BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
|
||||||
|
|
||||||
|
|
||||||
|
makeFigures :: Block -> [Block]
|
||||||
|
makeFigures (Div ("", ["figure"], []) blks) =
|
||||||
|
[html "<figure>"] ++ blks ++ [html "</figure>"]
|
||||||
|
where html = RawBlock (Format "html")
|
||||||
|
makeFigures b = [b]
|
||||||
|
|
||||||
|
|
||||||
|
enclosed :: Char -> Char -> Text -> Maybe Text
|
||||||
enclosed o c txt
|
enclosed o c txt
|
||||||
| length txt >= 2, head txt == o, last txt == c
|
| Text.length txt >= 2,
|
||||||
= Just $ init $ tail txt
|
Text.head txt == o,
|
||||||
|
Text.last txt == c
|
||||||
|
= Just $ Text.init $ Text.tail txt
|
||||||
enclosed _ _ _ = Nothing
|
enclosed _ _ _ = Nothing
|
||||||
|
|
Loading…
Reference in a new issue