improve langfilter & add glosses

This commit is contained in:
Rhiannon Morris 2020-11-04 19:04:55 +01:00
parent 776ae2ccee
commit d61d7c791f
2 changed files with 111 additions and 16 deletions

View file

@ -11,13 +11,18 @@ executable langfilter
hs-source-dirs: . hs-source-dirs: .
main-is: langfilter.hs main-is: langfilter.hs
ghc-options: ghc-options:
-Wall -threaded -rtsopts -with-rtsopts=-N -Wall -Wno-missing-signatures -Wno-missing-pattern-synonym-signatures
-Wno-name-shadowing
-threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010 default-language: Haskell2010
default-extensions: default-extensions:
BlockArguments, BlockArguments,
LambdaCase, LambdaCase,
OverloadedStrings,
PatternSynonyms, PatternSynonyms,
ViewPatterns ViewPatterns
build-depends: build-depends:
base ^>= 4.14.0.0, base ^>= 4.14.0.0,
pandoc-types == 1.17.* pandoc-types ^>= 1.22,
text,
pretty-show ^>= 1.10

View file

@ -1,27 +1,117 @@
{-# OPTIONS_GHC -fdefer-typed-holes #-}
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.JSON import Text.Pandoc.JSON
import Text.Pandoc.Builder import Text.Pandoc.Builder
import Text.Pandoc.Walk
import Data.Char (isUpper, isDigit)
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
main :: IO () main :: IO ()
main = toJSONFilter \case main = toJSONFilter filter where
Code _ txt filter :: Pandoc -> Pandoc
| Just _ <- enclosed '/' '/' txt -> filter =
Span (cls ["ipa", "ipa-broad"]) $ text' txt walk spans .
| Just _ <- enclosed '[' ']' txt -> walk (concatMap makeFigures) .
Span (cls ["ipa", "ipa-narrow"]) $ text' txt walk (concatMap glosses)
| Just txt' <- enclosed '{' '}' txt ->
Span (cls ["lang"]) $ text' txt' spans :: Inline -> Inline
spans = \case
Code attrs txt
| Just ('\\', txt') <- Text.uncons txt -> Code attrs txt'
| Just _ <- enclosed '/' '/' txt -> ipaB txt
| Just _ <- enclosed '[' ']' txt -> ipaN txt
| Just txt' <- enclosed '{' '}' txt -> lang txt'
| Just txt' <- enclosed '!' '!' txt -> abbr txt'
i -> i i -> i
cls :: [String] -> Attr ipaB, ipaN, lang, abbr :: Text -> Inline
cls cs = ("", cs, []) ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
lang = Span (cls ["lang"]) . text'
abbr = Span (cls ["abbr"]) . text' . endash
text' :: String -> [Inline] text' :: Text -> [Inline]
text' = toList . text text' = toList . text
cls :: [Text] -> Attr
cls cs = ("", cs, [])
enclosed :: Char -> Char -> String -> Maybe String glosses :: Block -> [Block]
glosses = \case
Div (_, cs, _) blocks | "glosses" `elem` cs ->
[RawBlock (Format "html") "<figure class=glosses>"] ++
mapMaybe glossTable blocks ++
[RawBlock (Format "html") "</figure>"]
b -> [b]
glossTable :: Block -> Maybe Block
glossTable = \case
Gloss l s g t -> Just $ make l Nothing s g t
PGloss l p s g t -> Just $ make l (Just p) s g t
HorizontalRule -> Nothing
b -> error $ "found " ++ show b ++ " in gloss section"
where
make l p s g t =
let n = length $ splitInlines s
colspecs = replicate n (AlignDefault, ColWidthDefault)
l' = cell1 n l; p' = cell1 n <$> p
ss = cells s; gs = cells g; t' = cell1 n t
in
Table ("", ["gloss"], []) (Caption Nothing []) colspecs
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] $ concat
[[row "gloss-lang" [l']],
maybe [] (pure . row "gloss-pron" . pure) p',
[row "gloss-split" ss],
[row "gloss-gloss" gs],
[row "gloss-trans" [t']]]]
(TableFoot mempty [])
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is]
cells = map (cell . concatMap abbrs) . splitInlines
row c = Row ("", [c], [])
endash :: Text -> Text
endash = Text.map \case '-' -> ''; '_' -> ' '; c -> c
abbrs :: Inline -> [Inline]
abbrs (Str txt) = go $ endash txt where
go "" = []
go txt
| (l, r) <- Text.span isAbbr txt,
not $ Text.null l
= abbr' l : go r
| (l, r) <- Text.break isAbbr txt
= Str l : go r
abbr' txt = if Text.length txt == 1 then Str txt else abbr txt
isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
abbrs i = [i]
splitInlines :: [Inline] -> [Inlines]
splitInlines is = filter (not . null) $ go is where
go [] = []
go is =
let (is1, is') = break (== Space) is in
fromList is1 : splitInlines (dropWhile (== Space) is')
pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
pattern PGloss l p g w t =
BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
makeFigures :: Block -> [Block]
makeFigures (Div ("", ["figure"], []) blks) =
[html "<figure>"] ++ blks ++ [html "</figure>"]
where html = RawBlock (Format "html")
makeFigures b = [b]
enclosed :: Char -> Char -> Text -> Maybe Text
enclosed o c txt enclosed o c txt
| length txt >= 2, head txt == o, last txt == c | Text.length txt >= 2,
= Just $ init $ tail txt Text.head txt == o,
Text.last txt == c
= Just $ Text.init $ Text.tail txt
enclosed _ _ _ = Nothing enclosed _ _ _ = Nothing