diff --git a/langfilter/langfilter.cabal b/langfilter/langfilter.cabal
index 4c18973..3440006 100644
--- a/langfilter/langfilter.cabal
+++ b/langfilter/langfilter.cabal
@@ -11,13 +11,18 @@ executable langfilter
hs-source-dirs: .
main-is: langfilter.hs
ghc-options:
- -Wall -threaded -rtsopts -with-rtsopts=-N
+ -Wall -Wno-missing-signatures -Wno-missing-pattern-synonym-signatures
+ -Wno-name-shadowing
+ -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010
default-extensions:
BlockArguments,
LambdaCase,
+ OverloadedStrings,
PatternSynonyms,
ViewPatterns
build-depends:
base ^>= 4.14.0.0,
- pandoc-types == 1.17.*
+ pandoc-types ^>= 1.22,
+ text,
+ pretty-show ^>= 1.10
diff --git a/langfilter/langfilter.hs b/langfilter/langfilter.hs
index 090c47e..07cf42d 100644
--- a/langfilter/langfilter.hs
+++ b/langfilter/langfilter.hs
@@ -1,27 +1,117 @@
+{-# OPTIONS_GHC -fdefer-typed-holes #-}
import Text.Pandoc.Definition
import Text.Pandoc.JSON
import Text.Pandoc.Builder
+import Text.Pandoc.Walk
+import Data.Char (isUpper, isDigit)
+import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as Text
main :: IO ()
-main = toJSONFilter \case
- Code _ txt
- | Just _ <- enclosed '/' '/' txt ->
- Span (cls ["ipa", "ipa-broad"]) $ text' txt
- | Just _ <- enclosed '[' ']' txt ->
- Span (cls ["ipa", "ipa-narrow"]) $ text' txt
- | Just txt' <- enclosed '{' '}' txt ->
- Span (cls ["lang"]) $ text' txt'
+main = toJSONFilter filter where
+ filter :: Pandoc -> Pandoc
+ filter =
+ walk spans .
+ walk (concatMap makeFigures) .
+ walk (concatMap glosses)
+
+spans :: Inline -> Inline
+spans = \case
+ Code attrs txt
+ | Just ('\\', txt') <- Text.uncons txt -> Code attrs txt'
+ | Just _ <- enclosed '/' '/' txt -> ipaB txt
+ | Just _ <- enclosed '[' ']' txt -> ipaN txt
+ | Just txt' <- enclosed '{' '}' txt -> lang txt'
+ | Just txt' <- enclosed '!' '!' txt -> abbr txt'
i -> i
-cls :: [String] -> Attr
-cls cs = ("", cs, [])
+ipaB, ipaN, lang, abbr :: Text -> Inline
+ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
+ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
+lang = Span (cls ["lang"]) . text'
+abbr = Span (cls ["abbr"]) . text' . endash
-text' :: String -> [Inline]
+text' :: Text -> [Inline]
text' = toList . text
+cls :: [Text] -> Attr
+cls cs = ("", cs, [])
-enclosed :: Char -> Char -> String -> Maybe String
+glosses :: Block -> [Block]
+glosses = \case
+ Div (_, cs, _) blocks | "glosses" `elem` cs ->
+ [RawBlock (Format "html") ""]
+ b -> [b]
+
+glossTable :: Block -> Maybe Block
+glossTable = \case
+ Gloss l s g t -> Just $ make l Nothing s g t
+ PGloss l p s g t -> Just $ make l (Just p) s g t
+ HorizontalRule -> Nothing
+ b -> error $ "found " ++ show b ++ " in gloss section"
+ where
+ make l p s g t =
+ let n = length $ splitInlines s
+ colspecs = replicate n (AlignDefault, ColWidthDefault)
+ l' = cell1 n l; p' = cell1 n <$> p
+ ss = cells s; gs = cells g; t' = cell1 n t
+ in
+ Table ("", ["gloss"], []) (Caption Nothing []) colspecs
+ (TableHead mempty [])
+ [TableBody mempty (RowHeadColumns 0) [] $ concat
+ [[row "gloss-lang" [l']],
+ maybe [] (pure . row "gloss-pron" . pure) p',
+ [row "gloss-split" ss],
+ [row "gloss-gloss" gs],
+ [row "gloss-trans" [t']]]]
+ (TableFoot mempty [])
+ cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
+ cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is]
+ cells = map (cell . concatMap abbrs) . splitInlines
+ row c = Row ("", [c], [])
+
+endash :: Text -> Text
+endash = Text.map \case '-' -> '–'; '_' -> ' '; c -> c
+
+abbrs :: Inline -> [Inline]
+abbrs (Str txt) = go $ endash txt where
+ go "" = []
+ go txt
+ | (l, r) <- Text.span isAbbr txt,
+ not $ Text.null l
+ = abbr' l : go r
+ | (l, r) <- Text.break isAbbr txt
+ = Str l : go r
+ abbr' txt = if Text.length txt == 1 then Str txt else abbr txt
+ isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
+abbrs i = [i]
+
+splitInlines :: [Inline] -> [Inlines]
+splitInlines is = filter (not . null) $ go is where
+ go [] = []
+ go is =
+ let (is1, is') = break (== Space) is in
+ fromList is1 : splitInlines (dropWhile (== Space) is')
+
+pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
+pattern PGloss l p g w t =
+ BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
+
+
+makeFigures :: Block -> [Block]
+makeFigures (Div ("", ["figure"], []) blks) =
+ [html ""]
+ where html = RawBlock (Format "html")
+makeFigures b = [b]
+
+
+enclosed :: Char -> Char -> Text -> Maybe Text
enclosed o c txt
- | length txt >= 2, head txt == o, last txt == c
- = Just $ init $ tail txt
+ | Text.length txt >= 2,
+ Text.head txt == o,
+ Text.last txt == c
+ = Just $ Text.init $ Text.tail txt
enclosed _ _ _ = Nothing