diff --git a/langfilter/langfilter.cabal b/langfilter/langfilter.cabal index 4c18973..3440006 100644 --- a/langfilter/langfilter.cabal +++ b/langfilter/langfilter.cabal @@ -11,13 +11,18 @@ executable langfilter hs-source-dirs: . main-is: langfilter.hs ghc-options: - -Wall -threaded -rtsopts -with-rtsopts=-N + -Wall -Wno-missing-signatures -Wno-missing-pattern-synonym-signatures + -Wno-name-shadowing + -threaded -rtsopts -with-rtsopts=-N default-language: Haskell2010 default-extensions: BlockArguments, LambdaCase, + OverloadedStrings, PatternSynonyms, ViewPatterns build-depends: base ^>= 4.14.0.0, - pandoc-types == 1.17.* + pandoc-types ^>= 1.22, + text, + pretty-show ^>= 1.10 diff --git a/langfilter/langfilter.hs b/langfilter/langfilter.hs index 090c47e..07cf42d 100644 --- a/langfilter/langfilter.hs +++ b/langfilter/langfilter.hs @@ -1,27 +1,117 @@ +{-# OPTIONS_GHC -fdefer-typed-holes #-} import Text.Pandoc.Definition import Text.Pandoc.JSON import Text.Pandoc.Builder +import Text.Pandoc.Walk +import Data.Char (isUpper, isDigit) +import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as Text main :: IO () -main = toJSONFilter \case - Code _ txt - | Just _ <- enclosed '/' '/' txt -> - Span (cls ["ipa", "ipa-broad"]) $ text' txt - | Just _ <- enclosed '[' ']' txt -> - Span (cls ["ipa", "ipa-narrow"]) $ text' txt - | Just txt' <- enclosed '{' '}' txt -> - Span (cls ["lang"]) $ text' txt' +main = toJSONFilter filter where + filter :: Pandoc -> Pandoc + filter = + walk spans . + walk (concatMap makeFigures) . + walk (concatMap glosses) + +spans :: Inline -> Inline +spans = \case + Code attrs txt + | Just ('\\', txt') <- Text.uncons txt -> Code attrs txt' + | Just _ <- enclosed '/' '/' txt -> ipaB txt + | Just _ <- enclosed '[' ']' txt -> ipaN txt + | Just txt' <- enclosed '{' '}' txt -> lang txt' + | Just txt' <- enclosed '!' '!' txt -> abbr txt' i -> i -cls :: [String] -> Attr -cls cs = ("", cs, []) +ipaB, ipaN, lang, abbr :: Text -> Inline +ipaB = Span (cls ["ipa", "ipa-broad"]) . text' +ipaN = Span (cls ["ipa", "ipa-narrow"]) . text' +lang = Span (cls ["lang"]) . text' +abbr = Span (cls ["abbr"]) . text' . endash -text' :: String -> [Inline] +text' :: Text -> [Inline] text' = toList . text +cls :: [Text] -> Attr +cls cs = ("", cs, []) -enclosed :: Char -> Char -> String -> Maybe String +glosses :: Block -> [Block] +glosses = \case + Div (_, cs, _) blocks | "glosses" `elem` cs -> + [RawBlock (Format "html") "
"] ++ + mapMaybe glossTable blocks ++ + [RawBlock (Format "html") "
"] + b -> [b] + +glossTable :: Block -> Maybe Block +glossTable = \case + Gloss l s g t -> Just $ make l Nothing s g t + PGloss l p s g t -> Just $ make l (Just p) s g t + HorizontalRule -> Nothing + b -> error $ "found " ++ show b ++ " in gloss section" + where + make l p s g t = + let n = length $ splitInlines s + colspecs = replicate n (AlignDefault, ColWidthDefault) + l' = cell1 n l; p' = cell1 n <$> p + ss = cells s; gs = cells g; t' = cell1 n t + in + Table ("", ["gloss"], []) (Caption Nothing []) colspecs + (TableHead mempty []) + [TableBody mempty (RowHeadColumns 0) [] $ concat + [[row "gloss-lang" [l']], + maybe [] (pure . row "gloss-pron" . pure) p', + [row "gloss-split" ss], + [row "gloss-gloss" gs], + [row "gloss-trans" [t']]]] + (TableFoot mempty []) + cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is] + cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is] + cells = map (cell . concatMap abbrs) . splitInlines + row c = Row ("", [c], []) + +endash :: Text -> Text +endash = Text.map \case '-' -> '–'; '_' -> ' '; c -> c + +abbrs :: Inline -> [Inline] +abbrs (Str txt) = go $ endash txt where + go "" = [] + go txt + | (l, r) <- Text.span isAbbr txt, + not $ Text.null l + = abbr' l : go r + | (l, r) <- Text.break isAbbr txt + = Str l : go r + abbr' txt = if Text.length txt == 1 then Str txt else abbr txt + isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String) +abbrs i = [i] + +splitInlines :: [Inline] -> [Inlines] +splitInlines is = filter (not . null) $ go is where + go [] = [] + go is = + let (is1, is') = break (== Space) is in + fromList is1 : splitInlines (dropWhile (== Space) is') + +pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]] +pattern PGloss l p g w t = + BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]] + + +makeFigures :: Block -> [Block] +makeFigures (Div ("", ["figure"], []) blks) = + [html "
"] ++ blks ++ [html "
"] + where html = RawBlock (Format "html") +makeFigures b = [b] + + +enclosed :: Char -> Char -> Text -> Maybe Text enclosed o c txt - | length txt >= 2, head txt == o, last txt == c - = Just $ init $ tail txt + | Text.length txt >= 2, + Text.head txt == o, + Text.last txt == c + = Just $ Text.init $ Text.tail txt enclosed _ _ _ = Nothing