From ba5522187c3b07f20c81d845ac674a3da66c8c88 Mon Sep 17 00:00:00 2001 From: Rhiannon Morris Date: Thu, 29 Apr 2021 11:52:44 +0200 Subject: [PATCH] =?UTF-8?q?lots=20of=20langfilter=20stuff,=20mostly=20l?= =?UTF-8?q?=C3=A1ntas=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 19 ++++-- langfilter/Glosses.hs | 72 +++++++++++++++++++++ langfilter/LaantasImage.hs | 106 ++++++++++++++++++++++++++++++ langfilter/Lang.hs | 19 ++++++ langfilter/Main.hs | 37 +++++++++++ langfilter/Spans.hs | 85 ++++++++++++++++++++++++ langfilter/langfilter.cabal | 16 ++++- langfilter/langfilter.hs | 126 ------------------------------------ 8 files changed, 348 insertions(+), 132 deletions(-) create mode 100644 langfilter/Glosses.hs create mode 100644 langfilter/LaantasImage.hs create mode 100644 langfilter/Lang.hs create mode 100644 langfilter/Main.hs create mode 100644 langfilter/Spans.hs delete mode 100644 langfilter/langfilter.hs diff --git a/Makefile b/Makefile index 2a51ee4..d98ea1f 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ OUTPUTSTYLE = $(patsubst %,$(BUILDDIR)/%,$(STYLE)) OUTPUT = $(OUTPUTPAGES) $(OUTPUTSTYLE) LANGFILTER = $(TMPDIR)/langfilter +LAANTAS_SCRIPT = $(TMPDIR)/laantas-script EXECS = $(LANGFILTER) .PHONY: all @@ -26,10 +27,12 @@ all: build .PHONY: build build: $(OUTPUT) -$(BUILDDIR)/%.html: $(PAGESDIR)/%.md $(TEMPLATE) $(LANGFILTER) +$(BUILDDIR)/%.html: $(PAGESDIR)/%.md $(TEMPLATE) $(LANGFILTER) $(LAANTAS_SCRIPT) mkdir -p $(dir $@) - pandoc -s --toc --template $(TEMPLATE) \ - --filter $(LANGFILTER) -o $@ $< + LAANTAS_SCRIPT="$(LAANTAS_SCRIPT)" \ + DIRNAME="$(subst $(PAGESDIR),$(BUILDDIR),$(dir $<))" \ + pandoc -s --toc --template $(TEMPLATE) -o $@ $< \ + --filter $(LANGFILTER) $(BUILDDIR)/%: $(PAGESDIR)/% mkdir -p $(dir $@) @@ -40,14 +43,22 @@ $(BUILDDIR)/%: % cp $< $@ $(LANGFILTER): langfilter/*.hs langfilter/langfilter.cabal - cabal build -v0 all + cabal build langfilter mkdir -p $(dir $@) find dist-newstyle -name $(notdir $@) \ -print -type f -exec cp {} $(TMPDIR) \; +$(LAANTAS_SCRIPT): laantas-script/*.hs laantas-script/laantas-script.cabal + cabal build laantas-script + mkdir -p $(dir $@) + find dist-newstyle -name $(notdir $@) \ + -type f -exec cp {} $(TMPDIR) \; + + .PHONY: clean distclean clean: rm -rf $(BUILDDIR) distclean: clean rm -rf $(TMPDIR) + cabal clean diff --git a/langfilter/Glosses.hs b/langfilter/Glosses.hs new file mode 100644 index 0000000..424cf18 --- /dev/null +++ b/langfilter/Glosses.hs @@ -0,0 +1,72 @@ +module Glosses (glosses) where + +import Lang +import LaantasImage +import Spans + +import Text.Pandoc.Definition +import Text.Pandoc.Builder +import Text.Pandoc.Walk +import Data.Maybe +import Data.Text (Text) + + +glosses :: Vars => Block -> IO [Block] +glosses = \case + Div (_, cs, _) blocks | "glosses" `elem` cs -> do + tables <- traverse glossTable blocks + pure $ + [RawBlock (Format "html") "
"] ++ + catMaybes tables ++ + [RawBlock (Format "html") "
"] + b -> pure [b] + +pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]] +pattern PGloss l p g w t = + BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]] + +glossTable :: Vars => Block -> IO (Maybe Block) +glossTable = \case + Gloss l s g t -> Just <$> make l Nothing s g t + PGloss l p s g t -> Just <$> make l (Just p) s g t + HorizontalRule -> pure Nothing + b -> error $ "found " ++ show b ++ " in gloss section" + where + make l p s g t = do + let n = length $ splitInlines s + let colspecs = replicate n (AlignDefault, ColWidthDefault) + let l' = cell1 n l; p' = cell1 n <$> p + let ss = cells s; gs = cells g; t' = cell1 n t + img <- case ?lang of + Just Lántas -> + [Just $ cell1 n [img] | img <- makeImage $ splitImage' $ stripInlines l] + Nothing -> pure Nothing + pure $ Table ("", ["gloss"], []) (Caption Nothing []) colspecs + (TableHead mempty []) + [TableBody mempty (RowHeadColumns 0) [] $ concat + [[row ["gloss-scr", "scr"] [i] | Just i <- [img]], + [row ["gloss-lang", "lang"] [l']], + [row ["gloss-pron", "ipa"] [p] | Just p <- [p']], + [row ["gloss-split", "lang"] ss], + [row ["gloss-gloss"] gs], + [row ["gloss-trans"] [t']]]] + (TableFoot mempty []) + cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is] + cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is] + cells = map (cell . concatMap abbrs) . splitInlines + row c = Row ("", c, []) + +stripInlines :: [Inline] -> Text +stripInlines = query \case + Str s -> s + Space -> " " + SoftBreak -> " " + LineBreak -> " " + _ -> "" + +splitInlines :: [Inline] -> [Inlines] +splitInlines is = filter (not . null) $ go is where + go [] = [] + go is = + let (is1, is') = break (== Space) is in + fromList is1 : splitInlines (dropWhile (== Space) is') diff --git a/langfilter/LaantasImage.hs b/langfilter/LaantasImage.hs new file mode 100644 index 0000000..48c10c1 --- /dev/null +++ b/langfilter/LaantasImage.hs @@ -0,0 +1,106 @@ +module LaantasImage + (Image (..), splitImage, splitImage', makeImage) +where + +import Text.Pandoc.Definition hiding (Image) +import qualified Text.Pandoc.Definition as Pandoc +import Data.Bifunctor +import Data.Function +import Data.Maybe +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Text (Text) +import qualified Data.Text as Text +import System.Environment +import System.FilePath +import System.Process + + +data Image = + Image { + text, title :: Text, + file :: FilePath, + size, stroke :: Double, + width :: Int, + color :: Text, + showText :: Bool + } deriving (Eq, Show) + +splitImage :: Text -> Maybe Image +splitImage (Text.uncons -> Just (c, txt)) + | c == '!' = Just $ splitImage' txt + | c == '#' = Just $ (splitImage' txt) {showText = False} +splitImage _ = Nothing + +splitImage' :: Text -> Image +splitImage' txt₀ = + case imageOpts txt₀ of + Just (txt, opts) -> defaultImage txt + & withOpt opts "file" (\f i -> i {file = makeFile f}) + & withOpt opts "size" (\s i -> i {size = readt s}) + & withOpt opts "stroke" (\k i -> i {stroke = readt k}) + & withOpt opts "width" (\w i -> i {width = readt w}) + & withOpt opts "color" (\c i -> i {color = c}) + Nothing -> defaultImage txt₀ + where readt x = read $ Text.unpack x + +withOpt :: Ord k => Map k v -> k -> (v -> a -> a) -> (a -> a) +withOpt m k f = + case Map.lookup k m of + Just v -> f v + Nothing -> id + +defaultImage :: Text -> Image +defaultImage txt = + Image { + text = Text.filter notPunc txt, + title = toTitle txt, + file = makeFile txt, + size = 20, + stroke = 0.75, + width = 600, + color = "hsl(340deg, 50%, 35%)", + showText = True + } + +split1 :: Text -> Text -> Maybe (Text, Text) +split1 s txt = + let (a, b) = Text.breakOn s txt in + if Text.null b then + Nothing + else + Just (Text.strip a, Text.strip $ Text.drop (Text.length s) b) + +type Opts = Map Text Text + +imageOpts :: Text -> Maybe (Text, Opts) +imageOpts = fmap (second splitOpts) . getOpts + +getOpts :: Text -> Maybe (Text, Text) +getOpts = split1 "|" + +splitOpts :: Text -> Map Text Text +splitOpts = Map.fromList . map splitOpt . Text.splitOn ";" where + splitOpt txt = fromMaybe ("file", txt) $ split1 "=" txt + +makeFile :: Text -> FilePath +makeFile txt = map stripWeird (Text.unpack txt) <.> "svg" + where stripWeird c = if weirdUrl c then '_' else c + +toTitle :: Text -> Text +toTitle = Text.filter \c -> c /= '\\' && c /= '#' + +makeImage :: Image -> IO Inline +makeImage (Image {..}) = do + exe <- getEnv "LAANTAS_SCRIPT" + dir <- getEnv "DIRNAME" + callProcess exe + ["-S", show size, "-K", show stroke, "-W", show width, + "-C", Text.unpack color, "-t", Text.unpack text, "-o", dir file] + pure $ Pandoc.Image ("", ["scr","laantas"], []) [] (Text.pack file, title) + +weirdUrl :: Char -> Bool +weirdUrl c = c `elem` ("#\\?&_/.·,{} " :: String) + +notPunc :: Char -> Bool +notPunc c = c `notElem` ("{}·" :: String) diff --git a/langfilter/Lang.hs b/langfilter/Lang.hs new file mode 100644 index 0000000..6459142 --- /dev/null +++ b/langfilter/Lang.hs @@ -0,0 +1,19 @@ +module Lang where + +import Text.Pandoc.Definition +import Data.Char (toLower) +import qualified Data.Text as Text +import System.IO + +data Lang = Lántas deriving (Eq, Show) + +type Vars = (?lang :: Maybe Lang) + +toLang :: Maybe MetaValue -> IO (Maybe Lang) +toLang (Just (MetaInlines [Str s])) = toLang (Just (MetaString s)) -- ugh +toLang (Just (MetaString (Text.map toLower -> s))) + | s == "lántas" || s == "laantas" = pure $ Just Lántas +toLang Nothing = pure Nothing +toLang (Just ℓ) = do + hPutStrLn stderr $ "[WARN] unknown language: " <> show ℓ + pure Nothing diff --git a/langfilter/Main.hs b/langfilter/Main.hs new file mode 100644 index 0000000..51dfb8c --- /dev/null +++ b/langfilter/Main.hs @@ -0,0 +1,37 @@ +import Lang +import Ebnf +import Spans +import Glosses + +import Text.Pandoc.Definition +import Text.Pandoc.JSON +import Text.Pandoc.Walk +import qualified Data.Map as Map + + +main :: IO () +main = toJSONFilter filter where + filter p@(Pandoc (Meta m) _) = do + lang' <- toLang $ Map.lookup "lang" m + let ?lang = lang' + fmap (walk makeEbnf . + walk (concatMap makeFigures) . + walk inlineLetterList) $ + walkM spans =<< + walkM (fmap concat . traverse glosses) p + + +makeFigures :: Block -> [Block] +makeFigures (Div ("", ["figure"], []) blks) = + [html "
"] ++ blks ++ [html "
"] + where html = RawBlock (Format "html") +makeFigures b = [b] + + +inlineLetterList :: Block -> Block +inlineLetterList (Div a@(_, cs, _) blks) + | "letter-list" `elem` cs = Div a (walk go blks) + where + go (Para xs) = Plain xs + go b = b +inlineLetterList b = b diff --git a/langfilter/Spans.hs b/langfilter/Spans.hs new file mode 100644 index 0000000..698a6f8 --- /dev/null +++ b/langfilter/Spans.hs @@ -0,0 +1,85 @@ +module Spans (spans, ipaB, ipaN, abbr, abbrs) where + +import LaantasImage hiding (text) +import Lang + +import Text.Pandoc.Definition hiding (Image) +import Text.Pandoc.Builder hiding (Image) +import Data.Char (isUpper, isDigit) +import Data.Text (Text) +import qualified Data.Text as Text + + +spans :: Vars => Inline -> IO Inline +spans = \case + Code attrs txt + | Just ('\\', txt') <- Text.uncons txt -> pure $ Code attrs txt' + | Just _ <- enclosed '/' '/' txt -> pure $ ipaB txt + | Just _ <- enclosed '[' ']' txt -> pure $ ipaN txt + | Just txt' <- enclosed '{' '}' txt -> lang txt' + | Just txt' <- enclosed '!' '!' txt -> pure $ abbr txt' + i -> pure i + +ipaB, ipaN, abbr :: Text -> Inline +ipaB = Span (cls ["ipa", "ipa-broad"]) . text' +ipaN = Span (cls ["ipa", "ipa-narrow"]) . text' +abbr = Span (cls ["abbr"]) . text' . endash + +text' :: Text -> [Inline] +text' = toList . text + +lang :: Vars => Text -> IO Inline +lang = fmap (Span (cls ["lang"])) . lang' + +lang' :: Vars => Text -> IO [Inline] +lang' txt₀ = case ?lang of + Just Lántas + | Just li@(Image {..}) <- splitImage txt₀ -> + if showText then do + img <- makeImage li + pure $ [img, Span (cls ["text"]) $ underlines title] + else + pure <$> makeImage li + _ -> + pure $ underlines txt₀ + +notBrace :: Char -> Bool +notBrace c = c /= '{' && c /= '}' + +underlines :: Text -> [Inline] +underlines txt = case Text.uncons txt of + Nothing -> [] + Just ('{', txt') -> RawInline "html" "" : underlines txt' + Just ('}', txt') -> RawInline "html" "" : underlines txt' + _ -> Str a : underlines b + where (a, b) = Text.span notBrace txt + +cls :: [Text] -> Attr +cls cs = ("", cs, []) + + +enclosed :: Char -> Char -> Text -> Maybe Text +enclosed o c txt + | Text.length txt >= 2, + Text.head txt == o, + Text.last txt == c + = Just $ Text.init $ Text.tail txt +enclosed _ _ _ = Nothing + + +endash :: Text -> Text +endash = Text.map \case '-' -> '–'; '_' -> ' '; c -> c + + +abbrs :: Inline -> [Inline] +abbrs (Str txt) = go $ endash txt where + go "" = [] + go txt + | (l, r) <- Text.span isAbbr txt, + not $ Text.null l + = abbr' l : go r + | (l, r) <- Text.break isAbbr txt + = Str l : go r + abbr' txt = if Text.length txt == 1 then Str txt else abbr txt + isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String) +abbrs i = [i] diff --git a/langfilter/langfilter.cabal b/langfilter/langfilter.cabal index 70a32a4..9173881 100644 --- a/langfilter/langfilter.cabal +++ b/langfilter/langfilter.cabal @@ -9,9 +9,13 @@ maintainer: Rhiannon Morris executable langfilter hs-source-dirs: . - main-is: langfilter.hs + main-is: Main.hs other-modules: - Ebnf + Lang, + Ebnf, + Spans, + LaantasImage, + Glosses ghc-options: -Wall -Wno-missing-signatures -Wno-missing-pattern-synonym-signatures -Wno-name-shadowing @@ -19,12 +23,20 @@ executable langfilter default-language: Haskell2010 default-extensions: BlockArguments, + ConstraintKinds, LambdaCase, + FlexibleContexts, + ImplicitParams, + MonadComprehensions, OverloadedStrings, PatternSynonyms, + RecordWildCards, ViewPatterns build-depends: base ^>= 4.14.0.0, + containers ^>= 0.6.2.1, + filepath ^>= 1.4.2.1, + process ^>= 1.6.11.0, pandoc-types ^>= 1.22, text, pretty-show ^>= 1.10 diff --git a/langfilter/langfilter.hs b/langfilter/langfilter.hs deleted file mode 100644 index 4646524..0000000 --- a/langfilter/langfilter.hs +++ /dev/null @@ -1,126 +0,0 @@ -import Ebnf - -import Text.Pandoc.Definition -import Text.Pandoc.JSON -import Text.Pandoc.Builder -import Text.Pandoc.Walk -import Data.Char (isUpper, isDigit) -import Data.Maybe -import Data.Text (Text) -import qualified Data.Text as Text - -main :: IO () -main = toJSONFilter (filter :: Pandoc -> Pandoc) where - filter = - walk makeEbnf . - walk (concatMap makeFigures) . - walk spans . - walk (concatMap glosses) - -spans :: Inline -> Inline -spans = \case - Code attrs txt - | Just ('\\', txt') <- Text.uncons txt -> Code attrs txt' - | Just _ <- enclosed '/' '/' txt -> ipaB txt - | Just _ <- enclosed '[' ']' txt -> ipaN txt - | Just txt' <- enclosed '{' '}' txt -> lang txt' - | Just txt' <- enclosed '!' '!' txt -> abbr txt' - i -> i - -ipaB, ipaN, lang, abbr :: Text -> Inline -ipaB = Span (cls ["ipa", "ipa-broad"]) . text' -ipaN = Span (cls ["ipa", "ipa-narrow"]) . text' -lang = Span (cls ["lang"]) . lang' -abbr = Span (cls ["abbr"]) . text' . endash - -text' :: Text -> [Inline] -text' = toList . text - -lang' :: Text -> [Inline] -lang' txt = case Text.uncons txt of - Nothing -> [] - Just ('{', txt') -> RawInline "html" "" : lang' txt' - Just ('}', txt') -> RawInline "html" "" : lang' txt' - _ -> Str a : lang' b - where (a, b) = Text.span (\c -> c /= '{' && c /= '}') txt - -cls :: [Text] -> Attr -cls cs = ("", cs, []) - -glosses :: Block -> [Block] -glosses = \case - Div (_, cs, _) blocks | "glosses" `elem` cs -> - [RawBlock (Format "html") "
"] ++ - mapMaybe glossTable blocks ++ - [RawBlock (Format "html") "
"] - b -> [b] - -pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]] -pattern PGloss l p g w t = - BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]] - -glossTable :: Block -> Maybe Block -glossTable = \case - Gloss l s g t -> Just $ make l Nothing s g t - PGloss l p s g t -> Just $ make l (Just p) s g t - HorizontalRule -> Nothing - b -> error $ "found " ++ show b ++ " in gloss section" - where - make l p s g t = - let n = length $ splitInlines s - colspecs = replicate n (AlignDefault, ColWidthDefault) - l' = cell1 n l; p' = cell1 n <$> p - ss = cells s; gs = cells g; t' = cell1 n t - in - Table ("", ["gloss"], []) (Caption Nothing []) colspecs - (TableHead mempty []) - [TableBody mempty (RowHeadColumns 0) [] $ concat - [[row ["gloss-lang", "lang"] [l']], - maybe [] (pure . row ["gloss-pron", "ipa"] . pure) p', - [row ["gloss-split", "lang"] ss], - [row ["gloss-gloss"] gs], - [row ["gloss-trans"] [t']]]] - (TableFoot mempty []) - cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is] - cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is] - cells = map (cell . concatMap abbrs) . splitInlines - row c = Row ("", c, []) - -endash :: Text -> Text -endash = Text.map \case '-' -> '–'; '_' -> ' '; c -> c - -abbrs :: Inline -> [Inline] -abbrs (Str txt) = go $ endash txt where - go "" = [] - go txt - | (l, r) <- Text.span isAbbr txt, - not $ Text.null l - = abbr' l : go r - | (l, r) <- Text.break isAbbr txt - = Str l : go r - abbr' txt = if Text.length txt == 1 then Str txt else abbr txt - isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String) -abbrs i = [i] - -splitInlines :: [Inline] -> [Inlines] -splitInlines is = filter (not . null) $ go is where - go [] = [] - go is = - let (is1, is') = break (== Space) is in - fromList is1 : splitInlines (dropWhile (== Space) is') - - -makeFigures :: Block -> [Block] -makeFigures (Div ("", ["figure"], []) blks) = - [html "
"] ++ blks ++ [html "
"] - where html = RawBlock (Format "html") -makeFigures b = [b] - - -enclosed :: Char -> Char -> Text -> Maybe Text -enclosed o c txt - | Text.length txt >= 2, - Text.head txt == o, - Text.last txt == c - = Just $ Text.init $ Text.tail txt -enclosed _ _ _ = Nothing