lots of langfilter stuff, mostly lántas script

This commit is contained in:
Rhiannon Morris 2021-04-29 11:52:44 +02:00
parent 4a177d7828
commit ba5522187c
8 changed files with 348 additions and 132 deletions

View file

@ -18,6 +18,7 @@ OUTPUTSTYLE = $(patsubst %,$(BUILDDIR)/%,$(STYLE))
OUTPUT = $(OUTPUTPAGES) $(OUTPUTSTYLE)
LANGFILTER = $(TMPDIR)/langfilter
LAANTAS_SCRIPT = $(TMPDIR)/laantas-script
EXECS = $(LANGFILTER)
.PHONY: all
@ -26,10 +27,12 @@ all: build
.PHONY: build
build: $(OUTPUT)
$(BUILDDIR)/%.html: $(PAGESDIR)/%.md $(TEMPLATE) $(LANGFILTER)
$(BUILDDIR)/%.html: $(PAGESDIR)/%.md $(TEMPLATE) $(LANGFILTER) $(LAANTAS_SCRIPT)
mkdir -p $(dir $@)
pandoc -s --toc --template $(TEMPLATE) \
--filter $(LANGFILTER) -o $@ $<
LAANTAS_SCRIPT="$(LAANTAS_SCRIPT)" \
DIRNAME="$(subst $(PAGESDIR),$(BUILDDIR),$(dir $<))" \
pandoc -s --toc --template $(TEMPLATE) -o $@ $< \
--filter $(LANGFILTER)
$(BUILDDIR)/%: $(PAGESDIR)/%
mkdir -p $(dir $@)
@ -40,14 +43,22 @@ $(BUILDDIR)/%: %
cp $< $@
$(LANGFILTER): langfilter/*.hs langfilter/langfilter.cabal
cabal build -v0 all
cabal build langfilter
mkdir -p $(dir $@)
find dist-newstyle -name $(notdir $@) \
-print -type f -exec cp {} $(TMPDIR) \;
$(LAANTAS_SCRIPT): laantas-script/*.hs laantas-script/laantas-script.cabal
cabal build laantas-script
mkdir -p $(dir $@)
find dist-newstyle -name $(notdir $@) \
-type f -exec cp {} $(TMPDIR) \;
.PHONY: clean distclean
clean:
rm -rf $(BUILDDIR)
distclean: clean
rm -rf $(TMPDIR)
cabal clean

72
langfilter/Glosses.hs Normal file
View file

@ -0,0 +1,72 @@
module Glosses (glosses) where
import Lang
import LaantasImage
import Spans
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.Pandoc.Walk
import Data.Maybe
import Data.Text (Text)
glosses :: Vars => Block -> IO [Block]
glosses = \case
Div (_, cs, _) blocks | "glosses" `elem` cs -> do
tables <- traverse glossTable blocks
pure $
[RawBlock (Format "html") "<figure class=glosses>"] ++
catMaybes tables ++
[RawBlock (Format "html") "</figure>"]
b -> pure [b]
pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
pattern PGloss l p g w t =
BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
glossTable :: Vars => Block -> IO (Maybe Block)
glossTable = \case
Gloss l s g t -> Just <$> make l Nothing s g t
PGloss l p s g t -> Just <$> make l (Just p) s g t
HorizontalRule -> pure Nothing
b -> error $ "found " ++ show b ++ " in gloss section"
where
make l p s g t = do
let n = length $ splitInlines s
let colspecs = replicate n (AlignDefault, ColWidthDefault)
let l' = cell1 n l; p' = cell1 n <$> p
let ss = cells s; gs = cells g; t' = cell1 n t
img <- case ?lang of
Just Lántas ->
[Just $ cell1 n [img] | img <- makeImage $ splitImage' $ stripInlines l]
Nothing -> pure Nothing
pure $ Table ("", ["gloss"], []) (Caption Nothing []) colspecs
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] $ concat
[[row ["gloss-scr", "scr"] [i] | Just i <- [img]],
[row ["gloss-lang", "lang"] [l']],
[row ["gloss-pron", "ipa"] [p] | Just p <- [p']],
[row ["gloss-split", "lang"] ss],
[row ["gloss-gloss"] gs],
[row ["gloss-trans"] [t']]]]
(TableFoot mempty [])
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is]
cells = map (cell . concatMap abbrs) . splitInlines
row c = Row ("", c, [])
stripInlines :: [Inline] -> Text
stripInlines = query \case
Str s -> s
Space -> " "
SoftBreak -> " "
LineBreak -> " "
_ -> ""
splitInlines :: [Inline] -> [Inlines]
splitInlines is = filter (not . null) $ go is where
go [] = []
go is =
let (is1, is') = break (== Space) is in
fromList is1 : splitInlines (dropWhile (== Space) is')

106
langfilter/LaantasImage.hs Normal file
View file

@ -0,0 +1,106 @@
module LaantasImage
(Image (..), splitImage, splitImage', makeImage)
where
import Text.Pandoc.Definition hiding (Image)
import qualified Text.Pandoc.Definition as Pandoc
import Data.Bifunctor
import Data.Function
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as Text
import System.Environment
import System.FilePath
import System.Process
data Image =
Image {
text, title :: Text,
file :: FilePath,
size, stroke :: Double,
width :: Int,
color :: Text,
showText :: Bool
} deriving (Eq, Show)
splitImage :: Text -> Maybe Image
splitImage (Text.uncons -> Just (c, txt))
| c == '!' = Just $ splitImage' txt
| c == '#' = Just $ (splitImage' txt) {showText = False}
splitImage _ = Nothing
splitImage' :: Text -> Image
splitImage' txt =
case imageOpts txt of
Just (txt, opts) -> defaultImage txt
& withOpt opts "file" (\f i -> i {file = makeFile f})
& withOpt opts "size" (\s i -> i {size = readt s})
& withOpt opts "stroke" (\k i -> i {stroke = readt k})
& withOpt opts "width" (\w i -> i {width = readt w})
& withOpt opts "color" (\c i -> i {color = c})
Nothing -> defaultImage txt
where readt x = read $ Text.unpack x
withOpt :: Ord k => Map k v -> k -> (v -> a -> a) -> (a -> a)
withOpt m k f =
case Map.lookup k m of
Just v -> f v
Nothing -> id
defaultImage :: Text -> Image
defaultImage txt =
Image {
text = Text.filter notPunc txt,
title = toTitle txt,
file = makeFile txt,
size = 20,
stroke = 0.75,
width = 600,
color = "hsl(340deg, 50%, 35%)",
showText = True
}
split1 :: Text -> Text -> Maybe (Text, Text)
split1 s txt =
let (a, b) = Text.breakOn s txt in
if Text.null b then
Nothing
else
Just (Text.strip a, Text.strip $ Text.drop (Text.length s) b)
type Opts = Map Text Text
imageOpts :: Text -> Maybe (Text, Opts)
imageOpts = fmap (second splitOpts) . getOpts
getOpts :: Text -> Maybe (Text, Text)
getOpts = split1 "|"
splitOpts :: Text -> Map Text Text
splitOpts = Map.fromList . map splitOpt . Text.splitOn ";" where
splitOpt txt = fromMaybe ("file", txt) $ split1 "=" txt
makeFile :: Text -> FilePath
makeFile txt = map stripWeird (Text.unpack txt) <.> "svg"
where stripWeird c = if weirdUrl c then '_' else c
toTitle :: Text -> Text
toTitle = Text.filter \c -> c /= '\\' && c /= '#'
makeImage :: Image -> IO Inline
makeImage (Image {..}) = do
exe <- getEnv "LAANTAS_SCRIPT"
dir <- getEnv "DIRNAME"
callProcess exe
["-S", show size, "-K", show stroke, "-W", show width,
"-C", Text.unpack color, "-t", Text.unpack text, "-o", dir </> file]
pure $ Pandoc.Image ("", ["scr","laantas"], []) [] (Text.pack file, title)
weirdUrl :: Char -> Bool
weirdUrl c = c `elem` ("#\\?&_/.·,{} " :: String)
notPunc :: Char -> Bool
notPunc c = c `notElem` ("{}·" :: String)

19
langfilter/Lang.hs Normal file
View file

@ -0,0 +1,19 @@
module Lang where
import Text.Pandoc.Definition
import Data.Char (toLower)
import qualified Data.Text as Text
import System.IO
data Lang = Lántas deriving (Eq, Show)
type Vars = (?lang :: Maybe Lang)
toLang :: Maybe MetaValue -> IO (Maybe Lang)
toLang (Just (MetaInlines [Str s])) = toLang (Just (MetaString s)) -- ugh
toLang (Just (MetaString (Text.map toLower -> s)))
| s == "lántas" || s == "laantas" = pure $ Just Lántas
toLang Nothing = pure Nothing
toLang (Just ) = do
hPutStrLn stderr $ "[WARN] unknown language: " <> show
pure Nothing

37
langfilter/Main.hs Normal file
View file

@ -0,0 +1,37 @@
import Lang
import Ebnf
import Spans
import Glosses
import Text.Pandoc.Definition
import Text.Pandoc.JSON
import Text.Pandoc.Walk
import qualified Data.Map as Map
main :: IO ()
main = toJSONFilter filter where
filter p@(Pandoc (Meta m) _) = do
lang' <- toLang $ Map.lookup "lang" m
let ?lang = lang'
fmap (walk makeEbnf .
walk (concatMap makeFigures) .
walk inlineLetterList) $
walkM spans =<<
walkM (fmap concat . traverse glosses) p
makeFigures :: Block -> [Block]
makeFigures (Div ("", ["figure"], []) blks) =
[html "<figure>"] ++ blks ++ [html "</figure>"]
where html = RawBlock (Format "html")
makeFigures b = [b]
inlineLetterList :: Block -> Block
inlineLetterList (Div a@(_, cs, _) blks)
| "letter-list" `elem` cs = Div a (walk go blks)
where
go (Para xs) = Plain xs
go b = b
inlineLetterList b = b

85
langfilter/Spans.hs Normal file
View file

@ -0,0 +1,85 @@
module Spans (spans, ipaB, ipaN, abbr, abbrs) where
import LaantasImage hiding (text)
import Lang
import Text.Pandoc.Definition hiding (Image)
import Text.Pandoc.Builder hiding (Image)
import Data.Char (isUpper, isDigit)
import Data.Text (Text)
import qualified Data.Text as Text
spans :: Vars => Inline -> IO Inline
spans = \case
Code attrs txt
| Just ('\\', txt') <- Text.uncons txt -> pure $ Code attrs txt'
| Just _ <- enclosed '/' '/' txt -> pure $ ipaB txt
| Just _ <- enclosed '[' ']' txt -> pure $ ipaN txt
| Just txt' <- enclosed '{' '}' txt -> lang txt'
| Just txt' <- enclosed '!' '!' txt -> pure $ abbr txt'
i -> pure i
ipaB, ipaN, abbr :: Text -> Inline
ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
abbr = Span (cls ["abbr"]) . text' . endash
text' :: Text -> [Inline]
text' = toList . text
lang :: Vars => Text -> IO Inline
lang = fmap (Span (cls ["lang"])) . lang'
lang' :: Vars => Text -> IO [Inline]
lang' txt = case ?lang of
Just Lántas
| Just li@(Image {..}) <- splitImage txt ->
if showText then do
img <- makeImage li
pure $ [img, Span (cls ["text"]) $ underlines title]
else
pure <$> makeImage li
_ ->
pure $ underlines txt
notBrace :: Char -> Bool
notBrace c = c /= '{' && c /= '}'
underlines :: Text -> [Inline]
underlines txt = case Text.uncons txt of
Nothing -> []
Just ('{', txt') -> RawInline "html" "<u>" : underlines txt'
Just ('}', txt') -> RawInline "html" "</u>" : underlines txt'
_ -> Str a : underlines b
where (a, b) = Text.span notBrace txt
cls :: [Text] -> Attr
cls cs = ("", cs, [])
enclosed :: Char -> Char -> Text -> Maybe Text
enclosed o c txt
| Text.length txt >= 2,
Text.head txt == o,
Text.last txt == c
= Just $ Text.init $ Text.tail txt
enclosed _ _ _ = Nothing
endash :: Text -> Text
endash = Text.map \case '-' -> ''; '_' -> ' '; c -> c
abbrs :: Inline -> [Inline]
abbrs (Str txt) = go $ endash txt where
go "" = []
go txt
| (l, r) <- Text.span isAbbr txt,
not $ Text.null l
= abbr' l : go r
| (l, r) <- Text.break isAbbr txt
= Str l : go r
abbr' txt = if Text.length txt == 1 then Str txt else abbr txt
isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
abbrs i = [i]

View file

@ -9,9 +9,13 @@ maintainer: Rhiannon Morris <rhi@rhiannon.website>
executable langfilter
hs-source-dirs: .
main-is: langfilter.hs
main-is: Main.hs
other-modules:
Ebnf
Lang,
Ebnf,
Spans,
LaantasImage,
Glosses
ghc-options:
-Wall -Wno-missing-signatures -Wno-missing-pattern-synonym-signatures
-Wno-name-shadowing
@ -19,12 +23,20 @@ executable langfilter
default-language: Haskell2010
default-extensions:
BlockArguments,
ConstraintKinds,
LambdaCase,
FlexibleContexts,
ImplicitParams,
MonadComprehensions,
OverloadedStrings,
PatternSynonyms,
RecordWildCards,
ViewPatterns
build-depends:
base ^>= 4.14.0.0,
containers ^>= 0.6.2.1,
filepath ^>= 1.4.2.1,
process ^>= 1.6.11.0,
pandoc-types ^>= 1.22,
text,
pretty-show ^>= 1.10

View file

@ -1,126 +0,0 @@
import Ebnf
import Text.Pandoc.Definition
import Text.Pandoc.JSON
import Text.Pandoc.Builder
import Text.Pandoc.Walk
import Data.Char (isUpper, isDigit)
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
main :: IO ()
main = toJSONFilter (filter :: Pandoc -> Pandoc) where
filter =
walk makeEbnf .
walk (concatMap makeFigures) .
walk spans .
walk (concatMap glosses)
spans :: Inline -> Inline
spans = \case
Code attrs txt
| Just ('\\', txt') <- Text.uncons txt -> Code attrs txt'
| Just _ <- enclosed '/' '/' txt -> ipaB txt
| Just _ <- enclosed '[' ']' txt -> ipaN txt
| Just txt' <- enclosed '{' '}' txt -> lang txt'
| Just txt' <- enclosed '!' '!' txt -> abbr txt'
i -> i
ipaB, ipaN, lang, abbr :: Text -> Inline
ipaB = Span (cls ["ipa", "ipa-broad"]) . text'
ipaN = Span (cls ["ipa", "ipa-narrow"]) . text'
lang = Span (cls ["lang"]) . lang'
abbr = Span (cls ["abbr"]) . text' . endash
text' :: Text -> [Inline]
text' = toList . text
lang' :: Text -> [Inline]
lang' txt = case Text.uncons txt of
Nothing -> []
Just ('{', txt') -> RawInline "html" "<u>" : lang' txt'
Just ('}', txt') -> RawInline "html" "</u>" : lang' txt'
_ -> Str a : lang' b
where (a, b) = Text.span (\c -> c /= '{' && c /= '}') txt
cls :: [Text] -> Attr
cls cs = ("", cs, [])
glosses :: Block -> [Block]
glosses = \case
Div (_, cs, _) blocks | "glosses" `elem` cs ->
[RawBlock (Format "html") "<figure class=glosses>"] ++
mapMaybe glossTable blocks ++
[RawBlock (Format "html") "</figure>"]
b -> [b]
pattern Gloss l g w t = BulletList [[Plain l], [Plain g], [Plain w], [Plain t]]
pattern PGloss l p g w t =
BulletList [[Plain l], [Plain p], [Plain g], [Plain w], [Plain t]]
glossTable :: Block -> Maybe Block
glossTable = \case
Gloss l s g t -> Just $ make l Nothing s g t
PGloss l p s g t -> Just $ make l (Just p) s g t
HorizontalRule -> Nothing
b -> error $ "found " ++ show b ++ " in gloss section"
where
make l p s g t =
let n = length $ splitInlines s
colspecs = replicate n (AlignDefault, ColWidthDefault)
l' = cell1 n l; p' = cell1 n <$> p
ss = cells s; gs = cells g; t' = cell1 n t
in
Table ("", ["gloss"], []) (Caption Nothing []) colspecs
(TableHead mempty [])
[TableBody mempty (RowHeadColumns 0) [] $ concat
[[row ["gloss-lang", "lang"] [l']],
maybe [] (pure . row ["gloss-pron", "ipa"] . pure) p',
[row ["gloss-split", "lang"] ss],
[row ["gloss-gloss"] gs],
[row ["gloss-trans"] [t']]]]
(TableFoot mempty [])
cell is = Cell mempty AlignDefault (RowSpan 1) (ColSpan 1) [Plain is]
cell1 n is = Cell mempty AlignDefault (RowSpan 1) (ColSpan n) [Plain is]
cells = map (cell . concatMap abbrs) . splitInlines
row c = Row ("", c, [])
endash :: Text -> Text
endash = Text.map \case '-' -> ''; '_' -> ' '; c -> c
abbrs :: Inline -> [Inline]
abbrs (Str txt) = go $ endash txt where
go "" = []
go txt
| (l, r) <- Text.span isAbbr txt,
not $ Text.null l
= abbr' l : go r
| (l, r) <- Text.break isAbbr txt
= Str l : go r
abbr' txt = if Text.length txt == 1 then Str txt else abbr txt
isAbbr c = isUpper c || isDigit c || c `elem` (",.;\\[]" :: String)
abbrs i = [i]
splitInlines :: [Inline] -> [Inlines]
splitInlines is = filter (not . null) $ go is where
go [] = []
go is =
let (is1, is') = break (== Space) is in
fromList is1 : splitInlines (dropWhile (== Space) is')
makeFigures :: Block -> [Block]
makeFigures (Div ("", ["figure"], []) blks) =
[html "<figure>"] ++ blks ++ [html "</figure>"]
where html = RawBlock (Format "html")
makeFigures b = [b]
enclosed :: Char -> Char -> Text -> Maybe Text
enclosed o c txt
| Text.length txt >= 2,
Text.head txt == o,
Text.last txt == c
= Just $ Text.init $ Text.tail txt
enclosed _ _ _ = Nothing