www
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYestin L. Harrison <yestin@ylh.io>2022-07-03 15:08:32 -0700
committerYestin L. Harrison <yestin@ylh.io>2022-07-03 15:08:32 -0700
commit992bb78ecd33f16e454565417f003a714016692f (patch)
tree9dd841ca5655239a7845bee56cb0890958f4a6c6
parent96e5b0469d3dfa1a19ef1247f73043e9281661db (diff)
downloadperc-992bb78ecd33f16e454565417f003a714016692f.tar.gz
perc-992bb78ecd33f16e454565417f003a714016692f.tar.xz
perc-992bb78ecd33f16e454565417f003a714016692f.zip
add -t and table of contents generation
-rw-r--r--Main.hs147
-rw-r--r--README.md14
-rw-r--r--perc.cabal6
-rw-r--r--perc.nix10
4 files changed, 126 insertions, 51 deletions
diff --git a/Main.hs b/Main.hs
index d5a1a3d..36569ea 100644
--- a/Main.hs
+++ b/Main.hs
@@ -1,50 +1,117 @@
module Main where
-import Control.Arrow
+import Prelude hiding (putStr, lines, unlines, log)
+import Control.Monad (join, (>=>))
+import Data.Bool
+import Data.Char (toLower)
import Data.ByteString (hGetContents)
import Data.Default
-import Data.Either
-import Data.Functor
+import Data.Functor ((<&>))
+import Data.Maybe (maybeToList)
+import Data.Monoid (Sum(..), Alt(..))
+import Data.String (fromString)
+import Data.Text (Text, pack, lines, unlines)
import Data.Text.Encoding
-import Prelude hiding (hGetContents, putStr, lines, unlines, concat)
-import Data.Text (pack, concat, lines, unlines, Text)
import Data.Text.IO (putStr)
+import Data.Text.Lazy (toStrict)
+import System.Console.GetOpt
import System.Environment
import System.IO (openFile, IOMode(..), stdin)
-import Text.Pandoc.Class
-import Text.Pandoc.Error
-import Text.Pandoc.Logging
-import Text.Pandoc.Options
-import Text.Pandoc.Readers.Markdown
-import Text.Pandoc.Writers.HTML
-
-mkSource name = (>>= hGetContents) >>> fmap (decodeUtf8 >>> (name,))
-
-setSource (path:_) = mkSource path (openFile path ReadMode)
-setSource _ = mkSource "<standard input>" (return stdin)
-
-shouldReport NoTranslation{} = False
-shouldReport CouldNotLoadTranslations{} = False
-shouldReport msg = messageVerbosity msg == WARNING
-
-logPretty msg =
- case lines (showLogMessage msg) of
- [] -> []
- l:ls -> concat ["[", verbosity msg, "] ", l] : map ("\t"<>) ls
- where verbosity = pack . show . messageVerbosity
-
-doPandoc :: (FilePath, Text) -> PandocPure Text
-doPandoc source =
- readMarkdown def{readerExtensions = pandocExtensions} [source]
- >>= writeHtml5String def{writerHTMLMathMethod = MathML}
- >>= \t -> getLog <&> filter shouldReport <&> \case
- [] -> t
- ms ->
- unlines (t:"<pre class=\"warning\">":concatMap logPretty ms)
- <> "</pre>\n"
-
-doError = flip either id $
- renderError >>> (:["</pre>"]) >>> ("<pre class=\"error\">":) >>> unlines
+import Text.Blaze.Html5 (pre, toHtml, (!))
+import Text.Blaze.Html5.Attributes (class_)
+import Text.Blaze.Html.Renderer.Text (renderHtml)
+import Text.Pandoc
+import Text.Pandoc.Shared
+import Text.Pandoc.Sources (toSources)
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Shared
+
+(.>) = flip (.)
+
+(..>) :: (a -> b -> c) -> (c -> d) -> a -> b -> d
+op ..> f = op .> (.> f)
+
+(>&>) :: Monad m => (a -> m b) -> (b -> c) -> a -> m c
+k >&> f = k >=> f .> return
+
+(>^>) :: Monad m => m a -> (a -> b -> c) -> b -> m c
+m >^> op = (<$> m) . flip op
+
+infixl 2 >&>, >^>
+
+readMd = readCommonMark def{
+ readerExtensions = extensionsFromList [
+ Ext_raw_html,
+ Ext_gfm_auto_identifiers, Ext_implicit_header_references, Ext_footnotes,
+ Ext_autolink_bare_uris,
+ Ext_pipe_tables,
+ Ext_tex_math_dollars
+ ],
+ readerAbbreviations = mempty
+}
+
+writeHtml = writeHtml5String def{
+ writerHTMLMathMethod = MathML,
+ writerHighlightStyle = Nothing,
+ writerSyntaxMap = mempty
+}
+
+getMd :: [String] -> IO (PandocPure Pandoc)
+getMd = getSource >&> (:[]) .> toSources .> readMd where
+ getSource (path:_) = mkSource path =<< openFile path ReadMode
+ getSource [] = mkSource "standard input" stdin
+ mkSource = (,) ..> mapM (hGetContents >&> decodeUtf8)
+
+mapBlocks :: ([Block] -> [Block]) -> Pandoc -> Pandoc
+mapBlocks f (Pandoc meta bs) = Pandoc meta (f bs)
+
+worthToc :: [Block] -> Bool
+worthToc = query countHeaders .> \(h1s, h2s) -> h1s > 1 || h2s > 3 where
+ countHeaders (Header 1 _ _) = (1, 0)
+ countHeaders (Header 2 _ _) = (0, 1)
+ countHeaders _ = mempty :: (Sum Int, Sum Int)
+
+mkToc :: [Block] -> Block
+mkToc = toTableOfContents def .> \case
+ BulletList [[Plain _, bl@(BulletList _)]] -> bl -- don't link to a solitary h1
+ other -> other
+
+addToc :: Pandoc -> Pandoc
+addToc = mapBlocks $ join (worthToc .> bool id (mkToc >>= (:)))
+
+care :: LogMessage -> Bool
+care NoTranslation{} = False
+care CouldNotLoadTranslations{} = False
+care msg = messageVerbosity msg >= WARNING
+
+addLog :: [LogMessage] -> Text -> Text
+addLog [] = id
+addLog ms = (<> htmlLog WARNING showLogMessage ms)
+
+htmlLog :: Verbosity -> (msg -> Text) -> [msg] -> Text
+htmlLog level render = concatMap (render .> lines .> pretty) .> unlines .> inPre
+ where pretty [] = []
+ pretty (l:ls) = "[" <> tshow level <> "] " <> l : map ("\t"<>) ls
+ inPre = toHtml .> (pre ! class__ level) .> renderHtml .> toStrict
+ class__ = show .> map toLower .> fromString .> class_
+
+fmtHtml :: PandocPure Pandoc -> PandocPure Text
+fmtHtml = (=<<) $ addToc .> writeHtml >=> getLog >^> filter care .> addLog
+
+htmlOut :: PandocPure Pandoc -> Text
+htmlOut = fmtHtml .> runPure .> either ((:[]) .> htmlLog ERROR renderError) id
+
+-- write title as plain text to strip `code` etc
+titleOut :: PandocPure Pandoc -> Text
+titleOut = (>>= toTitle .> writePlain def) .> runPure .> either (const "") id
+ where toTitle = mapBlocks (query (join $ h1 ..> Alt) .> getAlt .> maybeToList)
+ h1 = \case Header 1 _ _ -> Just; _ -> const Nothing
+
+dispatch :: [String] -> IO Text
+dispatch = getOpt RequireOrder [t] .> \case
+ ( _, _, es@(_:_)) -> return $ htmlLog ERROR pack es
+ (os, as, _) -> getMd as <&> case os of [] -> htmlOut; _ -> titleOut
+ where t = Option "t" [] (NoArg ()) "extract title instead of formatting"
main :: IO ()
-main = getArgs >>= setSource <&> (doPandoc >>> runPure >>> doError) >>= putStr
+main = getArgs >>= dispatch >>= putStr
diff --git a/README.md b/README.md
index 6e1682c..a9e2ba7 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,14 @@
perc - pandoc for werc
======================
-perc is a slimmed-down Pandoc-markdown-to-HTML converter suitable for use with
-werc.
+Synopsis: `perc [-t] [file.md]`
-It accepts an optional argument to read; otherwise, it reads standard input. It
-deals strictly with (extended) markdown input and HTML output; when all
-transitive dependencies are built with `-split-sections`, `perc` is about half
-the size of the full `pandoc` executable. In addition, it runs in the
+perc is a slimmed-down Pandoc-markdown-to-HTML converter suitable for use with
+werc. Passing `-t` extracts a title as plain text stripped of any formatting;
+otherwise, it will format its argument or standard input to HTML on standard
+outout. It deals strictly with (extended) markdown input and HTML output; when
+all transitive dependencies are built with `-split-sections`, `perc` is about
+half the size of the full `pandoc` executable. In addition, it runs in the
`PandocPure` monad, ensuring no bizarre edge cases that reach out to the
outside world.
@@ -20,4 +21,3 @@ Licensing
While the source code in this repository is permissively (ISC) licensed, keep
in mind that builds will be statically linked against Pandoc and will therefore
be under GPL v2 or (at your option) higher, the same as Pandoc.
-
diff --git a/perc.cabal b/perc.cabal
index 18138cd..588a260 100644
--- a/perc.cabal
+++ b/perc.cabal
@@ -14,6 +14,8 @@ extra-source-files: README.md
executable perc
hs-source-dirs: .
main-is: Main.hs
- build-depends: base, pandoc, bytestring, text, data-default
+ ghc-options: -Wall -Wno-missing-signatures -Wno-orphans
+ build-depends: base, text, bytestring, containers,
+ pandoc, pandoc-types, blaze-html, data-default
default-language: Haskell2010
- default-extensions: OverloadedStrings, LambdaCase, TupleSections
+ default-extensions: OverloadedStrings, LambdaCase
diff --git a/perc.nix b/perc.nix
index 91a1e33..f0e0a2d 100644
--- a/perc.nix
+++ b/perc.nix
@@ -1,11 +1,17 @@
# haskellPackages.callPackage
-{ lib, mkDerivation, base, bytestring, text, pandoc, data-default }:
+{ lib, mkDerivation
+, base, text, bytestring, containers
+, pandoc, pandoc-types, blaze-html, data-default
+}:
mkDerivation {
pname = "perc";
version = "1.0";
src = ./.;
- libraryHaskellDepends = [ base bytestring text pandoc data-default ];
+ libraryHaskellDepends = [
+ base text bytestring containers
+ pandoc pandoc-types blaze-html data-default
+ ];
isLibrary = false;
license = lib.licenses.isc;