From 133e579dec17a6d0ae256a30717ead327eeef2f0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 20 Mar 2023 18:12:59 +0100 Subject: [PATCH] T.P.Format: add new function `formatFromFilePaths` [API Change] The module `Text.Pandoc.App.FormatHeuristics` is removed. --- pandoc.cabal | 1 - src/Text/Pandoc/App.hs | 27 ++++---- src/Text/Pandoc/App/FormatHeuristics.hs | 92 ------------------------- src/Text/Pandoc/App/OutputSettings.hs | 67 +++++++++--------- src/Text/Pandoc/Format.hs | 86 ++++++++++++++++++++++- 5 files changed, 130 insertions(+), 143 deletions(-) delete mode 100644 src/Text/Pandoc/App/FormatHeuristics.hs diff --git a/pandoc.cabal b/pandoc.cabal index 19fa2b5555222..f1ffcc6ae40e5 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -532,7 +532,6 @@ library exposed-modules: Text.Pandoc, Text.Pandoc.App, - Text.Pandoc.App.FormatHeuristics, Text.Pandoc.Data, Text.Pandoc.Options, Text.Pandoc.Extensions, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 5df6d76682d59..76828e9d8aa5d 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -51,7 +51,6 @@ import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.MediaBag (mediaItems) import Text.Pandoc.Image (svgToPng) -import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, IpynbOutput (..), OptInfo(..)) import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, @@ -142,20 +141,20 @@ convertWithOpts' scriptingEngine istty datadir opts = do Just xs | not (optIgnoreArgs opts) -> xs _ -> ["-"] + let defFlavor fmt = Format.FlavoredFormat fmt mempty -- assign reader and writer based on options and filenames - readerName <- case optFrom opts of - Just f -> return f - Nothing -> case formatFromFilePaths sources of - Just f' -> return f' - Nothing | sources == ["-"] -> return "markdown" - | any (isURI . T.pack) sources -> return "html" - | otherwise -> do - report $ CouldNotDeduceFormat - (map (T.pack . takeExtension) sources) "markdown" - return "markdown" - flvrd@(Format.FlavoredFormat readerNameBase _extsDiff) <- - Format.parseFlavoredFormat readerName + case optFrom opts of + Just f -> Format.parseFlavoredFormat f + Nothing -> case Format.formatFromFilePaths sources of + Just f' -> return f' + Nothing | sources == ["-"] -> return $ defFlavor "markdown" + | any (isURI . T.pack) sources -> return $ defFlavor "html" + | otherwise -> do + report $ CouldNotDeduceFormat + (map (T.pack . takeExtension) sources) "markdown" + return $ defFlavor "markdown" + let makeSandboxed pureReader = let files = maybe id (:) (optReferenceDoc opts) . maybe id (:) (optEpubMetadata opts) . @@ -176,7 +175,7 @@ convertWithOpts' scriptingEngine istty datadir opts = do components <- engineLoadCustom scriptingEngine scriptPath r <- case customReader components of Nothing -> throwError $ PandocAppError $ - readerName <> " does not contain a custom reader" + readerNameBase <> " does not contain a custom reader" Just r -> return r let extsConf = fromMaybe mempty (customExtensions components) rexts <- Format.applyExtensionsDiff extsConf flvrd diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs deleted file mode 100644 index 376726f9887e7..0000000000000 --- a/src/Text/Pandoc/App/FormatHeuristics.hs +++ /dev/null @@ -1,92 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.App.FormatHeuristics - Copyright : Copyright (C) 2006-2023 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Guess the format of a file from its name. --} -module Text.Pandoc.App.FormatHeuristics - ( formatFromFilePaths - ) where - -import Data.Char (toLower) -import Data.Foldable (asum) -import Data.Text (Text) -import System.FilePath (takeExtension) - --- | Determines default format based on file extensions; uses the format --- of the first extension that's associated with a format. --- --- Examples: --- --- > formatFromFilePaths ["text.unknown", "no-extension"] --- Nothing --- --- > formatFromFilePaths ["my.md", "other.rst"] --- Just "markdown" -formatFromFilePaths :: [FilePath] -> Maybe Text -formatFromFilePaths = asum . map formatFromFilePath - --- | Determines format based on file extension. -formatFromFilePath :: FilePath -> Maybe Text -formatFromFilePath x = - case takeExtension (map toLower x) of - ".adoc" -> Just "asciidoc" - ".asciidoc" -> Just "asciidoc" - ".context" -> Just "context" - ".ctx" -> Just "context" - ".db" -> Just "docbook" - ".doc" -> Just "doc" -- so we get an "unknown reader" error - ".docx" -> Just "docx" - ".dokuwiki" -> Just "dokuwiki" - ".epub" -> Just "epub" - ".fb2" -> Just "fb2" - ".htm" -> Just "html" - ".html" -> Just "html" - ".icml" -> Just "icml" - ".json" -> Just "json" - ".latex" -> Just "latex" - ".lhs" -> Just "markdown+lhs" - ".ltx" -> Just "latex" - ".markdown" -> Just "markdown" - ".markua" -> Just "markua" - ".mkdn" -> Just "markdown" - ".mkd" -> Just "markdown" - ".mdwn" -> Just "markdown" - ".mdown" -> Just "markdown" - ".Rmd" -> Just "markdown" - ".md" -> Just "markdown" - ".ms" -> Just "ms" - ".muse" -> Just "muse" - ".native" -> Just "native" - ".odt" -> Just "odt" - ".opml" -> Just "opml" - ".org" -> Just "org" - ".pdf" -> Just "pdf" -- so we get an "unknown reader" error - ".pptx" -> Just "pptx" - ".ris" -> Just "ris" - ".roff" -> Just "ms" - ".rst" -> Just "rst" - ".rtf" -> Just "rtf" - ".s5" -> Just "s5" - ".t2t" -> Just "t2t" - ".tei" -> Just "tei" - ".tex" -> Just "latex" - ".texi" -> Just "texinfo" - ".texinfo" -> Just "texinfo" - ".text" -> Just "markdown" - ".textile" -> Just "textile" - ".txt" -> Just "markdown" - ".wiki" -> Just "mediawiki" - ".xhtml" -> Just "html" - ".ipynb" -> Just "ipynb" - ".csv" -> Just "csv" - ".tsv" -> Just "tsv" - ".bib" -> Just "biblatex" - ['.',y] | y `elem` ['1'..'9'] -> Just "man" - _ -> Nothing diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 525055d72eef6..af5c0ddbcf58b 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -37,10 +37,10 @@ import System.FilePath import System.IO (stdout) import Text.Pandoc.Chunks (PathTemplate(..)) import Text.Pandoc -import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..)) import Text.Pandoc.App.CommandLineOptions (engines) -import qualified Text.Pandoc.Format as Format +import Text.Pandoc.Format (FlavoredFormat (..), applyExtensionsDiff, + parseFlavoredFormat, formatFromFilePaths) import Text.Pandoc.Highlighting (lookupHighlightingStyle) import Text.Pandoc.Scripting (ScriptingEngine (engineLoadCustom), CustomComponents(..)) @@ -72,24 +72,26 @@ optToOutputSettings scriptingEngine opts = do let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" || optTo opts == Just "pdf" - (writerName, maybePdfProg) <- + let defaultOutput = "html" + defaultOutputFlavor <- parseFlavoredFormat defaultOutput + (flvrd@(FlavoredFormat format _extsDiff), maybePdfProg) <- if pdfOutput - then liftIO $ pdfWriterAndProg - (case optTo opts of - Just "pdf" -> Nothing - x -> x) - (optPdfEngine opts) + then do + outflavor <- case optTo opts of + Just x | x /= "pdf" -> Just <$> parseFlavoredFormat x + _ -> pure Nothing + liftIO $ pdfWriterAndProg outflavor (optPdfEngine opts) else case optTo opts of - Just f -> return (f, Nothing) + Just f -> (, Nothing) <$> parseFlavoredFormat f Nothing - | outputFile == "-" -> return ("html", Nothing) - | otherwise -> - case formatFromFilePaths [outputFile] of - Nothing -> do - report $ CouldNotDeduceFormat - [T.pack $ takeExtension outputFile] "html" - return ("html", Nothing) - Just f -> return (f, Nothing) + | outputFile == "-" -> + return (defaultOutputFlavor, Nothing) + | otherwise -> case formatFromFilePaths [outputFile] of + Nothing -> do + report $ CouldNotDeduceFormat + [T.pack $ takeExtension outputFile] defaultOutput + return (defaultOutputFlavor,Nothing) + Just f -> return (f, Nothing) let makeSandboxed pureWriter = let files = maybe id (:) (optReferenceDoc opts) . @@ -104,9 +106,6 @@ optToOutputSettings scriptingEngine opts = do ByteStringWriter w -> ByteStringWriter $ \o d -> sandbox files (w o d) - flvrd@(Format.FlavoredFormat format _extsDiff) <- - Format.parseFlavoredFormat writerName - let standalone = optStandalone opts || isBinaryFormat format || pdfOutput let templateOrThrow = \case Left e -> throwError $ PandocTemplateError (T.pack e) @@ -134,7 +133,7 @@ optToOutputSettings scriptingEngine opts = do format <> " does not contain a custom writer" Just w -> return w let extsConf = fromMaybe mempty $ customExtensions components - wexts <- Format.applyExtensionsDiff extsConf flvrd + wexts <- applyExtensionsDiff extsConf flvrd templ <- processCustomTemplate $ case customTemplate components of Nothing -> throwError $ PandocNoTemplateError format @@ -272,18 +271,16 @@ setVariableM key val (Context ctx) = return $ Context $ M.alter go key ctx where go Nothing = Just $ toVal val go (Just x) = Just x -baseWriterName :: T.Text -> T.Text -baseWriterName = T.takeWhile (\c -> c /= '+' && c /= '-') - -pdfWriterAndProg :: Maybe T.Text -- ^ user-specified writer name +pdfWriterAndProg :: Maybe FlavoredFormat -- ^ user-specified format -> Maybe String -- ^ user-specified pdf-engine - -> IO (T.Text, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) + -> IO (FlavoredFormat, Maybe String) -- ^ format, pdf-engine pdfWriterAndProg mWriter mEngine = case go mWriter mEngine of Right (writ, prog) -> return (writ, Just prog) Left err -> liftIO $ E.throwIO $ PandocAppError err where - go Nothing Nothing = Right ("latex", "pdflatex") + go Nothing Nothing = Right + (FlavoredFormat "latex" mempty, "pdflatex") go (Just writer) Nothing = (writer,) <$> engineForWriter writer go Nothing (Just engine) = (,engine) <$> writerForEngine (takeBaseName engine) go (Just writer) (Just engine) | isCustomWriter writer = @@ -291,23 +288,25 @@ pdfWriterAndProg mWriter mEngine = -- what they are doing. Right (writer, engine) go (Just writer) (Just engine) = - case find (== (baseWriterName writer, takeBaseName engine)) engines of + case find (== (formatName writer, takeBaseName engine)) engines of Just _ -> Right (writer, engine) Nothing -> Left $ "pdf-engine " <> T.pack engine <> - " is not compatible with output format " <> writer + " is not compatible with output format " <> + formatName writer writerForEngine eng = case [f | (f,e) <- engines, e == eng] of - fmt : _ -> Right fmt + fmt : _ -> Right (FlavoredFormat fmt mempty) [] -> Left $ "pdf-engine " <> T.pack eng <> " not known" - engineForWriter "pdf" = Left "pdf writer" - engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of + engineForWriter (FlavoredFormat "pdf" _) = Left "pdf writer" + engineForWriter w = case [e | (f,e) <- engines, f == formatName w] of eng : _ -> Right eng [] -> Left $ - "cannot produce pdf output from " <> w + "cannot produce pdf output from " <> + formatName w - isCustomWriter w = ".lua" `T.isSuffixOf` w + isCustomWriter w = ".lua" `T.isSuffixOf` formatName w isBinaryFormat :: T.Text -> Bool isBinaryFormat s = diff --git a/src/Text/Pandoc/Format.hs b/src/Text/Pandoc/Format.hs index 0cc64036276f8..f79742d3fcb11 100644 --- a/src/Text/Pandoc/Format.hs +++ b/src/Text/Pandoc/Format.hs @@ -16,17 +16,22 @@ module Text.Pandoc.Format , parseFlavoredFormat , applyExtensionsDiff , getExtensionsConfig + , formatFromFilePaths ) where import Control.Monad.Except (throwError) +import Data.Char (toLower) +import Data.Foldable (asum) import Data.List (foldl') -import System.FilePath (splitExtension) +import System.FilePath (splitExtension, takeExtension) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Extensions - ( Extensions + ( Extension (Ext_literate_haskell) + , Extensions , disableExtensions , enableExtension + , extensionsFromList , extensionsToList , getAllExtensions , getDefaultExtensions @@ -148,3 +153,80 @@ pExtensionsDiff = foldl' (flip ($)) mempty <$> many extMod extsToEnable extsDiff} _ -> extsDiff{extsToDisable = enableExtension ext $ extsToDisable extsDiff} + +-- | Determines default format based on file extensions; uses the format +-- of the first extension that's associated with a format. +-- +-- Examples: +-- +-- > formatFromFilePaths ["text.unknown", "no-extension"] +-- Nothing +-- +-- > formatFromFilePaths ["my.md", "other.rst"] +-- Just "markdown" +formatFromFilePaths :: [FilePath] -> (Maybe FlavoredFormat) +formatFromFilePaths = asum . map formatFromFilePath + +-- | Determines format based on file extension. +formatFromFilePath :: FilePath -> Maybe FlavoredFormat +formatFromFilePath x = + case takeExtension (map toLower x) of + ".Rmd" -> defFlavor "markdown" + ".adoc" -> defFlavor "asciidoc" + ".asciidoc" -> defFlavor "asciidoc" + ".bib" -> defFlavor "biblatex" + ".context" -> defFlavor "context" + ".csv" -> defFlavor "csv" + ".ctx" -> defFlavor "context" + ".db" -> defFlavor "docbook" + ".doc" -> defFlavor "doc" -- so we get an "unknown reader" error + ".docx" -> defFlavor "docx" + ".dokuwiki" -> defFlavor "dokuwiki" + ".epub" -> defFlavor "epub" + ".fb2" -> defFlavor "fb2" + ".htm" -> defFlavor "html" + ".html" -> defFlavor "html" + ".icml" -> defFlavor "icml" + ".ipynb" -> defFlavor "ipynb" + ".json" -> defFlavor "json" + ".latex" -> defFlavor "latex" + ".lhs" -> defFlavor "markdown" `withExtension` Ext_literate_haskell + ".ltx" -> defFlavor "latex" + ".markdown" -> defFlavor "markdown" + ".markua" -> defFlavor "markua" + ".md" -> defFlavor "markdown" + ".mdown" -> defFlavor "markdown" + ".mdwn" -> defFlavor "markdown" + ".mkd" -> defFlavor "markdown" + ".mkdn" -> defFlavor "markdown" + ".ms" -> defFlavor "ms" + ".muse" -> defFlavor "muse" + ".native" -> defFlavor "native" + ".odt" -> defFlavor "odt" + ".opml" -> defFlavor "opml" + ".org" -> defFlavor "org" + ".pdf" -> defFlavor "pdf" -- so we get an "unknown reader" error + ".pptx" -> defFlavor "pptx" + ".ris" -> defFlavor "ris" + ".roff" -> defFlavor "ms" + ".rst" -> defFlavor "rst" + ".rtf" -> defFlavor "rtf" + ".s5" -> defFlavor "s5" + ".t2t" -> defFlavor "t2t" + ".tei" -> defFlavor "tei" + ".tex" -> defFlavor "latex" + ".texi" -> defFlavor "texinfo" + ".texinfo" -> defFlavor "texinfo" + ".text" -> defFlavor "markdown" + ".textile" -> defFlavor "textile" + ".tsv" -> defFlavor "tsv" + ".txt" -> defFlavor "markdown" + ".wiki" -> defFlavor "mediawiki" + ".xhtml" -> defFlavor "html" + ['.',y] | y `elem` ['1'..'9'] -> defFlavor "man" + _ -> Nothing + where + defFlavor f = Just (FlavoredFormat f mempty) + withExtension Nothing _ = Nothing + withExtension (Just (FlavoredFormat f ed)) ext = Just $ + FlavoredFormat f (ed <> ExtensionsDiff (extensionsFromList [ext]) mempty)