Skip to content

Commit

Permalink
T.P.Format: add new function formatFromFilePaths [API Change]
Browse files Browse the repository at this point in the history
The module `Text.Pandoc.App.FormatHeuristics` is removed.
  • Loading branch information
tarleb committed Mar 20, 2023
1 parent 7051169 commit 133e579
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 143 deletions.
1 change: 0 additions & 1 deletion pandoc.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,6 @@ library

exposed-modules: Text.Pandoc,
Text.Pandoc.App,
Text.Pandoc.App.FormatHeuristics,
Text.Pandoc.Data,
Text.Pandoc.Options,
Text.Pandoc.Extensions,
Expand Down
27 changes: 13 additions & 14 deletions src/Text/Pandoc/App.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ import Text.Pandoc
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.MediaBag (mediaItems)
import Text.Pandoc.Image (svgToPng)
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
IpynbOutput (..), OptInfo(..))
import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
Expand Down Expand Up @@ -142,20 +141,20 @@ convertWithOpts' scriptingEngine istty datadir opts = do
Just xs | not (optIgnoreArgs opts) -> xs
_ -> ["-"]

let defFlavor fmt = Format.FlavoredFormat fmt mempty
-- assign reader and writer based on options and filenames
readerName <- case optFrom opts of
Just f -> return f
Nothing -> case formatFromFilePaths sources of
Just f' -> return f'
Nothing | sources == ["-"] -> return "markdown"
| any (isURI . T.pack) sources -> return "html"
| otherwise -> do
report $ CouldNotDeduceFormat
(map (T.pack . takeExtension) sources) "markdown"
return "markdown"

flvrd@(Format.FlavoredFormat readerNameBase _extsDiff) <-
Format.parseFlavoredFormat readerName
case optFrom opts of
Just f -> Format.parseFlavoredFormat f
Nothing -> case Format.formatFromFilePaths sources of
Just f' -> return f'
Nothing | sources == ["-"] -> return $ defFlavor "markdown"
| any (isURI . T.pack) sources -> return $ defFlavor "html"
| otherwise -> do
report $ CouldNotDeduceFormat
(map (T.pack . takeExtension) sources) "markdown"
return $ defFlavor "markdown"

let makeSandboxed pureReader =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
Expand All @@ -176,7 +175,7 @@ convertWithOpts' scriptingEngine istty datadir opts = do
components <- engineLoadCustom scriptingEngine scriptPath
r <- case customReader components of
Nothing -> throwError $ PandocAppError $
readerName <> " does not contain a custom reader"
readerNameBase <> " does not contain a custom reader"
Just r -> return r
let extsConf = fromMaybe mempty (customExtensions components)
rexts <- Format.applyExtensionsDiff extsConf flvrd
Expand Down
92 changes: 0 additions & 92 deletions src/Text/Pandoc/App/FormatHeuristics.hs

This file was deleted.

67 changes: 33 additions & 34 deletions src/Text/Pandoc/App/OutputSettings.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ import System.FilePath
import System.IO (stdout)
import Text.Pandoc.Chunks (PathTemplate(..))
import Text.Pandoc
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..))
import Text.Pandoc.App.CommandLineOptions (engines)
import qualified Text.Pandoc.Format as Format
import Text.Pandoc.Format (FlavoredFormat (..), applyExtensionsDiff,
parseFlavoredFormat, formatFromFilePaths)
import Text.Pandoc.Highlighting (lookupHighlightingStyle)
import Text.Pandoc.Scripting (ScriptingEngine (engineLoadCustom),
CustomComponents(..))
Expand Down Expand Up @@ -72,24 +72,26 @@ optToOutputSettings scriptingEngine opts = do

let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" ||
optTo opts == Just "pdf"
(writerName, maybePdfProg) <-
let defaultOutput = "html"
defaultOutputFlavor <- parseFlavoredFormat defaultOutput
(flvrd@(FlavoredFormat format _extsDiff), maybePdfProg) <-
if pdfOutput
then liftIO $ pdfWriterAndProg
(case optTo opts of
Just "pdf" -> Nothing
x -> x)
(optPdfEngine opts)
then do
outflavor <- case optTo opts of
Just x | x /= "pdf" -> Just <$> parseFlavoredFormat x
_ -> pure Nothing
liftIO $ pdfWriterAndProg outflavor (optPdfEngine opts)
else case optTo opts of
Just f -> return (f, Nothing)
Just f -> (, Nothing) <$> parseFlavoredFormat f
Nothing
| outputFile == "-" -> return ("html", Nothing)
| otherwise ->
case formatFromFilePaths [outputFile] of
Nothing -> do
report $ CouldNotDeduceFormat
[T.pack $ takeExtension outputFile] "html"
return ("html", Nothing)
Just f -> return (f, Nothing)
| outputFile == "-" ->
return (defaultOutputFlavor, Nothing)
| otherwise -> case formatFromFilePaths [outputFile] of
Nothing -> do
report $ CouldNotDeduceFormat
[T.pack $ takeExtension outputFile] defaultOutput
return (defaultOutputFlavor,Nothing)
Just f -> return (f, Nothing)

let makeSandboxed pureWriter =
let files = maybe id (:) (optReferenceDoc opts) .
Expand All @@ -104,9 +106,6 @@ optToOutputSettings scriptingEngine opts = do
ByteStringWriter w ->
ByteStringWriter $ \o d -> sandbox files (w o d)

flvrd@(Format.FlavoredFormat format _extsDiff) <-
Format.parseFlavoredFormat writerName

let standalone = optStandalone opts || isBinaryFormat format || pdfOutput
let templateOrThrow = \case
Left e -> throwError $ PandocTemplateError (T.pack e)
Expand Down Expand Up @@ -134,7 +133,7 @@ optToOutputSettings scriptingEngine opts = do
format <> " does not contain a custom writer"
Just w -> return w
let extsConf = fromMaybe mempty $ customExtensions components
wexts <- Format.applyExtensionsDiff extsConf flvrd
wexts <- applyExtensionsDiff extsConf flvrd
templ <- processCustomTemplate $
case customTemplate components of
Nothing -> throwError $ PandocNoTemplateError format
Expand Down Expand Up @@ -272,42 +271,42 @@ setVariableM key val (Context ctx) = return $ Context $ M.alter go key ctx
where go Nothing = Just $ toVal val
go (Just x) = Just x

baseWriterName :: T.Text -> T.Text
baseWriterName = T.takeWhile (\c -> c /= '+' && c /= '-')

pdfWriterAndProg :: Maybe T.Text -- ^ user-specified writer name
pdfWriterAndProg :: Maybe FlavoredFormat -- ^ user-specified format
-> Maybe String -- ^ user-specified pdf-engine
-> IO (T.Text, Maybe String) -- ^ IO (writerName, maybePdfEngineProg)
-> IO (FlavoredFormat, Maybe String) -- ^ format, pdf-engine
pdfWriterAndProg mWriter mEngine =
case go mWriter mEngine of
Right (writ, prog) -> return (writ, Just prog)
Left err -> liftIO $ E.throwIO $ PandocAppError err
where
go Nothing Nothing = Right ("latex", "pdflatex")
go Nothing Nothing = Right
(FlavoredFormat "latex" mempty, "pdflatex")
go (Just writer) Nothing = (writer,) <$> engineForWriter writer
go Nothing (Just engine) = (,engine) <$> writerForEngine (takeBaseName engine)
go (Just writer) (Just engine) | isCustomWriter writer =
-- custom writers can produce any format, so assume the user knows
-- what they are doing.
Right (writer, engine)
go (Just writer) (Just engine) =
case find (== (baseWriterName writer, takeBaseName engine)) engines of
case find (== (formatName writer, takeBaseName engine)) engines of
Just _ -> Right (writer, engine)
Nothing -> Left $ "pdf-engine " <> T.pack engine <>
" is not compatible with output format " <> writer
" is not compatible with output format " <>
formatName writer

writerForEngine eng = case [f | (f,e) <- engines, e == eng] of
fmt : _ -> Right fmt
fmt : _ -> Right (FlavoredFormat fmt mempty)
[] -> Left $
"pdf-engine " <> T.pack eng <> " not known"

engineForWriter "pdf" = Left "pdf writer"
engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of
engineForWriter (FlavoredFormat "pdf" _) = Left "pdf writer"
engineForWriter w = case [e | (f,e) <- engines, f == formatName w] of
eng : _ -> Right eng
[] -> Left $
"cannot produce pdf output from " <> w
"cannot produce pdf output from " <>
formatName w

isCustomWriter w = ".lua" `T.isSuffixOf` w
isCustomWriter w = ".lua" `T.isSuffixOf` formatName w

isBinaryFormat :: T.Text -> Bool
isBinaryFormat s =
Expand Down
86 changes: 84 additions & 2 deletions src/Text/Pandoc/Format.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,22 @@ module Text.Pandoc.Format
, parseFlavoredFormat
, applyExtensionsDiff
, getExtensionsConfig
, formatFromFilePaths
) where

import Control.Monad.Except (throwError)
import Data.Char (toLower)
import Data.Foldable (asum)
import Data.List (foldl')
import System.FilePath (splitExtension)
import System.FilePath (splitExtension, takeExtension)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Extensions
( Extensions
( Extension (Ext_literate_haskell)
, Extensions
, disableExtensions
, enableExtension
, extensionsFromList
, extensionsToList
, getAllExtensions
, getDefaultExtensions
Expand Down Expand Up @@ -148,3 +153,80 @@ pExtensionsDiff = foldl' (flip ($)) mempty <$> many extMod
extsToEnable extsDiff}
_ -> extsDiff{extsToDisable = enableExtension ext $
extsToDisable extsDiff}

-- | Determines default format based on file extensions; uses the format
-- of the first extension that's associated with a format.
--
-- Examples:
--
-- > formatFromFilePaths ["text.unknown", "no-extension"]
-- Nothing
--
-- > formatFromFilePaths ["my.md", "other.rst"]
-- Just "markdown"
formatFromFilePaths :: [FilePath] -> (Maybe FlavoredFormat)
formatFromFilePaths = asum . map formatFromFilePath

-- | Determines format based on file extension.
formatFromFilePath :: FilePath -> Maybe FlavoredFormat
formatFromFilePath x =
case takeExtension (map toLower x) of
".Rmd" -> defFlavor "markdown"
".adoc" -> defFlavor "asciidoc"
".asciidoc" -> defFlavor "asciidoc"
".bib" -> defFlavor "biblatex"
".context" -> defFlavor "context"
".csv" -> defFlavor "csv"
".ctx" -> defFlavor "context"
".db" -> defFlavor "docbook"
".doc" -> defFlavor "doc" -- so we get an "unknown reader" error
".docx" -> defFlavor "docx"
".dokuwiki" -> defFlavor "dokuwiki"
".epub" -> defFlavor "epub"
".fb2" -> defFlavor "fb2"
".htm" -> defFlavor "html"
".html" -> defFlavor "html"
".icml" -> defFlavor "icml"
".ipynb" -> defFlavor "ipynb"
".json" -> defFlavor "json"
".latex" -> defFlavor "latex"
".lhs" -> defFlavor "markdown" `withExtension` Ext_literate_haskell
".ltx" -> defFlavor "latex"
".markdown" -> defFlavor "markdown"
".markua" -> defFlavor "markua"
".md" -> defFlavor "markdown"
".mdown" -> defFlavor "markdown"
".mdwn" -> defFlavor "markdown"
".mkd" -> defFlavor "markdown"
".mkdn" -> defFlavor "markdown"
".ms" -> defFlavor "ms"
".muse" -> defFlavor "muse"
".native" -> defFlavor "native"
".odt" -> defFlavor "odt"
".opml" -> defFlavor "opml"
".org" -> defFlavor "org"
".pdf" -> defFlavor "pdf" -- so we get an "unknown reader" error
".pptx" -> defFlavor "pptx"
".ris" -> defFlavor "ris"
".roff" -> defFlavor "ms"
".rst" -> defFlavor "rst"
".rtf" -> defFlavor "rtf"
".s5" -> defFlavor "s5"
".t2t" -> defFlavor "t2t"
".tei" -> defFlavor "tei"
".tex" -> defFlavor "latex"
".texi" -> defFlavor "texinfo"
".texinfo" -> defFlavor "texinfo"
".text" -> defFlavor "markdown"
".textile" -> defFlavor "textile"
".tsv" -> defFlavor "tsv"
".txt" -> defFlavor "markdown"
".wiki" -> defFlavor "mediawiki"
".xhtml" -> defFlavor "html"
['.',y] | y `elem` ['1'..'9'] -> defFlavor "man"
_ -> Nothing
where
defFlavor f = Just (FlavoredFormat f mempty)
withExtension Nothing _ = Nothing
withExtension (Just (FlavoredFormat f ed)) ext = Just $
FlavoredFormat f (ed <> ExtensionsDiff (extensionsFromList [ext]) mempty)

0 comments on commit 133e579

Please sign in to comment.