Skip to content

Commit

Permalink
add mutool as PDF image conversion backend
Browse files Browse the repository at this point in the history
  • Loading branch information
liao961120 committed Nov 28, 2024
1 parent 2c9878f commit 3983b40
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 15 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export(logit)
export(lst2message)
export(min_max)
export(month)
export(mutool_convert)
export(n_samples)
export(pandoc_html)
export(pandoc_pdf)
Expand Down
132 changes: 117 additions & 15 deletions R/image_format_conversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,20 @@
#'
#' Convert a PDF/SVG file to images of other formats with Inkscape.
#'
#' To use this function, `inkscape` (v1.2.2) has to be available from the
#' command line. Alternatively, users can specify the absolute path to inkscape
#' by the option `stom.inkscape`
#' To use this function, `mutool` (v1.24.0) or `inkscape` (v1.2.2) has to be
#' available from the command line. Alternatively, users can specify the
#' absolute path to inkscape by the option `stom.inkscape`
#' (e.g., `options(stom.inkscape = "/home/stom/bin/inkscape")`).
#'
#' @param infile String. Path to input PDF file.
#' @param dpi Integer. "Dots Per Inch" for PNG outputs with `pdf2png()`.
#' @param outfile Character vector. Output file paths without file extensions.
#' File extensions will be set to `.png` for `*2png()`; `.svg` for
#' `pdf2svg()`; and `.pdf` for `svg2pdf()`. The length of `outfile`
#' should be `1` or corresponds to the length of `page`. If there is a
#' mismatch, only the first element in `outfile` will be taken as the
#' file stem, and the page numbers will be appended to the file stem as
#' suffixes. By default, `outfile` is set to `NULL`, in which the
#' @param outfile String. By default, `outfile` is set to `NULL`, in which the
#' input PDF file stem is used.
#' @param page Integer vector. The pages (starting with 1) to extract from the
#' PDF file, defaults to `NULL`, in which every page is converted to
#' an image.
#' @param backend String. The tool for converting images from PDFs. Defaults to `"mutool"`.
#' Currently supported options: `"mutool"` and `"inkscape"`
#'
#' @examples
#' \dontrun{
Expand All @@ -33,8 +29,116 @@
pdf2png = function(infile,
outfile = NULL,
page = NULL,
dpi = 1200,
white = FALSE) {
dpi = 400,
white = FALSE,
backend = c("mutool", "inkscape") ) {
if (backend[1] == "inkscape")
pdf2png_inkscape(infile, outfile, page, dpi, white)

if (backend[1] == "mutool") {
if (!is.null(outfile)) outfile = xfun::with_ext(outfile, "png")
mutool_convert(infile, outfile, format = "png", page = page, dpi = dpi, white = white)
}
}


#' @export
#' @rdname pdf2png
pdf2svg = function(infile,
outfile = NULL,
page = NULL,
white = FALSE,
backend = c("mutool", "inkscape")) {
if (backend[1] == "inkscape")
pdf2svg_inkscape(infile, outfile, page, white)

if (backend[1] == "mutool") {
if (!is.null(outfile)) outfile = xfun::with_ext(outfile, "svg")
mutool_convert(infile, outfile, format = "svg", page = page, white = white)
}

}


#' Convert PDF to images by mutool
#'
#' @param infile String. Path to input PDF file.
#' @param outfile String Output file path. Defaults to NULL, and is inferred from `infile`.
#' @param dpi Integer. "Dots Per Inch" for PNG outputs.
#' @param format String. Output format. Ignored when `outfile` is given.
#' @param page Integer vector. The pages (starting with 1) to extract from the
#' PDF file, defaults to `NULL`, in which every page is converted to
#' an image.
#' @export
mutool_convert = function(infile,
outfile = NULL,
format = c("png", "svg"),
page = NULL,
dpi = 300,
white = FALSE) {
# Set output format
if (!is.null(outfile)) {
fmt = xfun::file_ext(outfile)[1]
if (!fmt %in% format) {
format = format[1]
} else {
format = fmt
}
} else {
format = format[1]
}

# Set output directory & file name
if (is.null(outfile)) {
outdir = dirname(infile)
outfn = basename(xfun::with_ext(infile, format))
} else {
outdir = dirname(outfile)
outfn = basename(xfun::with_ext(outfile, format))[1]
}

# Set page
if (is.null(page)) {
page = "1-N"
} else {
page = paste(page, collapse=",")
}

# Convert pages to temp dir
tmpdir = tempdir()
opts = c(
paste0("resolution=",dpi),
ifelse(white, "alpha", "")
)
args = c(
"convert", "-F", format, "-o", file.path(tmpdir,outfn),
"-O", paste(opts, collapse=","),
infile, page
)
# mutool convert -F png -o dir/.png -O resolution=300,alpha dag-dawid.pdf 1-2
cat("\n mutools", args, "\n")
system2("mutool", args)

# Copy files to output directory
# Rename according to the number of files generated
fps = msg = list.files(tmpdir, full.names=T, pattern=paste0(format,"$"))
if (length(fps) == 1) {
outfp = msg = file.path(outdir, outfn)
file.copy(fps, outfp, overwrite=T)

} else {
file.copy(fps, outdir, overwrite=T)
}
file.remove(fps)
message("\nOutput file(s):\n ", paste(basename(msg), collapse=", ") )
}


pdf2png_inkscape = function(infile,
outfile = NULL,
page = NULL,
dpi = 1200,
white = FALSE) {
base_cmd = c(
"--export-background-opacity=0"[!white],
"--export-background=white"[white],
Expand All @@ -51,9 +155,7 @@ pdf2png = function(infile,
}


#' @export
#' @rdname pdf2png
pdf2svg = function(infile,
pdf2svg_inkscape = function(infile,
outfile = NULL,
page = NULL,
white = FALSE) {
Expand Down

0 comments on commit 3983b40

Please sign in to comment.