-
Notifications
You must be signed in to change notification settings - Fork 71
/
Copy pathmake_thumbnails.R
77 lines (74 loc) · 3.16 KB
/
make_thumbnails.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#' @title make_thumbnails
#' @description Convert Pages to Image Thumbnails
#' @param file A character string specifying the path or URL to a PDF file.
#' @param outdir An optional character string specifying a directory into which
#' to split the resulting files. If \code{NULL}, the \code{outdir} is
#' \code{tempdir()}. If \code{file} is a URL, both file and thumbnails are
#' stored in the R session's temporary directory.
#' @param pages An optional integer vector specifying pages to extract from.
#' @param format A character string specifying an image file format.
#' @param resolution A numeric value specifying the image resolution in DPI.
#' @param password Optionally, a character string containing a user password
#' to access a secured PDF.
#' @param copy Specifies whether the original local file(s) should be copied to
#' \code{tempdir()} before processing. \code{FALSE} by default. The argument is
#' ignored if \code{file} is URL.
#' @details This function save each (specified) page of a document as an image
#' with 720 dpi resolution. Images are saved in the same directory as the
#' original file, with file names specified by the original file name,
#' a page number, and the corresponding file format extension.
#' @note This may generate Java \dQuote{INFO} messages in the console,
#' which can be safely ignored.
#' @return A character vector of file paths.
#' @references \href{https://tabula.technology/}{Tabula}
#' @author Thomas J. Leeper <thosjleeper@gmail.com>
#' @examples
#' # simple demo file
#' f <- system.file("examples", "mtcars.pdf", package = "tabulapdf")
#'
#' # extract thumbnails from the first page
#' make_thumbnails(f, page = 1)
#' @importFrom tools file_path_sans_ext
#' @importFrom rJava J new .jfloat
#' @seealso \code{\link{extract_tables}}, \code{\link{extract_text}},
#' \code{\link{make_thumbnails}}
#' @export
make_thumbnails <- function(file,
outdir = NULL,
pages = NULL,
format = c("png", "jpeg", "bmp", "gif"),
resolution = 72,
password = NULL,
copy = FALSE) {
file <- localize_file(file, copy = copy)
pdfDocument <- load_doc(file, password = password, copy = copy)
on.exit(pdfDocument$close())
if (!is.null(pages)) {
pages <- as.integer(pages)
} else {
pages <- 1L:(get_n_pages(doc = pdfDocument))
}
format <- match.arg(format)
fileseq <- formatC(pages, width = max(nchar(pages)), flag = 0)
if (is.null(outdir)) {
outdir <- tempdir()
}
filename <- paste0(file_path_sans_ext(basename(file)), fileseq, ".", format)
outfile <- normalizePath(file.path(outdir, filename), mustWork = FALSE)
for (i in seq_along(pages)) {
pageIndex <- pages[i] - 1L
PDFRenderer <- new(J("org.apache.pdfbox.rendering.PDFRenderer"),
document = pdfDocument
)
BufferedImage <- PDFRenderer$renderImageWithDPI(pageIndex,
scale = .jfloat(resolution)
)
JavaFile <- new(J("java.io.File"), pathname = outfile[i])
J("javax.imageio.ImageIO")$write(
BufferedImage,
format,
JavaFile
)
}
ifelse(file.exists(outfile), outfile, NA_character_)
}