-
Notifications
You must be signed in to change notification settings - Fork 71
/
get_page_dims.R
52 lines (49 loc) · 2.24 KB
/
get_page_dims.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#' @rdname get_page_dims
#' @title Page length and dimensions
#' @description Get Page Length and Dimensions
#' @param file A character string specifying the path or URL to a PDF file.
#' @param pages An optional integer vector specifying pages to extract from.
#' @param doc Optionally,, in lieu of \code{file}, an rJava reference to a PDDocument Java object.
#' @param password Optionally, a character string containing a user password to access a secured PDF.
#' @param copy Specifies whether the original local file(s) should be copied to
#' \code{tempdir()} before processing. \code{FALSE} by default. The argument is
#' ignored if \code{file} is URL.
#' @details \code{get_n_pages} returns the page length of a PDF document. \code{get_page_dims} extracts the dimensions of specified pages in a PDF document. This can be useful for figuring out how to specify the \code{area} argument in \code{\link{extract_tables}}
#' @return For \code{get_n_pages}, an integer. For \code{get_page_dims}, a list of two-element numeric vectors specifying the width and height of each page, respectively.
#' @references \href{https://tabula.technology/}{Tabula}
#' @author Thomas J. Leeper <thosjleeper@gmail.com>
#' @examples
#' # simple demo file
#' f <- system.file("examples", "mtcars.pdf", package = "tabulapdf")
#'
#' get_n_pages(file = f)
#' get_page_dims(f)
#' @importFrom tools file_path_sans_ext
#' @importFrom rJava J new
#' @seealso \code{\link{extract_tables}}, \code{\link{extract_text}}, \code{\link{make_thumbnails}}
#' @export
get_page_dims <- function(file, doc, pages = NULL, password = NULL, copy = FALSE) {
if (!missing(file)) {
doc <- load_doc(file, password = password, copy = copy)
on.exit(doc$close())
}
if (!is.null(pages)) {
pages <- as.integer(pages)
} else {
pages <- 1L:(get_n_pages(doc = doc))
}
allpages <- doc$getDocumentCatalog()$getPages()
lapply(pages, function(x) {
thispage <- allpages$get(x - 1L)
c(thispage$getMediaBox()$getWidth(), thispage$getMediaBox()$getHeight())
})
}
#' @rdname get_page_dims
#' @export
get_n_pages <- function(file, doc, password = NULL, copy = FALSE) {
if (!missing(file)) {
doc <- load_doc(file, password = password, copy = copy)
on.exit(doc$close())
}
doc$getNumberOfPages()
}