Skip to content

Commit

Permalink
Merge pull request #21 from muschellij2/master
Browse files Browse the repository at this point in the history
ENH: Function to Convert PPTX to PDF
  • Loading branch information
hrbrmstr authored Jul 30, 2019
2 parents b85cb19 + b1d7798 commit 215d8bc
Show file tree
Hide file tree
Showing 12 changed files with 124 additions and 7 deletions.
6 changes: 5 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
language: R
sudo: false
sudo: required
cache: packages

before_install:
- if [[ "${TRAVIS_OS_NAME}" = "osx" ]]; then brew cask install libreoffice ; fi
- if [[ "${TRAVIS_OS_NAME}" = "linux" ]]; then sudo apt-get update -y && sudo apt-get install libreoffice; fi

after_success:
- Rscript -e 'covr::codecov()'
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
S3method(print,docx)
export("%>%")
export(assign_colnames)
export(convert_to_pdf)
export(docx_cmnt_count)
export(docx_describe_cmnts)
export(docx_describe_tbls)
Expand Down
41 changes: 41 additions & 0 deletions R/convert_pptx_to_pdf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Convert a Document (usually PowerPoint) to a PDF
#'
#' @md
#' @param path path to the document, can be PowerPoint or DOCX
#' @param pdf_file output PDF file name. By default, creates a PDF in the
#' same directory as the `path` file.
#' This functionality requires the use of
#' LibreOffice and the `soffice` binary it contains. See
#' [set_libreoffice_path] for more information. Note,
#' @export
#' @examples
#' \dontrun{
#' path = system.file("examples/ex.pptx", package="docxtractr")
#' pdf <- convert_to_pdf(path, pdf_file = tempfile(fileext = ".pdf"))
#' path = system.file("examples/data.docx", package="docxtractr")
#' pdf_doc <- convert_to_pdf(path, pdf_file = tempfile(fileext = ".pdf"))
#' }
convert_to_pdf <- function(path, pdf_file = sub("[.]pptx", ".pdf", path)) {
stopifnot(is_pptx(path) | is_doc(path) | is_docx(path))

lo_assert()
lo_path <- getOption("path_to_libreoffice")

# making temporary file because by default soffice
# will make sub("[.]pptx", ".pdf", path) output
# and don't want to do that in case pdf_file in other location
cp_path = tempfile(fileext = ".pptx")
cp_pdf = sub("[.](pptx|docx|doc)$", ".pdf", cp_path)
file.copy(path, cp_path)

if (Sys.info()["sysname"] == "Windows") {
convert_win(lo_path, dirname(cp_path), cp_path, convert_to = "pdf")
} else {
convert_osx(lo_path, dirname(cp_path), cp_path, convert_to = "pdf")
}
if (!file.exists(cp_pdf)) {
stop("Conversion from PPTX to PDF did not succeed")
}
file.copy(cp_pdf, pdf_file)
return(pdf_file)
}
4 changes: 2 additions & 2 deletions R/read_docs.r
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
#'
#' \dontrun{
#' # from a URL
# budget <- read_docx(
# "http://rud.is/dl/1.DOCX")
#' budget <- read_docx(
#' "http://rud.is/dl/1.DOCX")
#' }
read_docx <- function(path, track_changes=NULL) {

Expand Down
14 changes: 10 additions & 4 deletions R/utils.r
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ is_url <- function(path) { grepl("^(http|ftp)s?://", path) }

is_docx <- function(path) { tolower(tools::file_ext(path)) == "docx" }

is_pptx <- function(path) { tolower(tools::file_ext(path)) == "pptx" }

is_doc <- function(path) { tolower(tools::file_ext(path)) == "doc" }

# Copy a file to a new location, throw an error if the copy fails.
Expand All @@ -56,18 +58,22 @@ convert_doc_to_docx <- function(docx_dir, doc_file) {
}

# .docx to .doc convertion for Windows
convert_win <- function(lo_path, docx_dir, doc_file) {
cmd <- sprintf('"%s" -convert-to docx:"MS Word 2007 XML" -headless -outdir "%s" "%s"',
convert_win <- function(lo_path, docx_dir, doc_file,
convert_to = 'docx:"MS Word 2007 XML"') {
cmd <- sprintf('"%s" --convert-to %s -headless -outdir "%s" "%s"',
lo_path,
convert_to,
docx_dir,
doc_file)
system(cmd, show.output.on.console = FALSE)
}

# .docx to .doc convertion for OSX
convert_osx <- function(lo_path, docx_dir, doc_file) {
cmd <- sprintf('"%s" --convert-to docx:"MS Word 2007 XML" --headless --outdir "%s" "%s"',
convert_osx <- function(lo_path, docx_dir, doc_file,
convert_to = 'docx:"MS Word 2007 XML"') {
cmd <- sprintf('"%s" --convert-to %s --headless --outdir "%s" "%s"',
lo_path,
convert_to,
docx_dir,
doc_file)
res <- system(cmd, intern = TRUE)
Expand Down
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ cache:
# Adapt as necessary starting from here

build_script:
- choco install libreoffice-fresh & exit 0
- travis-tool.sh install_deps

test_script:
Expand Down
Binary file added inst/examples/ex.pptx
Binary file not shown.
Binary file added inst/examples/preserve.doc
Binary file not shown.
28 changes: 28 additions & 0 deletions man/convert_to_pdf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/read_docx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/test-doc-conversion.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
context("DOC conversion works")
test_that("we can convert a DOC to DOCX if LibreOffice Installed", {
lp = try({
docxtractr:::lo_find()
}, silent = TRUE)
if (!inherits(lp, "try-error")) {
path <- system.file("examples/preserve.doc", package = "docxtractr")
doc = read_docx(path)
expect_that(doc, is_a("docx"))
}
})
23 changes: 23 additions & 0 deletions tests/testthat/test-pptx-conversion.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
context("PPTX conversion works")
test_that("we can convert a PPTX if LibreOffice Installed", {
lp = try({
docxtractr:::lo_find()
}, silent = TRUE)
if (!inherits(lp, "try-error")) {
path <- system.file("examples/ex.pptx", package = "docxtractr")
pdf <- convert_to_pdf(path, pdf_file = tempfile(fileext = ".pdf"))
expect_true(file.size(pdf) > 0)
}
})

test_that("we can convert a DOCX to PDF if LibreOffice Installed", {
lp = try({
docxtractr:::lo_find()
}, silent = TRUE)
if (!inherits(lp, "try-error")) {
path <- system.file("examples/data.docx", package = "docxtractr")
pdf <- convert_to_pdf(path, pdf_file = tempfile(fileext = ".pdf"))
expect_true(file.size(pdf) > 0)
}
})

0 comments on commit 215d8bc

Please sign in to comment.