diff --git a/DESCRIPTION b/DESCRIPTION index 38915ecc..ee7f6d37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,7 +44,7 @@ Suggests: later, readr, yaml, - feather, + arrow, future, rstudioapi, spelling, diff --git a/NAMESPACE b/NAMESPACE index d12ec054..fbef9ad9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(parser_json) export(parser_multi) export(parser_none) export(parser_octet) +export(parser_parquet) export(parser_rds) export(parser_read_file) export(parser_text) @@ -83,6 +84,7 @@ export(serializer_html) export(serializer_htmlwidget) export(serializer_jpeg) export(serializer_json) +export(serializer_parquet) export(serializer_pdf) export(serializer_png) export(serializer_print) diff --git a/NEWS.md b/NEWS.md index f86cd9ec..417d09e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ ## New features * Introduces new GeoJSON serializer and parser. GeoJSON objects are parsed into `sf` objects and `sf` or `sfc` objects will be serialized into GeoJSON. (@josiahparry, #830) +* Update feather serializer to use the arrow package. The new default feather MIME type is `application/vnd.apache.arrow.file`. (@pachadotdev #849) +* Add parquet serializer and parser by using the arrow package (@pachadotdev #849) ## Bug fixes diff --git a/R/content-types.R b/R/content-types.R index c60d8087..91d1fc7a 100644 --- a/R/content-types.R +++ b/R/content-types.R @@ -42,7 +42,8 @@ knownContentTypes <- c( dotx = "application/vnd.openxmlformats-officedocument.wordprocessingml.template", xlam = "application/vnd.ms-excel.addin.macroEnabled.12", xlsb = "application/vnd.ms-excel.sheet.binary.macroEnabled.12", - feather = "application/feather", + feather = "application/vnd.apache.arrow.file", + parquet = "application/vnd.apache.parquet", rds = "application/rds", tsv = "application/tab-separated-values", csv = "application/csv", diff --git a/R/parse-body.R b/R/parse-body.R index d734badc..6ca067d8 100644 --- a/R/parse-body.R +++ b/R/parse-body.R @@ -480,18 +480,27 @@ parser_rds <- function(...) { }) } -#' @describeIn parsers feather parser. See [feather::read_feather()] for more details. +#' @describeIn parsers feather parser. See [arrow::read_feather()] for more details. #' @export parser_feather <- function(...) { parser_read_file(function(tmpfile) { - if (!requireNamespace("feather", quietly = TRUE)) { - stop("`feather` must be installed for `parser_feather` to work") + if (!requireNamespace("arrow", quietly = TRUE)) { + stop("`arrow` must be installed for `parser_feather` to work") } - feather::read_feather(tmpfile, ...) + arrow::read_feather(tmpfile, ...) }) } - +#' @describeIn parsers parquet parser. See [arrow::read_parquet()] for more details. +#' @export +parser_parquet <- function(...) { + parser_read_file(function(tmpfile) { + if (!requireNamespace("arrow", quietly = TRUE)) { + stop("`arrow` must be installed for `parser_parquet` to work") + } + arrow::read_parquet(tmpfile, ...) + }) +} #' @describeIn parsers Octet stream parser. Returns the raw content. #' @export @@ -568,7 +577,8 @@ register_parsers_onLoad <- function() { register_parser("octet", parser_octet, fixed = "application/octet-stream") register_parser("form", parser_form, fixed = "application/x-www-form-urlencoded") register_parser("rds", parser_rds, fixed = "application/rds") - register_parser("feather", parser_feather, fixed = "application/feather") + register_parser("feather", parser_feather, fixed = c("application/vnd.apache.arrow.file", "application/feather")) + register_parser("parquet", parser_parquet, fixed = "application/vnd.apache.parquet") register_parser("text", parser_text, fixed = "text/plain", regex = "^text/") register_parser("tsv", parser_tsv, fixed = c("application/tab-separated-values", "text/tab-separated-values")) # yaml types: https://stackoverflow.com/a/38000954/591574 diff --git a/R/serializer.R b/R/serializer.R index 4a6b81b8..76ef759c 100644 --- a/R/serializer.R +++ b/R/serializer.R @@ -263,17 +263,32 @@ serializer_rds <- function(version = "2", ascii = FALSE, ..., type = "applicatio }) } -#' @describeIn serializers feather serializer. See also: [feather::write_feather()] +#' @describeIn serializers feather serializer. See also: [arrow::write_feather()] #' @export -serializer_feather <- function(type = "application/feather") { - if (!requireNamespace("feather", quietly = TRUE)) { - stop("`feather` must be installed for `serializer_feather` to work") +serializer_feather <- function(type = "application/vnd.apache.arrow.file") { + if (!requireNamespace("arrow", quietly = TRUE)) { + stop("`arrow` must be installed for `serializer_feather` to work") } serializer_write_file( fileext = ".feather", type = type, write_fn = function(val, tmpfile) { - feather::write_feather(val, tmpfile) + arrow::write_feather(val, tmpfile) + } + ) +} + +#' @describeIn serializers parquet serializer. See also: [arrow::write_parquet()] +#' @export +serializer_parquet <- function(type = "application/vnd.apache.parquet") { + if (!requireNamespace("arrow", quietly = TRUE)) { + stop("`arrow` must be installed for `serializer_parquet` to work") + } + serializer_write_file( + fileext = ".parquet", + type = type, + write_fn = function(val, tmpfile) { + arrow::write_parquet(val, tmpfile) } ) } @@ -614,6 +629,7 @@ add_serializers_onLoad <- function() { register_serializer("csv", serializer_csv) register_serializer("tsv", serializer_tsv) register_serializer("feather", serializer_feather) + register_serializer("parquet", serializer_parquet) register_serializer("yaml", serializer_yaml) register_serializer("geojson", serializer_geojson) diff --git a/man/parsers.Rd b/man/parsers.Rd index d4be32d6..64c7c81f 100644 --- a/man/parsers.Rd +++ b/man/parsers.Rd @@ -11,6 +11,7 @@ \alias{parser_read_file} \alias{parser_rds} \alias{parser_feather} +\alias{parser_parquet} \alias{parser_octet} \alias{parser_multi} \alias{parser_none} @@ -36,6 +37,8 @@ parser_rds(...) parser_feather(...) +parser_parquet(...) + parser_octet() parser_multi() @@ -86,7 +89,9 @@ This parser should be used when reading from a file is required. \item \code{parser_rds}: RDS parser. See \code{\link[=readRDS]{readRDS()}} for more details. -\item \code{parser_feather}: feather parser. See \code{\link[feather:read_feather]{feather::read_feather()}} for more details. +\item \code{parser_feather}: feather parser. See \code{\link[arrow:read_feather]{arrow::read_feather()}} for more details. + +\item \code{parser_parquet}: parquet parser. See \code{\link[arrow:read_parquet]{arrow::read_parquet()}} for more details. \item \code{parser_octet}: Octet stream parser. Returns the raw content. diff --git a/man/serializers.Rd b/man/serializers.Rd index eee54968..4f13eca4 100644 --- a/man/serializers.Rd +++ b/man/serializers.Rd @@ -11,6 +11,7 @@ \alias{serializer_geojson} \alias{serializer_rds} \alias{serializer_feather} +\alias{serializer_parquet} \alias{serializer_yaml} \alias{serializer_text} \alias{serializer_format} @@ -45,7 +46,9 @@ serializer_geojson(..., type = "application/geo+json") serializer_rds(version = "2", ascii = FALSE, ..., type = "application/rds") -serializer_feather(type = "application/feather") +serializer_feather(type = "application/vnd.apache.arrow.file") + +serializer_parquet(type = "application/vnd.apache.parquet") serializer_yaml(..., type = "text/x-yaml; charset=UTF-8") @@ -135,7 +138,9 @@ more details on Plumber serializers and how to customize their behavior. \item \code{serializer_rds}: RDS serializer. See also: \code{\link[base:serialize]{base::serialize()}} -\item \code{serializer_feather}: feather serializer. See also: \code{\link[feather:read_feather]{feather::write_feather()}} +\item \code{serializer_feather}: feather serializer. See also: \code{\link[arrow:write_feather]{arrow::write_feather()}} + +\item \code{serializer_parquet}: parquet serializer. See also: \code{\link[arrow:write_parquet]{arrow::write_parquet()}} \item \code{serializer_yaml}: YAML serializer. See also: \code{\link[yaml:as.yaml]{yaml::as.yaml()}} diff --git a/tests/testthat/test-parse-body.R b/tests/testthat/test-parse-body.R index d1f89434..ec2b3770 100644 --- a/tests/testthat/test-parse-body.R +++ b/tests/testthat/test-parse-body.R @@ -90,7 +90,7 @@ test_that("Test tsv parser", { }) test_that("Test feather parser", { - skip_if_not_installed("feather") + skip_if_not_installed("arrow") tmp <- tempfile() on.exit({ @@ -98,10 +98,10 @@ test_that("Test feather parser", { }, add = TRUE) r_object <- iris - feather::write_feather(r_object, tmp) + arrow::write_feather(r_object, tmp) val <- readBin(tmp, "raw", 10000) - parsed <- parse_body(val, "application/feather", make_parser("feather")) + parsed <- parse_body(val, "application/vnd.apache.arrow.file", make_parser("feather")) # convert from feather tibble to data.frame parsed <- as.data.frame(parsed, stringsAsFactors = FALSE) attr(parsed, "spec") <- NULL @@ -109,6 +109,26 @@ test_that("Test feather parser", { expect_equal(parsed, r_object) }) +test_that("Test parquet parser", { + skip_if_not_installed("arrow") + + tmp <- tempfile() + on.exit({ + file.remove(tmp) + }, add = TRUE) + + r_object <- iris + arrow::write_parquet(r_object, tmp) + val <- readBin(tmp, "raw", 10000) + + parsed <- parse_body(val, "application/vnd.apache.parquet", make_parser("parquet")) + # convert from parquet tibble to data.frame + parsed <- as.data.frame(parsed, stringsAsFactors = FALSE) + attr(parsed, "spec") <- NULL + + expect_equal(parsed, r_object) +}) + test_that("Test geojson parser", { skip_if_not_installed("geojsonsf") skip_if_not_installed("sf") diff --git a/tests/testthat/test-serializer-feather.R b/tests/testthat/test-serializer-feather.R index c191997d..cfa4bc59 100644 --- a/tests/testthat/test-serializer-feather.R +++ b/tests/testthat/test-serializer-feather.R @@ -1,15 +1,15 @@ context("feather serializer") test_that("feather serializes properly", { - skip_if_not_installed("feather") + skip_if_not_installed("arrow") d <- data.frame(a=1, b=2, c="hi") val <- serializer_feather()(d, data.frame(), PlumberResponse$new(), stop) expect_equal(val$status, 200L) - expect_equal(val$headers$`Content-Type`, "application/feather") + expect_equal(val$headers$`Content-Type`, "application/vnd.apache.arrow.file") # can test by doing a full round trip if we believe the parser works via `test-parse-body.R` - parsed <- parse_body(val$body, "application/feather", make_parser("feather")) + parsed <- parse_body(val$body, "application/vnd.apache.arrow.file", make_parser("feather")) # convert from feather tibble to data.frame parsed <- as.data.frame(parsed, stringsAsFactors = FALSE) attr(parsed, "spec") <- NULL @@ -18,7 +18,7 @@ test_that("feather serializes properly", { }) test_that("Errors call error handler", { - skip_if_not_installed("feather") + skip_if_not_installed("arrow") errors <- 0 errHandler <- function(req, res, err){ @@ -31,7 +31,7 @@ test_that("Errors call error handler", { }) test_that("Errors are rendered correctly with debug TRUE", { - skip_if_not_installed("feather") + skip_if_not_installed("arrow") pr <- pr() %>% pr_get("/", function() stop("myerror"), serializer = serializer_feather()) %>% pr_set_debug(TRUE) capture.output(res <- pr$serve(make_req(pr = pr), PlumberResponse$new("csv"))) diff --git a/vignettes/rendering-output.Rmd b/vignettes/rendering-output.Rmd index bda83f92..2ef0f9d3 100644 --- a/vignettes/rendering-output.Rmd +++ b/vignettes/rendering-output.Rmd @@ -59,7 +59,8 @@ Annotation | Content Type | Description/References `@serializer rds` | `application/rds` | Object processed with `base::serialize()` `@serializer csv` | `text/csv` | Object processed with `readr::format_csv()` `@serializer tsv` | `text/tab-separated-values` | Object processed with `readr::format_tsv()` -`@serializer feather` | `application/feather` | Object processed with `feather::write_feather()` +`@serializer feather` | `application/vnd.apache.arrow.file` | Object processed with `arrow::write_feather()` +`@serializer parquet` | `application/parquet` | Object processed with `arrow::write_parquet()` `@serializer yaml` | `text/x-yaml` | Object processed with `yaml::as_yaml()` `@serializer htmlwidget` | `text/html; charset=utf-8` | `htmlwidgets::saveWidget()` `@serializer text` | `text/plain` | Text output processed by `as.character()`