diff --git a/R/content-types.R b/R/content-types.R index 9da895b67..9bb55800b 100644 --- a/R/content-types.R +++ b/R/content-types.R @@ -50,3 +50,13 @@ getContentType <- function(ext, defaultType='application/octet-stream') { } return(ct) } + +getCharacterSet = function(contentType){ + default="UTF-8" + if(is.null(contentType)){ + return(default) + } + charsetStart = attr(gregexpr(".*charset=(.*)", contentType, perl = T)[[1]],"capture.start") + charsetStart = as.integer(charsetStart) + as.character(ifelse(charsetStart > -1, substr(contentType, charsetStart, nchar(contentType)), default)) +} diff --git a/R/post-body.R b/R/post-body.R index ae13a1488..08187d279 100644 --- a/R/post-body.R +++ b/R/post-body.R @@ -2,7 +2,8 @@ postBodyFilter <- function(req){ handled <- req$.internal$postBodyHandled if (is.null(handled) || handled != TRUE){ body <- req$rook.input$read_lines() - args <- parseBody(body) + charset = getCharacterSet(req$HTTP_CONTENT_TYPE) + args <- parseBody(body, charset) req$postBody <- body req$args <- c(req$args, args) req$.internal$postBodyHandled <- TRUE @@ -12,12 +13,17 @@ postBodyFilter <- function(req){ #' @importFrom utils URLdecode #' @noRd -parseBody <- function(body){ +parseBody <- function(body, charset="UTF-8"){ # The body in a curl call can also include querystring formatted data # Is there data in the request? if (is.null(body) || length(body) == 0 || body == "") { return(list()) } + + if(is.character(body)){ + Encoding(body) <- charset + } + # Is it JSON data? if (stri_startswith_fixed(body, "{")) { # Handle JSON with jsonlite diff --git a/R/response.R b/R/response.R index 4b5259fec..d7508d103 100644 --- a/R/response.R +++ b/R/response.R @@ -27,6 +27,11 @@ PlumberResponse <- R6Class( body <- "" } + charset = getCharacterSet(h$HTTP_CONTENT_TYPE) + if(is.character(body) ){ + Encoding(body) <- charset + } + list( status = self$status, headers = h, diff --git a/tests/testthat/test-content-type.R b/tests/testthat/test-content-type.R index 8d59e7f46..3d93e4eb9 100644 --- a/tests/testthat/test-content-type.R +++ b/tests/testthat/test-content-type.R @@ -20,3 +20,17 @@ test_that("contentType works in files", { val <- r$serve(make_req("GET", "/"), res) expect_equal(val$headers$`Content-Type`, "text/plain") }) + +test_that('Parses charset properly', { + charset = getCharacterSet("Content-Type: text/html; charset=latin1") + expect_equal(charset, "latin1") + charset = getCharacterSet("Content-Type: text/html; charset=greek8") + expect_equal(charset, "greek8") +}) + +test_that('Defaults charset when not there', { + charset = getCharacterSet("Content-Type: text/html") + expect_equal(charset, "UTF-8") + charset = getCharacterSet(NULL) + expect_equal(charset, "UTF-8") +}) diff --git a/tests/testthat/test-postbody.R b/tests/testthat/test-postbody.R index 51dc8c0cb..15e6a71dc 100644 --- a/tests/testthat/test-postbody.R +++ b/tests/testthat/test-postbody.R @@ -8,3 +8,26 @@ test_that("Query strings on post are handled correctly", { expect_equivalent(parseBody("a="), list()) # It's technically a named list() expect_equal(parseBody("a=1&b=&c&d=1"), list(a="1", d="1")) }) + +test_that("Able to handle UTF-8", { + expect_equal(parseBody('{"text":"élise"}', 'UTF-8')$text, "élise") +}) + +test_that("filter passes on charset", { + charset_passed = "" + req = list(.internal=list(postBodyHandled=FALSE), + rook.input=list(read_lines=function(){ + called = TRUE + return("this is a body")}), + HTTP_CONTENT_TYPE="text/html; charset=testset", + args = c() + ) + with_mock( + parseBody = function(body, charset="UTF-8"){ + print(charset) + body + }, + expect_output(postBodyFilter(req), "testset"), + .env="plumber" + ) +})