From a22aff3ffa6bcdd03f924c6a21c54d32173779af Mon Sep 17 00:00:00 2001 From: elimillera Date: Tue, 8 Nov 2022 21:52:36 +0000 Subject: [PATCH 1/9] Update default character_types to include dates and datetime classes. #65 --- R/zzz.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index b2541120..f88a5169 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -14,7 +14,8 @@ xportr.label_verbose = "none", xportr.length_verbose = "none", xportr.type_verbose = "none", - xportr.character_types = c("character", "Char", "text"), + xportr.character_types = c("character", "Char", "text", "Date", "POSIXct", + "POSIXt"), xportr.order_name = "order" ) toset <- !(names(op.devtools) %in% names(op)) From 8bc10ca69361522ef78447319d452d722d5f55ad Mon Sep 17 00:00:00 2001 From: elimillera Date: Tue, 8 Nov 2022 22:01:52 +0000 Subject: [PATCH 2/9] Add in date9., and datetime20. --- R/zzz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index f88a5169..58bb4042 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -15,7 +15,7 @@ xportr.length_verbose = "none", xportr.type_verbose = "none", xportr.character_types = c("character", "Char", "text", "Date", "POSIXct", - "POSIXt"), + "POSIXt", "date9.", "datetime20."), xportr.order_name = "order" ) toset <- !(names(op.devtools) %in% names(op)) From c3c7fbd545892608623aef61c6aba62d97c506d2 Mon Sep 17 00:00:00 2001 From: elimillera Date: Tue, 14 Feb 2023 17:37:52 +0000 Subject: [PATCH 3/9] Add tolower for class types --- R/utils-xportr.R | 2 +- R/zzz.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/utils-xportr.R b/R/utils-xportr.R index d787b2d1..78a29071 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -148,7 +148,7 @@ get_pipe_call <- function() { # Helper function to get the first class attribute first_class <- function(x) { characterTypes <- getOption("xportr.character_types") - class_ <- class(x)[1] + class_ <- tolower(class(x)[1]) if (class_ %in% characterTypes) "character" else class_ } diff --git a/R/zzz.R b/R/zzz.R index 58bb4042..2aa1d2ce 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -14,8 +14,8 @@ xportr.label_verbose = "none", xportr.length_verbose = "none", xportr.type_verbose = "none", - xportr.character_types = c("character", "Char", "text", "Date", "POSIXct", - "POSIXt", "date9.", "datetime20."), + xportr.character_types = c("character", "char", "text", "date", "posixct", + "posixt", "date9.", "datetime20."), xportr.order_name = "order" ) toset <- !(names(op.devtools) %in% names(op)) From dd3caaccc98820a935ea87800773272938168331 Mon Sep 17 00:00:00 2001 From: elimillera Date: Tue, 14 Feb 2023 18:25:42 +0000 Subject: [PATCH 4/9] Add time5 and time8 --- R/zzz.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index 2aa1d2ce..5493b06e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -15,7 +15,8 @@ xportr.length_verbose = "none", xportr.type_verbose = "none", xportr.character_types = c("character", "char", "text", "date", "posixct", - "posixt", "date9.", "datetime20."), + "posixt", "date9.", "datetime20.", "time5.", + "time8."), xportr.order_name = "order" ) toset <- !(names(op.devtools) %in% names(op)) From 38f80d7a0863e137db6afaade73785fd0dc19c02 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 14 Feb 2023 18:54:30 +0000 Subject: [PATCH 5/9] add time formats --- R/utils-xportr.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/utils-xportr.R b/R/utils-xportr.R index 5db31e9b..2c39698b 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -221,6 +221,7 @@ xpt_validate <- function(data) { '', paste("$", 1:200, ".", sep = ""), paste("date", 5:11, ".", sep = ""), + paste("time", 2:20, ".", sep = ""), paste("datetime", 7:40, ".", sep = ""), paste("yymmdd", 2:10, ".", sep = ""), paste("mmddyy", 2:10, ".", sep = ""), From 85761fefbd8b1bcf55cd647591814b8b9299278e Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 14 Feb 2023 19:48:23 +0000 Subject: [PATCH 6/9] date, datetime, time are formats not types --- R/zzz.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 5493b06e..35eca269 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -14,9 +14,7 @@ xportr.label_verbose = "none", xportr.length_verbose = "none", xportr.type_verbose = "none", - xportr.character_types = c("character", "char", "text", "date", "posixct", - "posixt", "date9.", "datetime20.", "time5.", - "time8."), + xportr.character_types = c("character", "char", "text", "date", "posixct", "posixt"), xportr.order_name = "order" ) toset <- !(names(op.devtools) %in% names(op)) From 863c1c0f858edab03e2b8435b5e085cf28455a0c Mon Sep 17 00:00:00 2001 From: elimillera Date: Mon, 20 Feb 2023 20:03:53 +0000 Subject: [PATCH 7/9] Add logic to convert any null or NA formats to "" for haven. --- R/format.R | 6 ++++++ tests/testthat/test-metadata.R | 20 +++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/R/format.R b/R/format.R index c2ee7551..ba96259e 100644 --- a/R/format.R +++ b/R/format.R @@ -74,5 +74,11 @@ xportr_format <- function(.df, metacore, domain = NULL, verbose = getOption("xpo attr(.df[[i]], "format.sas") <- format[[i]] } + # Convert NA formats to "" for haven + for (i in seq_len(ncol(.df))) { + if (is.na(attr(.df[[i]], "format.sas")) || is.null(attr(.df[[i]], "format.sas"))) + attr(.df[[i]], "format.sas") <- "" + } + .df } \ No newline at end of file diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index eae69f54..b50b1502 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -54,20 +54,26 @@ test_that("Expect error if any label exceeds 40 character", { }) test_that("SAS format", { - df <- data.frame(x = 1, y = 2) - varmeta <- data.frame(dataset = rep("df", 2), - variable = c("x", "y"), - format = c("date9.", "datetime20.")) + df <- data.frame(x = 1, y = 2, z = 3) + varmeta <- data.frame(dataset = rep("df", 3), + variable = c("x", "y", "z"), + format = c("date9.", "datetime20.", NA)) extract_format <- function(.x) { - vapply(.x, function(.x) attr(.x, "format.sas"), character(1), USE.NAMES = FALSE) + format_ <- character(3) + for (i in 1:3) { + print(attr(.x[[i]], "format.sas")) + format_[i] <- attr(.x[[i]], "format.sas") + } + format_ } out <- xportr_format(df, varmeta) - expect_equal(extract_format(out), c("DATE9.", "DATETIME20.")) + expect_equal(extract_format(out), c("DATE9.", "DATETIME20.", "")) expect_equal(dput(out), structure(list(x = structure(1, format.sas = "DATE9."), - y = structure(2, format.sas = "DATETIME20.")), + y = structure(2, format.sas = "DATETIME20."), + z = structure(3, format.sas = "")), row.names = c(NA, -1L), class = "data.frame")) }) From 131357c96a025afad21e6d3f3e9627e71160ea44 Mon Sep 17 00:00:00 2001 From: elimillera Date: Tue, 21 Feb 2023 21:39:36 +0000 Subject: [PATCH 8/9] Update xportr_format to not crash with unexpected variables from a metacore object. --- NEWS.md | 1 + R/format.R | 11 +++++++---- tests/testthat/test-metadata.R | 6 +++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index effd281d..1525ffa5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # xportr 0.2.0 * Added a new validation test that errors when users pass invalid formats (#60 #64). Thanks to @zdz2101! +* Fixed an issue where xportr_format could pass invalid formats to haven::write_xpt. # xportr 0.1.0 diff --git a/R/format.R b/R/format.R index ba96259e..89941701 100644 --- a/R/format.R +++ b/R/format.R @@ -61,14 +61,17 @@ xportr_format <- function(.df, metacore, domain = NULL, verbose = getOption("xpo } else { metadata <- metacore } + + filtered_metadata <- metadata %>% + filter(!!sym(variable_name) %in% names(.df)) - - format <- metadata %>% + + format <- filtered_metadata %>% select(!!sym(format_name)) %>% unlist() %>% toupper() - - names(format) <- metadata[[variable_name]] + + names(format) <- filtered_metadata[[variable_name]] for (i in names(format)) { attr(.df[[i]], "format.sas") <- format[[i]] diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index b50b1502..500f1a29 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -55,9 +55,9 @@ test_that("Expect error if any label exceeds 40 character", { test_that("SAS format", { df <- data.frame(x = 1, y = 2, z = 3) - varmeta <- data.frame(dataset = rep("df", 3), - variable = c("x", "y", "z"), - format = c("date9.", "datetime20.", NA)) + varmeta <- data.frame(dataset = rep("df", 4), + variable = c("x", "y", "z", "abc"), + format = c("date9.", "datetime20.", NA, "text")) extract_format <- function(.x) { format_ <- character(3) From 93cacb85a3f6bd8ab003d45e2790b4d8cdc26b9f Mon Sep 17 00:00:00 2001 From: elimillera Date: Thu, 23 Feb 2023 15:01:47 +0000 Subject: [PATCH 9/9] Split out test --- tests/testthat/test-metadata.R | 43 ++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index 500f1a29..264cc657 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -1,3 +1,12 @@ + +extract_format <- function(.x) { + format_ <- character(length(.x)) + for (i in 1:length(.x)) { + format_[i] <- attr(.x[[i]], "format.sas") + } + format_ +} + test_that("Variable label", { df <- data.frame(x = "a", y = "b") varmeta <- data.frame(dataset = rep("df", 2), @@ -53,27 +62,35 @@ test_that("Expect error if any label exceeds 40 character", { "dataset label must be 40 characters or less") }) -test_that("SAS format", { - df <- data.frame(x = 1, y = 2, z = 3) +test_that("xportr_format will set formats as expected", { + df <- data.frame(x = 1, y = 2) + varmeta <- data.frame(dataset = rep("df", 2), + variable = c("x", "y"), + format = c("date9.", "datetime20.")) + + + + out <- xportr_format(df, varmeta) + + expect_equal(extract_format(out), c("DATE9.", "DATETIME20.")) + expect_equal(dput(out), structure(list(x = structure(1, format.sas = "DATE9."), + y = structure(2, format.sas = "DATETIME20.")), + row.names = c(NA, -1L), class = "data.frame")) +}) + +test_that("xportr_format will handle NA values and won't error", { + df <- data.frame(x = 1, y = 2, z = 3, a = 4) varmeta <- data.frame(dataset = rep("df", 4), variable = c("x", "y", "z", "abc"), format = c("date9.", "datetime20.", NA, "text")) - extract_format <- function(.x) { - format_ <- character(3) - for (i in 1:3) { - print(attr(.x[[i]], "format.sas")) - format_[i] <- attr(.x[[i]], "format.sas") - } - format_ - } - out <- xportr_format(df, varmeta) - expect_equal(extract_format(out), c("DATE9.", "DATETIME20.", "")) + expect_equal(extract_format(out), c("DATE9.", "DATETIME20.", "", "")) expect_equal(dput(out), structure(list(x = structure(1, format.sas = "DATE9."), y = structure(2, format.sas = "DATETIME20."), - z = structure(3, format.sas = "")), + z = structure(3, format.sas = ""), + a = structure(4, format.sas = "")), row.names = c(NA, -1L), class = "data.frame")) })