[R/wb_to_df] enable reading POSIXct dates and tests

JanMarvin · Feb 28, 2022 · 38854bf · JanMarvin · Feb 28, 2022 · JanMarvin
1 parent c4c0fea
commit 38854bf
Show file tree

Hide file tree

Showing 2 changed files with 106 additions and 69 deletions.
diff --git a/R/wb_functions.R b/R/wb_functions.R
@@ -68,9 +68,13 @@ guess_col_type <- function(tt) {
   col_dte <- vapply(tt[!col_num], function(x) all(x == "d", na.rm = TRUE), NA)
   types[names(col_dte[col_dte])] <- 2
 
+  # or even posix
+  col_dte <- vapply(tt[!col_num], function(x) all(x == "p", na.rm = TRUE), NA)
+  types[names(col_dte[col_dte])] <- 3
+
   # there are bools as well
   col_log <- vapply(tt[!col_num], function(x) any(x == "b", na.rm = TRUE), NA)
-  types[names(col_log[col_log])] <- 3
+  types[names(col_log[col_log])] <- 4
 
   types
 }
@@ -92,9 +96,7 @@ numfmt_is_date <- function(numFmt) {
   date_fmts <- c(
     "yy", "yyyy",
     "m", "mm", "mmm", "mmmm", "mmmmm",
-    "d", "dd", "ddd", "dddd",
-    "h", "hh", "m", "mm", "s", "ss",
-    "AM", "PM", "A", "P"
+    "d", "dd", "ddd", "dddd"
   )
   date_or_fmt <- paste0(date_fmts, collapse = "|")
   maybe_dates <- grepl(pattern = date_or_fmt, x = numFmt_df$formatCode)
@@ -104,14 +106,59 @@ numfmt_is_date <- function(numFmt) {
   z
 }
 
+#' check if numFmt is posix. internal function
+#' @param numFmt numFmt xml nodes
+numfmt_is_posix <- function(numFmt) {
+
+  # if numFmt is character(0)
+  if (length(numFmt) ==0) return(z <- NULL)
+
+  numFmt_df <- read_numfmt(read_xml(numFmt))
+  num_fmts <- c(
+    "#", as.character(0:9)
+  )
+  num_or_fmt <- paste0(num_fmts, collapse = "|")
+  maybe_num <- grepl(pattern = num_or_fmt, x = numFmt_df$formatCode)
+
+  posix_fmts <- c(
+    "yy", "yyyy",
+    "m", "mm", "mmm", "mmmm", "mmmmm",
+    "d", "dd", "ddd", "dddd",
+    "h", "hh", "m", "mm", "s", "ss",
+    "AM", "PM", "A", "P"
+  )
+  posix_or_fmt <- paste0(posix_fmts, collapse = "|")
+  maybe_posix <- grepl(pattern = posix_or_fmt, x = numFmt_df$formatCode)
+
+  z <- numFmt_df$numFmtId[maybe_posix & !maybe_num]
+  if (length(z)==0) z <- NULL
+  z
+}
+
 #' check if style is date. internal function
 #'
 #' @param cellXfs cellXfs xml nodes
 #' @param numfmt_date custom numFmtId dates
 style_is_date <- function(cellXfs, numfmt_date) {
 
   # numfmt_date: some basic date formats and custom formats
-  date_numfmts <- as.character(14:22)
+  date_numfmts <- as.character(14:17)
+  numfmt_date <- c(numfmt_date, date_numfmts)
+
+  cellXfs_df <- read_xf(read_xml(cellXfs))
+  z <- rownames(cellXfs_df[cellXfs_df$numFmtId %in% numfmt_date,])
+  if (length(z)==0) z <- NA
+  z
+}
+
+#' check if style is posix. internal function
+#'
+#' @param cellXfs cellXfs xml nodes
+#' @param numfmt_date custom numFmtId dates
+style_is_posix <- function(cellXfs, numfmt_date) {
+
+  # numfmt_date: some basic date formats and custom formats
+  date_numfmts <- as.character(18:22)
   numfmt_date <- c(numfmt_date, date_numfmts)
 
   cellXfs_df <- read_xf(read_xml(cellXfs))
@@ -340,13 +387,13 @@ wb_to_df <- function(
   numfmt_date <- numfmt_is_date(wb$styles$numFmts)
   xlsx_date_style <- style_is_date(wb$styles$cellXfs, numfmt_date)
 
+  numfmt_posix <- numfmt_is_posix(wb$styles$numFmts)
+  xlsx_posix_style <- style_is_posix(wb$styles$cellXfs, numfmt_posix)
+
   # create temporary data frame. hard copy required
   z  <- dims_to_dataframe(dims)
   tt <- dims_to_dataframe(dims)
 
-
-
-
   # tt <- data.frame(matrix(0, nrow = 4, ncol = ncol(z)))
   # names(tt) <- names(z)
   # rownames(tt) <- c("b", "s", "d", "n")
@@ -412,6 +459,7 @@ wb_to_df <- function(
     cc$val[sel] <- "NA"
     cc$typ[sel] <- "na_string"
   }
+
   # dates
   if (!is.null(cc$c_s)) {
     # if a cell is t="s" the content is a sst and not da date
@@ -421,10 +469,15 @@ wb_to_df <- function(
 
     if (detectDates) {
       sel <- (cc$c_s %in% xlsx_date_style) & !cc$is_string & cc$v != "_openxlsx_NA_"
-      cc$val[sel] <- as.character(convertToDate(cc$v[sel]))
+      cc$val[sel] <- suppressWarnings(as.character(convertToDate(cc$v[sel])))
       cc$typ[sel]  <- "d"
+
+      sel <- (cc$c_s %in% xlsx_posix_style) & !cc$is_string & cc$v != "_openxlsx_NA_"
+      cc$val[sel] <- suppressWarnings(as.character(convertToDateTime(cc$v[sel])))
+      cc$typ[sel]  <- "p"
     }
   }
+
   # remaining values are numeric?
   sel <- is.na(cc$typ)
   cc$val[sel] <- cc$v[sel]
@@ -509,11 +562,13 @@ wb_to_df <- function(
     if (any(sel)) {
       nums <- names( which(types[sel] == 1) )
       dtes <- names( which(types[sel] == 2) )
-      logs <- names( which(types[sel] == 3) )
+      poxs <- names( which(types[sel] == 3) )
+      logs <- names( which(types[sel] == 4) )
       # convert "#NUM!" to "NaN" -- then converts to NaN
       # maybe consider this an option to instead return NA?
       z[nums] <- lapply(z[nums], function(i) as.numeric(replace(i, i == "#NUM!", "NaN")))
       z[dtes] <- lapply(z[dtes], as.Date)
+      z[poxs] <- lapply(z[poxs], as.POSIXct)
       z[logs] <- lapply(z[logs], as.logical)
     } else {
       warning("could not convert. All missing in row used for variable names")

diff --git a/tests/testthat/test-writing_posixct.R b/tests/testthat/test-writing_posixct.R
@@ -23,62 +23,44 @@ test_that("Writing Posixct with writeData & writeDataTable", {
   options("openxlsx.datetimeFormat" = "yyyy-mm-dd hh:mm:ss")
 })
 
-# # missing datetime is not yet implemented
-# test_that("Writing mixed EDT/EST Posixct with writeData & writeDataTable", {
-#   options("openxlsx.datetimeFormat" = "dd/mm/yy hh:mm")
-#
-#   tstart1 <- as.POSIXct("12/03/2018 08:30", format = "%d/%m/%Y %H:%M")
-#   tstart2 <- as.POSIXct("10/03/2018 08:30", format = "%d/%m/%Y %H:%M")
-#   TimeDT1 <- c(NA, 0, 10, 30, 60, 120, 240, 720, 1440) * 60 + tstart1
-#   TimeDT2 <- c(0, 10, 30, 60, 120, 240, 720, NA, 1440) * 60 + tstart2
-#
-#   df <- data.frame(
-#     timeval = c(TimeDT1, TimeDT2),
-#     timetxt = format(c(TimeDT1, TimeDT2), "%Y-%m-%d %H:%M")
-#   )
-#
-#   wb <- createWorkbook()
-#   addWorksheet(wb, "writeData")
-#   addWorksheet(wb, "writeDataTable")
-#
-#   writeData(wb, "writeData", df, startCol = 2, startRow = 3, rowNames = FALSE)
-#   writeDataTable(wb, "writeDataTable", df, startCol = 2, startRow = 3)
-#
-#   # xlsx file is broken‚ <NA> where some missing value is expected.
-#   wb_open(wb)
-#
-#   wd <- as.numeric(wb$worksheets[[1]]$sheet_data$v)
-#   wdt <- as.numeric(wb$worksheets[[2]]$sheet_data$v)
-#   wd <- wd[wb$worksheets[[1]]$sheet_data$cols == 2]
-#   wdt <- wdt[wb$worksheets[[2]]$sheet_data$cols == 2]
-#
-#   # drop any integer indexes introduced in write
-#   wd <- wd[wd != 0 | is.na(wd)]
-#   wdt <- wdt[wdt  != 0 | is.na(wdt)]
-#
-#   # sort everything
-#   wd <- convertToDateTime(wd[order(wd)])
-#   wdt <- convertToDateTime(wdt[order(wdt)])
-#   expected <- df$timeval[order(df$timeval)]
-#
-#   # compare
-#   expect_equal(
-#     object = wd,
-#     expected = expected,
-#     tolerance = 10 ^ -10,
-#     check.tzone = FALSE
-#   )
-#   expect_equal(
-#     object = wdt,
-#     expected = expected,
-#     tolerance = 10 ^ -10,
-#     check.tzone = TRUE
-#   )
-#   expect_equal(
-#     object = wd,
-#     expected = wdt,
-#     check.tzone = TRUE
-#   )
-#
-#   options("openxlsx.datetimeFormat" = "yyyy-mm-dd hh:mm:ss")
-# })
+# missing datetime is not yet implemented
+test_that("Writing mixed EDT/EST Posixct with writeData & writeDataTable", {
+  options("openxlsx.datetimeFormat" = "dd/mm/yy hh:mm")
+
+  tstart1 <- as.POSIXct("12/03/2018 08:30", format = "%d/%m/%Y %H:%M")
+  tstart2 <- as.POSIXct("10/03/2018 08:30", format = "%d/%m/%Y %H:%M")
+  TimeDT1 <- c(NA, 0, 10, 30, 60, 120, 240, 720, 1440) * 60 + tstart1
+  TimeDT2 <- c(0, 10, 30, 60, 120, 240, 720, NA, 1440) * 60 + tstart2
+
+  df <- data.frame(
+    timeval = c(TimeDT1, TimeDT2),
+    timetxt = format(c(TimeDT1, TimeDT2), "%Y-%m-%d %H:%M")
+  )
+
+  wb <- createWorkbook()
+  addWorksheet(wb, "writeData")
+  addWorksheet(wb, "writeDataTable")
+
+  writeData(wb, "writeData", df, startCol = 2, startRow = 3, rowNames = FALSE)
+  writeDataTable(wb, "writeDataTable", df, startCol = 2, startRow = 3)
+
+  # xlsx file is broken‚ <NA> where some missing value is expected.
+  # TODO check: looks alright in LibreOffice
+  # wb_open(wb)
+  xlsxFile <- temp_xlsx()
+  saveWorkbook(wb, xlsxFile, TRUE)
+  wb <- wb_to_df(xlsxFile)
+
+  exp <- df$timeval
+  got <- wb$timeval
+
+  # compare
+  expect_equal(
+    exp,
+    got,
+    tolerance = 10 ^ -10,
+    check.tzone = FALSE
+  )
+
+  options("openxlsx.datetimeFormat" = "yyyy-mm-dd hh:mm:ss")
+})