simplify PHB functionality to just read and merge #68

wadpac · Sep 30, 2024 · 3eab460 · 3eab460
1 parent 2ceb92c
commit 3eab460
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 333 deletions.
diff --git a/R/mergePHBfilePairs.R b/R/mergePHBfilePairs.R
@@ -1,57 +1,40 @@
-mergePHBfilePairs = function(inputPath = ".", outputPath = ".",
+mergePHBfilePairs = function(filenames = NULL,
                              timeformat = "%m/%d/%Y %H:%M:%S",
                              desiredtz = "", configtz = NULL,
                              timeformatName = "timeformat") {
   # merges Philips Health Band xlsx files per participant
   # as there can be multiple files per participant.
-  fnames = dir(inputPath, recursive = FALSE, full.names = TRUE, pattern = "[.]xlsx")
-  fileOverview = data.frame(filename = fnames)
-  extractID = function(x) {
-    x = basename(x)
-    x = gsub(pattern = "sleep_wake", replacement = "sleepwake", x = tolower(x))
-    ID = unlist(strsplit(x, "_"))[2]
-    return(ID)
+  if (length(filenames) != 2) {
+    stop("Provide two filenames")
   }
-  fileOverview$ID = unlist(lapply(fileOverview$filename, FUN = extractID))
 
-  uids = unique(fileOverview$ID)
-  for (uid in uids) {
-    filesForThisPerson = fileOverview$filename[which(fileOverview$ID == uid)]
-    # Identify both file
-    file1 = grep(pattern = "datalist", x = filesForThisPerson, ignore.case = TRUE)
-    file2 = grep(pattern = "sleep_wake", x = filesForThisPerson, ignore.case = TRUE)
-    if (length(file1) == 0 && length(file2) == 0) {
-      next
-    }
-    # Data
-    deviceSN = NULL
+  # Identify both file
+  file1 = grep(pattern = "datalist", x = filenames, ignore.case = TRUE)
+  file2 = grep(pattern = "sleep_wake", x = filenames, ignore.case = TRUE)
+
+  # Datalist file (with all variables except sleep/wake scores)
+  deviceSN = NULL
+  if (length(file1) > 0) {
+    data1 = readPHBCount(filename = filenames[file1], timeformat = timeformat,
+                         desiredtz = desiredtz, configtz = configtz,
+                         timeformatName = timeformatName)
+    deviceSN = data1$deviceSN
+  }
+  # Sleep wake scores file
+  if (length(file2) > 0) {
+    data2 = readPHBCount(filename = filenames[file2], timeformat = timeformat,
+                         desiredtz = desiredtz, configtz = configtz,
+                         timeformatName = timeformatName)
+  }
+  if (length(file1) > 0 && length(file2) > 0) {
+    data2$data = data2$data[, which(colnames(data2$data) != "sleepEventMarker")]
+    data = merge(data1$data, data2$data, by = "timestamp")
+  } else {
     if (length(file1) > 0) {
-      data1 = readPHBCount(filename = filesForThisPerson[file1], timeformat = timeformat,
-                   desiredtz = desiredtz, configtz = configtz,
-                   timeformatName = timeformatName)
-      deviceSN = data1$deviceSN
-    }
-    # Sleep wake scores
-    if (length(file2) > 0) {
-      data2 = readPHBCount(filename = filesForThisPerson[file2], timeformat = timeformat,
-                           desiredtz = desiredtz, configtz = configtz,
-                           timeformatName = timeformatName)
-    }
-    if (length(file1) > 0 && length(file2) > 0) {
-      data2$data = data2$data[, which(colnames(data2$data) != "sleepEventMarker")]
-      data = merge(data1$data, data2$data, by = "timestamp")
+      data = data1$data
     } else {
-      if (length(file1) > 0) {
-        data = data1$data
-      } else {
-        data = data2$data
-      }
+      data = data2$data
     }
-    colnames(data)[grep(pattern = "timestamp", x = colnames(data))] = "timestamp"
-    newName = gsub(pattern = "Sleep_Wake", replacement = "def", x =  basename(filesForThisPerson[file2]), ignore.case = TRUE)
-    newName = paste0(unlist(strsplit(newName, "[.]")) , collapse = paste0("_", deviceSN, "."))
-    newName = gsub(pattern = "xlsx", replacement = "csv", x = newName)
-    outputfile = paste0(outputPath, "/", newName)
-    write.csv(x = data, file = outputfile, row.names = FALSE)
   }
+  invisible(list(data = data, deviceSN = deviceSN))
 }
diff --git a/R/readPHBCount.R b/R/readPHBCount.R
@@ -1,12 +1,13 @@
 readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
                         desiredtz = "", configtz = NULL,
                         timeformatName = "timeformat") {
+
   if (length(configtz) == 0) configtz = desiredtz
   deviceSN = NULL
   if (length(grep(pattern = "datalist", x = filename, ignore.case = TRUE)) > 0) {
     data = as.data.frame(readxl::read_excel(path = filename, 
-                                             col_types = "text", skip = 8),
-                          row.names = FALSE)
+                                            col_types = "text", skip = 8),
+                         row.names = FALSE)
     header = as.data.frame(readxl::read_excel(path = filename, 
                                               col_types = "text", n_max = 8,
                                               .name_repair = "unique_quiet"),
@@ -15,11 +16,17 @@ readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
     if (length(SNlocation) > 0) {
       deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
       deviceSN = deviceSN[length(deviceSN)]
-   }
+    }
     colnames(data)[grep(pattern = "counts", x = colnames(data), ignore.case = TRUE)] = "counts"
     colnames(data)[grep(pattern = "offWrist", x = colnames(data), ignore.case = TRUE)] = "nonwear"
+    for (varname in c("counts", "steps", "nonwear")) {
+      if (varname %in% colnames(data) == FALSE) {
+        stop(paste0("Expected column ", varname, " not found in file ", filename), call. = TRUE)
+      }
+    }
     data$counts = as.numeric(data$counts)
     data$nonwear = as.numeric(data$counts)
+    data$steps = as.numeric(data$steps)
   } else {
     data = as.data.frame(readxl::read_excel(path = filename, col_types = "text", skip = 8), row.names = FALSE)
     colnames(data)[grep(pattern = "sleepWake", x = colnames(data), ignore.case = TRUE)] = "sleep"
@@ -37,7 +44,7 @@ readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
   # Establish starttime in the correct timezone
   if (configtz != desiredtz) {
     data$timestamp = as.POSIXct(x = as.numeric(data$timestamp), tz = desiredtz,
-                                 origin = "1970-01-01")
+                                origin = "1970-01-01")
   }
   invisible(list(data = data, deviceSN = deviceSN))
 }