Skip to content

Commit

Permalink
simplify PHB functionality to just read and merge #68
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentvanhees committed Sep 30, 2024
1 parent 2ceb92c commit 3eab460
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 333 deletions.
73 changes: 28 additions & 45 deletions R/mergePHBfilePairs.R
Original file line number Diff line number Diff line change
@@ -1,57 +1,40 @@
mergePHBfilePairs = function(inputPath = ".", outputPath = ".",
mergePHBfilePairs = function(filenames = NULL,
timeformat = "%m/%d/%Y %H:%M:%S",
desiredtz = "", configtz = NULL,
timeformatName = "timeformat") {
# merges Philips Health Band xlsx files per participant
# as there can be multiple files per participant.
fnames = dir(inputPath, recursive = FALSE, full.names = TRUE, pattern = "[.]xlsx")
fileOverview = data.frame(filename = fnames)
extractID = function(x) {
x = basename(x)
x = gsub(pattern = "sleep_wake", replacement = "sleepwake", x = tolower(x))
ID = unlist(strsplit(x, "_"))[2]
return(ID)
if (length(filenames) != 2) {
stop("Provide two filenames")
}
fileOverview$ID = unlist(lapply(fileOverview$filename, FUN = extractID))

uids = unique(fileOverview$ID)
for (uid in uids) {
filesForThisPerson = fileOverview$filename[which(fileOverview$ID == uid)]
# Identify both file
file1 = grep(pattern = "datalist", x = filesForThisPerson, ignore.case = TRUE)
file2 = grep(pattern = "sleep_wake", x = filesForThisPerson, ignore.case = TRUE)
if (length(file1) == 0 && length(file2) == 0) {
next
}
# Data
deviceSN = NULL
# Identify both file
file1 = grep(pattern = "datalist", x = filenames, ignore.case = TRUE)
file2 = grep(pattern = "sleep_wake", x = filenames, ignore.case = TRUE)

# Datalist file (with all variables except sleep/wake scores)
deviceSN = NULL
if (length(file1) > 0) {
data1 = readPHBCount(filename = filenames[file1], timeformat = timeformat,
desiredtz = desiredtz, configtz = configtz,
timeformatName = timeformatName)
deviceSN = data1$deviceSN
}
# Sleep wake scores file
if (length(file2) > 0) {
data2 = readPHBCount(filename = filenames[file2], timeformat = timeformat,
desiredtz = desiredtz, configtz = configtz,
timeformatName = timeformatName)
}
if (length(file1) > 0 && length(file2) > 0) {
data2$data = data2$data[, which(colnames(data2$data) != "sleepEventMarker")]
data = merge(data1$data, data2$data, by = "timestamp")
} else {
if (length(file1) > 0) {
data1 = readPHBCount(filename = filesForThisPerson[file1], timeformat = timeformat,
desiredtz = desiredtz, configtz = configtz,
timeformatName = timeformatName)
deviceSN = data1$deviceSN
}
# Sleep wake scores
if (length(file2) > 0) {
data2 = readPHBCount(filename = filesForThisPerson[file2], timeformat = timeformat,
desiredtz = desiredtz, configtz = configtz,
timeformatName = timeformatName)
}
if (length(file1) > 0 && length(file2) > 0) {
data2$data = data2$data[, which(colnames(data2$data) != "sleepEventMarker")]
data = merge(data1$data, data2$data, by = "timestamp")
data = data1$data
} else {
if (length(file1) > 0) {
data = data1$data
} else {
data = data2$data
}
data = data2$data
}
colnames(data)[grep(pattern = "timestamp", x = colnames(data))] = "timestamp"
newName = gsub(pattern = "Sleep_Wake", replacement = "def", x = basename(filesForThisPerson[file2]), ignore.case = TRUE)
newName = paste0(unlist(strsplit(newName, "[.]")) , collapse = paste0("_", deviceSN, "."))
newName = gsub(pattern = "xlsx", replacement = "csv", x = newName)
outputfile = paste0(outputPath, "/", newName)
write.csv(x = data, file = outputfile, row.names = FALSE)
}
invisible(list(data = data, deviceSN = deviceSN))
}
15 changes: 11 additions & 4 deletions R/readPHBCount.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
desiredtz = "", configtz = NULL,
timeformatName = "timeformat") {

if (length(configtz) == 0) configtz = desiredtz
deviceSN = NULL
if (length(grep(pattern = "datalist", x = filename, ignore.case = TRUE)) > 0) {
data = as.data.frame(readxl::read_excel(path = filename,
col_types = "text", skip = 8),
row.names = FALSE)
col_types = "text", skip = 8),
row.names = FALSE)
header = as.data.frame(readxl::read_excel(path = filename,
col_types = "text", n_max = 8,
.name_repair = "unique_quiet"),
Expand All @@ -15,11 +16,17 @@ readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
if (length(SNlocation) > 0) {
deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
deviceSN = deviceSN[length(deviceSN)]
}
}
colnames(data)[grep(pattern = "counts", x = colnames(data), ignore.case = TRUE)] = "counts"
colnames(data)[grep(pattern = "offWrist", x = colnames(data), ignore.case = TRUE)] = "nonwear"
for (varname in c("counts", "steps", "nonwear")) {
if (varname %in% colnames(data) == FALSE) {
stop(paste0("Expected column ", varname, " not found in file ", filename), call. = TRUE)
}
}
data$counts = as.numeric(data$counts)
data$nonwear = as.numeric(data$counts)
data$steps = as.numeric(data$steps)
} else {
data = as.data.frame(readxl::read_excel(path = filename, col_types = "text", skip = 8), row.names = FALSE)
colnames(data)[grep(pattern = "sleepWake", x = colnames(data), ignore.case = TRUE)] = "sleep"
Expand All @@ -37,7 +44,7 @@ readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
# Establish starttime in the correct timezone
if (configtz != desiredtz) {
data$timestamp = as.POSIXct(x = as.numeric(data$timestamp), tz = desiredtz,
origin = "1970-01-01")
origin = "1970-01-01")
}
invisible(list(data = data, deviceSN = deviceSN))
}
Loading

0 comments on commit 3eab460

Please sign in to comment.