-
Notifications
You must be signed in to change notification settings - Fork 0
/
1_merge-mulitvis.R
43 lines (31 loc) · 1.33 KB
/
1_merge-mulitvis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
setwd("~/UdallR/data")
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 1){
dir <- args[1]
} else {
stop("Supply (1) input directory")
}
if (!dir.exists(dir))
stop("No such directory")
# Read input files
files <- list.files(dir, pattern = "panuc-0110-.*.csv", full.names = TRUE)
if (length(files) != 2)
stop(paste0("There must be two and only two CSV files in data/", dir))
updrs <- read.csv(files[1])
main <- read.csv(files[2])
# These are the columns shared by both data frames, minus the ones were actually
# going to merge on
in.common <- intersect(colnames(updrs), colnames(main))[-(1:2)]
# Drop those columns since the data already exists and merging on it just
# creates more problems
main.merge <- main[, !(colnames(main) %in% in.common)]
# Actually merge the data. We want to keep everyone from the main file, even
# if they don't have UPDRS data.
merged <- merge(updrs, main.merge, by = c("summary_id", "visit_number"),
all.y = TRUE)
# Output file name, extract date from original file, not today
output.date <- gsub(".*/panuc-0110-", "", gsub(".csv", "", files[2]))
output.file <- paste0("~/UdallR/data/panuc_multivis_", output.date, ".csv")
write.csv(merged, file = output.file, row.names = FALSE)
message("Saved to: ", output.file)
message("Now run: ./2_csv2rda.sh ", substring(output.date, 3))