-
Notifications
You must be signed in to change notification settings - Fork 7
/
preprocess_RXQ_RX_new.py
72 lines (69 loc) · 3.25 KB
/
preprocess_RXQ_RX_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import csv
import glob
input_dir = "data/raw/"
output_dir = "data/"
year_codes = {
'J' : "2017-2018",
'I' : "2015-2016",
'H' : "2013-2014"
}
for input_filename in glob.glob(input_dir + 'RXQ_RX*'):
print(input_filename)
year_code = input_filename[-5:-4]
if year_code not in year_codes.keys():
continue
output_filename_p = output_dir + "DA-NHANES-" + year_codes[year_code] + "-RXQ_RX_" + year_code + "-P.csv"
output_filename_t = output_dir + "DA-NHANES-" + year_codes[year_code] + "-RXQ_RX_" + year_code + "-T.csv"
output_filename_p_100 = output_dir + "DA-NHANES-" + year_codes[year_code] + "-RXQ_RX_" + year_code + "-P-100.csv"
output_filename_t_100 = output_dir + "DA-NHANES-" + year_codes[year_code] + "-RXQ_RX_" + year_code + "-T-100.csv"
print(output_filename_t)
print(output_filename_p)
print(output_filename_t_100)
print(output_filename_p_100)
with open(input_filename, 'r') as input_file:
reader = csv.DictReader(input_file)
output_file_p = open(output_filename_p, "w")
output_file_t = open(output_filename_t, "w")
output_file_p_100 = open(output_filename_p_100, "w")
output_file_t_100 = open(output_filename_t_100, "w")
output_file_p.write("SEQN,RXDUSE,RXDDRUG,RXDDRGID,RXQSEEN,RXDDAYS,RXDRSC,RXDRSD,RXDCOUNT")
output_file_t.write("SEQN,RXDUSE,RXDDRUG,RXDDRGID,RXQSEEN,RXDDAYS,RXDRSC,RXDRSD,RXDCOUNT")
output_file_p_100.write("SEQN,RXDUSE,RXDDRUG,RXDDRGID,RXQSEEN,RXDDAYS,RXDRSC,RXDRSD,RXDCOUNT")
output_file_t_100.write("SEQN,RXDUSE,RXDDRUG,RXDDRGID,RXQSEEN,RXDDAYS,RXDRSC,RXDRSD,RXDCOUNT")
seqn = 0
for row in reader:
if seqn == 0:
seqn = int(row["SEQN"])
if row["RXDUSE"] != "1":
continue
for i in range(1, 4):
rxdrsc = "RXDRSC" + str(i)
rxdrsd = "RXDRSD" + str(i)
new_row = "\n"
new_row += row["SEQN"] + ","
new_row += row["RXDUSE"] + ","
new_row += row["RXDDRUG"] + ","
new_row += row["RXDDRGID"] + ","
new_row += row["RXQSEEN"] + ","
new_row += row["RXDDAYS"] + ","
if row[rxdrsc].endswith(".P"):
new_row += row[rxdrsc][:-2] + ","
new_row += "\"" + row[rxdrsd] + "\","
new_row += row["RXDCOUNT"]
output_file_p.write(new_row)
if int(row["SEQN"]) - seqn < 100:
output_file_p_100.write(new_row)
elif row[rxdrsc].endswith("P"):
new_row += row[rxdrsc][:-1] + ","
new_row += "\"" + row[rxdrsd] + "\","
new_row += row["RXDCOUNT"]
output_file_p.write(new_row)
if int(row["SEQN"]) - seqn < 100:
output_file_p_100.write(new_row)
elif row[rxdrsc]:
new_row += row[rxdrsc] + ","
new_row += "\"" + row[rxdrsd] + "\","
new_row += row["RXDCOUNT"]
output_file_t.write(new_row)
if int(row["SEQN"]) - seqn < 100:
output_file_t_100.write(new_row)