-
Notifications
You must be signed in to change notification settings - Fork 0
/
SAS-Code-For-Cardiovascular-Disease-Analysis.sas
295 lines (242 loc) · 7.75 KB
/
SAS-Code-For-Cardiovascular-Disease-Analysis.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
/*William-Elijah Clark Project B Code*/
/*I hereby certify that the following SAS code is my own original work*/
/*Problem 1*/
/*Here's a snippet for my csv file*/
/** FOR CSV Files uploaded from Windows **/
FILENAME CSV "/home/u49665201/sasuser.v94/STA3064/heart.csv" TERMSTR=CRLF;
/** Import the CSV file. **/
PROC IMPORT DATAFILE=CSV
OUT=Heart
DBMS=CSV
REPLACE;
RUN;
/****************************************/
/*Some Scatterplots by Physiological Sex*/
/****************************************/
/*Oooh, this is interesting! I'm not sure how to interpret those outliers that are closer to zero cholesterol just lined up at the
bottom of the scatterplot, but seeing male cholesterol decrease with age and female cholesterol increase with age was NOT something I
expected!*/
proc sgplot data=heart;
reg x=age y=cholesterol
/group=sex
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Cholesterol as a Function of Age and Sex';
run;
/*This one is not a good correlation*/
proc sgplot data=heart;
reg x=age y=RestingBP
/group=sex
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Resting Blood Pressure as a Function of Age and Sex';
run;
/*This looks like a good correlation of decreased maximum heart rates as a person ages*/
proc sgplot data=heart;
reg x=age y=MaxHR
/group=sex
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Maximum Heart Rate as a Function of Age and Sex';
run;
/************************************/
/*Some Scatterplots by Heart Disease*/
/************************************/
/*This scatterplot isn't very useful*/
proc sgplot data=heart;
reg x=age y=cholesterol
/group=HeartDisease
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Cholesterol as a Function of Age and Sex';
run;
/*This scatterplot shows another correlation that's mediocre at best.*/
proc sgplot data=heart;
reg x=age y=RestingBP
/group=HeartDisease
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Resting Blood Pressure as a Function of Age and Sex';
run;
/*This model is more promising*/
proc sgplot data=heart;
reg x=age y=MaxHR
/group=HeartDisease
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Maximum Heart Rate as a Function of Age and Sex';
run;
/*I think the above are nifty, but let's do a bit more dabbling.*/
/*Some scatterplots by ChestPain*/
/*This one isn't very useful*/
proc sgplot data=heart;
reg x=age y=cholesterol
/group=ChestPainType
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Cholesterol as a Function of Age and Chest Pain Type';
run;
/*This model looks like a dead end*/
proc sgplot data=heart;
reg x=sex y=cholesterol
/group=ChestPainType
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Cholesterol as a Function of Sex and Chest Pain Type';
run;
/*There is a correlation, but there's not much of a difference between groups*/
proc sgplot data=heart;
reg x=age y=RestingBP
/group=ExerciseAngina
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Resting Blood Pressure as a Function of Exercise Induced Angina and Sex';
run;
/*This seems to be correlated by a decent amount*/
proc sgplot data=heart;
reg x=age y=MaxHR
/group=ExerciseAngina
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Maximum Heart Rate as a Function of Age and Exercise Induced Angina';
run;
/*This one doesn't seem to show a very strong correlation*/
proc sgplot data=heart;
reg x=age y=cholesterol
/group=ExerciseAngina
CLM alpha=0.05;
xaxis grid;
yaxis grid;
title 'Regression Fit Scatterplot of Cholesterol as a Function of Age and Exercised Induced Angina';
run;
/*I would do more if I understood the importance of Oldpeak and STSlope, and I don't know what to expect with fasting blood sugar.
This seems to call for medical knowledge that I simply don't have.*/
/*ANOVA*/
/*This one is OK*/
proc glm data=Heart;
class HeartDisease;
model cholesterol = HeartDisease/ss3;
means HeartDisease/hovtest;
run;
/*Meh*/
proc glm data=Heart;
class HeartDisease;
model RestingBP = HeartDisease/ss3;
means HeartDisease/hovtest;
run;
/*This one has a decent amount of statistical significance*/
proc glm data=Heart;
class HeartDisease;
model MaxHR = HeartDisease/ss3;
means HeartDisease/hovtest;
run;
/*This one seems to show the strongest statistical significance!*/
proc glm data=Heart;
class HeartDisease;
model MaxHR = HeartDisease/ss3;
means HeartDisease/tukey;
run;
/*There's also a statistical correlation here*/
proc glm data=Heart;
class Sex;
model cholesterol = Sex/ss3;
means HeartDisease/hovtest;
run;
/*This one is not a very strong correlation*/
proc glm data=Heart;
class Sex;
model cholesterol = Sex/ss3;
means HeartDisease/tukey;
run;
/*This one has very little correlation*/
proc glm data=Heart;
class Sex;
model RestingBP = Sex/ss3;
means HeartDisease/hovtest;
run;
/*There's some use for this model, but the F-test statistic could be better*/
proc glm data=Heart;
class Sex;
model MaxHR = Sex/ss3;
means HeartDisease/hovtest;
run;
/*This model doesn't work very well.*/
proc glm data=Heart;
class Sex;
model MaxHR = Sex/ss3;
means HeartDisease/tukey;
run;
/*The model residuals look terrible! Sure, there's a correlation, but it's not worth it with those residuals!*/
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model cholesterol = HeartDisease Sex/solution ss3;
means HeartDisease/tukey;
run;
/*Well, the residuals are OK, but the correlation just isn't there*/
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model RestingBP = HeartDisease Sex/solution ss3;
run;
/*Ahh, this looks better! I think I'll go with this model.*/
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model MaxHR = HeartDisease Sex/solution ss3;
means HeartDisease/tukey;
run;
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model MaxHR = HeartDisease Sex/solution ss3;
means Sex/tukey;
run;
/*This model doesn't work very well.*/
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model cholesterol = HeartDisease|Sex/solution ss3;
run;
/*This model also doesn't work very well.*/
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model RestingBP = HeartDisease|Sex/solution ss3;
run;
/*The below two models are OK.*/
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model MaxHR = HeartDisease|Sex/solution ss3;
means HeartDisease/tukey;
run;
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model MaxHR = HeartDisease|Sex/solution ss3;
means HeartDisease/tukey;
run;
/*Unsuprisingly, predicting age based on heart disease and sex doesn't work very well*/
proc glm data=Heart plots=diagnostics;
class HeartDisease(ref='0') Sex(ref='M');
model Age = HeartDisease Sex/solution ss3;
run;
/*ANCOVA*/
/*Age seems to be the odd variable that doesn't show much of a difference here, but the rest show statistically significant differences.*/
proc glm data=heart plots=diagnostics;
class sex (ref='M') HeartDisease(ref='0');
model Cholesterol = Age Sex HeartDisease/solution ss3;
run;
/*Once again, RestingBP is tricky to pin down. However, we can say that it's specifically physiological sex that doesn't show any difference.*/
proc glm data=heart plots=diagnostics;
class sex (ref='M') HeartDisease(ref='0');
model RestingBP = Age Sex HeartDisease/solution ss3;
run;
/*There seems to be a decent amount of statistical difference here.*/
proc glm data=heart plots=diagnostics;
class sex (ref='M') HeartDisease(ref='0');
model MaxHR = Age Sex HeartDisease/solution ss3;
run;