-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path14_End_of_life_analysis.R
217 lines (160 loc) · 11.9 KB
/
14_End_of_life_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Project: English results for the International Collaborative on Costs, Outcomes & Needs in Care (ICCONIC)
# Purpose: Additional work on end of life utilisation and spending
# Author: Dr Will Parry
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Setup - load packages and read in data ----
pkgs <- c('tidyverse', 'readxl', 'here')
sapply(pkgs, require, character.only = TRUE)
#_____________________________________________________________
#Create function to tally patients in end of life cohorts ----
#Prepare data for hip cohort
hip_cats <- c('hip_op_total','hip_op_partial','hip_op_osteopin')
hipdeaths <- readRDS(here('../Data', 'hipdeaths.rds')) %>% #read in all hip deaths data
filter(inhip, hip_cat %in% hip_cats) #filter out inhip and hip ops
deathN <- function(deathflag = NULL){ #function expects death flags from mortality outcomes analysis
temp <- if(is.null(deathflag)){hipdeaths} else {hipdeaths %>% filter(!!sym(deathflag))} #filter for death flag
x1 <- temp %>% group_by(gender, hip_agegroup) %>% tally() #count patients by gender and age group
x2 <- temp %>% group_by(hip_agegroup) %>% tally() %>% #count patients just by age group
bind_rows(x1) %>% #bind to previous results
pivot_wider(id_cols = hip_agegroup, values_from = n, names_from = gender) #pivot wide by gender
x3 <- x2 %>% summarise(across(c('NA', '1', '2'), sum, na.rm = TRUE)) #add totals by gender
return(bind_rows(x2,x3) %>% select(hip_agegroup, All = 'NA', Female = '2', Male = '1')) #rename columns and return results
}
write.csv(deathN(deathflag = NULL), here('../Outputs', 'death_hip_all.csv'))
write.csv(deathN(deathflag = 'death365days'), here('../Outputs', 'death_hip_365.csv'))
write.csv(deathN(deathflag = 'hospitaldeath365'), here('../Outputs', 'death_hip_hosp365.csv'), na = '0')
#________________________________________________________
#Creating statistics for various end of life periods ----
source(here('R', 'Output_functions.R')) #source summary stats functions
#Amend the death cohort to within 365 days only and add eol time period identifiers
hipdeaths365 <- hipdeaths %>% filter(death365days) %>% #filter cohort to deaths within 365 days of index admission
rename(hipcips = cips) %>% #rename index cips identifier
select(!(cipstart:cipdur)) %>% #remove index cips info (cips info from all costed cips is needed later)
mutate(eol_start30 = dod - 30, #add end of life period start dates...
eol_start90 = dod - 90,
eol_start180 = dod - 180)
eol_start_vars <- c(eol30 = 'eol_start30', eol90 = 'eol_start90', eol180 = 'eol_start180') #store the eol start date var names
#_________________
#APC analysis ----
#Read in cost and utilisation data
APCcosts <- readRDS(here('../Data', 'APCcosts.rds')) %>% ungroup() %>% #read in APC data and remove grouping
select(!(hippre:dempost)) #remove pre/post variables
#Create function to prepare APC data
APCeol_prep <- function(eol_start){
temp <- hipdeaths365 %>% #get hip cohort deaths within 365 days with end of life period indicators
inner_join(APCcosts, by = c('patid', 'gender', 'dod', 'hip_cat', 'hip_agegroup')) %>% #join on APC costs using inner join
filter(cipend >= !!sym(eol_start) & cipstart <= dod) %>% #remove records not overlapping end of life period
mutate(t_start = ifelse(cipstart < !!sym(eol_start), !!sym(eol_start), cipstart), #adjust start dates so they are not before eol_start date
t_end = ifelse(dod < cipend, dod, cipend), #adjust end dates so they are not after death
t_days = t_end - t_start + 1, #add new days in hospital for cips within eol period
percent_of_cips = t_days / cipdays, #work out percentage of original cips days within eol period
eol_cost = totcost * percent_of_cips) %>% #multiply cost of cips by percentage within eol period
ungroup() %>% arrange(patid) #ungroup and sort data
}
#NB: check that joining fields used above are constant within-patient
#Function to create summary statistics for various eol periods
APC_eol <- function(sum_var){ #sum_var is the variable being summarised
map_dfr(eol_start_vars, function(.x) { #for each of the eol period start variables...
APCeol_prep(.x) %>% #prepare the APC data then send that to the summary statistics output function
stats_func(cohort_in = hipdeaths365, cost_data = ., cost_var = sum_var, pre_post = NULL,
age_group = 'hip_agegroup', hip_cats = hip_cats, extra_cat = NULL)}, .id = 'eol')}
#NB: when using function for days in hospital, the days are the 'cost_var'
APC_mean_eol_costs <- APC_eol('eol_cost') %>% select(eol:Male_NA) #APC costs at end of life
APC_mean_eol_days <- APC_eol('t_days') %>% select(eol:Male_NA) #APC days in hospital at end of life
#NB: in the eol_APC_days output the 'meancost' column is actually the mean days in this case
write.csv(APC_mean_eol_costs, here('../Outputs', 'eol_APC_costs.csv'))
write.csv(APC_mean_eol_days, here('../Outputs', 'eol_APC_days.csv'))
#_____________________________________________________________________________________________________
#Create function to prepare data for any types of activity that occur same-day (i.e. A&E, OP, PC) ----
eol_func <- function(data_in, event_date, eol_start){ #this version is simpler than the APCeol_prep function
hipdeaths365 %>% #get hip cohort deaths within 365 days with end of life period indicators
inner_join(data_in, by = c('patid', 'gender', 'dod', 'hip_cat', 'hip_agegroup')) %>% #join on costs using inner join
filter(!!sym(event_date) >= !!sym(eol_start) & !!sym(event_date) <= dod) #remove records not in end of life period
}
#_________________
#A&E analysis ----
#Read in cost and utilisation data
AEcosts_no_hosp <- readRDS(here('../Data', 'AEcosts.rds')) %>% #read AE costs data
select(patid, aekey, gender, dod, hip_cat, hip_agegroup, arrivaldate, aeattenddisp, totcost) %>% #select necessary cols
filter(aeattenddisp != 1) #remove admitted patients
#Call stats_func on data returned by eolfunc for each end of life time period
AE_eol <- map_dfr(eol_start_vars, function(.x) { #for each of the eol period start variables...
eol_func(AEcosts_no_hosp, 'arrivaldate', .x) %>% #prepare data using eol_func then send to the summary statistics output function
stats_func(cohort_in = hipdeaths365, cost_data = ., cost_var = 'totcost', pre_post = NULL,
age_group = 'hip_agegroup', hip_cats = hip_cats, extra_cat = NULL)}, .id = 'eol') %>%
select(eol:Male_NA) #reduce to required cols
write.csv(AE_eol, here('../Outputs', 'eol_AE_no_hosp.csv')) #write to file
#________________
#OP analysis ----
#Read in specialty categorisations
OPbuckets <- read_excel(here('Reference_data', 'Outpatient specialty code buckets.xlsx')) #categories based on 'mainspef'
#Read in cost and utilisation data
OPcosts <- readRDS(here('../Data', 'OPcosts.rds')) %>% #read OP costs data
left_join(OPbuckets) %>% #join on specialty 'category'
select(patid, attendkey, gender, dod, hip_cat, hip_agegroup, apptdate, totcost, category) #select necessary cols
#Call stats_func on data returned by eol_func for each end of life time period
OP_eol <- map_dfr(eol_start_vars, function(.x) { #for each of the eol period start variables...
eol_func(OPcosts, 'apptdate', .x) %>% #prepare data using eol_func then send to the summary statistics output function
stats_func(cohort_in = hipdeaths365, cost_data = ., cost_var = 'totcost', pre_post = NULL,
age_group = 'hip_agegroup', hip_cats = hip_cats, extra_cat = 'category')}, .id = 'eol') %>%
select(eol:Male_NA) #reduce to required cols
write.csv(OP_eol, here('../Outputs', 'eol_OP_by_category.csv')) #write to file
#________________
#PC analysis ----
GPcosts <- readRDS(here('../Data', 'PCvisits.rds')) %>% #read in costed primary care visits
select(patid, gender, dod, hip_cat, hip_agegroup, eventdate, role_cat, cost) %>% #reduce to required columns
filter(role_cat == 'GP') #restrict to GP consultations and costs
#Call statsfunc on data returned by eol_func for each end of life time period
GP_eol <- map_dfr(eol_start_vars, function(.x) { #for each of the eol period start variables...
eol_func(GPcosts, 'eventdate', .x) %>% #prepare data using eol_func then send to the summary statistics output function
stats_func(cohort_in = hipdeaths365, cost_data = ., cost_var = 'cost', pre_post = NULL,
age_group = 'hip_agegroup', hip_cats = hip_cats, extra_cat = NULL)}, .id = 'eol') %>%
select(eol:Male_NA) #reduce to required cols
write.csv(GP_eol, here('../Outputs', 'eol_GP_primary_care.csv')) #write to file
#___________________
#Drugs analysis ----
#Drugs are slightly problematic, as in the 'Costing_primary_care' script they were grouped by 'prepost' and
#drug substance. However, for this analysis, we need the date of the prescription to see which eol period
#they are for using the 'eol_func'
PCdrugs_recs <- readRDS(here('../Data', 'PCdrugs_recs.rds')) %>% #read in costed drugs
select(patid, gender, dod, hip_cat, hip_agegroup, eventdate, drugsubstance, cost) #reduce to required columns
PCdrugscosts <- map(eol_start_vars, function(.x){ #for each of the eol period start variables...
eol_func(PCdrugs_recs, 'eventdate', .x) %>% #prepare data using eol_func
select(patid, gender, dod, hip_cat, hip_agegroup, drugsubstance, cost) %>% #select required columns
group_by(across(-cost)) %>% #group by everything except the cost
summarise(countrecs = n(), drugcost = sum(cost, na.rm = TRUE), .groups = 'keep')}) #summarise (by patient and drug substance, essentially)
drugs_eol <- map_dfr(PCdrugscosts, function(.x) { #send summarised data to summary statistics output function
stats_func(cohort_in = hipdeaths365, cost_data = .x, cost_var = 'drugcost', pre_post = NULL,
age_group = 'hip_agegroup', hip_cats = hip_cats, extra_cat = NULL)}, .id = 'eol') %>%
select(eol:Male_NA) #reduce to required cols
write.csv(drugs_eol, here('../Outputs', 'eol_Drugs_primary_care.csv')) #write to file
#____________________
#Survival tables ----
tab365 <- hipdeaths365 %>% group_by(hip_agegroup, admission_to_death) %>% tally() %>% #summarise hip deaths within year follow-up
ungroup() %>% mutate(result = NA) #ungroup and add a column for storing the result of the algorithm below
age_groups <- c('65-69', '70-74', '75-79', '80-84', '85-89', '90-94', '95+') #store age groups used in hip analysis
#Statistical Disclosure Control (SDC) survival table function
SDC_counter <- function(.g) { #function expects an age group
x <- tab365 %>% filter(hip_agegroup == .g) #filter the summary table of counts (tab365) by age group (.g)
y <- 0 #initialise counter y to zero
L <- nrow(x) #store last row number of table x
for (i in 1:L){ #for each row...
y <- y + x$n[i] #add the value in the tally column (n) to y
if (y >= 10) { #if the value is >=10
x$result[i] <- y #store in the result column
z <- i #remember which row this occurred on
y <- 0 #re-initialise y to zero
}
if (i == L & i != z){ #if you've reached the last row and it is not the last row >= 10
x$result[i] <- y + x$result[z] #add y to the last row >= 10 and store in result column
x$result[z] <- NA #remove the value that was last >= 10 from the results
}
}
return(x)
}
base_N <- hipdeaths %>% group_by(hip_agegroup) %>% tally(n = 'base') #create a base column by age group
survival <- map_dfr(age_groups, SDC_counter) %>% filter(!is.na(result)) %>% #send age groups to SDC_counter and filter out results
group_by(hip_agegroup) %>% mutate(cumsum = cumsum(result)) %>% #create a cumulative sum column by age group
inner_join(base_N) %>% mutate(survival = (base - cumsum) / base) #join on the bases and create a cumulative percentage survival
write.csv(survival, here('../Outputs', 'eol_survival_table.csv')) #write to file