-
Notifications
You must be signed in to change notification settings - Fork 0
/
cond_pmed_2020.R
335 lines (220 loc) · 10.8 KB
/
cond_pmed_2020.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# -----------------------------------------------------------------------------
#
# MEPS-HC: Prescribed medicine utilization and expenditures for
# the treatment of hyperlipidemia
#
# This example code shows how to link the MEPS-HC Medical Conditions file
# to the Prescribed Medicines file for data year 2020 in order to estimate
# the following:
#
# Event-level estimates:
# - Total rx fills for the treatment of hyperlipidemia
# - Total rx expenditures for the treatment of hyperlipidemia
#
# Person-level estimates
# - Total number of people treated for hyperlipidemia (any health care)
# - Number of people treated for hyperlipidemia with prescribed medicines
# - Mean number of rx fills and drugs per person for hyperlipidemia
# treatment (among those with any rx fills for hyperlipidemia)
# - Mean rx expenditures per person for the treatment of hyperlipidemia
# (among those with any rx fills for hyperlipidemia)
#
# Input files:
# - h220a.dta (2020 Prescribed Medicines file)
# - h222.dta (2020 Conditions file)
# - h220if1.dta (2020 CLNK: Condition-Event Link file)
# - h224.dta (2020 Full-Year Consolidated file)
#
# Resources:
# - CCSR codes:
# https://github.com/HHS-AHRQ/MEPS/blob/master/Quick_Reference_Guides/meps_ccsr_conditions.csv
#
# - MEPS-HC Public Use Files:
# https://meps.ahrq.gov/mepsweb/data_stats/download_data_files.jsp
#
# - MEPS-HC online data tools:
# https://datatools.ahrq.gov/meps-hc
#
# -----------------------------------------------------------------------------
# Install/load packages and set global options --------------------------------
# For each package that you don't already have installed, un-comment
# and run. Skip this step if all packages below are already installed.
# install.packages("survey") # for survey analysis
# install.packages("haven") # for loading Stata (.dta) files
# install.packages("tidyverse") # for data manipulation
# install.packages("devtools") # for loading "MEPS" package from GitHub
# If MEPS package is not installed, un-comment and run to install
# library(devtools)
# install_github("e-mitchell/meps_r_pkg/MEPS")
# Load libraries
library(MEPS)
library(survey)
library(tidyverse)
library(haven)
# Set survey option for lonely PSUs
options(survey.lonely.psu="adjust")
# Load datasets ---------------------------------------------------------------
# RX = Prescribed medicines (PMED) file (record = rx fill or refill)
# Conditions = Medical conditions file (record = medical condition)
# CLNK = Conditions-event link file (crosswalk between conditions and
# events, including PMED events)
# FYC = Full year consolidated file (record = MEPS sample person)
### Option 1 - load data files using read_MEPS from the MEPS package
# For PMED file, rename LINKIDX to EVNTIDX to merge with Conditions
pmed20 <- read_MEPS(year = 2020, type = "RX") %>% rename(EVNTIDX=LINKIDX)
cond20 <- read_MEPS(year = 2020, type = "Conditions")
clnk20 <- read_MEPS(year = 2020, type = "CLNK")
fyc20 <- read_MEPS(year = 2020, type = "FYC")
### Option 2 - load Stata data files using read_dta from the haven package
# Replace "C:/MEPS" below with the directory you saved the files to.
# For PMED file, rename LINKIDX to EVNTIDX to merge with Conditions
# pmed20 <- read_dta("C:/MEPS/h220a.dta") %>% rename(EVNTIDX=LINKIDX)
# cond20 <- read_dta("C:/MEPS/h222.dta")
# clnk20 <- read_dta("C:/MEPS/h220if1.dta")
# fyc20 <- read_dta("C:/MEPS/h224.dta")
# Select only needed variables ------------------------------------------------
pmed20x <- pmed20 %>% select(DUPERSID, DRUGIDX, RXRECIDX, EVNTIDX,
RXDRGNAM, RXXP20X)
cond20x <- cond20 %>% select(DUPERSID, CONDIDX, ICD10CDX, CCSR1X:CCSR3X)
fyc20x <- fyc20 %>% select(DUPERSID, SEX, CHOLDX, VARSTR, VARPSU,
PERWT20F)
# OPTIONAL: Look at table of ICD10s and CCSRs.
cond_counts <- cond20x %>%
count(ICD10CDX, CCSR1X, CCSR2X, CCSR3X)
View(cond_counts)
# Prepare data for estimation -------------------------------------------------
# Subset condition records to hyperlipidemia (any CCSR = "END010")
hl <- cond20x %>%
filter(CCSR1X == "END010" | CCSR2X == "END010" | CCSR3X == "END010")
# Example to show someone with 'duplicate' hyperlipidemia conditions with
# different CONDIDXs. This usually happens when the collapsed 3-digit
# ICD10s are the same but the fully-specified ICD10s are different
# (e.g., one person has different condition records for both E78.1 and
# E78.5, which both map to END010 collapse to E78 on the PUF).
dup_hl <- hl[duplicated(hl$DUPERSID), ]
hl %>% filter(DUPERSID=='2320134102')
# Get de-duplicated list of people (unique DUPERSIDs) for people with
# treated hyperlipidemia and create a person-level flag for any
# hyperlipidemia treatment (hl_flag)
hl_ppl <- hl %>%
select(DUPERSID) %>%
distinct %>%
mutate(hl_flag = 1)
# Merge this flag onto the FYC (person-level file) and set it to 0 for
# people without hyperlipidemia
fyc_hl <- fyc20x %>%
left_join(hl_ppl, by = "DUPERSID") %>%
replace_na(list(hl_flag = 0))
# QC: count of hl_flag=1 on FYC should be equal to the number of rows
# in hl_ppl and there should be no NAs
table(fyc_hl$hl_flag, useNA="always")
# QC: compare people *ever* diagnosed with hyperlipidemia (CHOLDX == 1) with
# those who were treated for hyperlipidemia in 2020 (hl_flag == 1)
# Note: there will always be noise in self-reported survey responses!
fyc_hl %>%
filter(CHOLDX >= 0) %>% # remove missing and inapplicable
count(CHOLDX, hl_flag)
# Merge hyperlipidemia conditions with PMED file, using CLNK as crosswalk
hl_merged <- hl %>%
inner_join(clnk20, by = c("DUPERSID", "CONDIDX")) %>%
inner_join(pmed20x, by = c("DUPERSID", "EVNTIDX"))
# QC: View top PMEDS for hyperlipidemia to see if they make sense
hl_merged %>%
count(ICD10CDX, RXDRGNAM) %>%
arrange(-n)
# Because some people can have multiple CONDIDX values for hyperlipidemia
# as shown in the example above, and each of these different CONDIDX IDs
# can link to the same rx fills, it is necessary to de-duplicate on
# the unique fill identifier RXRECIDX within a person who has
# hyperlipidemia.
# For example, the same drug/fill can link to both E78.1 and E78.5.
# An example illustrating the above issue. Note that there are 'duplicate'
# RXRECIDX (fill IDs) repeated across two different CONDIDX values for the
# same person because this person has 'duplicate' hyperlipidemia records
# and some fills are linked to both CONDIDX values for hyperlipidemia.
hl_merged %>%
filter(DUPERSID == "2320134102") %>%
select(DUPERSID, CONDIDX, RXRECIDX, RXDRGNAM, ICD10CDX, CCSR1X)
# De-duplicate 'duplicate' fills
hl_dedup <- hl_merged %>%
distinct(DUPERSID, RXRECIDX, .keep_all=T)
# Revisiting the example to show effect of de-duplicating
hl_dedup %>%
filter(DUPERSID == "2320134102") %>%
select(DUPERSID, CONDIDX, RXRECIDX)
# For each person, count the number of unique drugs, number of fills, and
# sum expenditures for treating hyperlipidemia.
# Make a flag for people with a PMED purchase (hl_pmed_flag)
drugs_by_pers <- hl_dedup %>%
group_by(DUPERSID) %>%
summarize(
number_hl_drugs = n_distinct(DRUGIDX),
number_hl_fills = n_distinct(RXRECIDX),
hl_drug_exp = sum(RXXP20X)) %>%
mutate(hl_pmed_flag = 1)
# Revisiting 'duplicate' fill example at the person level to show
# that we counted their fills and expenses only once
drugs_by_pers %>%
filter(DUPERSID == "2320134102")
# Merge onto FYC file to capture all Strata (VARSTR) and PSUs (VARPSU) for
# all MEPS sample persons for correct variance estimation
fyc_hl_merged <- fyc_hl %>%
left_join(drugs_by_pers, by="DUPERSID") %>%
replace_na(
list(number_hl_drugs = 0,
number_hl_fills = 0,
hl_pmed_flag = 0,
hl_drug_exp = 0))
# QC: check counts of hl_pmed_flag=1 and compare to the number of rows in
# drugs_by_pers. Confirm all NAs were overwritten to zeroes.
table(fyc_hl_merged$hl_pmed_flag, useNA="always")
# QC: There should be no records where number of drugs > number of fills
fyc_hl_merged %>% filter(number_hl_drugs > number_hl_fills)
# QC: There should be no records where hl_flag=0 and hl_pmed_flag=1
fyc_hl_merged %>% filter(hl_flag==0 & hl_pmed_flag==1)
# QC: There should be no records where hl_pmed_flag=0 and
# (hl_drug_exp > 0 or number_hl_drugs > 0 or number_hl_fills > 0)
fyc_hl_merged %>%
filter(hl_pmed_flag==0 &
(hl_drug_exp > 0 | number_hl_drugs > 0 | number_hl_fills > 0))
# Define survey design object ------------------------------------------
meps_dsgn <- svydesign(
id = ~VARPSU,
strata = ~VARSTR,
weights = ~PERWT20F,
data = fyc_hl_merged,
nest = TRUE)
# ESTIMATION ------------------------------------------------------------
### National Totals:
# !!Note: The total number of people treated for hyperlipidemia
# does not exactly match the online data tools estimate because the online
# data tools exclude informal home health visits from its estimate
# (we did not exclude them here).
svytotal(~hl_flag + # Total people treated for hyperlipidemia
hl_pmed_flag + # Total people treated for HL w/ rx drugs
number_hl_fills + # Total rx fills for hyperlipidemia
hl_drug_exp, # Total rx expenditures for hyperlipidemia
design=meps_dsgn)
# Total number of people treated for hyperlipidemia with prescribed
# medicines, BY SEX
svyby(~hl_pmed_flag, ~factor(SEX), design=meps_dsgn, svytotal)
### Per-person averages for people with at least one PMED fill for
### hyperlipidemia (hl_pmed_flag = 1)
# Subset survey design object to those with at least one PMED fill
# for hyperlipidemia
hl_pmed_dsgn <- subset(meps_dsgn, hl_pmed_flag == 1)
# Estimation of means among people with at least one PMED fill for
# hyperlipidemia
svymean(~number_hl_fills + # Avg # of fills for HL per person w/ HL fills
number_hl_drugs + # Avg # of drugs for HL per person w/ HL fills
hl_drug_exp, # Avg PMED exp for HL per person w/ HL fills
design = hl_pmed_dsgn)
# Average PMED expenditures for hyperlipidemia among those with any rx fills
# for hyperlipidemia, BY SEX
svyby(~hl_drug_exp, ~factor(SEX), design=hl_pmed_dsgn, svymean)
# t-test of male vs. female for above estimates
# Note: The warning message is because some individuals have zero
# person weight.
svyttest(hl_drug_exp ~ factor(SEX), design=hl_pmed_dsgn)
# Alternate t-test method using regression
summary(svyglm(hl_drug_exp ~ factor(SEX), design=hl_pmed_dsgn))