-
Notifications
You must be signed in to change notification settings - Fork 0
/
File1_UK_LLC_CohortDataDerivationndividualStudyDerivation.do
2675 lines (1887 loc) · 86.1 KB
/
File1_UK_LLC_CohortDataDerivationndividualStudyDerivation.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
*** This is stata code to derive harmonised variables which are then combined in R before being analysed in Stata.
*** Note this Stata file has been complied by Richard Shaw (richard.shaw@glasgow.ac.uk) Other contributors to work include
* Jingmin Zhu (ELSA) & Rebecca Rhead (NCDS, BCS70 Next steps) and undoubtedly been informed by the work of many others.
clear all
set maxvar 30000
*Note reasembling files in their original form.
global source "S:\LLC_0010\data\stata_w_labs"
global destination "S:\LLC_0010\data\flow_chart"
*******************************************************************************8
*** ELSA *** Covid wave 1
*
use "$source\ELSA_elsa_covid_w1_eul_v0001_20211101.dta"
foreach var of varlist _all {
rename `var' `var'_w1
}
gen inCVWave1 = 1
rename llc_0010_stud_id_w1 llc_0010_stud_id
save "$destination\tempCV_w1.dta" , replace
* Covid wave 2
use "$source\ELSA_elsa_covid_w2_eul_v0001_20211101.dta", clear
foreach var of varlist _all {
rename `var' `var'_w2
}
generate inCVWave2 = 1
rename llc_0010_stud_id_w2 llc_0010_stud_id
save "$destination\tempCV_w2.dta", replace
*** Assemblng ELSA wave 9 data which has been broken into many components.
*Import the first section
use "$source\elsa_wave_9_elsa_data_eul_ca_v0001_20211101.dta" , clear
* Adding all the additional sections
foreach letters in cf dhdi er ex fq heps ho1 ho2 ia1 ia2 ia3 spsc wp {
merge 1:1 llc_0010_stud_id using "$source\elsa_wave_9_elsa_data_eul_`letters'_v0001_20211101.dta"
drop _merge
}
*Adding the other wave 9 variables needed
merge 1:1 llc_0010_stud_id using "$source\ELSA_wave_9_financial_derived_variables_v0001_20211101.dta", keepusing(eqtotinc_bu_s nettotnhw_bu_s tnhwq5_bu_s)
drop _merge
merge 1:1 llc_0010_stud_id using "$source\ELSA_wave_9_ifs_derived_variables_v0001_20211101.dta", keepusing(smoker* edqual difjobm)
drop _merge
gen inWave9 = 1
save "$destination\temp_w9.dta", replace
********************************************************************************
*Assemblng files
use "$destination\tempCV_w1", clear
merge 1:1 llc_0010_stud_id using "$destination\tempCV_w2"
drop _merge
merge 1:1 llc_0010_stud_id using "$destination\temp_w9"
drop if _merge==2
drop _merge
egen ELSA_inwave = concat(inWave9 inCVWave1 inCVWave2)
tab ELSA_inwave
*******************************************************************************8
gen study_selection = 1 if corepartner_w1==1 | corepartner_w2==1
replace study_selection = . if ELSA_inwave == "..1" | ELSA_inwave == ".1." | ELSA_inwave == ".11"
gen age_selection = 1
replace age_selection = . if age_arch_w2 >= 66
replace age_selection = . if age_arch_w1 >= 66 & age_arch_w2==.
gen employment_selection = 1 if cvpred_w1 == 2 | cvpred_w1== 3
******************************************************************************
***** outcome variable
*** employment status
* Following discussions with Daniel Kopasker (Economist)
* The self-employed but not currently working have been classified as economically active
* but unemployed.
gen employ = cvpstd_w2
recode employ (1=2)(2=3)(3=1)(4 =3)(5/8=2)(-9/-8=.)
label define employ 1"1:furloygh" 2"2:unemployed" 3"3:employed/self-employed",replace
label values employ employ
*** Economic activity
label define econ_act 0 "Active" 1 "In active"
gen econ_act = .
recode econ_act . = 0 if inlist(cvpstd_w2, 2, 3, 4, 5)
recode econ_act . = 1 if inlist(cvpstd_w2, 1, 7 , 8)
recode econ_act . = 0 if cvpstd_w2 == 6 & cvsearch_w2 == 1
recode econ_act . = 1 if cvpstd_w2 == 6 & cvsearch_w2 == 2
* financial difficulty
gen subfin=cvpostfn_w2
label define subfin 1"1:much worse" 2"2:little worse" 3"3:same" 4"4:little better" 5"5:much better",replace
label val subfin subfin
gen subfin_bi=(subfin==3 | subfin==4 | subfin==5)
*** changing in working time
* number of hours worked
recode cvpsth_w2 cvpreh_w1 (-9/-1=.)
gen workhour_w2 = cvpsth_w2
* pre_pandemic working hours
gen workhour_pre=cvpreh_w1
* work hour change
gen workhour_low = workhour_pre*0.9
gen workhour_high = workhour_pre*1.1
gen workhour_change=1 if workhour_w2 > workhour_high
replace workhour_change=2 if workhour_w2 < workhour_low
replace workhour_change=3 if workhour_low <= workhour_w2 <=workhour_high
replace workhour_change=4 if employ==1
replace workhour_change=5 if employ==2 | cvpsth_w2==5
label define change 1"1:increase" 2"2:decrease" 3"3:same" 4"4:non-working furlough" 5"5:non-working non-furlough",replace
label values workhour_change change
*worktime_change
gen worktime_change = 1 if (workhour_w2 > workhour_high ) & workhour_w2 !=.
recode worktime_change . = 2 if (workhour_w2 < workhour_low) & workhour_low !=.
recode worktime_change . = 3 if (workhour_w2 >= workhour_low) & (workhour_w2 <= workhour_high) & workhour_w2 ! =. & workhour_w2 !=.
replace worktime_change =4 if employ==1
replace worktime_change =5 if employ==2 | cvpsth_w2==5
label values worktime_change change
*** exposure
* self-reported covid
** definition 1: positive covid test/ hospitalisation/ one of three core symptoms
** cw1
egen srcovid_w1= rowtotal (cvsymp01_w1 cvsymp02_w1 cvsymp05_w1)
tab srcovid_w1, missing
gen srcovid1_w1=(srcovid_w1>=1) if srcovid_w1 !=.
tab srcovid1_w1
tab cvtestb_w1
tab cvhosp_w1, nolab
recode cvhosp_w1 (-9/-8=.)
gen covcase_w1 =(cvtestb_w1 == 1 | cvhosp_w1 == 1 | srcovid1_w1 == 1) if cvtestb_w1 !=. | cvhosp_w1 !=. | srcovid1_w1 !=.
tab covcase_w1 , missing
*Note all those who had postive tests had been hospitalized.
*Note using the coreset of symptoms as the question for ELSA slightly different and
* may prompt people to report fatigue irrespective of whether they thought it was due to covid.
gen covid_confirmed_w1 = .
recode covid_confirmed_w1 . = 2 if cvtestb_w1 == 1
recode covid_confirmed_w1 . = 0 if cvtestb_w1 == 2
recode covid_confirmed_w1 . = 1 if srcovid_w1 > 0
recode covid_confirmed_w1 . = 0 if srcovid_w1 == 0
label define covid_confirmed_w1 0 "No" 1 "Suspected" 2 "Confirmed"
label values covid_confirmed_w1 covid_confirmed_w1
** cw2
gen covcase_w2=0
replace covcase_w2=1 if (cvtestb_w2 == 1 | cvhosp_w2 == 1)
replace covcase_w2=1 if cvtestwhy_final001_w2==1 & (cvtestb_w2 == 3 | cvtestb_w2 == 4 )
tab covcase_w2
*Test because they had covid symptoms
tab cvtestwhy_final001_w2
*Note no hospital stays in analytic sample
gen covid_confirmed_w2 = .
*Postive tests
recode covid_confirmed_w2 . = 2 if cvtestb_w2 ==1
*Negative tests
recode covid_confirmed_w2 . = 0 if cvtestb_w2 ==2
*Those who had a test due to symptoms
recode covid_confirmed_w2 . = 1 if cvtestwhy_final001_w2 ==1
* remaineder not suspected
recode covid_confirmed_w2 . = 0
* People whose test results are not being reported.
replace covid_confirmed_w2 = . if cvtestb_w2 == -9
label define covid_confirmed_w2 0 "No" 1 "Suspected" 2 "Confirmed"
label values covid_confirmed_w2 covid_confirmed_w2
*coding the remaining cases
gen covid_sr = .
recode covid_sr . = 2 if covid_confirmed_w1 ==2 | covid_confirmed_w2 == 2
recode covid_sr . = 1 if covid_confirmed_w1 ==1 | covid_confirmed_w2 == 1
recode covid_sr . = 0
label define covid_sr 0 "No" 1 "Suspected" 2 "Confirmed"
label values covid_sr covid_sr
*What was the result of the covid test
tab cvtestb_w2
* -9 prefer not to answer
*-1 not applicale
* 1 positive
* 2 negative
* 3 inconclusive
* 4 waiting
* timing of covid infection
gen covtime=0 if covcase_w1==0 & covcase_w2==0
replace covtime=1 if covcase_w1==1 & covcase_w2==0
replace covtime=2 if covcase_w1==0 & covcase_w2==1
replace covtime=3 if covcase_w1==1 & covcase_w2==1
*** MAIN COVARIATES
* Sex: Sex_w2
* Age
gen age_gr_w2=age_arch_w2
recode age_gr_w2 (52/54=1) (55/66=2)
label define age 1"1:45-54" 2"2:55-66",replace
label values age_gr_w2 age
* Ethnicity
label define ethnicity 1 "White" 2 "Asian" 3 "Black" 4 "Mixed" 5 "Other"
recode fqethnm (-9/-1 = . ) (1 = 1 "White") (2 = 4 "Mixed") (3/4 = 3 "Black") (5/6 = 2 "Asian") (95 = 5 "Other") , gen(ethnicity)
label variable ethnicity "Ethnicity Broad"
gen ethnicity1 = .
recode ethnicity (1 = 0 "White") (2/5 = 1 "Non White") , gen(ethnicity_bin)
label variable ethnicity_bin "Ethnicity Binary"
* Education
label define education 0 "NVQ 4 or 5" 1 "NVQ 3" 2 "NVQ 2 & 1" 3 "None"
recode w9edqual (-9/-1=.) (1/2 = 0 ) (3 = 1) (4/5 = 2) (6/7 = 3), gen(education)
label variable education "Education NVQ eqvs"
label values education education
recode education (0 = 0 "NVQ 4 or 5") (1/3 = 1 "NVQ3 or less") , gen(education_bin)
*Soc 2000
/*Commenting out Soc2000 as data is not available.
* Soc2000 - for this variable, we need to gather info from previous waves (issues with w7!)
* Note1 -- I created in ELSAw1 a soc2000 var, and for w7 I derived it from nssec
* Note2 -- we also gather number of room for 'overcrowding'
* In ELSA w6, for the loop to work, you first need to rename HoRoom as horoom
cd "/Users/jm/OneDrive - University College London/ELSA_data/loop"
foreach h in 1 2 3 4 5 6 7 8 {
merge 1:1 idauniq using "wave_`h'_elsa_data", keepusing(w`h'soc2000 horoom)
rename horoom horoom_w`h'
drop if _merge==2
drop _merge
}
recode w*soc2000* (-9/-1=.)
generate occupation_1d=floor(w9soc2000r/10)
foreach h in 8 6 5 4 3 2 1 7 {
replace occupation_1d=floor(w`h'soc2000/10) if occupation_1d==.
}
gen occupation_2d=w9soc2000r_w9
foreach h in 8 6 5 4 3 2 1 7 {
replace occupation_2d=w`h'soc2000 if occupation_2d==.
}
* industry - SIC2003-2 digits -for this variable, we need to gather info from previous waves
foreach h in 4 5 6 7 8 {
merge 1:1 idauniq using "wave_`h'_elsa_data", keepusing(w`h'sic2003)
drop if _merge==2
drop _merge
}
recode w9sic2003r_w9 w8sic2003 w7sic2003 w6sic2003 w5sic2003 w4sic2003 (-3/-1=.)(-8=.)
rename w9sic2003r_w9 w9sic2003
gen sic2003_2d=.
foreach h in 9 8 7 6 5 4 {
replace sic2003_2d=w`h'sic2003 if sic2003_2d==.
}
merge m:m sic2003_2d using /Users/jasmine/Documents/UCL/NCS_7HS_P4_employment/sic2003_2007.dta
drop if _merge==2
drop _merge
*/
* household composition
*Note that cvnump_w`k' indicate number in houshold
* Target variable will have the following groups Alone, partner, partner & children, lone parent, other person
* Note to be consistent with CLS and USoc this has been amended to include grandchilren as offspring, not just children
egen offspring_w1=anymatch(demographics_*_cvrelp_w1), values(2 3 4)
egen offspring_w2=anymatch(demographics_*_cvrelp_w2), values(2 3 4)
egen partinhh_w1=anymatch(demographics_*_cvrelp_w1), values(1)
egen partinhh_w2=anymatch(demographics_*_cvrelp_w2), values(1)
recode partinhh_w1 -1=0
forvalues k=1/2 {
generate cvhhcomp_w`k'=.
replace cvhhcomp_w`k'=1 if cvnump_w`k'==1
replace cvhhcomp_w`k'=2 if cvnump_w`k'==2 & partinhh_w`k'==1
replace cvhhcomp_w`k'=3 if cvnump_w`k'>=2 & partinhh_w`k'==1 & offspring_w`k'==1
replace cvhhcomp_w`k'=4 if cvnump_w`k'>=2 & partinhh_w`k'==0 & offspring_w`k'==1
replace cvhhcomp_w`k'=5 if cvnump_w`k'==-1 | (cvnump_w`k'>=2 & partinhh_w`k'==0 & offspring_w`k'==0) | (cvnump_w`k'>=3 & partinhh_w`k'==1 & offspring_w`k'==0)
}
forvalues k=1/2 {
recode cvhhcomp_w`k' (2=1)(3=2)(4=3)(1=4)(5=4)
label define cvhhcomp 1"1:only partner" 2"2:partner+kids" 3"3:single parent" 4"4:alone or other", replace
label values cvhhcomp_w* cvhhcomp
}
*New varibale hh_comp with categoies as used for the CLS studies.
label define hh_comp 0 "alone" 1 "Partner" 2 "Partner & children" 3 "lone parent" 4 "other person"
gen hh_comp =.
replace hh_comp = 0 if cvnump_w1 == 1
replace hh_comp = 1 if partinhh_w1 == 1
recode hh_comp 1 = 2 if offspring_w1 == 1
replace hh_comp = 3 if offspring_w1 == 1 & partinhh_w1 == 0
recode hh_comp . = 4 if cvnump_w1 >1 & cvnump_w1 !=.
label values hh_comp hh_comp
* Self-rated health
tab hehelf
clonevar sr_health = hehelf
recode sr_health -1 = .
*lim_ill
gen lim_ill = .
replace lim_ill = 0 if heill == 2 | helim == 2
replace lim_ill = 1 if helim == 1
label define lim_ill 0 "No" 1 "Yes"
label values lim_ill lim_ill
* keyworker
recode cvkey_w* (-9/-1=.)
gen keyworker = cvkey_w1
replace keyworker = cvkey_w2 if keyworker == .
recode keyworker 2 = 0 . = 0
label define keyworker 0 "No" 1 "Yes"
label values keyworker keyworker
* pre-pandemic mental health - ces-d in wave9
recode pscedd pscedf (2=1) (1=0) (-9/-1=.)
recode psceda pscedb pscedc pscede pscedg pscedh (2=0) (-9/-1=.)
egen depression_w9=rowtotal(psced*), missing
generate depressed_w9=(depression_w9>=4)
replace depressed_w9=. if depression_w9==.
* lli
*generate lli=(helim_w9==1)
*label define lli 0"no long-standing limiting illness" 1"lli", replace
*label values lli lli
* shielding at first pandemic wave
recode cvvuln_w1 (-9/-1=.)
gen shielding=(cvvuln_w1==1) if cvvuln_w1!=.
*** Five health conditions: pre-pandemic wave combined with first pandemic wave
**Note other pre_covid waves are added the suffix _w9 will need to be addded to some vars or other ways of selecting variables found
* cancer
gen cancer=hedbsca
recode cancer (-8=.)(-1=0)(2=0)
replace cancer=1 if hedibca ==1
replace cancer=1 if cvhecond09_w1==1
* heart disease and high blood pressure
recode hedas95 hedasar hedashf hedashm heyra hediahf hediahm hedim85 hedim86 (-9/-1=.)
gen heartd=0
foreach k in hedas95 hedasar hedashf hedashm heyra hediahf hediahm hedim85 hedim86 {
replace heartd=1 if `k'==1
}
replace heartd=1 if cvhecond02_w1==1
replace heartd=1 if cvhecond03_w1==1
//high blood pressure part//
replace heartd=1 if hedasbp==1
replace heartd=1 if hediabp==1
replace heartd=1 if cvhecond01_w1==1
* obesity at first pandemic wave
recode wtimperial_wtstones_w1 wtimperial_wtpounds_w1 wtkilos_w1 dvheight_w2 (-9/-1=.)
gen cvweight_w1=6.35*wtimperial_wtstones_w1
replace cvweight_w1=cvweight_w1+0.45*wtimperial_wtpounds_w1 if wtimperial_wtpounds_w1!=.
gen cvestwt_w1=wtkilos_w1
replace cvestwt_w1=cvweight_w1 if cvestwt_w1==. & cvweight_w1!=.
gen cvbmi_w1 = round(cvestwt_w1/[(dvheight_w2/100)^2], .1)
gen obesity=(cvbmi_w1>=30) if cvbmi_w1!=.
* respiratory disorders
recode hedblu hediblu hedbsas hedibas (-9/-1=.)
gen respiratory=0
foreach k in hedblu hediblu hedbsas hedibas {
replace respiratory=1 if `k'==1
}
replace respiratory=1 if cvhecond06_w1==1
replace respiratory=1 if cvhecond07_w1==1
********************************************************************************
*****Variables for analysis
*** Study mannagement
* Person id
*rename llc_0010_stud_id LLC_0010_stud_id
* Cohort id
gen cohort_id = "ELSA"
*** Outcomes
*Employment status: employ -> employment_status
tab employ
label define employment_status 0 "Employed" 1 "Furloughed" 2 "Not in employment"
recode employ 3 = 0 , gen(employment_status)
label values employment_status employment_status
*Financial difficulties: subfin subfin_bi -> (finance_change finance_change_bi)
clonevar finance_change = subfin
recode subfin_bi 0 = 1 1 = 0, gen(finance_change_bin)
label define finance_change_bi 0 "Same - better" 1 "Much worse - little worse"
*Change in working hours: worktime_change -> worktime_change
tab worktime_change
label define worktime_change 0 "Stayed the same" 1 "Increased" 2 "Decreased" 3 "Furloughed" 4 "Not in employed"
recode worktime_change 3 = 0 4 = 3 5 =4
label values worktime_change worktime_change
***Exposures
*Any covid symptoms: covtime -> covid_ever
recode covtime 0 = 0 1/3 = 1 , gen(covid_ever)
label define covid_ever 0 "No" 1 "Yes"
label values covid_ever covid_ever
*Timing of Covid: covtime -> covid_timing
clonevar covid_timing = covtime
label define covid_timing 0 "No Covid" 1 "First Wave" 2 "Second Wave" 3 "Both Waves"
label values covid_timing covid_timing
*LongCovid: long_covid_out -> long_covid
*Going to leave now as there are problems
gen long_covid = .
***Control variables
*Country variable does not appear to ba available so using
gen country = rgn_arch_w2
recode country -1 = .
replace country = rgn_arch_w1 if rgn_arch_w2 == .
recode country -1 =. 1/9 = 1 10 = 2 11 = 3
label define country 1 "England" 2 "Scotland" 3 "Wales" 4 "Northern Ireland"
label values country country
*Age: ch_age ch_agecat -? age age_group
clonevar age = age_arch_w2
recode age_arch_w2 25/39 = 0 40/54 = 1 55/66 = 2, gen(age_group)
label define age_group 0 "25 to 39" 1 "40 to 54" 2 "55 +"
label values age_group age_group
*sex: Sex_w2 >- sex
gen sex = sex_w2 - 1
label drop sex
label define sex 0 "Male" 1 "Female"
label values sex sex
*Household composition: ????? -> hh_comp
*Will need recoding when looking at other cohorts
* Note using wave 1 as it won't be consequence of the outcome.
*clonevar hh_comp = cvhhcomp_w1
* NS-SEC "Seven category" to be consistent across the studies.
* Using seven categories as the ELSA 8 categories are different from the other cohorts.
recode w9nssec8 (-9/-1 99 = 9 "Other Unclassifiable") (1 = 1 "Higher management or professional") (2 = 2 "Lower management and professional" ) (3 = 3 "Intermediate") (4 = 4 "Small Employer") (5 = 5 "Lower supervisory") (6 = 6 "Semi-routine") (7 = 7 "Routine") (8 =8 "longterm unemployed" ) , gen(nssec7)
label variable nssec7 "NS-SEC 7 Categories"
* NS-SEC "Five category" to be consistent
recode w9nssec5 (-9/-1 99 = 9 "Other Unclassifiable") (1 = 1 "Management & professional ") (2 = 2 "Intermediate") (3 = 3 "Small Employer") (4 = 4 "Lower supervisory & Technical") (5 = 5 "Semi-routine & Routine") , gen(nssec5)
label variable nssec5 "NS-SEC 5 Categories"
*SOC 2010/2007 pre-pandemic: soc_2d soc_designation -> soc_2d soc_designation
gen soc_2d = .
gen soc_designation = .
*SIC 2007 : lob_sic07_sec -> sic_1d
gen sic_1d = .
*Key worker status
tab keyworker
*Mental health: depressed_w9 -> mental_health
clonevar mental_health = depressed_w9
*Shielding: ?????? -> shielding
tab shielding
*clonevar shielding = shield
*Cancer: ??????? -> cancer
tab cancer
*Heart disease
gen heart = heartd
*Depression
gen depression = .
* Obesity
gen obese = obesity
* respiratory
tab respiratory
tab1 employment_status econ_act covid_sr age sex education education_bin ethnicity ethnicity_bin ethnicity1 country hh_comp nssec7 sr_health keyworker mental_health shielding
keep llc_0010_stud_id cohort_id cintdaty_w2 cintdatm_w2 cintdatd_w2 study_selection age_selection employment_selection ///
employment_status econ_act covid_sr age sex education education_bin ethnicity ethnicity_bin ethnicity1 country hh_comp nssec7 sr_health keyworker mental_health shielding
save "$destination\ELSA_flowchart_sample_26Apr23.dta" , replace
************************************************************************************************************************************************
*** USoc Code
clear all
*Recoding all covid waves with single file in same loop.
*NB this is all waves apart from e and f, and there are two versions of f.
*NB there are current two versions of wave cf I am using the single file version that drops a couple of varibales at the end.
* The variables are not needed in this analysis and the two file version appear to drop variale lables.
foreach x in a b c d f g h {
use "$source\ukhls_c`x'_indresp_w_v0003_20220531.dta" , clear
rename (racel_dv psu strata) (c`x'_racel_dv c`x'_psu c`x'_strata)
gen in_c`x' = 1
save "$destination\c`x'_indresp_w_racel.dta", replace
}
*This is wave ce
use "$source\ukhls_ce_indresp_w_v0003_1_20220531.dta" , clear
merge 1:1 llc_0010_stud_id using "$source\ukhls_ce_indresp_w_v0003_2_20220531.dta", gen(ce_part2)
drop ce_part2
rename (racel_dv psu strata) (ce_racel_dv ce_psu ce_strata)
gen in_ce = 1
save "$destination\ce_indresp_w_racel.dta", replace
*This is for wave cf if the 2 file version is needed.
*use "$source\ukhls_cf_indresp_w_v0003_1_20220531.dta" , clear
*merge 1:1 llc_0010_stud_id using "$source\ukhls_cf_indresp_w_v0003_2_20220531.dta", gen(cf_part2)
* drop ce_part2
*rename (racel_dv psu strata) (cf_racel_dv ce_psu cf_strata)
*save "$destination\cf_indresp_w_racel.dta", replace
*This is wave g
use "$source\ukhls_g_indresp_v0003_1_20220531.dta" , clear
merge 1:1 llc_0010_stud_id using "$source\ukhls_g_indresp_v0003_2_20220531.dta", gen(g_part2)
merge 1:1 llc_0010_stud_id using "$source\ukhls_g_indresp_v0003_3_20220531.dta", gen(g_part3)
merge 1:1 llc_0010_stud_id using "$source\ukhls_g_indresp_v0003_4_20220531.dta", gen(g_part4)
drop g_part2 g_part3 g_part4
save "$destination\g_indresp.dta", replace
*This is wave h
use "$source\ukhls_h_indresp_v0003_1_20220531.dta" , clear
merge 1:1 llc_0010_stud_id using "$source\ukhls_h_indresp_v0003_2_20220531.dta", gen(h_part2)
merge 1:1 llc_0010_stud_id using "$source\ukhls_h_indresp_v0003_3_20220531.dta", gen(h_part3)
drop h_part2 h_part3
save "$destination\h_indresp.dta", replace
*This is wave i
use "$source\ukhls_i_indresp_v0003_1_20220531.dta" , clear
merge 1:1 llc_0010_stud_id using "$source\ukhls_i_indresp_v0003_2_20220531.dta", gen(i_part2)
merge 1:1 llc_0010_stud_id using "$source\ukhls_i_indresp_v0003_3_20220531.dta", gen(i_part3)
merge 1:1 llc_0010_stud_id using "$source\ukhls_i_indresp_v0003_4_20220531.dta", gen(i_part4)
drop i_part2 i_part3 i_part4
save "$destination\i_indresp.dta", replace
*This is wave j
use "$source\ukhls_j_indresp_v0003_1_20220531.dta" , clear
merge 1:1 llc_0010_stud_id using "$source\ukhls_j_indresp_v0003_2_20220531.dta", gen(j_part2)
merge 1:1 llc_0010_stud_id using "$source\ukhls_j_indresp_v0003_3_20220531.dta", gen(j_part3)
drop j_part2 j_part3
keep if j_intdaty_dv <= 2019 | (j_intdatm_dv <= 2 & j_intdaty_dv == 2020)
save "$destination\j_indresp.dta", replace
*Adding wave k
use "$source\ukhls_k_indresp_v0003_1_20220531.dta" , clear
merge 1:1 llc_0010_stud_id using "$source\ukhls_k_indresp_v0003_2_20220531.dta", gen(k_part2)
merge 1:1 llc_0010_stud_id using "$source\ukhls_k_indresp_v0003_3_20220531.dta", gen(k_part3)
merge 1:1 llc_0010_stud_id using "$source\ukhls_k_indresp_v0003_4_20220531.dta", gen(k_part4)
drop k_part2 k_part3 k_part4
drop if k_intdaty_dv == -9
keep if k_intdaty_dv <= 2019 | (k_intdatm_dv <= 2 & k_intdaty_dv == 2020)
save "$destination\k_indresp.dta", replace
*****Importing data
*Data files to be used at this point are the web survey Pandemic waves c`x'_indresp_w ca - ch (April 2020 to March 2021) and prepandemic waves `x'_indresp g - j (2015 to 2020)
***Setting File locations
**
clear all
set maxvar 30000
set more off
**File Path
*** Merging data files
clear
**Pre-pandemic waves
*Wave 10
use "$destination\j_indresp.dta"
gen in_w10 = 1
*add wave 11
merge 1:1 llc_0010_stud_id using "$destination\k_indresp.dta", gen(w11mrg)
*Add wave 9
merge 1:1 llc_0010_stud_id using "$destination\i_indresp.dta", gen(w9mrg)
*Add wave 8
merge 1:1 llc_0010_stud_id using "$destination\h_indresp.dta", gen(w8mrg)
*Add wave 7
merge 1:1 llc_0010_stud_id using "$destination\g_indresp.dta", gen(w7mrg)
**Covid data
*add covid data
foreach x in a b c d e f g h {
merge 1:1 llc_0010_stud_id using "$destination\c`x'_indresp_w_racel.dta", gen(c`x'_mrg)
tab c`x'_mrg
}
********************************************************************************
label define no_yes 0 "No" 1 "Yes"
***** Deriving outcomes
*** Employment status
*** Employment status (latest C19 Sweep)
*This is derived from ch_sempderived and ch_newfurlough
*ch_sempderived is a derived varible. Universe is ALL, but around 1.5% are inapplicable. Not clear why
*ch_newfurloughis is asked of those who are employed or both employed and self-employed from ch_sempderived
* Note if self-employed hours = 0 classifed as as not working. Not doing this for both emplyed and self-employed due to being employed.
gen ch_employment_stat = .
replace ch_employment_stat = 2 if ch_newfurlough == 1 // Recodes those reporting as furloughed
replace ch_employment_stat = 1 if ch_newfurlough == 2 // recodes those not furloughed as employed
recode ch_employment_stat . = 3 if ch_sempderived == 2 & ch_hours == 0 // Selfemployed and 0 hours
recode ch_employment_stat . = 1 if ch_sempderived == 2 & ch_hours != 0 & ch_hours !=. // Selfemployed are coded as in employment.
recode ch_employment_stat . = 3 if ch_sempderived == 4
label define ch_employment_stat 1 "In employment" 2 "Furloughed" 3 "Not Employed"
label values ch_employment_stat ch_employment_stat
* Note that using 0 - working and 2 working as to be consistent with other cohorts.
* Not coding furlough for this paper as that not the relevant data.
gen cg_employment_stat = .
recode cg_employment_stat . = 0 if cg_sempderived == 1 | cg_sempderived == 3
recode cg_employment_stat . = 0 if cg_sempderived == 2 & cg_hours > 0 & cg_hours !=.
recode cg_employment_stat . = 2 if cg_sempderived == 2 & cg_hours == 0
recode cg_employment_stat . = 2 if cg_sempderived == 4
label variable cg_employment_stat "Working or not January"
label define cg_employment_stat 0 "Working " 2 "Not working"
label values cg_employment_stat cg_employment_stat
*** economic activity status form cg
gen econ_act = .
recode econ_act . = 0 if cg_sempderived ==1 | cg_sempderive == 2 | cg_sempderived == 3
recode econ_act . = 0 if cg_sempderive ==4 & cg_julk4wk == 1
recode econ_act . = 1 if cg_sempderive ==4 & cg_julk4wk == 2
label define econ_act 0 "Active" 1 "Inactive"
label values econ_act econ_act
*******************************************************************************
**** Exposures
gen usoc_main = ch_surveystart
gen usoc_alt = cg_surveystart
label define c19_confirmed 0 "No" 1 "Suspected" 2 "Confirmed"
*** Covid confirmed
foreach wave in a b c d e f {
gen c`wave'_c19_confirmed = .
recode c`wave'_c19_confirmed . = 2 if c`wave'_testresult == 1
recode c`wave'_c19_confirmed . = 0 if c`wave'_testresult == 2
recode c`wave'_c19_confirmed . = 1 if c`wave'_hadsymp == 1
recode c`wave'_c19_confirmed . = 0 if c`wave'_hadsymp == 2
label values c`wave'_c19_confirmed c19_confirmed
}
gen cg_c19_confirmed = .
recode cg_c19_confirmed . = 2 if cg_testresult_test1 == 1 | cg_testresult_test2 == 1 | cg_testresult_test3 == 1
recode cg_c19_confirmed . = 0 if cg_testresult_test1 == 2 | cg_testresult_test2 == 2 | cg_testresult_test3 ==2
recode cg_c19_confirmed . = 1 if cg_hadsymp == 1
recode cg_c19_confirmed . = 0 if cg_hadsymp == 2
label values cg_c19_confirmed c19_confirmed
gen ch_c19_confirmed = .
recode ch_c19_confirmed . = 2 if ch_testpos == 1
recode ch_c19_confirmed . = 0 if ch_testpos == 2
recode ch_c19_confirmed . = 1 if ch_hadsymp == 1
recode ch_c19_confirmed . = 0 if ch_hadsymp == 2
label variable ch_c19_confirmed "ch C19 SR confirmed"
label values ch_c19_confirmed c19_confirmed
gen covid_sr = .
foreach wave in a b c d e f g {
recode covid_sr . = 2 if c`wave'_c19_confirmed == 2
}
foreach wave in a b c d e f g {
recode covid_sr . = 1 if c`wave'_c19_confirmed == 1
}
recode covid_sr . = 0 if cg_c19_confirmed == 0
replace covid_sr = . if cg_c19_confirmed ==.
*********************************************************************************
*****Control Variables
*** UK Country of residence
gen pan_region = ca_gor_dv
recode pan_region -9 = .
foreach wave in b c d e f {
replace pan_region = c`wave'_gor_dv if pan_region == .
recode pan_region -9 = .
}
recode pan_region 1/9 = 1 10 = 3 11 = 2 12 = 4 , gen(country)
label define country 1 "England" 2 "Scotland" 3 "Wales" 4 "Northern Ireland"
label values country country
*Country for ch_wave
recode ch_gor_dv 1/9 = 1 10 = 3 11 = 2 12 = 4, gen(country_main)
label values country_main pan_country
*Country for cg_wave
recode cg_gor_dv 1/9 = 1 10 = 3 11 = 2 12 = 4, gen(country_alt)
label values country_alt pan_country
***Age - Pandemic outcome
recode cg_age 25/39 = 0 40/54 = 1 55/66 = 2, gen(cg_agecat)
replace cg_agecat = . if ch_age < 25 | cg_age >66
label variable cg_agecat "Age March 2021 in categories"
label define cg_agecat 0 "25 to 39" 1 "40 to 54" 2 "55 +"
label values cg_agecat cg_agecat
gen age_main = ch_age
gen age_alt = cg_age
*** age first covid wave with corrections
*Note that there are some inconsitencies with ca_age which outside the TRE
* I have previously addressed with ca_pidpcorrected which is not available.
* Note solution is I am coding as missing in ca_age and then replacing with next available
* or year of birth.
label define age_entry 1 "16-24" 2 "25-34" 3 "35-44" 4 "45-54" 5 "55-64" 6 "65-74" 7 "75+"
gen lob_birthy = k_birthy
recode lob_birthy (-9/-1 = .)
replace lob_birthy = j_birthy if lob_birthy == .
recode lob_birthy (-9/-1 = .)
replace lob_birthy = i_birthy if lob_birthy == .
recode lob_birthy (-9/-1 = .)
replace lob_birthy = h_birthy if lob_birthy == .
recode lob_birthy (-9/-1 = .)
replace lob_birthy = g_birthy if lob_birthy == .
recode lob_birthy (-9/-1 = .)
gen dob_age = 2020 - lob_birthy
*Note that there are some inconsitencies with ca_age which
* I have previously addressed with ca_pidpcorrected which is not available.
* Note that I am recoding ca_ages
gen ca_age_temp = ca_age
foreach wave in b c d e f g h {
replace ca_age_temp = . if c`wave'_age < ca_age & ca_age != .
replace ca_age_temp =. if ca_age + 1 < c`wave'_age & c`wave'_age !=.
}
gen age_at_start = ca_age_temp
foreach wave in b c d e f g h {
replace age_at_start = c`wave'_age if age_at_start == .
}
replace age_at_start = dob_age if age_at_start ==.
*** Sex - Pandemic
gen temp_sex = cg_sex
foreach wave in h f e d c b a {
replace temp_sex = c`wave'_sex if temp_sex == 3
}
gen sex = temp_sex - 1
label define sex 0 "Male" 1 "Female"
label values sex sex
drop temp_sex
*** Household composition - pandemic
* Note that wave ca is missing relationshp data so will be classified on the basis
* of first valid data person has from remaining waves. Designed to be consistent with CLS
label define household_comp 0 "Alone" 1 "Partner" 2 "Partner & children" 3 "Lone parent" 4 "Other person"
foreach wave in cb cc cd ce cf cg ch {
egen `wave'_offspring = anymatch(`wave'_relation*), values(3 6)
egen `wave'_partner = anymatch(`wave'_relation*), values(1 2)
gen `wave'_household_comp = .
recode `wave'_household_comp . = 2 if `wave'_partner == 1 & `wave'_offspring == 1 & `wave'_couple != .
recode `wave'_household_comp . = 1 if `wave'_partner == 1 & `wave'_offspring == 0 & `wave'_couple != .
recode `wave'_household_comp . = 3 if `wave'_partner == 0 & `wave'_offspring == 1 & `wave'_couple != .
recode `wave'_household_comp . = 4 if `wave'_partner == 0 & `wave'_hhnum >=2 & `wave'_hhnum <= 20 & `wave'_couple != .
recode `wave'_household_comp . = 0 if `wave'_partner == 0 & `wave'_couple != .
label values `wave'_household_comp household_comp
}
gen hh_comp = cb_household_comp
foreach wave in cc cd ce cf cg ch {
replace hh_comp = `wave'_household_comp if hh_comp == .
}
label values hh_comp household_comp
tab cb_household_comp, miss
tab cb_couple,miss
*** Self-rated health
label define sr_health 1 "Excellent" 2 "Very Good" 3 "Good" 4 "Fair" 5 "Poor"
label define sr_health_bin 0 "Excellent-Good" 1 "Fair-Poor"
foreach wave in g h i j k cf cg ch {
recode `wave'_scsf1 (-9/0 = . ) , gen(`wave'_sr_health)
label values `wave'_sr_health sr_health
recode `wave'_sr_health (1/3 = 0) (4/5 = 1), gen(`wave'_sr_health_bin)
label values `wave'_sr_health_bin sr_health_bin
}
gen sr_health = .
foreach wave in k j i h g {
replace sr_health = `wave'_sr_health if sr_health == .
}
label values sr_health sr_health
*** Ethnicity - pandemic
recode cg_racel_dv (-9/0=.) (1/4=1) (5/8=4) (9/13=2) (14/16=3) (17 97=5), gen(ethnicity)
label variable ethnicity "Ethnicity broad "
label define ethnicity 1 "White" 2 "Asian" 3 "Black" 4 "Mixed" 5 "Other"
label values ethnicity ethnicity
recode ethnicity (1 = 0 "White") (2/5 = 1 "Non White") , gen(ethnicity_bin)
label variable ethnicity_bin "Ethnicity Binary"
label define ethnicity1 1 "White" 2 "Mixed" 3 "Indian" 4 "Pakistani" 5 "Bangladeshi" 6 "Black Caribean" 7 "Black African" 8 "Other"
recode ch_racel_dv(-9/-1 = . ) (1/4 = 1) (5/8 = 2 ) (9 = 3) (10 = 4) (11 = 5) (12/13 = 8) (14 = 6) (15 = 7) (16/97 = 8) , gen(ethnicity1)
label values ethnicity1 ethnicity1
**** NSSEC 7
* Note inorder to be consistent with wave9 for ELSa this will being
* Note that I am focusing on ELSA firt.
*These are a derived variable showing last jub NSSEC
foreach x in k j i h g {
gen `x'_nssec8 = `x'_jbnssec8_dv
replace `x'_nssec8 = `x'_jlnssec8_dv if `x'_jbnssec8_dv == -8 & `x'_jlnssec8_dv > 0 & `x'_jlnssec8_dv !=.
}
gen nssec8 = .
foreach x in k j i h g {
replace nssec8 = `x'_nssec8 if nssec8 == .
recode nssec8 -9/-1 = .
}
recode nssec8 . = 9
label define nssec8 1 "Large employers & higher mangement" 2 "Higher professional" 3 "Lower management & professional" 4 "Intermediate" 5 "Small employers & own acccount" 6 "Lower supervisory & technical" 7 "Semi-routine" 8 "routine" 9 "Unclassifiable"
label values nssec8 nssec8
*Note some of the unclassifiable might be coded as longterm unemployed at this point to distingush those in educaiton or training.
recode nssec8 (1/2 = 1 "Higher management or professional") (3 = 2 "Lower management and professional" ) (4 = 3 "Intermediate") (5 = 4 "Small Employer") (6 = 5 "Lower supervisory") (7 = 6 "Semi-routine") (8 = 7 "Routine") (999 = 8 "longterm unemployed" ) ( 9 = 9 "Other Unclassifiable") , gen(nssec7)
label variable nssec7 "NS-SEC 7 Categories"
recode nssec7 (1/2 = 1 "Management & professional ") (3 = 2 "Intermediate") (4 = 3 "Small Employer") (5 = 4 "Lower supervisory & Technical") (6/7 = 5 "Semi-routine & Routine") (8 = 6 "longterm unemployed" ) ( 9 = 9 "Other Unclassifiable") , gen(nssec5)
label variable nssec5 "NS-SEC 5 Categories"
*** SOC - pre-pandemic
*NB not sure this is necessarily the most efficient, and have corrected some of Mikes code.
*Namely d2 needs both `y' and `z' and d3 needs both `y' `z' `k'
*Can probably just divide the three digit version by ten for the 2 digit version then
*SOC codes
foreach job in soc00 soc10 {
gen lob_`job'_d3=.
}
foreach x in k j i h g {
foreach job in jbsoc00 jlsoc00 jbsoc10 jlsoc10{
recode `x'_`job'_cc (-9/-1=.), gen(`x'_`job')
}
foreach code in soc00 soc10{
replace lob_`code'_d3=`x'_jb`code' if lob_`code'==.
replace lob_`code'_d3=`x'_jl`code' if lob_`code'==.
}
}
gen lob_soc00_d2 = floor(lob_soc00_d3/10)
gen lob_soc00_d1 = floor(lob_soc00_d2/10)
gen lob_soc10_d2 = floor(lob_soc10/10)
gen lob_soc10_d1 = floor(lob_soc10_d2/10)
*** soc_designation
label define soc_designation 0 "SOC2000" 1 "SOC2010" 2 "Missing"
gen soc_designation = 1 if lob_soc10_d1 ! =.