-
Notifications
You must be signed in to change notification settings - Fork 1
/
09_combine_crunch.R
156 lines (108 loc) · 4.48 KB
/
09_combine_crunch.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
library(tidyverse)
library(crunch)
library(dplyr)
library(haven)
library(googlesheets)
library(lubridate)
login()
if (writeToCrunch) {
login()
deleteDataset("CCES Cumulative Issues")
newDataset("https://www.dropbox.com/s/p8cx49h82coqfcs/cumulative_2006_2018_crunch.sav?dl=0",
"CCES Cumulative v4.0")
}
# append intent to vote party -----
ds_orig <- loadDataset("CCES Cumulative Common", project = "CCES")
forkDataset(ds_orig, "Fork of CCES Cumulative Common")
ds_fork <- loadDataset("Fork of CCES Cumulative Common")
ds_new <- loadDataset("CCES Cumulative v6.0")
ds_iss <- loadDataset("CCES Cumulative Issues")
# identifier
ds_fork$year_caseid <- paste0(as.character(as.vector(ds_fork$year)), "_", as.vector(ds_fork$case_id))
ds_new$year_caseid <- paste0(as.character(as.vector(ds_new$year)), "_", as.vector(ds_new$case_id))
ds_iss$year_caseid <- paste0(as.character(as.vector(ds_iss$year)), "_", as.vector(ds_iss$case_id))
# compare
vars_to_replace <- setdiff(names(ds_new), c("year_caseid", "year", "case_id"))
# validated vote update
# vars_to_replace <- c(str_subset(names(ds_new), "vv"),
# str_subset(names(ds_new), "voted_(rep|gov|sen)"))
# A - for the new variables dataset
# keep only variables to merge
# delete everything BUT the key and the vars to be replaced
deleteVariables(ds_new,
setdiff(names(ds_new), c("year_caseid", vars_to_replace)))
saveVersion(ds_new, "dropped all but the vote validation and post-elec vote")
# B- drop the to-be overwritten variables fromfork
deleteVariables(ds_fork,
vars_to_replace) # delete the vars to be replaced
saveVersion(ds_fork, "dropped all but year_caseid")
refresh(ds_fork)
refresh(ds_new)
# merge fork on new, dropping 2018 rows
extendDataset(ds_fork, ds_new, by = "year_caseid")
refresh(ds_fork)
saveVersion(ds_fork, description = "fork merged with new 2020")
# updte new dataset to be only 2018
ds_new <- dropRows(ds_new, ds_new$year != 2018)
refresh(ds_new)
# append
appendDataset(ds_fork, ds_new)
# un 07 format
# merge new vars into fork
mergeFork(ds_orig, fork = ds_fork)
# Fix 2016 vote match ---------
ds <- loadDataset("CCES 2016 Common Vote Validated", project = "CCES")
crtabs(~ inputstate + CL_E2016GVM, ds, useNA = "ifany", weight = NULL) # check Northeastern states
login()
ds <- loadDataset("Fork of CCES 2016 Common Vote Validated")
crtabs(~ inputstate + CL_E2016GVM, ds, useNA = "ifany", weight = NULL) # check Northeastern states
if(FALSE){
ds16 <- loadDataset("CCES Cumulative Common 2016") # old dataset, 2006 - 2016
ds17 <- loadDataset("CCES Cumulative Common 2017") # new dataset, only 2017
ds16_17 <- appendDataset(ds16, ds17)
compareDatasets(ds16, ds17)
# something wrong with variable number 6?
ds16[[6]]
ds17[[6]]
}
# fix 2016 cumulative --------
# upload 2016
cc16 <- read_dta("~/Dropbox/CCES_SDA/2016/data/Common/CCES16_Common_OUTPUT_Feb2018_VV.dta") %>%
select(V101, matches("weight"), matches("^CL")) # vars to replace + ID
# insert it once
if (FALSE) { # don't run again
insert <- loadDataset("CCES 2016 Jan 2018")
old16_fork <- loadDataset("Fork of CCES 2016 Common Vote Validated") # old version (will get overwritten)
# insert is the vars to replacement + ID
vars_to_replace <- setdiff(names(insert), "V101")
# delete the "wrong" variables
deleteVariables(old16_fork, vars_to_replace)
# immediately add back the "correct" variables in its place
joined <- extendDataset(old16_fork, insert, by = "V101")
}
# add to weights
login()
ds <- loadDataset("Fork of CCES 2016 Common Vote Validated")
# apply weights ---
weight_aliases <- str_subset(names(ds), "weight")
weightVariables(ds) <- weight_aliases
weight(ds) <- ds$commonweight_vv
# replace alias-based names with real names
ccvar <- gs_title("CCES_crunch_variables")
v16 <- gs_read_csv(ccvar, ws = "CCES_2016_variables")
lookup <- v16 %>% select(alias = variable, name) %>% distinct()
# rename
for (cv in aliases(variables(ds))) {
if (cv %in% lookup$alias) {
replacement <- lookup$name[which(lookup$alias == cv)]
if (!is.na(replacement)) {
name(ds[[which(aliases(variables(ds)) == cv)]]) <- replacement
print(cv)
} else next
}
}
# merge fork
ds_original <- loadDataset("CCES 2016 Common Vote Validated", project = "CCES")
ds_fork <- loadDataset("Fork of CCES 2016 Common Vote Validated")
mergeFork(dataset = ds_original, fork = ds_fork)
crtabs(~ inputstate + CL_E2016GVM, ds_original, useNA = "ifany", weight = NULL) # check Northeastern states