-
Notifications
You must be signed in to change notification settings - Fork 0
/
1.1 prepare_consensus.R
133 lines (89 loc) · 4.33 KB
/
1.1 prepare_consensus.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# DESCRIPTION: the script produces a dataframe with Consensus forecasts for 2020 for each month of the year and country.
# FOR EXTERNAL USERS: change main path with location regional subdirectories Consensus forecasts and export path
# Wrangling Consensus forecasts function ----
#' Wrangling Consensus forecasts
#'
#' Wrangling Consensus forecasts from excel with each individual sheet for every country to
#' more user-friendly dataframe.
#'
#' @param main_path Path to directory with Consensus forecasts.
#' @param subdirectory Subdirectory after main path with regional distinction survey.
#' @param row_value_to_extract Number of row with the value to extract: 5 for mean, 7 for max, 8 for min and 9 for sd.
#' @param variable Name of the variable to extract. To get a sample of the correct names, read a sheet skipping the first two lines.
#' Default is GDP.
#'
#' @return tibble with three identifiers (horizon, value, country)
#'
#' @details Problem with Asia-Pacific: one sheet is missing data for Vietnam. For the moment we remove it completelly.
clean_consensus <- function(main_path,subdirectory,row_value_to_extract, variable = "Gross Domestic"){
if(subdirectory == "Asia_Pacific"){
name_countries= list.files(paste0(main_path,"/",subdirectory)) %>%
map_chr(~ paste0(main_path,"/",subdirectory,"/",.x)) %>%
map(~ .x %>% getSheetNames()) %>%
map(~ .x %>% str_subset("A1|TrendCharts|Data|Additional|Forex|Oil|Copyright", negate = T)) %>%
.[[1]] %>%
str_subset("Vietnam",negate = T)
}
else{
name_countries= list.files(paste0(main_path,"/",subdirectory)) %>%
map_chr(~ paste0(main_path,"/",subdirectory,"/",.x)) %>%
map(~ .x %>% getSheetNames()) %>%
map(~ .x %>% str_subset("A1|TrendCharts|Data|Additional|Forex|Oil|Copyright", negate = T)) %>%
.[[1]]
}
path_files=list.files(paste0(main_path,"/",subdirectory)) %>%
map_chr(~ paste0(main_path,"/",subdirectory,"/",.x))
df_to_clean=path_files %>%
map(~ name_countries %>%
map(function(x) {
read_xlsx(.x, sheet = x, skip = 2)
})
)
dfs_final =df_to_clean %>%
modify_depth(2, ~ .x %>% slice(row_value_to_extract)) %>%
modify_depth(2, ~ .x %>% select(variable)) %>%
modify_depth(2, ~ .x %>% setNames(c("value"))) %>%
map(~ .x %>% bind_rows()) %>%
map(~ .x %>% mutate(country = name_countries))
names(dfs_final) = path_files %>% str_extract("[A-Z]{1}[a-z]{2}2020") %>% str_remove("2020")
dfs_final %>%
bind_rows(.id = "horizon") %>%
arrange(horizon) %>%
mutate(value = as.numeric(value))
}
# Set parameters before running the function: -----
main_path="../Forecasts_Time_Covid_material/raw_data/Consensus_2020"
subdirectory=c("Asia_Pacific","G7_Western_Europe","East_Europe","Latin_America") # Regions sheets
rows=c(5,7,8,9) # Distribution values of Consensus forecasts
# Running the function and export intermediate datasets: -----
# Note: apply function for each value of the distribution we want to extract and each region.
# We obtain a nested list whose upper level is the value of the distribution and the lower is the region.
# Run for GDP:
gdp_forecasts <- rows %>%
map(~ map(subdirectory, function(x){
clean_consensus(main_path,x,.x,"Gross Domestic")
})
) %>%
map(~ .x %>% bind_rows())
map(~ .x %>% mutate(country = case_when(country == "USA" ~ "United States",
country == "UK" ~ "United Kingdom",
country == "Czechia" ~ "Czech Republic",
T ~ country)))
# Run for inflation:
inflation_forecasts <- rows %>%
map(~ map(subdirectory, function(x){
clean_consensus(main_path,x,.x,contains("Consumer"))
})
) %>%
map(~ .x %>% bind_rows()) %>%
map(~ .x %>% mutate(country = case_when(country == "USA" ~ "United States",
country == "UK" ~ "United Kingdom",
country == "Czechia" ~ "Czech Republic",
T ~ country)))
# Export:
# Export GDP:
gdp_forecasts %>%
walk2(c("","_max","_min","_sd"), ~ saveRDS(.x, paste0("../Forecasts_Time_Covid_material/intermediate_data/consensus_2020",.y,".RDS")))
# Export inflation:
inflation_forecasts %>%
walk2(c("","_max","_min","_sd"), ~ saveRDS(.x, paste0("../Forecasts_Time_Covid_material/intermediate_data/consensus_2020_inflation",.y,".RDS")))