diff --git a/etl/utils.R b/etl/utils.R index a4156acc4..0a69cbc31 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -7,7 +7,7 @@ library(tools) save_s3_to_local <- function(s3_uri, path, overwrite = FALSE) { - if (!file.exists(path) | overwrite) { + if (!file.exists(path) || overwrite) { message("Saving file: ", s3_uri, " to: ", path) aws.s3::save_object(object = s3_uri, file = path) } @@ -15,14 +15,14 @@ save_s3_to_local <- function(s3_uri, path, overwrite = FALSE) { save_local_to_s3 <- function(s3_uri, path, overwrite = FALSE) { - if (!aws.s3::object_exists(s3_uri) | overwrite) { + if (!aws.s3::object_exists(s3_uri) || overwrite) { message("Saving file: ", path, "to: ", s3_uri) aws.s3::put_object( file = path, object = s3_uri, show_progress = TRUE, multipart = TRUE - ) + ) } } @@ -34,8 +34,7 @@ open_data_to_s3 <- function(s3_bucket_uri, file_year, file_ext, file_prefix = NULL, - overwrite = FALSE - ) { + overwrite = FALSE) { open_data_file <- paste0(base_url, data_url) remote_file <- file.path( s3_bucket_uri, dir_name, @@ -61,8 +60,7 @@ open_data_to_s3 <- function(s3_bucket_uri, write_partitions_to_s3 <- function(df, s3_output_path, is_spatial = TRUE, - overwrite = FALSE - ) { + overwrite = FALSE) { if (!dplyr::is.grouped_df(df)) { warning("Input data must contain grouping vars for partitioning") } @@ -80,7 +78,7 @@ write_partitions_to_s3 <- function(df, remote_path <- file.path( s3_output_path, partition_path, "part-0.parquet" ) - if (!object_exists(remote_path) | overwrite) { + if (!object_exists(remote_path) || overwrite) { message("Now uploading: ", partition_path) tmp_file <- tempfile(fileext = ".parquet") if (is_spatial) { @@ -95,32 +93,30 @@ write_partitions_to_s3 <- function(df, standardize_expand_geo <- function(spatial_df, make_valid = FALSE, polygon = TRUE) { - return( - spatial_df %>% st_transform(4326) %>% - { if (make_valid) st_make_valid(.) else .} %>% + { + if (make_valid) st_make_valid(.) else . + } %>% mutate(geometry_3435 = st_transform(geometry, 3435)) %>% - { if (polygon) { - - mutate(., centroid = st_centroid(st_transform(geometry, 3435))) %>% - cbind(., - st_coordinates(st_transform(.$centroid, 4326)), - st_coordinates(.$centroid) - ) %>% - select(!contains("centroid"), - lon = X, lat = Y, x_3435 = `X.1`, y_3435 = `Y.1`, geometry, geometry_3435) - - } else { - - select(., dplyr::everything(), geometry, geometry_3435) - - } + { + if (polygon) { + mutate(., centroid = st_centroid(st_transform(geometry, 3435))) %>% + cbind( + ., + st_coordinates(st_transform(.$centroid, 4326)), + st_coordinates(.$centroid) + ) %>% + select(!contains("centroid"), + lon = X, lat = Y, x_3435 = `X.1`, y_3435 = `Y.1`, + geometry, geometry_3435 + ) + } else { + select(., dplyr::everything(), geometry, geometry_3435) } - + } ) - } county_gdb_to_s3 <- function( @@ -128,9 +124,7 @@ county_gdb_to_s3 <- function( dir_name, file_path, layer, - overwrite = FALSE -) { - + overwrite = FALSE) { remote_file <- file.path( s3_bucket_uri, dir_name, @@ -138,31 +132,24 @@ county_gdb_to_s3 <- function( ) if (!aws.s3::object_exists(remote_file)) { - message(paste0("Reading ", basename(file_path))) if (layer %in% st_layers(file_path)$name) { - try({ - tmp_file <- tempfile(fileext = ".geojson") st_read(file_path, layer) %>% st_write(tmp_file) save_local_to_s3(remote_file, tmp_file, overwrite = overwrite) file.remove(tmp_file) cat(paste0("File successfully written to ", remote_file, "\n")) - }) - } else { - - cat(paste0("Layer '", layer, - "' not present in ", - basename(file_path), - "... skipping.\n") - ) - + cat(paste0( + "Layer '", layer, + "' not present in ", + basename(file_path), + "... skipping.\n" + )) } - } }