Merge pull request #136 from RSGInc/132-clean-up-styling-in-code

132 clean up styling in code
RSGInc · Feb 29, 2024 · f4666a3 · f4666a3
2 parents 9fc8f6f + de0c1a4
commit f4666a3
Show file tree

Hide file tree

Showing 69 changed files with 2,215 additions and 2,199 deletions.
diff --git a/R/data.R b/R/data.R
@@ -1,8 +1,8 @@
 #' Test household travel survey dataset
-#' 
+#'
 #' @format ## `test_data`
 #' A list containing the following datasets:
-#' 
+#'
 #' @source A list of export tables compiled from internal RSG database on 11/22/2023
 "test_data"
 #' Test household dataset
@@ -152,4 +152,4 @@
 #'   \item{value}{The numeric value of the variable}
 #'   \item{label}{What the numeric value of the variable represents}
 #' }
-"value_labels"
+"value_labels"
diff --git a/R/factorize_column.R b/R/factorize_column.R
@@ -34,81 +34,88 @@
 #'   var_str = "income_detailed",
 #'   vals_df = value_labels,
 #'   extra_labels = "Missing",
-#'   value_label_colname = 'label'
+#'   value_label_colname = "label"
 #' )
 #'
 #' @export factorize_column
 factorize_column = function(
-  x,
-  var_str,
-  vals_df,
-  variable_colname = 'variable',
-  value_colname = 'value',
-  value_label_colname = 'value_label',
-  value_order_colname = 'value',
-  extra_labels = NULL,
-  add_na = TRUE
-) {
+    x,
+    var_str,
+    vals_df,
+    variable_colname = "variable",
+    value_colname = "value",
+    value_label_colname = "value_label",
+    value_order_colname = "value",
+    extra_labels = NULL,
+    add_na = TRUE) {
   vals_df = data.table::data.table(vals_df)
 
   # sort the vals_df to ensure the ordered factor is ordered correctly
-  vals_df = vals_df[with(vals_df,
-                          order(get(variable_colname),
-                                get(value_order_colname),
-                                get(value_colname),
-                                get(value_label_colname))), ]
+  vals_df = vals_df[with(
+    vals_df,
+    order(
+      get(variable_colname),
+      get(value_order_colname),
+      get(value_colname),
+      get(value_label_colname)
+    )
+  ), ]
 
   # select levels and labels for this variable
-  levels  = vals_df[get(variable_colname) == var_str, get(value_colname)]
-  labels  = vals_df[get(variable_colname) == var_str, get(value_label_colname)]
+  levels = vals_df[get(variable_colname) == var_str, get(value_colname)]
+  labels = vals_df[get(variable_colname) == var_str, get(value_label_colname)]
 
   # add extra labels, if provided
-  if ( !missing(extra_labels) ) {
-    levels = c(levels, vals_df[get(variable_colname) %in% extra_labels,  get(value_colname)])
-    labels = c(labels, vals_df[get(variable_colname) %in% extra_labels,  get(value_label_colname)])
+  if (!missing(extra_labels)) {
+    levels = c(levels, vals_df[get(variable_colname) %in% extra_labels, get(value_colname)])
+    labels = c(labels, vals_df[get(variable_colname) %in% extra_labels, get(value_label_colname)])
   }
 
   # only apply labels if there are labels specific to this variable
   if (var_str %in% unique(vals_df[, get(variable_colname)])) {
-
-    #perform QC checks before factorizing
+    # perform QC checks before factorizing
 
     # check for duplicate labels (gets a warning)
-    if ( any(duplicated(labels)) ) {
-      warning('Duplicated labels in variable "', var_str, '". Labels: ',
-              paste(labels, collapse = '; '))
-              }
+    if (any(duplicated(labels))) {
+      warning(
+        'Duplicated labels in variable "', var_str, '". Labels: ',
+        paste(labels, collapse = "; ")
+      )
+    }
 
     # check for duplicate values (or "levels") (gets a warning)
-    if ( any(duplicated(levels)) ) {
-      warning('Duplicated values/levels in variable "', var_str, '". Values: ',
-              paste(levels, collapse = '; '))
-              }
+    if (any(duplicated(levels))) {
+      warning(
+        'Duplicated values/levels in variable "', var_str, '". Values: ',
+        paste(levels, collapse = "; ")
+      )
+    }
 
     # check for missing levels (except NA) (gets a warning)
-    if ( any(!(unique(x) %in% c(levels, NA))) ) {
+    if (any(!(unique(x) %in% c(levels, NA)))) {
       missing_levels = unique(x)[!unique(x) %in% c(levels, NA)]
 
       levels = c(levels, missing_levels)
       labels = c(labels, extra_labels)
 
-      warning('Missing labels in variable "', var_str, '". Values missing labels: ',
-              paste(missing_levels, collapse = '; '))
-      }
+      warning(
+        'Missing labels in variable "', var_str, '". Values missing labels: ',
+        paste(missing_levels, collapse = "; ")
+      )
+    }
 
     # TODO add checks after making variable a factor to ensure it's right?
 
     y = factor(x, levels = levels, labels = labels, ordered = TRUE)
 
     # if NAs are desired.
-    if (add_na) {y = addNA(y)}
-    #if (add_na) {forcats::fct_explicit_na(y, na_level = "(Missing - NA)")}
-
+    if (add_na) {
+      y = addNA(y)
+    }
+    # if (add_na) {forcats::fct_explicit_na(y, na_level = "(Missing - NA)")}
   } else {
-
     # if there are no labels in the codebook for this variable, it is returned unchanged.
     y = x
   }
   return(invisible(y))
 }
-
diff --git a/R/factorize_df.R b/R/factorize_df.R
@@ -23,44 +23,43 @@
 #' hh_labeled = factorize_df(
 #'   df = hh,
 #'   vals_df = value_labels,
-#'   value_label_colname = 'label',
+#'   value_label_colname = "label",
 #'   extra_labels = c("Missing")
 #' )
 #'
 #' @export factorize_df
-factorize_df <- function(df, vals_df, verbose = TRUE, ...) {
-
-  if ( 'data.table' %in% class(df) ){
+factorize_df = function(df, vals_df, verbose = TRUE, ...) {
+  if ("data.table" %in% class(df)) {
     df_is_dt = TRUE
-    df_labeled <- as.data.frame(df)   # If df is a data.table, R crashes in Rstudio
+    df_labeled = as.data.frame(df) # If df is a data.table, R crashes in Rstudio
     # on.exit(expr=data.table(df_labeled))
   } else {
     df_is_dt = FALSE
   }
 
   for (i in 1:ncol(df)) {
+    var_str = names(df)[i]
 
-    var_str <- names(df)[i]
-
-    df_labeled[[var_str]] <- factorize_column(
-      x=df[[var_str]],
-      var_str=var_str,
-      vals_df=vals_df,
-      ...)
-    }
+    df_labeled[[var_str]] = factorize_column(
+      x = df[[var_str]],
+      var_str = var_str,
+      vals_df = vals_df,
+      ...
+    )
+  }
 
-  if ( verbose ){
+  if (verbose) {
     # print which vars are labeled and unlabeled
-    labeled_vars_in_df   <- sort(colnames(df)[ (colnames(df) %in% vals_df$variable) ])
-    unlabeled_vars_in_df <- sort(colnames(df)[!(colnames(df) %in% vals_df$variable) ])
+    labeled_vars_in_df = sort(colnames(df)[(colnames(df) %in% vals_df$variable)])
+    unlabeled_vars_in_df = sort(colnames(df)[!(colnames(df) %in% vals_df$variable)])
 
     message("\n Labeled vars: ")
-    message(paste(sprintf("- %s", labeled_vars_in_df), collapse= '\n'), '\n')
+    message(paste(sprintf("- %s", labeled_vars_in_df), collapse = "\n"), "\n")
     message("Unlabeled vars: ")
-    message(paste(sprintf("- %s",unlabeled_vars_in_df), collapse='\n'))
+    message(paste(sprintf("- %s", unlabeled_vars_in_df), collapse = "\n"))
   }
-  if ( df_is_dt ){
-     df_labeled = data.table::data.table(df_labeled)
+  if (df_is_dt) {
+    df_labeled = data.table::data.table(df_labeled)
   }
   return(df_labeled)
 }
diff --git a/R/get_distance_meters.R b/R/get_distance_meters.R
@@ -8,45 +8,44 @@
 #' @param radius Radius of the sphere to use for haversine calculation (Defaults to meters)
 #' @return A vector of distances in meters
 #' @export get_distance_meters
-#' 
-#' @examples 
+#'
+#' @examples
 #' get_distance_meters(c(38.8734, -75.2394), c(40.3497, -76.2314))
-#' 
+#'
 get_distance_meters =
   function(
-    location_1,
-    location_2,
-    radius = 6378137) {
-
-  # convert to matrix if not already a matrix
-  location_1 = matrix(location_1, ncol = 2)
-  location_2 = matrix(location_2, ncol = 2)
-
-  # do some checks on inputs
-  # longitudes should be numeric and between -180 & 180
-  stopifnot(is.numeric(location_1[, 1]))
-  stopifnot(is.numeric(location_2[, 1]))
-  stopifnot(location_1[!is.na(location_1[, 1]), 1] %between% c(-180, 180))
-  stopifnot(location_2[!is.na(location_2[, 1]), 1] %between% c(-180, 180))
-
-  # latitudes should be numeric and between -90 & 90
-  stopifnot(is.numeric(location_1[, 2]))
-  stopifnot(is.numeric(location_2[, 2]))
-  stopifnot(location_1[!is.na(location_1[, 2]), 2] %between% c(-90, 90))
-  stopifnot(location_2[!is.na(location_2[, 2]), 2] %between% c(-90, 90))
-
-  lon_1 = location_1[, 1] * pi / 180 # converts to radian
-  lon_2 = location_2[, 1] * pi / 180
-
-  lat_1 = location_1[, 2] * pi / 180
-  lat_2 = location_2[, 2] * pi / 180
-
-  dLat = lat_2 - lat_1
-  dLon = lon_2 - lon_1
-
-  a = sin(dLat / 2) ^ 2 + cos(lat_1) * cos(lat_2) * sin(dLon / 2) ^ 2
-  a = pmin(a, 1)
-  dist = 2 * atan2(sqrt(a), sqrt(1 - a)) * radius
-
-  return(dist)
-}
+      location_1,
+      location_2,
+      radius = 6378137) {
+    # convert to matrix if not already a matrix
+    location_1 = matrix(location_1, ncol = 2)
+    location_2 = matrix(location_2, ncol = 2)
+
+    # do some checks on inputs
+    # longitudes should be numeric and between -180 & 180
+    stopifnot(is.numeric(location_1[, 1]))
+    stopifnot(is.numeric(location_2[, 1]))
+    stopifnot(location_1[!is.na(location_1[, 1]), 1] %between% c(-180, 180))
+    stopifnot(location_2[!is.na(location_2[, 1]), 1] %between% c(-180, 180))
+
+    # latitudes should be numeric and between -90 & 90
+    stopifnot(is.numeric(location_1[, 2]))
+    stopifnot(is.numeric(location_2[, 2]))
+    stopifnot(location_1[!is.na(location_1[, 2]), 2] %between% c(-90, 90))
+    stopifnot(location_2[!is.na(location_2[, 2]), 2] %between% c(-90, 90))
+
+    lon_1 = location_1[, 1] * pi / 180 # converts to radian
+    lon_2 = location_2[, 1] * pi / 180
+
+    lat_1 = location_1[, 2] * pi / 180
+    lat_2 = location_2[, 2] * pi / 180
+
+    dLat = lat_2 - lat_1
+    dLon = lon_2 - lon_1
+
+    a = sin(dLat / 2)^2 + cos(lat_1) * cos(lat_2) * sin(dLon / 2)^2
+    a = pmin(a, 1)
+    dist = 2 * atan2(sqrt(a), sqrt(1 - a)) * radius
+
+    return(dist)
+  }