RSGInc · jacobmoore5067 · Jan 26, 2024 · Jan 19, 2024 · Jan 22, 2024 · Jan 23, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,7 +8,6 @@ export(hts_bin_var)
 export(hts_cbind_var)
 export(hts_filter_data)
 export(hts_find_var)
-export(hts_get_keycols)
 export(hts_get_ns)
 export(hts_melt_vars)
 export(hts_prep_byvar)

diff --git a/R/get_distance_meters.R b/R/get_distance_meters.R
@@ -9,7 +9,7 @@
 #' @return A vector of distances in meters
 #' @export get_distance_meters
 #' 
-#' @example 
+#' @examples 
 #' get_distance_meters(c(38.8734, -75.2394), c(40.3497, -76.2314))
 #' 
 get_distance_meters =

diff --git a/R/hts_cbind_var.R b/R/hts_cbind_var.R
@@ -2,32 +2,43 @@
 #'
 #' @param lhs_table Table to bind a column to in data.table format
 #' @param rhs_var Variable to bind to the lhs_table.
+#' @param hts_data List of household, person, vehicle, day, and trip tables in
+#'  data.table format.
 #' @param variable_list A variable list with descriptions and table locations
 #'  of variables.
+#' @param cbind_ids list of unique identifiers for each table in hts_data
+#' @param cbind_wts list of weight for each table in hts_data 
 #' @param return_weight_cols If true binds weight variable along with rhs_var
 #'  to lhs_table. Default is FALSE.
-#' @param ... Additional arguments passed to \code{link{hts_get_keycols}}
 #' 
 #' @return Inputted table with inputted variable binded.
 #' @export
 #'
 #' @examples
 #' 
 #' require(data.table)
-#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph', variable_list = variable_list)
-#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph', 
-#' variable_list = variable_list, return_weight_cols = TRUE)
+#' hts_cbind_var(lhs_table = trip,
+#'  rhs_var = 'speed_mph',
+#'  hts_data = test_data,
+#'   variable_list = variable_list)
+#' hts_cbind_var(lhs_table = trip,
+#'  rhs_var = 'speed_mph',
+#'   hts_data = test_data,
+#' variable_list = variable_list,
+#'  return_weight_cols = TRUE)
 #'
 hts_cbind_var = function(lhs_table,
                          rhs_var,
+                         hts_data,
                          variable_list = variable_list,
                          return_weight_cols = FALSE,
-                         ...) {
+                         cbind_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
+                         cbind_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight')) {
 
   var_location =
     hts_find_var(rhs_var, variables_dt = variable_list)
 
-  rhs_table =data.table::copy(get(as.character(var_location)))
+  rhs_table = hts_data[[var_location]]
 
   # If joining trip to vehicle or vice versa, need vehicle ID:
   if ("trip_id" %in% names(lhs_table) &
@@ -45,9 +56,11 @@ hts_cbind_var = function(lhs_table,
   }
 
   # Subset table to ID columns, weight columns (if desired), rhs_var:
-  selected_cols = c(hts_get_keycols(rhs_table,
-                                  ids = TRUE,
-                                  weights = return_weight_cols), rhs_var)
+  selected_cols = c(intersect(
+    names(rhs_table),
+    c(cbind_ids, cbind_wts)
+  ),
+  rhs_var)
 
   rhs_table = rhs_table[, selected_cols, with = FALSE]
 

diff --git a/R/hts_get_keycols.R b/R/hts_get_keycols.R
diff --git a/R/hts_get_ns.R b/R/hts_get_ns.R
@@ -2,84 +2,42 @@
 #'
 #' @param prepped_dt Dataset to pull counts from.
 #' @param weighted Boolean whether to pull weighted estimates.
+#' @param ids list of possible ids to return counts for
+#' @param wt_col weight column to return sum of
 #' 
 #' @return List of unweighted counts, weighted counts, and highest level unit.
 #' @export
 #'
 #' @examples
 #' 
 #' require(data.table)
-#' hts_get_ns(prepped_dt = day, weighted = TRUE)
+# hts_get_ns(prepped_dt = day,
+#            weighted = TRUE,
+#            wt_col = 'day_weight')
 #'
 hts_get_ns = function(prepped_dt,
-                      weighted
+                      weighted,
+                      ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
+                      wt_col
 ) {
 
-  #get unweighted counts
-  n_idcols = hts_get_keycols(prepped_dt, 
-                             ids = TRUE,
-                             weights = FALSE)
+  # Get ids that are in prepped_dt
+  present_ids = intersect(names(prepped_dt), ids)
 
-  ndt_ids = prepped_dt[, n_idcols, with=FALSE]
+  ndt_ids = prepped_dt[, present_ids, with=FALSE]
 
   ns_unwtd = lapply(ndt_ids, function(x) uniqueN(x))
-
-
-  n_names =
-    paste0(stringr::str_to_title(
-      stringr::str_replace(
-        pattern = "hh",
-        replacement = "household",
-        string = stringr::str_remove(string = n_idcols, pattern = "_id")
-      )
-    ),
-    "s")
+
+  n_names = paste('Count of unique', present_ids)
 
   names(ns_unwtd) = n_names
 
-  #get units
-  id_names = c('trip_id', 'day_id', 'person_id', 'vehicle_id', 'hh_id')
-
-  unit = NULL
-
-  for (name in id_names) {
-
-
-    if (name %in% names(prepped_dt)){
-
-      id = name
-
-      unit = paste0(stringr::str_remove(string = id, pattern = "_id"),
-                    "s")
-
-      break
-
-    }
-
-  }
-
   #get weighted counts
   if (weighted){
 
-    n_wtcols = hts_get_keycols(prepped_dt, 
-                               ids = FALSE,
-                               weights = TRUE)
-
-    ndt_wts = prepped_dt[, n_wtcols, with=FALSE]
-
-    ns_wtd = lapply(ndt_wts, function(x) sum(x))
-
-    n_names =
-      paste0(stringr::str_to_title(
-        stringr::str_replace(
-          pattern = "hh",
-          replacement = "household",
-          string = stringr::str_remove(string = n_wtcols, pattern = "_weight")
-        )
-      ),
-      "s")
-
-    names(ns_wtd) = n_names
+    ns_wtd = prepped_dt[, sum(get(wt_col))]
+
+    names(ns_wtd) = paste('Sum of', wt_col)
 
   } else {
 
@@ -89,11 +47,9 @@ hts_get_ns = function(prepped_dt,
 
   ns = list(
     'unwtd' = ns_unwtd,
-    'wtd' = ns_wtd,
-    'units' = unit
+    'wtd' = ns_wtd
   )
 
   return(ns)
 
-
 }
diff --git a/R/hts_melt_vars.R b/R/hts_melt_vars.R
@@ -8,8 +8,9 @@
 #'  Defaults to NULL.
 #' @param variables_dt List of variable locations and descriptions in data.table
 #'  format.
-#' @param hts_data List containing household, person, day, trip, and vehicle
+#' @param data List containing household, person, day, trip, and vehicle
 #'  datasets in data.table format.
+#' @param ids unique identifiers appearing in wide_dt
 #' @param remove_missing Boolean to remove rows with missing values. Defaults to
 #'  TRUE.
 #' @param missing_values Missing values to remove. Defaults to 'Missing Response'
@@ -32,7 +33,7 @@
 #' require(stringr)
 #' hts_melt_vars(shared_name = 'race',
 #'               wide_dt = person,
-#'               hts_data = list('hh' = hh,
+#'               data = list('hh' = hh,
 #'                               'person' = person,
 #'                               'day' = day,
 #'                               'trip' = trip,
@@ -42,7 +43,8 @@ hts_melt_vars = function(shared_name = NULL,
                          wide_dt = NULL,
                          shared_name_vars = NULL,
                          variables_dt = variable_list,
-                         hts_data = hts_data,
+                         data = hts_data,
+                         ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
                          remove_missing = TRUE,
                          missing_values = c("Missing Response", "995"),
                          checkbox_label_sep = ":",
@@ -54,7 +56,7 @@ hts_melt_vars = function(shared_name = NULL,
     var_location = hts_find_var(shared_name, variables_dt = variables_dt)
 
     # Select table where this variable lives:
-    wide_dt = hts_data[[var_location]]
+    wide_dt = data[[var_location]]
 
   }
 
@@ -67,7 +69,7 @@ hts_melt_vars = function(shared_name = NULL,
 
   melted_dt = data.table::melt(
     wide_dt,
-    id.vars = hts_get_keycols(wide_dt),
+    id.vars = intersect(names(wide_dt),ids),
     measure.vars = shared_name_vars,
     variable.name = "variable",
     value.name = "value"
@@ -96,7 +98,7 @@ hts_melt_vars = function(shared_name = NULL,
 
     # two or more checked:
     melted_dt[, num_checked := sum(value),
-             by =  c(hts_get_keycols(wide_dt))]
+             by =  intersect(names(melted_dt), ids)]
 
     # make factor levels
     melted_dt$description = factor(melted_dt$description, levels = unique(melted_dt$description))

diff --git a/R/hts_prep_byvar.R b/R/hts_prep_byvar.R
@@ -5,6 +5,8 @@
 #'  format.
 #' @param hts_data List containing household, person, day, trip, and vehicle
 #'  datasets in data.table format.
+#' @param byvar_ids unique identifiers for each table in hts_data
+#' @param byvar_wts weight column for each table in hts_data 
 #' @param ... Additional parameters to pass to \code{link{hts_melt_vars}}
 #'
 #' @return Data table containing the variable to be summarized and other key
@@ -31,6 +33,8 @@
 hts_prep_byvar = function(summarize_by = NULL,
                           variables_dt = variables_list,
                           hts_data,
+                          byvar_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
+                          byvar_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight'),
                           ...) {
 
   # For each variables in trip table:
@@ -55,6 +59,7 @@ hts_prep_byvar = function(summarize_by = NULL,
         hts_melt_vars(
           shared_name = summarize_by[[b]],
           wide_dt = byvar_dt_v,
+          ids = byvar_ids,
           shared_name_vars = NULL,
           variables_dt = variables_dt,
           to_single_row = TRUE,
@@ -69,18 +74,23 @@ hts_prep_byvar = function(summarize_by = NULL,
                                nbins = 7)
     }
 
+    if (!is.null(byvar_wts)){
+
+      table_idx = which(names(hts_data) == byvar_loc)
+      wtname = byvar_wts[table_idx]
+
+    } else {
+      wtname = NULL
+    }
+
+
     if (!byvar_is_shared) {
-      byvar_cols = c(hts_get_keycols(byvar_dt_v), byvar)
+      byvar_cols = c(intersect(c(byvar_ids, wtname), names(byvar_dt_v)), byvar)
 
       byvar_dt_v = byvar_dt_v[, byvar_cols, with=FALSE]
 
     }
 
-    # Set keys for merging
-    # keycols = hts_get_keycols(byvar_dt_v)
-    # 
-    # setkeyv(byvar_dt_v, keycols)
-
     byvar_dt_ls[[b]] = byvar_dt_v
 
   }