Skip to content

Commit

Permalink
Merge pull request #75 from RSGInc/33-allow-user-to-flexibly-specify-…
Browse files Browse the repository at this point in the history
…weights

33 allow user to flexibly specify weights
  • Loading branch information
erika-redding authored Jan 26, 2024
2 parents aaf600b + abe9de8 commit 903238b
Show file tree
Hide file tree
Showing 25 changed files with 315 additions and 328 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ export(hts_bin_var)
export(hts_cbind_var)
export(hts_filter_data)
export(hts_find_var)
export(hts_get_keycols)
export(hts_get_ns)
export(hts_melt_vars)
export(hts_prep_byvar)
Expand Down
2 changes: 1 addition & 1 deletion R/get_distance_meters.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @return A vector of distances in meters
#' @export get_distance_meters
#'
#' @example
#' @examples
#' get_distance_meters(c(38.8734, -75.2394), c(40.3497, -76.2314))
#'
get_distance_meters =
Expand Down
31 changes: 22 additions & 9 deletions R/hts_cbind_var.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,43 @@
#'
#' @param lhs_table Table to bind a column to in data.table format
#' @param rhs_var Variable to bind to the lhs_table.
#' @param hts_data List of household, person, vehicle, day, and trip tables in
#' data.table format.
#' @param variable_list A variable list with descriptions and table locations
#' of variables.
#' @param cbind_ids list of unique identifiers for each table in hts_data
#' @param cbind_wts list of weight for each table in hts_data
#' @param return_weight_cols If true binds weight variable along with rhs_var
#' to lhs_table. Default is FALSE.
#' @param ... Additional arguments passed to \code{link{hts_get_keycols}}
#'
#' @return Inputted table with inputted variable binded.
#' @export
#'
#' @examples
#'
#' require(data.table)
#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph', variable_list = variable_list)
#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph',
#' variable_list = variable_list, return_weight_cols = TRUE)
#' hts_cbind_var(lhs_table = trip,
#' rhs_var = 'speed_mph',
#' hts_data = test_data,
#' variable_list = variable_list)
#' hts_cbind_var(lhs_table = trip,
#' rhs_var = 'speed_mph',
#' hts_data = test_data,
#' variable_list = variable_list,
#' return_weight_cols = TRUE)
#'
hts_cbind_var = function(lhs_table,
rhs_var,
hts_data,
variable_list = variable_list,
return_weight_cols = FALSE,
...) {
cbind_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
cbind_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight')) {

var_location =
hts_find_var(rhs_var, variables_dt = variable_list)

rhs_table =data.table::copy(get(as.character(var_location)))
rhs_table = hts_data[[var_location]]

# If joining trip to vehicle or vice versa, need vehicle ID:
if ("trip_id" %in% names(lhs_table) &
Expand All @@ -45,9 +56,11 @@ hts_cbind_var = function(lhs_table,
}

# Subset table to ID columns, weight columns (if desired), rhs_var:
selected_cols = c(hts_get_keycols(rhs_table,
ids = TRUE,
weights = return_weight_cols), rhs_var)
selected_cols = c(intersect(
names(rhs_table),
c(cbind_ids, cbind_wts)
),
rhs_var)

rhs_table = rhs_table[, selected_cols, with = FALSE]

Expand Down
78 changes: 0 additions & 78 deletions R/hts_get_keycols.R

This file was deleted.

78 changes: 17 additions & 61 deletions R/hts_get_ns.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,42 @@
#'
#' @param prepped_dt Dataset to pull counts from.
#' @param weighted Boolean whether to pull weighted estimates.
#' @param ids list of possible ids to return counts for
#' @param wt_col weight column to return sum of
#'
#' @return List of unweighted counts, weighted counts, and highest level unit.
#' @export
#'
#' @examples
#'
#' require(data.table)
#' hts_get_ns(prepped_dt = day, weighted = TRUE)
# hts_get_ns(prepped_dt = day,
# weighted = TRUE,
# wt_col = 'day_weight')
#'
hts_get_ns = function(prepped_dt,
weighted
weighted,
ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
wt_col
) {

#get unweighted counts
n_idcols = hts_get_keycols(prepped_dt,
ids = TRUE,
weights = FALSE)
# Get ids that are in prepped_dt
present_ids = intersect(names(prepped_dt), ids)

ndt_ids = prepped_dt[, n_idcols, with=FALSE]
ndt_ids = prepped_dt[, present_ids, with=FALSE]

ns_unwtd = lapply(ndt_ids, function(x) uniqueN(x))


n_names =
paste0(stringr::str_to_title(
stringr::str_replace(
pattern = "hh",
replacement = "household",
string = stringr::str_remove(string = n_idcols, pattern = "_id")
)
),
"s")

n_names = paste('Count of unique', present_ids)

names(ns_unwtd) = n_names

#get units
id_names = c('trip_id', 'day_id', 'person_id', 'vehicle_id', 'hh_id')

unit = NULL

for (name in id_names) {


if (name %in% names(prepped_dt)){

id = name

unit = paste0(stringr::str_remove(string = id, pattern = "_id"),
"s")

break

}

}

#get weighted counts
if (weighted){

n_wtcols = hts_get_keycols(prepped_dt,
ids = FALSE,
weights = TRUE)

ndt_wts = prepped_dt[, n_wtcols, with=FALSE]

ns_wtd = lapply(ndt_wts, function(x) sum(x))

n_names =
paste0(stringr::str_to_title(
stringr::str_replace(
pattern = "hh",
replacement = "household",
string = stringr::str_remove(string = n_wtcols, pattern = "_weight")
)
),
"s")

names(ns_wtd) = n_names
ns_wtd = prepped_dt[, sum(get(wt_col))]

names(ns_wtd) = paste('Sum of', wt_col)

} else {

Expand All @@ -89,11 +47,9 @@ hts_get_ns = function(prepped_dt,

ns = list(
'unwtd' = ns_unwtd,
'wtd' = ns_wtd,
'units' = unit
'wtd' = ns_wtd
)

return(ns)


}
14 changes: 8 additions & 6 deletions R/hts_melt_vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
#' Defaults to NULL.
#' @param variables_dt List of variable locations and descriptions in data.table
#' format.
#' @param hts_data List containing household, person, day, trip, and vehicle
#' @param data List containing household, person, day, trip, and vehicle
#' datasets in data.table format.
#' @param ids unique identifiers appearing in wide_dt
#' @param remove_missing Boolean to remove rows with missing values. Defaults to
#' TRUE.
#' @param missing_values Missing values to remove. Defaults to 'Missing Response'
Expand All @@ -32,7 +33,7 @@
#' require(stringr)
#' hts_melt_vars(shared_name = 'race',
#' wide_dt = person,
#' hts_data = list('hh' = hh,
#' data = list('hh' = hh,
#' 'person' = person,
#' 'day' = day,
#' 'trip' = trip,
Expand All @@ -42,7 +43,8 @@ hts_melt_vars = function(shared_name = NULL,
wide_dt = NULL,
shared_name_vars = NULL,
variables_dt = variable_list,
hts_data = hts_data,
data = hts_data,
ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
remove_missing = TRUE,
missing_values = c("Missing Response", "995"),
checkbox_label_sep = ":",
Expand All @@ -54,7 +56,7 @@ hts_melt_vars = function(shared_name = NULL,
var_location = hts_find_var(shared_name, variables_dt = variables_dt)

# Select table where this variable lives:
wide_dt = hts_data[[var_location]]
wide_dt = data[[var_location]]

}

Expand All @@ -67,7 +69,7 @@ hts_melt_vars = function(shared_name = NULL,

melted_dt = data.table::melt(
wide_dt,
id.vars = hts_get_keycols(wide_dt),
id.vars = intersect(names(wide_dt),ids),
measure.vars = shared_name_vars,
variable.name = "variable",
value.name = "value"
Expand Down Expand Up @@ -96,7 +98,7 @@ hts_melt_vars = function(shared_name = NULL,

# two or more checked:
melted_dt[, num_checked := sum(value),
by = c(hts_get_keycols(wide_dt))]
by = intersect(names(melted_dt), ids)]

# make factor levels
melted_dt$description = factor(melted_dt$description, levels = unique(melted_dt$description))
Expand Down
22 changes: 16 additions & 6 deletions R/hts_prep_byvar.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#' format.
#' @param hts_data List containing household, person, day, trip, and vehicle
#' datasets in data.table format.
#' @param byvar_ids unique identifiers for each table in hts_data
#' @param byvar_wts weight column for each table in hts_data
#' @param ... Additional parameters to pass to \code{link{hts_melt_vars}}
#'
#' @return Data table containing the variable to be summarized and other key
Expand All @@ -31,6 +33,8 @@
hts_prep_byvar = function(summarize_by = NULL,
variables_dt = variables_list,
hts_data,
byvar_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
byvar_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight'),
...) {

# For each variables in trip table:
Expand All @@ -55,6 +59,7 @@ hts_prep_byvar = function(summarize_by = NULL,
hts_melt_vars(
shared_name = summarize_by[[b]],
wide_dt = byvar_dt_v,
ids = byvar_ids,
shared_name_vars = NULL,
variables_dt = variables_dt,
to_single_row = TRUE,
Expand All @@ -69,18 +74,23 @@ hts_prep_byvar = function(summarize_by = NULL,
nbins = 7)
}

if (!is.null(byvar_wts)){

table_idx = which(names(hts_data) == byvar_loc)
wtname = byvar_wts[table_idx]

} else {
wtname = NULL
}


if (!byvar_is_shared) {
byvar_cols = c(hts_get_keycols(byvar_dt_v), byvar)
byvar_cols = c(intersect(c(byvar_ids, wtname), names(byvar_dt_v)), byvar)

byvar_dt_v = byvar_dt_v[, byvar_cols, with=FALSE]

}

# Set keys for merging
# keycols = hts_get_keycols(byvar_dt_v)
#
# setkeyv(byvar_dt_v, keycols)

byvar_dt_ls[[b]] = byvar_dt_v

}
Expand Down
Loading

0 comments on commit 903238b

Please sign in to comment.