Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

33 allow user to flexibly specify weights #75

Merged
merged 10 commits into from
Jan 26, 2024
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ export(hts_bin_var)
export(hts_cbind_var)
export(hts_filter_data)
export(hts_find_var)
export(hts_get_keycols)
export(hts_get_ns)
export(hts_melt_vars)
export(hts_prep_byvar)
Expand Down
2 changes: 1 addition & 1 deletion R/get_distance_meters.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @return A vector of distances in meters
#' @export get_distance_meters
#'
#' @example
#' @examples
#' get_distance_meters(c(38.8734, -75.2394), c(40.3497, -76.2314))
#'
get_distance_meters =
Expand Down
31 changes: 22 additions & 9 deletions R/hts_cbind_var.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,43 @@
#'
#' @param lhs_table Table to bind a column to in data.table format
#' @param rhs_var Variable to bind to the lhs_table.
#' @param hts_data List of household, person, vehicle, day, and trip tables in
#' data.table format.
#' @param variable_list A variable list with descriptions and table locations
#' of variables.
#' @param cbind_ids list of unique identifiers for each table in hts_data
#' @param cbind_wts list of weight for each table in hts_data
#' @param return_weight_cols If true binds weight variable along with rhs_var
#' to lhs_table. Default is FALSE.
#' @param ... Additional arguments passed to \code{link{hts_get_keycols}}
#'
#' @return Inputted table with inputted variable binded.
#' @export
#'
#' @examples
#'
#' require(data.table)
#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph', variable_list = variable_list)
#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph',
#' variable_list = variable_list, return_weight_cols = TRUE)
#' hts_cbind_var(lhs_table = trip,
#' rhs_var = 'speed_mph',
#' hts_data = test_data,
#' variable_list = variable_list)
#' hts_cbind_var(lhs_table = trip,
#' rhs_var = 'speed_mph',
#' hts_data = test_data,
#' variable_list = variable_list,
#' return_weight_cols = TRUE)
#'
hts_cbind_var = function(lhs_table,
rhs_var,
hts_data,
variable_list = variable_list,
return_weight_cols = FALSE,
...) {
cbind_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
cbind_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight')) {

var_location =
hts_find_var(rhs_var, variables_dt = variable_list)

rhs_table =data.table::copy(get(as.character(var_location)))
rhs_table = hts_data[[var_location]]

# If joining trip to vehicle or vice versa, need vehicle ID:
if ("trip_id" %in% names(lhs_table) &
Expand All @@ -45,9 +56,11 @@ hts_cbind_var = function(lhs_table,
}

# Subset table to ID columns, weight columns (if desired), rhs_var:
selected_cols = c(hts_get_keycols(rhs_table,
ids = TRUE,
weights = return_weight_cols), rhs_var)
selected_cols = c(intersect(
names(rhs_table),
c(cbind_ids, cbind_wts)
),
rhs_var)

rhs_table = rhs_table[, selected_cols, with = FALSE]

Expand Down
78 changes: 0 additions & 78 deletions R/hts_get_keycols.R

This file was deleted.

78 changes: 17 additions & 61 deletions R/hts_get_ns.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,42 @@
#'
#' @param prepped_dt Dataset to pull counts from.
#' @param weighted Boolean whether to pull weighted estimates.
#' @param ids list of possible ids to return counts for
#' @param wt_col weight column to return sum of
#'
#' @return List of unweighted counts, weighted counts, and highest level unit.
#' @export
#'
#' @examples
#'
#' require(data.table)
#' hts_get_ns(prepped_dt = day, weighted = TRUE)
# hts_get_ns(prepped_dt = day,
# weighted = TRUE,
# wt_col = 'day_weight')
#'
hts_get_ns = function(prepped_dt,
weighted
weighted,
ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
wt_col
) {

#get unweighted counts
n_idcols = hts_get_keycols(prepped_dt,
ids = TRUE,
weights = FALSE)
# Get ids that are in prepped_dt
present_ids = intersect(names(prepped_dt), ids)

ndt_ids = prepped_dt[, n_idcols, with=FALSE]
ndt_ids = prepped_dt[, present_ids, with=FALSE]

ns_unwtd = lapply(ndt_ids, function(x) uniqueN(x))


n_names =
paste0(stringr::str_to_title(
stringr::str_replace(
pattern = "hh",
replacement = "household",
string = stringr::str_remove(string = n_idcols, pattern = "_id")
)
),
"s")

n_names = paste('Count of unique', present_ids)

names(ns_unwtd) = n_names

#get units
id_names = c('trip_id', 'day_id', 'person_id', 'vehicle_id', 'hh_id')

unit = NULL

for (name in id_names) {


if (name %in% names(prepped_dt)){

id = name

unit = paste0(stringr::str_remove(string = id, pattern = "_id"),
"s")

break

}

}

#get weighted counts
if (weighted){

n_wtcols = hts_get_keycols(prepped_dt,
ids = FALSE,
weights = TRUE)

ndt_wts = prepped_dt[, n_wtcols, with=FALSE]

ns_wtd = lapply(ndt_wts, function(x) sum(x))

n_names =
paste0(stringr::str_to_title(
stringr::str_replace(
pattern = "hh",
replacement = "household",
string = stringr::str_remove(string = n_wtcols, pattern = "_weight")
)
),
"s")

names(ns_wtd) = n_names
ns_wtd = prepped_dt[, sum(get(wt_col))]

names(ns_wtd) = paste('Sum of', wt_col)

} else {

Expand All @@ -89,11 +47,9 @@ hts_get_ns = function(prepped_dt,

ns = list(
'unwtd' = ns_unwtd,
'wtd' = ns_wtd,
'units' = unit
'wtd' = ns_wtd
)

return(ns)


}
14 changes: 8 additions & 6 deletions R/hts_melt_vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
#' Defaults to NULL.
#' @param variables_dt List of variable locations and descriptions in data.table
#' format.
#' @param hts_data List containing household, person, day, trip, and vehicle
#' @param data List containing household, person, day, trip, and vehicle
#' datasets in data.table format.
#' @param ids unique identifiers appearing in wide_dt
#' @param remove_missing Boolean to remove rows with missing values. Defaults to
#' TRUE.
#' @param missing_values Missing values to remove. Defaults to 'Missing Response'
Expand All @@ -32,7 +33,7 @@
#' require(stringr)
#' hts_melt_vars(shared_name = 'race',
#' wide_dt = person,
#' hts_data = list('hh' = hh,
#' data = list('hh' = hh,
#' 'person' = person,
#' 'day' = day,
#' 'trip' = trip,
Expand All @@ -42,7 +43,8 @@ hts_melt_vars = function(shared_name = NULL,
wide_dt = NULL,
shared_name_vars = NULL,
variables_dt = variable_list,
hts_data = hts_data,
data = hts_data,
ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
remove_missing = TRUE,
missing_values = c("Missing Response", "995"),
checkbox_label_sep = ":",
Expand All @@ -54,7 +56,7 @@ hts_melt_vars = function(shared_name = NULL,
var_location = hts_find_var(shared_name, variables_dt = variables_dt)

# Select table where this variable lives:
wide_dt = hts_data[[var_location]]
wide_dt = data[[var_location]]

}

Expand All @@ -67,7 +69,7 @@ hts_melt_vars = function(shared_name = NULL,

melted_dt = data.table::melt(
wide_dt,
id.vars = hts_get_keycols(wide_dt),
id.vars = intersect(names(wide_dt),ids),
measure.vars = shared_name_vars,
variable.name = "variable",
value.name = "value"
Expand Down Expand Up @@ -96,7 +98,7 @@ hts_melt_vars = function(shared_name = NULL,

# two or more checked:
melted_dt[, num_checked := sum(value),
by = c(hts_get_keycols(wide_dt))]
by = intersect(names(melted_dt), ids)]

# make factor levels
melted_dt$description = factor(melted_dt$description, levels = unique(melted_dt$description))
Expand Down
22 changes: 16 additions & 6 deletions R/hts_prep_byvar.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#' format.
#' @param hts_data List containing household, person, day, trip, and vehicle
#' datasets in data.table format.
#' @param byvar_ids unique identifiers for each table in hts_data
#' @param byvar_wts weight column for each table in hts_data
#' @param ... Additional parameters to pass to \code{link{hts_melt_vars}}
#'
#' @return Data table containing the variable to be summarized and other key
Expand All @@ -31,6 +33,8 @@
hts_prep_byvar = function(summarize_by = NULL,
variables_dt = variables_list,
hts_data,
byvar_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'),
byvar_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight'),
...) {

# For each variables in trip table:
Expand All @@ -55,6 +59,7 @@ hts_prep_byvar = function(summarize_by = NULL,
hts_melt_vars(
shared_name = summarize_by[[b]],
wide_dt = byvar_dt_v,
ids = byvar_ids,
shared_name_vars = NULL,
variables_dt = variables_dt,
to_single_row = TRUE,
Expand All @@ -69,18 +74,23 @@ hts_prep_byvar = function(summarize_by = NULL,
nbins = 7)
}

if (!is.null(byvar_wts)){

table_idx = which(names(hts_data) == byvar_loc)
wtname = byvar_wts[table_idx]

} else {
wtname = NULL
}


if (!byvar_is_shared) {
byvar_cols = c(hts_get_keycols(byvar_dt_v), byvar)
byvar_cols = c(intersect(c(byvar_ids, wtname), names(byvar_dt_v)), byvar)

byvar_dt_v = byvar_dt_v[, byvar_cols, with=FALSE]

}

# Set keys for merging
# keycols = hts_get_keycols(byvar_dt_v)
#
# setkeyv(byvar_dt_v, keycols)

byvar_dt_ls[[b]] = byvar_dt_v

}
Expand Down
Loading
Loading