diff --git a/NAMESPACE b/NAMESPACE index 2728918..8ee2f50 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,7 +8,6 @@ export(hts_bin_var) export(hts_cbind_var) export(hts_filter_data) export(hts_find_var) -export(hts_get_keycols) export(hts_get_ns) export(hts_melt_vars) export(hts_prep_byvar) diff --git a/R/get_distance_meters.R b/R/get_distance_meters.R index 869fc70..6696e8e 100644 --- a/R/get_distance_meters.R +++ b/R/get_distance_meters.R @@ -9,7 +9,7 @@ #' @return A vector of distances in meters #' @export get_distance_meters #' -#' @example +#' @examples #' get_distance_meters(c(38.8734, -75.2394), c(40.3497, -76.2314)) #' get_distance_meters = diff --git a/R/hts_cbind_var.R b/R/hts_cbind_var.R index 279660a..b88772b 100644 --- a/R/hts_cbind_var.R +++ b/R/hts_cbind_var.R @@ -2,11 +2,14 @@ #' #' @param lhs_table Table to bind a column to in data.table format #' @param rhs_var Variable to bind to the lhs_table. +#' @param hts_data List of household, person, vehicle, day, and trip tables in +#' data.table format. #' @param variable_list A variable list with descriptions and table locations #' of variables. +#' @param cbind_ids list of unique identifiers for each table in hts_data +#' @param cbind_wts list of weight for each table in hts_data #' @param return_weight_cols If true binds weight variable along with rhs_var #' to lhs_table. Default is FALSE. -#' @param ... Additional arguments passed to \code{link{hts_get_keycols}} #' #' @return Inputted table with inputted variable binded. #' @export @@ -14,20 +17,28 @@ #' @examples #' #' require(data.table) -#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph', variable_list = variable_list) -#' hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph', -#' variable_list = variable_list, return_weight_cols = TRUE) +#' hts_cbind_var(lhs_table = trip, +#' rhs_var = 'speed_mph', +#' hts_data = test_data, +#' variable_list = variable_list) +#' hts_cbind_var(lhs_table = trip, +#' rhs_var = 'speed_mph', +#' hts_data = test_data, +#' variable_list = variable_list, +#' return_weight_cols = TRUE) #' hts_cbind_var = function(lhs_table, rhs_var, + hts_data, variable_list = variable_list, return_weight_cols = FALSE, - ...) { + cbind_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), + cbind_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight')) { var_location = hts_find_var(rhs_var, variables_dt = variable_list) - rhs_table =data.table::copy(get(as.character(var_location))) + rhs_table = hts_data[[var_location]] # If joining trip to vehicle or vice versa, need vehicle ID: if ("trip_id" %in% names(lhs_table) & @@ -45,9 +56,11 @@ hts_cbind_var = function(lhs_table, } # Subset table to ID columns, weight columns (if desired), rhs_var: - selected_cols = c(hts_get_keycols(rhs_table, - ids = TRUE, - weights = return_weight_cols), rhs_var) + selected_cols = c(intersect( + names(rhs_table), + c(cbind_ids, cbind_wts) + ), + rhs_var) rhs_table = rhs_table[, selected_cols, with = FALSE] diff --git a/R/hts_get_keycols.R b/R/hts_get_keycols.R deleted file mode 100644 index 28dc368..0000000 --- a/R/hts_get_keycols.R +++ /dev/null @@ -1,78 +0,0 @@ -#' Find key columns in table -#' -#' @param dt Dataset to find key columns of in data.table format -#' @param ids Boolean whether to return id columns. Default is TRUE. -#' @param weights Boolean whether to return weight columns. Default is TRUE. -#' @param priority Boolean whether to only return highest level weight/id. -#' Default is FALSE. -#' -#' @return List of names of key columns in the dataset. -#' @export -#' -#' @examples -#' -#' require(data.table) -#' hts_get_keycols(dt = trip) -#' hts_get_keycols(dt = trip, priority = TRUE) -#' -hts_get_keycols = function(dt, - ids = TRUE, - weights = TRUE, - priority = FALSE){ - - idcols = c('trip_id', 'day_id', 'person_id','hh_id', 'vehicle_id') - wtcols = c('trip_weight', 'day_weight', 'person_weight', 'hh_weight') - - idnames = c() - wtnames = c() - - #get id(s) - if(ids){ - - # priority will select the highest level weight/id only - if (priority){ - - for (name in idcols){ - - if (name %in% names(dt)){ - - idnames = name - break - } - - } - - } else { - - idnames = c(names(dt)[names(dt) %in% idcols]) - - } - } - - #get weight(s) - if(weights){ - - if (priority){ - - for (name in wtcols){ - - if (name %in% names(dt)){ - - wtnames = name - break - } - - } - - } else { - - wtnames = c(names(dt)[names(dt) %in% wtcols]) - - } - } - - names = c(idnames, wtnames) - - return(names) - -} diff --git a/R/hts_get_ns.R b/R/hts_get_ns.R index 56cc53e..8557b87 100644 --- a/R/hts_get_ns.R +++ b/R/hts_get_ns.R @@ -2,6 +2,8 @@ #' #' @param prepped_dt Dataset to pull counts from. #' @param weighted Boolean whether to pull weighted estimates. +#' @param ids list of possible ids to return counts for +#' @param wt_col weight column to return sum of #' #' @return List of unweighted counts, weighted counts, and highest level unit. #' @export @@ -9,77 +11,33 @@ #' @examples #' #' require(data.table) -#' hts_get_ns(prepped_dt = day, weighted = TRUE) +# hts_get_ns(prepped_dt = day, +# weighted = TRUE, +# wt_col = 'day_weight') #' hts_get_ns = function(prepped_dt, - weighted + weighted, + ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), + wt_col ) { - #get unweighted counts - n_idcols = hts_get_keycols(prepped_dt, - ids = TRUE, - weights = FALSE) + # Get ids that are in prepped_dt + present_ids = intersect(names(prepped_dt), ids) - ndt_ids = prepped_dt[, n_idcols, with=FALSE] + ndt_ids = prepped_dt[, present_ids, with=FALSE] ns_unwtd = lapply(ndt_ids, function(x) uniqueN(x)) - - - n_names = - paste0(stringr::str_to_title( - stringr::str_replace( - pattern = "hh", - replacement = "household", - string = stringr::str_remove(string = n_idcols, pattern = "_id") - ) - ), - "s") + + n_names = paste('Count of unique', present_ids) names(ns_unwtd) = n_names - #get units - id_names = c('trip_id', 'day_id', 'person_id', 'vehicle_id', 'hh_id') - - unit = NULL - - for (name in id_names) { - - - if (name %in% names(prepped_dt)){ - - id = name - - unit = paste0(stringr::str_remove(string = id, pattern = "_id"), - "s") - - break - - } - - } - #get weighted counts if (weighted){ - n_wtcols = hts_get_keycols(prepped_dt, - ids = FALSE, - weights = TRUE) - - ndt_wts = prepped_dt[, n_wtcols, with=FALSE] - - ns_wtd = lapply(ndt_wts, function(x) sum(x)) - - n_names = - paste0(stringr::str_to_title( - stringr::str_replace( - pattern = "hh", - replacement = "household", - string = stringr::str_remove(string = n_wtcols, pattern = "_weight") - ) - ), - "s") - - names(ns_wtd) = n_names + ns_wtd = prepped_dt[, sum(get(wt_col))] + + names(ns_wtd) = paste('Sum of', wt_col) } else { @@ -89,11 +47,9 @@ hts_get_ns = function(prepped_dt, ns = list( 'unwtd' = ns_unwtd, - 'wtd' = ns_wtd, - 'units' = unit + 'wtd' = ns_wtd ) return(ns) - } diff --git a/R/hts_melt_vars.R b/R/hts_melt_vars.R index b44f9a3..5b310c2 100644 --- a/R/hts_melt_vars.R +++ b/R/hts_melt_vars.R @@ -8,8 +8,9 @@ #' Defaults to NULL. #' @param variables_dt List of variable locations and descriptions in data.table #' format. -#' @param hts_data List containing household, person, day, trip, and vehicle +#' @param data List containing household, person, day, trip, and vehicle #' datasets in data.table format. +#' @param ids unique identifiers appearing in wide_dt #' @param remove_missing Boolean to remove rows with missing values. Defaults to #' TRUE. #' @param missing_values Missing values to remove. Defaults to 'Missing Response' @@ -32,7 +33,7 @@ #' require(stringr) #' hts_melt_vars(shared_name = 'race', #' wide_dt = person, -#' hts_data = list('hh' = hh, +#' data = list('hh' = hh, #' 'person' = person, #' 'day' = day, #' 'trip' = trip, @@ -42,7 +43,8 @@ hts_melt_vars = function(shared_name = NULL, wide_dt = NULL, shared_name_vars = NULL, variables_dt = variable_list, - hts_data = hts_data, + data = hts_data, + ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), remove_missing = TRUE, missing_values = c("Missing Response", "995"), checkbox_label_sep = ":", @@ -54,7 +56,7 @@ hts_melt_vars = function(shared_name = NULL, var_location = hts_find_var(shared_name, variables_dt = variables_dt) # Select table where this variable lives: - wide_dt = hts_data[[var_location]] + wide_dt = data[[var_location]] } @@ -67,7 +69,7 @@ hts_melt_vars = function(shared_name = NULL, melted_dt = data.table::melt( wide_dt, - id.vars = hts_get_keycols(wide_dt), + id.vars = intersect(names(wide_dt),ids), measure.vars = shared_name_vars, variable.name = "variable", value.name = "value" @@ -96,7 +98,7 @@ hts_melt_vars = function(shared_name = NULL, # two or more checked: melted_dt[, num_checked := sum(value), - by = c(hts_get_keycols(wide_dt))] + by = intersect(names(melted_dt), ids)] # make factor levels melted_dt$description = factor(melted_dt$description, levels = unique(melted_dt$description)) diff --git a/R/hts_prep_byvar.R b/R/hts_prep_byvar.R index 0d3f55d..8697c7b 100644 --- a/R/hts_prep_byvar.R +++ b/R/hts_prep_byvar.R @@ -5,6 +5,8 @@ #' format. #' @param hts_data List containing household, person, day, trip, and vehicle #' datasets in data.table format. +#' @param byvar_ids unique identifiers for each table in hts_data +#' @param byvar_wts weight column for each table in hts_data #' @param ... Additional parameters to pass to \code{link{hts_melt_vars}} #' #' @return Data table containing the variable to be summarized and other key @@ -31,6 +33,8 @@ hts_prep_byvar = function(summarize_by = NULL, variables_dt = variables_list, hts_data, + byvar_ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), + byvar_wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight'), ...) { # For each variables in trip table: @@ -55,6 +59,7 @@ hts_prep_byvar = function(summarize_by = NULL, hts_melt_vars( shared_name = summarize_by[[b]], wide_dt = byvar_dt_v, + ids = byvar_ids, shared_name_vars = NULL, variables_dt = variables_dt, to_single_row = TRUE, @@ -69,18 +74,23 @@ hts_prep_byvar = function(summarize_by = NULL, nbins = 7) } + if (!is.null(byvar_wts)){ + + table_idx = which(names(hts_data) == byvar_loc) + wtname = byvar_wts[table_idx] + + } else { + wtname = NULL + } + + if (!byvar_is_shared) { - byvar_cols = c(hts_get_keycols(byvar_dt_v), byvar) + byvar_cols = c(intersect(c(byvar_ids, wtname), names(byvar_dt_v)), byvar) byvar_dt_v = byvar_dt_v[, byvar_cols, with=FALSE] } - # Set keys for merging - # keycols = hts_get_keycols(byvar_dt_v) - # - # setkeyv(byvar_dt_v, keycols) - byvar_dt_ls[[b]] = byvar_dt_v } diff --git a/R/hts_prep_data.R b/R/hts_prep_data.R index 67aa2a0..b60a887 100644 --- a/R/hts_prep_data.R +++ b/R/hts_prep_data.R @@ -7,7 +7,9 @@ #' format. #' @param data List of household, person, vehicle, day, and trip tables in #' data.table format. +#' @param id_cols name of unique identifier for each table in hts_data #' @param weighted Whether the data is weighted. Default is TRUE. +#' @param wt_cols weight name for each table in hts_data #' @param remove_outliers Whether to remove outliers for numeric variable. Default #' is TRUE. #' @param threshold Threshold to define outliers. Default is 0.975. @@ -57,7 +59,9 @@ hts_prep_data = function(summarize_var = NULL, summarize_by = NULL, variables_dt = variable_list, data = hts_data, + id_cols = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), weighted = TRUE, + wt_cols = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight'), remove_outliers = TRUE, threshold = 0.975, remove_missing = TRUE, @@ -91,19 +95,32 @@ hts_prep_data = function(summarize_var = NULL, message(paste0(msg_pt1, " ", msg_pt2)) # TODO: Could we put id and weight cols in a snippet or some such? # Or in a settings/options for these functions? - + + if (remove_missing){ + + data = hts_remove_missing_data(hts_data = data, + variables_dt = variables_dt, + summarize_var = summarize_var, + summarize_by = summarize_by, + ids = id_cols, + missing_values = missing_values, + not_imputable = not_imputable) + } + # Find location of summary variable: var_location = hts_find_var(summarize_var, variables_dt = variables_dt) - + + tbl_idx = which(names(data) == var_location) + # Select table where this variable lives: var_dt = data[[var_location]] - + # Is this a shared variable? var_is_shared = variables_dt[shared_name == summarize_var, is_checkbox][1] == 1 - + # If yes, expand summarize_var: if (var_is_shared) { - + summarize_var = variables_dt[shared_name == summarize_var, variable] for(i in 1:length(summarize_var)){ @@ -118,86 +135,121 @@ hts_prep_data = function(summarize_var = NULL, } + } - # Subset table to these column(s): - subset_cols = c(hts_get_keycols(var_dt), summarize_var) + # only keep ids that are in var_dt + id_cols = intersect(id_cols, names(var_dt)) + # Subset table to these column(s): + wtname = wt_cols[tbl_idx] + + if (weighted){ + + subset_cols = c(id_cols, summarize_var, wtname) + + } else { + + subset_cols = c(id_cols, summarize_var) + + } + var_dt = var_dt[, subset_cols, with=FALSE] - + # If shared variable, melt var_dt: if (var_is_shared) { - + shared_name = variables_dt[variable == summarize_var[[1]], shared_name] + + if (weighted){ + + var_dt = hts_melt_vars( + shared_name = shared_name, + wide_dt = var_dt, + variables_dt = variables_dt, + shared_name_vars = summarize_var, + ids = c(id_cols, wtname), + remove_missing = TRUE, + checkbox_label_sep = ":", + missing_values = c("Missing Response", "995"), + to_single_row = FALSE + ) + + } else { + + var_dt = hts_melt_vars( + shared_name = shared_name, + wide_dt = var_dt, + variables_dt = variables_dt, + shared_name_vars = summarize_var, + ids = id_cols, + remove_missing = TRUE, + checkbox_label_sep = ":", + missing_values = c("Missing Response", "995"), + to_single_row = FALSE + ) + + } - var_dt = hts_melt_vars( - shared_name = shared_name, - wide_dt = var_dt, - variables_dt = variables_dt, - shared_name_vars = summarize_var, - remove_missing = TRUE, - checkbox_label_sep = ":", - missing_values = missing_values, - to_single_row = FALSE - ) summarize_var = shared_name - + setnames(var_dt, shared_name, 'shared_name') - + # make factor levels var_dt$shared_name = factor(var_dt$shared_name, levels = unique(var_dt$shared_name)) - + setnames(var_dt, 'shared_name', shared_name) - + } - + # Identify, then bin, if summarize_var is numeric: v_class = variables_dt[shared_name == summarize_var, data_type][[1]] - + if (!v_class %in% c("integer", "numeric")) { var_dt_num = NULL var_dt_cat = var_dt - + } - + if (v_class %in% c("integer", "numeric")) { - + # remove outliers if (remove_outliers){ - + out = hts_remove_outliers(var_dt, numvar = summarize_var, threshold = threshold) - + var_dt = out[['dt']] - + outlier_table = out[['outlier_description']] - + } - + # save a copy of the un-binned data: var_dt_num = data.table::copy(var_dt) - - + + # bin the data for categorical summaries: var_dt_cat = hts_bin_var(prepped_dt = var_dt, numvar = summarize_var, nbins = 7) - + } - + # Summarize-by variables: if (length(summarize_by) == 0) { - + num_res = var_dt_num cat_res = var_dt_cat - + } - + if (length(summarize_by) > 0) { + for (i in 1:length(summarize_by)){ var = summarize_by[i] @@ -224,44 +276,42 @@ hts_prep_data = function(summarize_var = NULL, } } + byvar_dt = hts_prep_byvar(summarize_by, variables_dt = variables_dt, - hts_data = data) - + hts_data = data, + byvar_ids = id_cols, + byvar_wts = wt_cols) + # Merge by var and summarize var: allow_cartesian_setting = FALSE - + if (var_is_shared == TRUE) { allow_cartesian_setting = TRUE } - + cat_res = merge(var_dt_cat, byvar_dt, all.x = FALSE, all.y = FALSE, allow.cartesian = allow_cartesian_setting) - + + setcolorder(cat_res, intersect(c(id_cols, wt_cols, summarize_var, summarize_by), names(cat_res))) + if (v_class %in% c("integer", "numeric")) { num_res = merge(var_dt_num, byvar_dt, all.x = FALSE, all.y = FALSE, allow.cartesian = allow_cartesian_setting) - + + setcolorder(num_res, intersect(c(id_cols, wt_cols, summarize_var, summarize_by), names(cat_res))) + } - + if (!v_class %in% c("integer", "numeric")) { num_res = NULL } - - - } - if (remove_missing){ - hts_data = hts_remove_missing_data(hts_data = data, - variables_dt = variables_dt, - summarize_var = summarize_var, - summarize_by = summarize_by, - missing_values = missing_values, - not_imputable = not_imputable) + } if (!is.null(strataname)) { @@ -270,7 +320,10 @@ hts_prep_data = function(summarize_var = NULL, cat_res = hts_cbind_var(lhs_table = cat_res, rhs_var = strataname, - variable_list = variables_dt) + hts_data = data, + variable_list = variables_dt, + cbind_ids = id_cols, + cbind_wts = wt_cols) } @@ -278,15 +331,18 @@ hts_prep_data = function(summarize_var = NULL, num_res = hts_cbind_var(lhs_table = num_res, rhs_var = strataname, - variable_list = variables_dt) + hts_data = data, + variable_list = variables_dt, + cbind_ids = id_cols, + cbind_wts = wt_cols) } } - + prepped_dt_ls = list("cat" = cat_res, "num" = num_res, "var_is_shared" = var_is_shared) - + # Append outliers: if (v_class %in% c("integer", 'numeric') & remove_outliers) { prepped_dt_ls = list( @@ -294,10 +350,9 @@ hts_prep_data = function(summarize_var = NULL, "num" = num_res, "outliers" = outlier_table) } - - + + return(prepped_dt_ls) - } diff --git a/R/hts_prep_triprate.R b/R/hts_prep_triprate.R index 338c78c..f26efd1 100644 --- a/R/hts_prep_triprate.R +++ b/R/hts_prep_triprate.R @@ -6,6 +6,8 @@ #' format. #' @param trip_name Name of the trip dataset in hts_data. #' @param day_name Name of the day dataset in hts_data. +#' @param ids name of unique identifier in each table in hts_data +#' @param wts name of weight column in each table in hts_data #' @param remove_outliers Boolean whether or not to remove outliers from dataset. #' Default is TRUE. #' @param threshold Threshold to define outliers. Default is 0.975. @@ -43,6 +45,8 @@ hts_prep_triprate = function(summarize_by = NULL, variables_dt = variable_list, trip_name = 'trip', day_name = 'day', + ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), + wts = c('hh_weight', 'person_weight', 'day_weight', 'trip_weight', 'hh_weight'), remove_outliers = TRUE, threshold = 0.975, weighted = TRUE, @@ -51,12 +55,23 @@ hts_prep_triprate = function(summarize_by = NULL, tripdat = hts_data[[trip_name]] daydat = hts_data[[day_name]] - tripratekeys = c("hh_id", "person_id", "day_id") - trip_subset_cols = hts_get_keycols(tripdat) - day_subset_cols = hts_get_keycols(daydat) + trip_index = which(names(hts_data) == trip_name) + day_index = which(names(hts_data) == day_name) - if (weighted & (!"trip_weight" %in% trip_subset_cols | - !"day_weight" %in% day_subset_cols)) { + # Get ids + trip_id = ids[trip_index] + day_id = ids[day_index] + + # Get weights + trip_wt = wts[trip_index] + day_wt = wts[day_index] + + tripratekeys = intersect(names(tripdat), ids[-trip_index]) + trip_subset_cols = intersect(names(tripdat), c(ids, wts)) + day_subset_cols = intersect(names(daydat), c(ids, wts)) + + if (weighted & (!trip_wt %in% trip_subset_cols | + !day_wt %in% day_subset_cols)) { stop("Trip/Day weight not found - are these data weighted?") } @@ -69,7 +84,7 @@ hts_prep_triprate = function(summarize_by = NULL, if (length(summarize_by) == 0) { if (weighted) { - triprate_dt = tripdat[, .(num_trips = sum(trip_weight)), + triprate_dt = tripdat[, .(num_trips = sum(get(trip_wt))), by = tripratekeys] } @@ -95,7 +110,7 @@ hts_prep_triprate = function(summarize_by = NULL, # calculate trip rate triprate_dt[, trip_rate := - ifelse(num_trips == 0, 0, num_trips / day_weight)] + ifelse(num_trips == 0, 0, num_trips / get(day_wt))] triprate_dt[, num_trips := NULL] @@ -105,20 +120,23 @@ hts_prep_triprate = function(summarize_by = NULL, if (length(summarize_by) > 0) { - byvar_dt = hts_prep_byvar(summarize_by, variables_dt = variables_dt, hts_data = hts_data) + byvar_dt = hts_prep_byvar(summarize_by, + variables_dt = variables_dt, + hts_data = hts_data, + byvar_ids = ids) merge_cols = names(byvar_dt)[names(byvar_dt) %in% names(trip_control)] triprate_dt = merge(trip_control, byvar_dt, by = merge_cols) - triprate_cols = hts_get_keycols(triprate_dt) + triprate_cols = intersect(names(triprate_dt), c(ids, wts)) - triprate_cols = triprate_cols[!triprate_cols %in% c("trip_id", "trip_weight")] + triprate_cols = triprate_cols[!triprate_cols %in% c(trip_id, trip_wt)] triprate_cols_all = c(triprate_cols, summarize_by) if (weighted) { - triprate_dt = triprate_dt[, .(num_trips = sum(trip_weight)), + triprate_dt = triprate_dt[, .(num_trips = sum(get(trip_wt))), by = triprate_cols_all] } @@ -134,7 +152,7 @@ hts_prep_triprate = function(summarize_by = NULL, # If one of the by-variables is in trip table, need to expand to # include all levels of the variable for every trip, and fill with zeros: - if ("trip_id" %in% names(byvar_dt)) { + if (trip_id %in% names(byvar_dt)) { # fill in with zeros for zero trips for a given level of xt_var using dcast: dcast_formula = paste0(paste0(triprate_cols, collapse = " + "), @@ -178,7 +196,7 @@ hts_prep_triprate = function(summarize_by = NULL, # calculate trip rate triprate_dt[, trip_rate := - ifelse(num_trips == 0, 0, num_trips / day_weight)] + ifelse(num_trips == 0, 0, num_trips / get(day_wt))] triprate_dt[, num_trips := NULL] diff --git a/R/hts_remove_missing_data.R b/R/hts_remove_missing_data.R index 99ccc91..351ae9b 100644 --- a/R/hts_remove_missing_data.R +++ b/R/hts_remove_missing_data.R @@ -6,6 +6,7 @@ #' of variables. #' @param summarize_var Variable to be summarized that has it's missing data #' removed. +#' @param ids names of unique identifiers for each table in hts_data #' @param summarize_by Variable being summarized by that has it's missing data #' removed. Default is NULL. #' @param missing_values Missing values that will be removed. Defaults are 995 and @@ -31,6 +32,7 @@ hts_remove_missing_data = function(hts_data, variables_dt, summarize_var, + ids = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), summarize_by = NULL, missing_values = c("Missing Response", "995"), not_imputable = -1){ @@ -44,10 +46,16 @@ hts_remove_missing_data = function(hts_data, !get(summarize_var_name) %in% c(missing_values, not_imputable) | is.na(get(summarize_var_name))] - summarize_var_id = hts_get_keycols(summarize_var_tbl, - ids = TRUE, - weights = FALSE, - priority = TRUE) + # get ids that are in this table + ids_in_table = intersect(ids, names(summarize_var_tbl)) + + # get id with the most unique counts to filter on + max_index = which.max( + sapply(summarize_var_tbl[, ids_in_table, with = FALSE], function(x) length(unique(x))) + ) + + summarize_var_id = ids_in_table[max_index] + hts_data = hts_filter_data( hts_data = hts_data, @@ -69,10 +77,18 @@ hts_remove_missing_data = function(hts_data, !get(summarize_by_name) %in% c(missing_values, not_imputable) | is.na(get(summarize_by_name))] - summarize_by_id = hts_get_keycols(summarize_by_tbl, - ids = TRUE, - weights = FALSE, - priority = TRUE) + # get id with the most unique counts to filter on + + + # get ids that are in this table + ids_in_table = intersect(ids, names(summarize_by_tbl)) + + max_index = which.max( + sapply(summarize_by_tbl[, ids_in_table, with = FALSE], function(x) length(unique(x))) + ) + + summarize_by_id = ids_in_table[max_index] + hts_data = hts_filter_data( hts_data = hts_data, @@ -85,3 +101,6 @@ hts_remove_missing_data = function(hts_data, return(hts_data) } + +## quiets concerns of R CMD check +utils::globalVariables(c("ids", "ids_in_table")) \ No newline at end of file diff --git a/R/hts_summary.R b/R/hts_summary.R index 656d47f..5f54b8e 100644 --- a/R/hts_summary.R +++ b/R/hts_summary.R @@ -10,6 +10,7 @@ #' variable being summarized is categorical), 'checkbox' (when the variable being #' summarized is derived from a multiple response, aka select-all-that-apply question) #' or 'numeric', when the variable being summarized is numeric. +#' @param id_cols names of possible ids in prepped_dt to return unique counts of #' @param weighted Whether the data is weighted. Default is TRUE. #' @param se Whether to calculate standard error. Default is FALSE. Will be set #' to FALSE if weighted is FALSE. @@ -74,6 +75,7 @@ hts_summary = function( summarize_var, summarize_by = NULL, summarize_vartype = 'categorical', + id_cols = c('hh_id', 'person_id', 'day_id', 'trip_id', 'vehicle_id'), weighted = TRUE, se = FALSE, wtname = NULL, @@ -113,7 +115,9 @@ hts_summary = function( cat_ns = hts_get_ns( prepped_dt = prepped_dt, - weighted = weighted + weighted = weighted, + ids = id_cols, + wt_col = wtname ) # something here to check if the number of unique values is more than 20 diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..079f8b3 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://rsginc.github.io/travelSurveyTools/ +template: + bootstrap: 5 + diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/.nojekyll @@ -0,0 +1 @@ + diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..e48b725 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,86 @@ + + +
+ + + + +This project uses a feature-branch naming convention and workflow. main
is the main branch (not master
), base your work off of main
. Contribute to the project by making changes to your own feature branch and issuing pull-requests when you’re ready to integrate into the main
branch.
Pull the main
branch; git pull
, and if necessary git checkout main
to switch to main
Create a feature branch and check out your branch, e.g., git checkout -b crosstab-performance
* You can use your initials to prefix a feature branch, e.g., aa-crosstab-performance
.
For example:
+Commit changes related to your feature and push them to GitHub.
+Push changes to your feature branch at any time.
+Create a pull request on GitHub When you’re ready to have your work reviewed. You can submit a PR before you are done, if you want guidance on your work-in-progress.
+Make changes or respond to comments in your pull-request reviews. New commits pushed to your branch will update the pull-request.
+When your pull request is approved the approver will merge your branch into main and may delete your branch from GitHub.
+To remove deleted feature branches from your local copy of the repository run git remote prune origin
.
Do not attempt to push additional commits to a merged pull-request. Instead, start a new feature branch and issue a new pull request.
+Remember to update and branch off of main
whenever you start a new feature, e.g., git checkout main; git pull origin main; git checkout -b a-new-feature
.
Version 3, 29 June 2007
Copyright © 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+The GNU General Public License is a free, copyleft license for software and other kinds of works.
+The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program–to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too.
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things.
+To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others.
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it.
+For the developers’ and authors’ protection, the GPL clearly explains that there is no warranty for this free software. For both users’ and authors’ sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions.
+Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users’ freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users.
+Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free.
+The precise terms and conditions for copying, distribution and modification follow.
+“This License” refers to version 3 of the GNU General Public License.
+“Copyright” also means copyright-like laws that apply to other kinds of works, such as semiconductor masks.
+“The Program” refers to any copyrightable work licensed under this License. Each licensee is addressed as “you”. “Licensees” and “recipients” may be individuals or organizations.
+To “modify” a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a “modified version” of the earlier work or a work “based on” the earlier work.
+A “covered work” means either the unmodified Program or a work based on the Program.
+To “propagate” a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well.
+To “convey” a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying.
+An interactive user interface displays “Appropriate Legal Notices” to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion.
+The “source code” for a work means the preferred form of the work for making modifications to it. “Object code” means any non-source form of a work.
+A “Standard Interface” means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language.
+The “System Libraries” of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A “Major Component”, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it.
+The “Corresponding Source” for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work’s System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work.
+The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source.
+The Corresponding Source for a work in source code form is that same work.
+All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law.
+You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you.
+Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
+No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures.
+When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work’s users, your or third parties’ legal rights to forbid circumvention of technological measures.
+You may convey verbatim copies of the Program’s source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program.
+You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee.
+You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions:
+A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an “aggregate” if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation’s users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate.
+You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways:
+A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work.
+A “User Product” is either (1) a “consumer product”, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, “normally used” refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product.
+“Installation Information” for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made.
+If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM).
+The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network.
+Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying.
+“Additional permissions” are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions.
+When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission.
+Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms:
+All other non-permissive additional terms are considered “further restrictions” within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying.
+If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms.
+Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way.
+You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11).
+However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.
+Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.
+Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10.
+You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so.
+Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License.
+An “entity transaction” is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party’s predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts.
+You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it.
+A “contributor” is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor’s “contributor version”.
+A contributor’s “essential patent claims” are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, “control” includes the right to grant patent sublicenses in a manner consistent with the requirements of this License.
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor’s essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version.
+In the following three paragraphs, a “patent license” is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To “grant” such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party.
+If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. “Knowingly relying” means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient’s use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid.
+If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it.
+A patent license is “discriminatory” if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007.
+Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law.
+If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program.
+Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such.
+The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation.
+If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy’s public statement of acceptance of a version permanently authorizes you to choose that version for the Program.
+Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version.
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.
+END OF TERMS AND CONDITIONS
+If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.
+To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the “copyright” line and a pointer to where the full notice is found.
+<one line to give the program's name and a brief idea of what it does.>
+Copyright (C) <year> <name of author>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
+If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode:
+<program> Copyright (C) <year> <name of author>
+This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type 'show c' for details.
The hypothetical commands show w
and show c
should show the appropriate parts of the General Public License. Of course, your program’s commands might be different; for a GUI interface, you would use an “about box”.
You should also get your employer (if you work as a programmer) or school, if any, to sign a “copyright disclaimer” for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see <http://www.gnu.org/licenses/>.
+The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read <http://www.gnu.org/philosophy/why-not-lgpl.html>.
+getting_started.Rmd
The travelSurveyTools
package provides tools for R users
+to aid use of data from household travel surveys. Some possible uses
+include creating custom cross tabs, labeling data, and calculating trip
+rates.
travelSurveyTools
assumes the the data have the
+structure shown below. If this does not reflect the structure of your
+data
hts_data is a list of five core tables:
+Household dataset
+Person dataset
+Day dataset
+In addition to data from the household travel survey. The codebook is +also required. The codebook is assumed to be in two parts:
+A dataset containing information about all variables existing in the +hh, person, day, trip, and vehicle tables. The variables are as +follows:
+A dataset containing the values for all variables found in +variable_list The variables are as follows:
+In order to create summaries of our data we first need to prepare our
+data. We can do this by using hts_prep_data
. This will
+return a categorical (cat) and numeric (num) (if applicable) prepped
+data table that can be used to create summaries.
+library(travelSurveyTools)
+library(data.table)
+library(srvyr)
+
+# Load data
+data("test_data")
+data("variable_list")
+
+DT = hts_prep_data(summarize_var = 'speed_mph',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
Numeric variables will be automatically binned in
+hts_prep_data
to create categorical summaries. Here we can
+make a categorical summary of a numeric variable using
+hts_summary
.
+speed_cat_summary = hts_summary(prepped_dt = DT$cat,
+ summarize_var = 'speed_mph',
+ summarize_by = NULL,
+ summarize_vartype = 'categorical',
+ weighted = FALSE)
+
+speed_cat_summary$summary
## $unwtd
+## speed_mph count prop
+## 1: 1 or less 575 0.038244097
+## 2: 1-9 6537 0.434785500
+## 3: 9-17 3661 0.243498503
+## 4: 17-25 2249 0.149584303
+## 5: 25-33 1012 0.067309611
+## 6: 33-41 533 0.035450615
+## 7: 41-43 76 0.005054872
+## 8: 43 or more 392 0.026072498
+Additionally, for numeric variables we can create numeric +summaries.
+
+speed_num_summary = hts_summary(prepped_dt = DT$num,
+ summarize_var = 'speed_mph',
+ summarize_by = NULL,
+ summarize_vartype = 'numeric',
+ weighted = FALSE)
+
+speed_num_summary$summary
## $unwtd
+## count min max mean median
+## 1: 15035 0 109.2184 12.89507 9.728796
+Additionally, we can use weighted data by setting
+weighted = TRUE
and specifying the name of the weight to be
+used (wtname
).
+speed_cat_summary = hts_summary(prepped_dt = DT$cat,
+ summarize_var = 'speed_mph',
+ summarize_by = NULL,
+ summarize_vartype = 'categorical',
+ weighted = TRUE,
+ wtname = 'trip_weight')
+
+speed_cat_summary$summary
## $unwtd
+## speed_mph count prop
+## 1: 1 or less 575 0.038244097
+## 2: 1-9 6537 0.434785500
+## 3: 9-17 3661 0.243498503
+## 4: 17-25 2249 0.149584303
+## 5: 25-33 1012 0.067309611
+## 6: 33-41 533 0.035450615
+## 7: 41-43 76 0.005054872
+## 8: 43 or more 392 0.026072498
+##
+## $wtd
+## speed_mph count prop est
+## 1: 1 or less 575 0.038596067 291500
+## 2: 1-9 6537 0.433679180 3275398
+## 3: 9-17 3661 0.243523176 1839229
+## 4: 17-25 2249 0.151377218 1143289
+## 5: 25-33 1012 0.067742784 511633
+## 6: 33-41 533 0.034100916 257550
+## 7: 41-43 76 0.005215169 39388
+## 8: 43 or more 392 0.025765490 194596
+##
+## $weight_name
+## [1] "trip_weight"
+Additionally, by specifying se = TRUE
we can calculate
+standard errors.
+speed_cat_summary = hts_summary(prepped_dt = DT$cat,
+ summarize_var = 'speed_mph',
+ summarize_by = NULL,
+ summarize_vartype = 'categorical',
+ weighted = TRUE,
+ wtname = 'trip_weight',
+ se = TRUE)
+
+speed_cat_summary$summary
## $unwtd
+## speed_mph count prop
+## 1: 1 or less 575 0.038244097
+## 2: 1-9 6537 0.434785500
+## 3: 9-17 3661 0.243498503
+## 4: 17-25 2249 0.149584303
+## 5: 25-33 1012 0.067309611
+## 6: 33-41 533 0.035450615
+## 7: 41-43 76 0.005054872
+## 8: 43 or more 392 0.026072498
+##
+## $wtd
+## speed_mph count prop prop_se est est_se
+## 1: 1 or less 575 0.038596067 0.0018091628 291500 13731.043
+## 2: 1-9 6537 0.433679180 0.0046545746 3275398 38298.012
+## 3: 9-17 3661 0.243523176 0.0040290132 1839229 31595.081
+## 4: 17-25 2249 0.151377218 0.0033752879 1143289 26070.445
+## 5: 25-33 1012 0.067742784 0.0023796356 511633 18152.124
+## 6: 33-41 533 0.034100916 0.0016840894 257550 12764.269
+## 7: 41-43 76 0.005215169 0.0006874633 39388 5196.226
+## 8: 43 or more 392 0.025765490 0.0014748922 194596 11170.619
+##
+## $weight_name
+## [1] "trip_weight"
+If we want summarize a variable by another variable (e.g., mode type
+by a person’s race, mode_type by a person’s ethnicity) we can use the
+summarize_by
argument.
+DT = hts_prep_data(summarize_var = 'mode_type',
+ summarize_by = 'race',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+
+mode_by_race_summary = hts_summary(prepped_dt = DT$cat,
+ summarize_var = 'mode_type',
+ summarize_by = 'race',
+ summarize_vartype = 'categorical',
+ weighted = TRUE,
+ wtname = 'trip_weight',
+ se = TRUE)
+
+
+mode_by_race_summary$summary
## $unwtd
+## race mode_type count prop
+## 1: African American or Black 1 146 0.2997946612
+## 2: African American or Black 3 6 0.0123203285
+## 3: African American or Black 4 2 0.0041067762
+## 4: African American or Black 6 9 0.0184804928
+## 5: African American or Black 7 7 0.0143737166
+## 6: African American or Black 8 198 0.4065708419
+## 7: African American or Black 10 1 0.0020533881
+## 8: African American or Black 13 56 0.1149897331
+## 9: African American or Black 995 62 0.1273100616
+## 10: American Indian or Alaska Native 1 15 0.1724137931
+## 11: American Indian or Alaska Native 2 6 0.0689655172
+## 12: American Indian or Alaska Native 3 1 0.0114942529
+## 13: American Indian or Alaska Native 4 3 0.0344827586
+## 14: American Indian or Alaska Native 5 1 0.0114942529
+## 15: American Indian or Alaska Native 6 1 0.0114942529
+## 16: American Indian or Alaska Native 8 58 0.6666666667
+## 17: American Indian or Alaska Native 13 2 0.0229885057
+## 18: Asian 1 738 0.2703296703
+## 19: Asian 2 52 0.0190476190
+## 20: Asian 3 4 0.0014652015
+## 21: Asian 5 2 0.0007326007
+## 22: Asian 6 17 0.0062271062
+## 23: Asian 7 8 0.0029304029
+## 24: Asian 8 1621 0.5937728938
+## 25: Asian 11 36 0.0131868132
+## 26: Asian 12 2 0.0007326007
+## 27: Asian 13 170 0.0622710623
+## 28: Asian 14 7 0.0025641026
+## 29: Asian 995 73 0.0267399267
+## 30: Native Hawaiian or other Pacific Islander 1 4 0.1481481481
+## 31: Native Hawaiian or other Pacific Islander 7 1 0.0370370370
+## 32: Native Hawaiian or other Pacific Islander 8 21 0.7777777778
+## 33: Native Hawaiian or other Pacific Islander 995 1 0.0370370370
+## 34: White 1 2355 0.2591899626
+## 35: White 2 238 0.0261941448
+## 36: White 3 6 0.0006603566
+## 37: White 4 12 0.0013207132
+## 38: White 5 1 0.0001100594
+## 39: White 6 31 0.0034118424
+## 40: White 7 49 0.0053929122
+## 41: White 8 5781 0.6362535769
+## 42: White 11 29 0.0031917235
+## 43: White 12 6 0.0006603566
+## 44: White 13 351 0.0386308607
+## 45: White 14 22 0.0024213075
+## 46: White 995 205 0.0225621836
+## 47: Two or more 1 298 0.3197424893
+## 48: Two or more 2 31 0.0332618026
+## 49: Two or more 4 6 0.0064377682
+## 50: Two or more 6 10 0.0107296137
+## 51: Two or more 7 3 0.0032188841
+## 52: Two or more 8 486 0.5214592275
+## 53: Two or more 11 2 0.0021459227
+## 54: Two or more 13 57 0.0611587983
+## 55: Two or more 14 1 0.0010729614
+## 56: Two or more 995 38 0.0407725322
+## 57: Other race 1 68 0.2281879195
+## 58: Other race 2 2 0.0067114094
+## 59: Other race 3 1 0.0033557047
+## 60: Other race 5 1 0.0033557047
+## 61: Other race 6 2 0.0067114094
+## 62: Other race 7 2 0.0067114094
+## 63: Other race 8 207 0.6946308725
+## 64: Other race 10 1 0.0033557047
+## 65: Other race 11 3 0.0100671141
+## 66: Other race 13 9 0.0302013423
+## 67: Other race 14 1 0.0033557047
+## 68: Other race 995 1 0.0033557047
+## 69: Prefer not to answer 1 335 0.3067765568
+## 70: Prefer not to answer 2 30 0.0274725275
+## 71: Prefer not to answer 4 1 0.0009157509
+## 72: Prefer not to answer 6 7 0.0064102564
+## 73: Prefer not to answer 7 6 0.0054945055
+## 74: Prefer not to answer 8 602 0.5512820513
+## 75: Prefer not to answer 11 10 0.0091575092
+## 76: Prefer not to answer 12 2 0.0018315018
+## 77: Prefer not to answer 13 51 0.0467032967
+## 78: Prefer not to answer 14 3 0.0027472527
+## 79: Prefer not to answer 995 45 0.0412087912
+## race mode_type count prop
+##
+## $wtd
+## race mode_type count prop
+## 1: African American or Black 1 146 0.2833781746
+## 2: African American or Black 3 6 0.0094980635
+## 3: African American or Black 4 2 0.0036587038
+## 4: African American or Black 6 9 0.0121585495
+## 5: African American or Black 7 7 0.0128354526
+## 6: African American or Black 8 198 0.4373479110
+## 7: African American or Black 10 1 0.0033031155
+## 8: African American or Black 13 56 0.1216968845
+## 9: African American or Black 995 62 0.1161231449
+## 10: American Indian or Alaska Native 1 15 0.1974696041
+## 11: American Indian or Alaska Native 2 6 0.0400602723
+## 12: American Indian or Alaska Native 3 1 0.0202899304
+## 13: American Indian or Alaska Native 4 3 0.0235633378
+## 14: American Indian or Alaska Native 5 1 0.0024680453
+## 15: American Indian or Alaska Native 6 1 0.0193806505
+## 16: American Indian or Alaska Native 8 58 0.6717759534
+## 17: American Indian or Alaska Native 13 2 0.0249922062
+## 18: Asian 1 738 0.2709798598
+## 19: Asian 2 52 0.0184360043
+## 20: Asian 3 4 0.0014247932
+## 21: Asian 5 2 0.0007731039
+## 22: Asian 6 17 0.0065739430
+## 23: Asian 7 8 0.0021547437
+## 24: Asian 8 1621 0.5913549871
+## 25: Asian 11 36 0.0116945676
+## 26: Asian 12 2 0.0003737522
+## 27: Asian 13 170 0.0653057015
+## 28: Asian 14 7 0.0023997671
+## 29: Asian 995 73 0.0285287767
+## 30: Native Hawaiian or other Pacific Islander 1 4 0.1291936433
+## 31: Native Hawaiian or other Pacific Islander 7 1 0.0436138905
+## 32: Native Hawaiian or other Pacific Islander 8 21 0.7842848735
+## 33: Native Hawaiian or other Pacific Islander 995 1 0.0429075927
+## 34: White 1 2355 0.2594362966
+## 35: White 2 238 0.0245521612
+## 36: White 3 6 0.0010643381
+## 37: White 4 12 0.0012790864
+## 38: White 5 1 0.0001231194
+## 39: White 6 31 0.0037034243
+## 40: White 7 49 0.0056438130
+## 41: White 8 5781 0.6347583917
+## 42: White 11 29 0.0033753973
+## 43: White 12 6 0.0007743623
+## 44: White 13 351 0.0400750526
+## 45: White 14 22 0.0023469235
+## 46: White 995 205 0.0228676335
+## 47: Two or more 1 298 0.3126455742
+## 48: Two or more 2 31 0.0319984514
+## 49: Two or more 4 6 0.0062069589
+## 50: Two or more 6 10 0.0094282654
+## 51: Two or more 7 3 0.0013571148
+## 52: Two or more 8 486 0.5261733782
+## 53: Two or more 11 2 0.0013760512
+## 54: Two or more 13 57 0.0681082241
+## 55: Two or more 14 1 0.0007258986
+## 56: Two or more 995 38 0.0419800830
+## 57: Other race 1 68 0.2323740485
+## 58: Other race 2 2 0.0101936395
+## 59: Other race 3 1 0.0052948827
+## 60: Other race 5 1 0.0034859078
+## 61: Other race 6 2 0.0090448745
+## 62: Other race 7 2 0.0080017430
+## 63: Other race 8 207 0.6911868592
+## 64: Other race 10 1 0.0020334462
+## 65: Other race 11 3 0.0049053589
+## 66: Other race 13 9 0.0274977388
+## 67: Other race 14 1 0.0047337044
+## 68: Other race 995 1 0.0012477965
+## 69: Prefer not to answer 1 335 0.3054086719
+## 70: Prefer not to answer 2 30 0.0320972220
+## 71: Prefer not to answer 4 1 0.0003938309
+## 72: Prefer not to answer 6 7 0.0050475397
+## 73: Prefer not to answer 7 6 0.0062019341
+## 74: Prefer not to answer 8 602 0.5485360509
+## 75: Prefer not to answer 11 10 0.0071576064
+## 76: Prefer not to answer 12 2 0.0026285506
+## 77: Prefer not to answer 13 51 0.0485152754
+## 78: Prefer not to answer 14 3 0.0006088121
+## 79: Prefer not to answer 995 45 0.0434045059
+## race mode_type count prop
+## prop_se est est_se
+## 1: 0.0234687720 66145 6376.9579
+## 2: 0.0045022896 2217 1054.2367
+## 3: 0.0035250133 854 824.5439
+## 4: 0.0047264678 2838 1106.2444
+## 5: 0.0060509229 2996 1421.6027
+## 6: 0.0263961006 102084 8316.6518
+## 7: 0.0032969390 771 771.0000
+## 8: 0.0176656865 28406 4407.8285
+## 9: 0.0165432306 27105 4085.3028
+## 10: 0.0509370755 7601 2185.3872
+## 11: 0.0172849748 1542 662.1665
+## 12: 0.0200446154 781 781.0000
+## 13: 0.0191960125 907 747.4497
+## 14: 0.0024823327 95 95.0000
+## 15: 0.0191641602 746 746.0000
+## 16: 0.0591888934 25858 4113.3865
+## 17: 0.0181764028 962 706.5006
+## 18: 0.0098165289 370488 15432.1286
+## 19: 0.0028967163 25206 3990.1727
+## 20: 0.0007779163 1948 1064.1078
+## 21: 0.0006818806 1057 932.6673
+## 22: 0.0018333236 8988 2514.1969
+## 23: 0.0009184436 2946 1256.5048
+## 24: 0.0108526487 808510 22166.8778
+## 25: 0.0023410399 15989 3216.3905
+## 26: 0.0003615032 511 494.2913
+## 27: 0.0055083035 89287 7761.1131
+## 28: 0.0010244254 3281 1401.8481
+## 29: 0.0037485044 39005 5192.9653
+## 30: 0.0720199776 2195 1316.3076
+## 31: 0.0426623863 741 741.0000
+## 32: 0.0871191335 13325 3142.9172
+## 33: 0.0420024696 729 729.0000
+## 34: 0.0052972447 1186349 26399.8331
+## 35: 0.0018435264 112272 8482.0491
+## 36: 0.0004424597 4867 2024.1818
+## 37: 0.0004102564 5849 1876.4708
+## 38: 0.0001231175 563 563.0000
+## 39: 0.0007402465 16935 3388.5176
+## 40: 0.0009107528 25808 4171.2191
+## 41: 0.0058194896 2902620 36947.1458
+## 42: 0.0006978522 15435 3193.9582
+## 43: 0.0003707955 3541 1696.0954
+## 44: 0.0023825741 183255 11019.7669
+## 45: 0.0005982994 10732 2737.8349
+## 46: 0.0018073181 104569 8316.2724
+## 47: 0.0174664936 148592 9930.0737
+## 48: 0.0065075536 15208 3138.9053
+## 49: 0.0028205365 2950 1344.0506
+## 50: 0.0033602107 4481 1602.8141
+## 51: 0.0008690064 645 412.8248
+## 52: 0.0188331919 250076 12819.7938
+## 53: 0.0009763951 654 464.0060
+## 54: 0.0098454437 32370 4853.0539
+## 55: 0.0007259125 345 345.0000
+## 56: 0.0077499675 19952 3763.7078
+## 57: 0.0282172813 35197 4870.6765
+## 58: 0.0072057646 1544 1097.7295
+## 59: 0.0052788052 802 802.0000
+## 60: 0.0034816429 528 528.0000
+## 61: 0.0063697618 1370 969.2030
+## 62: 0.0059919642 1212 911.2358
+## 63: 0.0308116056 104692 8373.0922
+## 64: 0.0020339135 308 308.0000
+## 65: 0.0040838658 743 619.5947
+## 66: 0.0107307187 4165 1646.9584
+## 67: 0.0047219948 717 717.0000
+## 68: 0.0012490631 189 189.0000
+## 69: 0.0158953559 169055 10456.3448
+## 70: 0.0063898004 17767 3597.8422
+## 71: 0.0003939242 218 218.0000
+## 72: 0.0023550813 2794 1306.4556
+## 73: 0.0027005341 3433 1499.1951
+## 74: 0.0172056215 303635 13888.1934
+## 75: 0.0027357248 3962 1518.6874
+## 76: 0.0018865202 1455 1045.7321
+## 77: 0.0075710397 26855 4295.1524
+## 78: 0.0003754133 337 207.5867
+## 79: 0.0071456884 24026 4042.1333
+## prop_se est est_se
+##
+## $weight_name
+## [1] "trip_weight"
+summarize_by
can be used with an unlimited amount of
+variables. To use more than one summarize_by
variable pass
+a list to the argument.
+DT = hts_prep_data(summarize_var = 'mode_type',
+ summarize_by = c('race', 'ethnicity'),
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+
+mode_by_race_ethnicity_summary = hts_summary(prepped_dt = DT$cat,
+ summarize_var = 'mode_type',
+ summarize_by = c('race', 'ethnicity'),
+ summarize_vartype = 'categorical',
+ weighted = TRUE,
+ wtname = 'trip_weight',
+ se = TRUE)
+
+
+head(mode_by_race_ethnicity_summary$summary$wtd, 10)
## race ethnicity
+## 1: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 2: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 3: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 4: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 5: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 6: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 7: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 8: African American or Black Not of Hispanic, Latino, or Spanish origin
+## 9: African American or Black Mexican, Mexican American, Chicano
+## 10: African American or Black Mexican, Mexican American, Chicano
+## mode_type count prop prop_se est est_se
+## 1: 1 90 0.234234181 0.025626160 39323 4832.6785
+## 2: 3 2 0.004783207 0.003446115 803 579.0889
+## 3: 4 2 0.005086997 0.004896995 854 824.5439
+## 4: 6 4 0.007392229 0.004627275 1241 778.6525
+## 5: 7 5 0.012628143 0.006726073 2120 1135.6683
+## 6: 8 179 0.546459057 0.030972856 91739 7884.0218
+## 7: 13 24 0.073415972 0.016791110 12325 2933.9364
+## 8: 995 43 0.116000214 0.019718403 19474 3509.1793
+## 9: 3 4 0.746962493 0.203291194 1414 881.0363
+## 10: 8 2 0.253037507 0.203291194 479 419.9017
+hts_summary
can also be used to calculate trip
+rates.
+DT = hts_prep_triprate(summarize_by = 'employment',
+ variables_dt = variable_list,
+ trip_name = 'trip',
+ day_name = 'day',
+ hts_data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+
+trip_rate_by_employment_summary = hts_summary(prepped_dt = DT$num,
+ summarize_var = 'num_trips_wtd',
+ summarize_by = 'employment',
+ summarize_vartype = 'numeric',
+ weighted = TRUE,
+ wtname = 'day_weight',
+ se = TRUE)
+
+head(trip_rate_by_employment_summary$summary$wtd, 10)
## employment count min max mean mean_se median
+## 1: 1 2012 0 59.17021 3.817292 0.0871695 2.478363
+## 2: 2 307 0 54.24561 3.795345 0.2380826 2.414773
+## 3: 3 189 0 54.81752 4.128966 0.2921698 3.361946
+## 4: 5 899 0 57.84615 3.629484 0.1354622 2.154110
+## 5: 6 181 0 48.29293 3.411580 0.2988906 1.842215
+## 6: 7 37 0 44.70815 3.686618 0.7769508 2.448465
+## 7: 8 24 0 24.23790 2.513073 0.6037672 1.613707
+## 8: 995 568 0 58.38776 2.000315 0.1028051 1.146023
+To label values we can use factorize_column
.
+trip_rate_by_employment_summary$summary$wtd$employment = factorize_column(
+ trip_rate_by_employment_summary$summary$wtd$employment,
+ 'employment',
+ value_labels,
+ variable_colname = 'variable',
+ value_colname = 'value',
+ value_label_colname = 'label',
+ value_order_colname = 'val_order'
+)
+
+
+trip_rate_by_employment_summary$summary$wtd
## employment
+## 1: Employed full-time (35+ hours/week, paid)
+## 2: Employed part-time (fewer than 35 hours/week, paid)
+## 3: Self-employed
+## 4: Not employed and not looking for work (e.g., retired, stay-at-home parent, student)
+## 5: Unemployed and looking for work
+## 6: Unpaid volunteer or intern
+## 7: Employed, but not currently working (e.g., on leave, furloughed 100%)
+## 8: Missing Response
+## count min max mean mean_se median
+## 1: 2012 0 59.17021 3.817292 0.0871695 2.478363
+## 2: 307 0 54.24561 3.795345 0.2380826 2.414773
+## 3: 189 0 54.81752 4.128966 0.2921698 3.361946
+## 4: 899 0 57.84615 3.629484 0.1354622 2.154110
+## 5: 181 0 48.29293 3.411580 0.2988906 1.842215
+## 6: 37 0 44.70815 3.686618 0.7769508 2.448465
+## 7: 24 0 24.23790 2.513073 0.6037672 1.613707
+## 8: 568 0 58.38776 2.000315 0.1028051 1.146023
+hts_summary
creates outputs that can easily be used to
+create visuals.
+library(ggplot2)
+
+p = ggplot(
+ trip_rate_by_employment_summary$summary$wtd,
+ aes(x = mean, y = employment)) +
+ geom_bar(stat = 'identity') +
+ geom_errorbar(
+ aes(xmin = (mean - mean_se),
+ xmax = (mean + mean_se),
+ width = .2)
+ ) +
+ labs(x = 'Mean Trip Rate',
+ y = 'Employment') +
+ scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50),
+ limits = rev)
+
+ print(p)
travelSurveyTools
is an R package that empowers users of household travel survey (HTS) data to create meaningful summaries of their data. Currently, travelSurveyTools
is compatible with RSG, Inc. HTS datasets, but we hope to expand to any travel survey. If you would like to collaborate, please contact Suzanne Childress at schildress@psrc.org. These datasets usually contain six tables: household, person, day, trip, vehicle, and location. In the future, we may expand travelSurveyTools
to work with other types of travel survey data or other types of surveys.
travelSurveyTools
is in active development and is open-source; anyone can contribute 🤝. See the CONTRIBUTING page to learn how.
Set config - usethis::use_git_config(user.name = {“username”}, user.email = {your_email@email.com})
Go to github page to generate token - usethis::create_github_token()
Paste your PAT into pop-up that follows - credentials::set_github_pat()
Lastly, remotes::install_github() will work - remotes::install_github(‘RSGInc/travelSurveyTools’)
Some of the things this package enables include: * Cross tabs with an unlimited number of variables * Summarizes numeric, categorical, date, and date-time variables * Accepts both weighted and unweighted data * Numeric summaries return means, medians, and summaries with binned data * Ability to use customized datasets (e.g., filtered, binned, renamed data) * Data labeling helper functions * Returns sample sizes/unweighted counts * Standard errors calculated with survey statistics * Ability to specify custom weights * Trip rate calculations
+├─dplyr 1.1.4
+│ ├─cli 3.6.2
+│ ├─generics 0.1.3
+│ ├─glue 1.6.2
+│ ├─lifecycle 1.0.4
+│ │ ├─cli
+│ │ ├─glue
+│ │ └─rlang 1.1.2
+│ ├─magrittr 2.0.3
+│ ├─pillar 1.9.0
+│ │ ├─cli
+│ │ ├─fansi 1.0.6
+│ │ ├─glue
+│ │ ├─lifecycle
+│ │ ├─rlang
+│ │ ├─utf8 1.2.4
+│ │ └─vctrs 0.6.5
+│ │ ├─cli
+│ │ ├─glue
+│ │ ├─lifecycle
+│ │ └─rlang
+│ ├─R6 2.5.1
+│ ├─rlang
+│ ├─tibble 3.2.1
+│ │ ├─fansi
+│ │ ├─lifecycle
+│ │ ├─magrittr
+│ │ ├─pillar
+│ │ ├─pkgconfig 2.0.3
+│ │ ├─rlang
+│ │ └─vctrs
+│ ├─tidyselect 1.2.0
+│ │ ├─cli
+│ │ ├─glue
+│ │ ├─lifecycle
+│ │ ├─rlang
+│ │ ├─vctrs
+│ │ └─withr 2.5.2
+│ └─vctrs
+├─srvyr 1.2.0
+│ ├─dplyr
+│ ├─magrittr
+│ ├─rlang
+│ ├─survey 4.2-1
+│ │ ├─Matrix 1.6-1.1 -> 1.6-4
+│ │ │ └─lattice 0.21-9 -> 0.22-5
+│ │ ├─survival 3.5-7
+│ │ │ └─Matrix
+│ │ ├─lattice
+│ │ ├─minqa 1.2.6
+│ │ │ └─Rcpp 1.0.11
+│ │ ├─numDeriv 2016.8-1.1
+│ │ └─mitools 2.4
+│ │ └─DBI 1.2.0
+│ ├─tibble
+│ ├─tidyr 1.3.0
+│ │ ├─cli
+│ │ ├─dplyr
+│ │ ├─glue
+│ │ ├─lifecycle
+│ │ ├─magrittr
+│ │ ├─purrr 1.0.2
+│ │ │ ├─cli
+│ │ │ ├─lifecycle
+│ │ │ ├─magrittr
+│ │ │ ├─rlang
+│ │ │ └─vctrs
+│ │ ├─rlang
+│ │ ├─stringr 1.5.1
+│ │ │ ├─cli
+│ │ │ ├─glue
+│ │ │ ├─lifecycle
+│ │ │ ├─magrittr
+│ │ │ ├─rlang
+│ │ │ ├─stringi 1.8.3
+│ │ │ └─vctrs
+│ │ ├─tibble
+│ │ ├─tidyselect
+│ │ └─vctrs
+│ ├─tidyselect
+│ └─vctrs
+└─stringr
+day.Rd
A dataset containing information about participant travel days from the +2023 Puget Sound Regional Council Household Travel Survey. +The variables are as follows:
+day
A data frame with 5,602 rows and 13 columns:
8 digit household ID
10 digit person ID
12 digit day ID
Take-out/prepared food delivered to home, response codes
Someone came to do work at home (e.g., babysitter, +housecleaning, lawn), response codes
Groceries delivered to home, response codes
Received packages at home (e.g., USPS, FedEx, UPS), response codes
Received personal packages at work, response codes
Received packages at another location (e.g., +Amazon Locker, package pick-up point), response codes
Other item delivered to home (e.g., appliance), response codes
None of the above, response codes
Location at the beginning of the day, response codes
Location at the end of the day, response codes
factorize_column.Rd
Factorize a column. This function is typically not called directly, but is +called as part of the factorize_df function.
+factorize_column(
+ x,
+ var_str,
+ vals_df,
+ variable_colname = "variable",
+ value_colname = "value",
+ value_label_colname = "value_label",
+ value_order_colname = "value",
+ extra_labels = NULL,
+ add_na = TRUE
+)
A vector (or variable) to be 'factorized' or labeled. X is often a +single variable or column within a dataframe.
The name of the vector or variable to be factorized (e.g., +"sample_segment").
A dataframe of variable labels (i.e., factor levels and +labels) with the format as specified below.
The name of the variable column in vals_df. Default is 'variable'.
The name of the value column in vals_df. Default is 'value',
The name of the value label column in vals_df. Default is 'value_label'
The name of the value order column in vals_df (can be the value column itself). Default is 'value'.
Pass a vector of the names of other variables in the +values dataframe to use when labeling. Common uses include missing values, +universal values (e.g., "Total" or "Subtotal" row), or similar variables +(e.g., "mode_1" to reuse the mode_1 labels for mode_2). Default is NULL.
TRUE or FALSE setting for whether to add NA as an explicit +level in the factor. Default setting of TRUE.
A 'factorized' version of the vector it was passed (i.e., a factor). +This is returned invisibly.
+factorize_df.Rd
Factorize a dataframe. The function loops over a dataframe (calling +factorize_column) and labels each variable for which you provide labels.
+A dataframe to label
A dataframe of variable labels (i.e., factor levels and +labels) with the format as specified below. Passed to factorize_column +function.
Prints which vars are labeled and unlabeled
Additional arguments passed to factorize_column
The function expects a values dataframe (vals_df) in the following format: +variable (the character/string names of each variable), value (the integer +values for each variable), val_order (the sequential ordering of each +value), label (the strings or names to use in the levels of the factor).
+The "factorize" functions were borrowed and updated from the +'tmr.Rite.out.tester' package by Matt Landis.
+
+hh_labeled = factorize_df(
+ df = hh,
+ vals_df = value_labels,
+ value_label_colname = 'label',
+ extra_labels = c("Missing")
+)
+#>
+#> Labeled vars:
+#> income_detailed, income_followup, num_people, residence_type, sample_segment
+#>
+#> Unlabeled vars:
+#> hh_id, hh_weight, num_trips
+
+
get_distance_meters.Rd
Function to get haversine distance in meters between two points +Based on calculation from the geosphere package
+hh.Rd
A dataset containing household level attributes of 1,000 households from the +2023 Puget Sound Regional Council Household Travel Survey. +The variables are as follows:
+hh
A data frame with 1,000 rows and 6 columns:
8 digit household ID
Sample segment
2022 household income (detailed categories), response codes
2022 household income (broad categories), response codes
Household size, response codes
Type of residence, response codes
hts_bin_var.Rd
Bin numeric variables
+Dataset containing variable to bin in data.table format
Name of the numeric variable to bin
Number of bins for variable. Defaults to 7.
+require(data.table)
+require(stringr)
+hts_bin_var(prepped_dt = trip, numvar = 'speed_mph')
+#> hh_id person_id day_id travel_date trip_id mode_type
+#> 1: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 8
+#> 2: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 8
+#> 3: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 1
+#> 4: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 1
+#> 5: 23000173 2300017301 230001730103 2023-04-21 2.300017e+12 8
+#> ---
+#> 16062: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 8
+#> 16063: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 8
+#> 16064: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 8
+#> 16065: 23419703 2341970301 234197030101 2023-05-31 2.341970e+12 8
+#> 16066: 23419703 2341970301 234197030101 2023-05-31 2.341970e+12 8
+#> d_purpose_category trip_weight speed_mph
+#> 1: 9 356 1-23
+#> 2: 1 679 23-45
+#> 3: 9 758 1-23
+#> 4: 1 314 1-23
+#> 5: 3 883 23-45
+#> ---
+#> 16062: 1 950 1-23
+#> 16063: 6 123 23-45
+#> 16064: 1 720 23-45
+#> 16065: 13 523 1-23
+#> 16066: 1 298 1-23
+
+
hts_cbind_var.Rd
Bind a column from one table to another
+hts_cbind_var(
+ lhs_table,
+ rhs_var,
+ variable_list = variable_list,
+ return_weight_cols = FALSE,
+ ...
+)
Table to bind a column to in data.table format
Variable to bind to the lhs_table.
A variable list with descriptions and table locations +of variables.
If true binds weight variable along with rhs_var +to lhs_table. Default is FALSE.
Additional arguments passed to link{hts_get_keycols}
+require(data.table)
+hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph', variable_list = variable_list)
+#> Joining speed_mph to table on hh_id, person_id, day_id, trip_id, speed_mph
+#> hh_id person_id day_id trip_id speed_mph travel_date
+#> 1: 23000173 2300017301 230001730102 2.300017e+12 21.480572 2023-04-20
+#> 2: 23000173 2300017301 230001730102 2.300017e+12 23.720178 2023-04-20
+#> 3: 23000173 2300017301 230001730102 2.300017e+12 6.927242 2023-04-20
+#> 4: 23000173 2300017301 230001730102 2.300017e+12 6.146634 2023-04-20
+#> 5: 23000173 2300017301 230001730103 2.300017e+12 34.657421 2023-04-21
+#> ---
+#> 16062: 23419141 2341914103 234191410301 2.341914e+12 8.904454 2023-05-31
+#> 16063: 23419141 2341914103 234191410301 2.341914e+12 43.284089 2023-05-31
+#> 16064: 23419141 2341914103 234191410301 2.341914e+12 25.518845 2023-05-31
+#> 16065: 23419703 2341970301 234197030101 2.341970e+12 13.131100 2023-05-31
+#> 16066: 23419703 2341970301 234197030101 2.341970e+12 16.413875 2023-05-31
+#> mode_type d_purpose_category trip_weight
+#> 1: 8 9 356
+#> 2: 8 1 679
+#> 3: 1 9 758
+#> 4: 1 1 314
+#> 5: 8 3 883
+#> ---
+#> 16062: 8 1 950
+#> 16063: 8 6 123
+#> 16064: 8 1 720
+#> 16065: 8 13 523
+#> 16066: 8 1 298
+hts_cbind_var(lhs_table = trip, rhs_var = 'speed_mph',
+variable_list = variable_list, return_weight_cols = TRUE)
+#> Joining speed_mph to table on hh_id, person_id, day_id, trip_id, speed_mph, trip_weight
+#> hh_id person_id day_id trip_id speed_mph trip_weight
+#> 1: 23000173 2300017301 230001730102 2.300017e+12 21.480572 356
+#> 2: 23000173 2300017301 230001730102 2.300017e+12 23.720178 679
+#> 3: 23000173 2300017301 230001730102 2.300017e+12 6.927242 758
+#> 4: 23000173 2300017301 230001730102 2.300017e+12 6.146634 314
+#> 5: 23000173 2300017301 230001730103 2.300017e+12 34.657421 883
+#> ---
+#> 16062: 23419141 2341914103 234191410301 2.341914e+12 8.904454 950
+#> 16063: 23419141 2341914103 234191410301 2.341914e+12 43.284089 123
+#> 16064: 23419141 2341914103 234191410301 2.341914e+12 25.518845 720
+#> 16065: 23419703 2341970301 234197030101 2.341970e+12 13.131100 523
+#> 16066: 23419703 2341970301 234197030101 2.341970e+12 16.413875 298
+#> travel_date mode_type d_purpose_category
+#> 1: 2023-04-20 8 9
+#> 2: 2023-04-20 8 1
+#> 3: 2023-04-20 1 9
+#> 4: 2023-04-20 1 1
+#> 5: 2023-04-21 8 3
+#> ---
+#> 16062: 2023-05-31 8 1
+#> 16063: 2023-05-31 8 6
+#> 16064: 2023-05-31 8 1
+#> 16065: 2023-05-31 8 13
+#> 16066: 2023-05-31 8 1
+
+
hts_filter_data.Rd
Filter datasets to only keep specified ids
+List of containing household travel data tables.
List of ids to keep in all of the tables
Name of id being used for filtering (e.g., hh_id, person_id)
+require(data.table)
+
+data(test_data)
+
+hts_filter_data(hts_data = test_data,
+ ids = hh[num_people > 5, hh_id],
+ id_name = 'hh_id')
+#> $hh
+#> hh_id sample_segment num_trips num_people income_detailed income_followup
+#> 1: 23016029 8 20 7 5 995
+#> 2: 23121381 14 101 6 9 995
+#> 3: 23122159 21 5 6 2 995
+#> 4: 23150818 16 3 6 1 995
+#> 5: 23233061 21 10 6 8 995
+#> 6: 23233651 21 20 8 6 995
+#> 7: 23240801 21 2 6 6 995
+#> 8: 23411644 14 24 6 9 995
+#> 9: 23416563 14 16 6 7 995
+#> residence_type hh_weight
+#> 1: 2 974
+#> 2: 1 826
+#> 3: 1 938
+#> 4: 1 602
+#> 5: 1 39
+#> 6: 1 345
+#> 7: 1 783
+#> 8: 1 154
+#> 9: 2 928
+#>
+#> $person
+#> ethnicity_1 ethnicity_2 ethnicity_3 ethnicity_4 ethnicity_997 ethnicity_999
+#> 1: 1 0 0 0 0 0
+#> 2: 1 0 0 0 0 0
+#> 3: 995 995 995 995 995 995
+#> 4: 995 995 995 995 995 995
+#> 5: 995 995 995 995 995 995
+#> 6: 1 0 0 0 0 0
+#> 7: 995 995 995 995 995 995
+#> 8: 1 0 0 0 0 0
+#> 9: 1 0 0 0 0 0
+#> 10: 995 995 995 995 995 995
+#> 11: 995 995 995 995 995 995
+#> 12: 995 995 995 995 995 995
+#> 13: 995 995 995 995 995 995
+#> 14: 0 1 0 0 0 0
+#> 15: 995 995 995 995 995 995
+#> 16: 995 995 995 995 995 995
+#> 17: 995 995 995 995 995 995
+#> 18: 995 995 995 995 995 995
+#> 19: 995 995 995 995 995 995
+#> 20: 1 0 0 0 0 0
+#> 21: 995 995 995 995 995 995
+#> 22: 995 995 995 995 995 995
+#> 23: 995 995 995 995 995 995
+#> 24: 995 995 995 995 995 995
+#> 25: 995 995 995 995 995 995
+#> 26: 1 0 0 0 0 0
+#> 27: 1 0 0 0 0 0
+#> 28: 1 0 0 0 0 0
+#> 29: 995 995 995 995 995 995
+#> 30: 995 995 995 995 995 995
+#> 31: 995 995 995 995 995 995
+#> 32: 1 0 0 0 0 0
+#> 33: 1 0 0 0 0 0
+#> 34: 1 0 0 0 0 0
+#> 35: 1 0 0 0 0 0
+#> 36: 995 995 995 995 995 995
+#> 37: 995 995 995 995 995 995
+#> 38: 995 995 995 995 995 995
+#> 39: 995 995 995 995 995 995
+#> 40: 0 0 0 0 0 1
+#> 41: 995 995 995 995 995 995
+#> 42: 0 0 0 0 0 1
+#> 43: 995 995 995 995 995 995
+#> 44: 995 995 995 995 995 995
+#> 45: 995 995 995 995 995 995
+#> 46: 1 0 0 0 0 0
+#> 47: 1 0 0 0 0 0
+#> race_1 race_2 race_3 race_4 race_5 race_997 race_999 hh_id num_trips
+#> 1: 0 0 0 0 1 0 0 23016029 5
+#> 2: 0 0 0 0 1 0 0 23016029 0
+#> 3: 995 995 995 995 995 995 995 23016029 2
+#> 4: 995 995 995 995 995 995 995 23016029 2
+#> 5: 995 995 995 995 995 995 995 23016029 5
+#> 6: 0 0 0 0 1 0 0 23016029 2
+#> 7: 995 995 995 995 995 995 995 23016029 4
+#> 8: 0 0 0 0 1 0 0 23121381 45
+#> 9: 0 0 0 0 1 0 0 23121381 15
+#> 10: 995 995 995 995 995 995 995 23121381 0
+#> 11: 995 995 995 995 995 995 995 23121381 13
+#> 12: 995 995 995 995 995 995 995 23121381 12
+#> 13: 995 995 995 995 995 995 995 23121381 16
+#> 14: 0 0 0 0 1 0 0 23122159 5
+#> 15: 995 995 995 995 995 995 995 23122159 0
+#> 16: 995 995 995 995 995 995 995 23122159 0
+#> 17: 995 995 995 995 995 995 995 23122159 0
+#> 18: 995 995 995 995 995 995 995 23122159 0
+#> 19: 995 995 995 995 995 995 995 23122159 0
+#> 20: 1 0 0 0 0 0 0 23150818 3
+#> 21: 995 995 995 995 995 995 995 23150818 0
+#> 22: 995 995 995 995 995 995 995 23150818 0
+#> 23: 995 995 995 995 995 995 995 23150818 0
+#> 24: 995 995 995 995 995 995 995 23150818 0
+#> 25: 995 995 995 995 995 995 995 23150818 0
+#> 26: 0 0 0 0 1 0 0 23233061 0
+#> 27: 0 0 0 0 1 0 0 23233061 2
+#> 28: 0 0 1 0 0 0 0 23233061 4
+#> 29: 995 995 995 995 995 995 995 23233061 2
+#> 30: 995 995 995 995 995 995 995 23233061 2
+#> 31: 995 995 995 995 995 995 995 23233061 0
+#> 32: 0 0 0 0 1 0 0 23233651 0
+#> 33: 0 1 0 0 0 0 0 23233651 2
+#> 34: 0 0 0 0 1 0 0 23233651 3
+#> 35: 0 0 0 0 1 0 0 23233651 3
+#> 36: 995 995 995 995 995 995 995 23233651 3
+#> 37: 995 995 995 995 995 995 995 23233651 3
+#> 38: 995 995 995 995 995 995 995 23233651 3
+#> 39: 995 995 995 995 995 995 995 23233651 3
+#> 40: 0 0 0 0 1 0 0 23240801 0
+#> 41: 995 995 995 995 995 995 995 23240801 2
+#> 42: 0 0 0 0 1 0 0 23240801 0
+#> 43: 995 995 995 995 995 995 995 23240801 0
+#> 44: 995 995 995 995 995 995 995 23240801 0
+#> 45: 995 995 995 995 995 995 995 23240801 0
+#> 46: 0 0 0 0 1 0 0 23411644 4
+#> 47: 0 0 0 0 1 0 0 23411644 6
+#> person_id age gender employment education person_weight
+#> 1: 2301602901 5 1 5 2 411
+#> 2: 2301602902 6 2 1 2 578
+#> 3: 2301602903 2 1 995 995 17
+#> 4: 2301602904 2 1 995 995 490
+#> 5: 2301602905 1 1 995 995 342
+#> 6: 2301602906 7 1 1 2 754
+#> 7: 2301602907 2 1 995 995 249
+#> 8: 2312138101 6 1 5 6 186
+#> 9: 2312138102 6 2 1 7 296
+#> 10: 2312138103 3 1 2 995 369
+#> 11: 2312138104 2 2 995 995 499
+#> 12: 2312138105 2 1 995 995 226
+#> 13: 2312138106 2 2 995 995 823
+#> 14: 2312215901 5 1 1 3 859
+#> 15: 2312215902 2 995 995 995 280
+#> 16: 2312215903 2 995 995 995 685
+#> 17: 2312215904 2 995 995 995 592
+#> 18: 2312215905 5 995 1 995 240
+#> 19: 2312215906 5 995 1 995 710
+#> 20: 2315081801 4 2 8 7 231
+#> 21: 2315081802 4 995 5 995 331
+#> 22: 2315081803 4 995 5 995 214
+#> 23: 2315081804 4 995 2 995 105
+#> 24: 2315081805 4 995 2 995 317
+#> 25: 2315081806 4 995 5 995 825
+#> 26: 2323306101 10 1 5 6 114
+#> 27: 2323306102 7 2 1 4 297
+#> 28: 2323306103 6 1 1 2 861
+#> 29: 2323306104 2 1 995 995 161
+#> 30: 2323306105 2 2 995 995 817
+#> 31: 2323306106 10 995 5 995 193
+#> 32: 2323365101 7 1 1 3 207
+#> 33: 2323365102 8 2 1 2 178
+#> 34: 2323365103 5 2 6 2 211
+#> 35: 2323365104 5 1 6 2 590
+#> 36: 2323365105 2 2 995 995 626
+#> 37: 2323365106 2 1 995 995 433
+#> 38: 2323365107 2 1 995 995 447
+#> 39: 2323365108 1 1 995 995 404
+#> 40: 2324080101 8 2 1 6 801
+#> 41: 2324080102 2 999 995 995 894
+#> 42: 2324080103 7 1 5 3 330
+#> 43: 2324080104 2 999 995 995 983
+#> 44: 2324080105 2 999 995 995 327
+#> 45: 2324080106 2 999 995 995 66
+#> 46: 2341164401 6 1 1 5 950
+#> 47: 2341164402 6 2 1 6 785
+#> [ reached getOption("max.print") -- omitted 11 rows ]
+#>
+#> $day
+#> delivery_2 delivery_3 delivery_4 delivery_5 delivery_6 delivery_7
+#> 1: 0 0 0 0 0 0
+#> 2: 995 995 995 995 995 995
+#> 3: 995 995 995 995 995 995
+#> 4: 995 995 995 995 995 995
+#> 5: 995 995 995 995 995 995
+#> 6: 995 995 995 995 995 995
+#> 7: 995 995 995 995 995 995
+#> 8: 0 0 0 0 0 0
+#> 9: 0 0 0 0 0 0
+#> 10: 995 995 995 995 995 995
+#> 11: 995 995 995 995 995 995
+#> 12: 995 995 995 995 995 995
+#> 13: 995 995 995 995 995 995
+#> 14: 0 0 0 0 0 0
+#> 15: 0 0 0 0 0 0
+#> 16: 995 995 995 995 995 995
+#> 17: 995 995 995 995 995 995
+#> 18: 995 995 995 995 995 995
+#> 19: 995 995 995 995 995 995
+#> 20: 0 0 0 0 0 0
+#> 21: 0 0 0 0 0 0
+#> 22: 995 995 995 995 995 995
+#> 23: 995 995 995 995 995 995
+#> 24: 995 995 995 995 995 995
+#> 25: 995 995 995 995 995 995
+#> 26: 0 0 0 1 0 0
+#> 27: 0 0 0 0 0 0
+#> 28: 995 995 995 995 995 995
+#> 29: 995 995 995 995 995 995
+#> 30: 995 995 995 995 995 995
+#> 31: 995 995 995 995 995 995
+#> 32: 0 0 0 1 0 0
+#> 33: 0 0 0 0 0 0
+#> 34: 995 995 995 995 995 995
+#> 35: 995 995 995 995 995 995
+#> 36: 995 995 995 995 995 995
+#> 37: 995 995 995 995 995 995
+#> 38: 0 0 0 1 0 0
+#> 39: 0 0 0 1 0 0
+#> 40: 995 995 995 995 995 995
+#> 41: 995 995 995 995 995 995
+#> 42: 995 995 995 995 995 995
+#> 43: 995 995 995 995 995 995
+#> 44: 0 0 0 0 0 0
+#> 45: 0 0 0 0 0 0
+#> 46: 995 995 995 995 995 995
+#> 47: 995 995 995 995 995 995
+#> 48: 995 995 995 995 995 995
+#> 49: 995 995 995 995 995 995
+#> 50: 0 0 0 1 0 0
+#> 51: 0 0 0 1 0 0
+#> 52: 0 0 0 0 0 0
+#> 53: 995 995 995 995 995 995
+#> 54: 995 995 995 995 995 995
+#> 55: 995 995 995 995 995 995
+#> 56: 995 995 995 995 995 995
+#> 57: 0 0 0 0 0 0
+#> 58: 995 995 995 995 995 995
+#> 59: 995 995 995 995 995 995
+#> 60: 995 995 995 995 995 995
+#> 61: 995 995 995 995 995 995
+#> 62: 995 995 995 995 995 995
+#> delivery_8 delivery_996 hh_id num_trips person_id day_id
+#> 1: 0 1 23016029 5 2301602901 230160290101
+#> 2: 995 995 23016029 0 2301602902 230160290201
+#> 3: 995 995 23016029 2 2301602903 230160290301
+#> 4: 995 995 23016029 2 2301602904 230160290401
+#> 5: 995 995 23016029 5 2301602905 230160290501
+#> 6: 995 995 23016029 2 2301602906 230160290601
+#> 7: 995 995 23016029 4 2301602907 230160290701
+#> 8: 0 1 23121381 11 2312138101 231213810101
+#> 9: 0 1 23121381 0 2312138102 231213810201
+#> 10: 995 995 23121381 NA 2312138103 231213810301
+#> 11: 995 995 23121381 3 2312138104 231213810401
+#> 12: 995 995 23121381 4 2312138105 231213810501
+#> 13: 995 995 23121381 3 2312138106 231213810601
+#> 14: 0 1 23121381 5 2312138101 231213810102
+#> 15: 0 1 23121381 0 2312138102 231213810202
+#> 16: 995 995 23121381 NA 2312138103 231213810302
+#> 17: 995 995 23121381 NA 2312138104 231213810402
+#> 18: 995 995 23121381 NA 2312138105 231213810502
+#> 19: 995 995 23121381 NA 2312138106 231213810602
+#> 20: 0 1 23121381 2 2312138101 231213810103
+#> 21: 0 1 23121381 4 2312138102 231213810203
+#> 22: 995 995 23121381 NA 2312138103 231213810303
+#> 23: 995 995 23121381 2 2312138104 231213810403
+#> 24: 995 995 23121381 2 2312138105 231213810503
+#> 25: 995 995 23121381 2 2312138106 231213810603
+#> 26: 0 0 23121381 7 2312138101 231213810104
+#> 27: 0 1 23121381 3 2312138102 231213810204
+#> 28: 995 995 23121381 NA 2312138103 231213810304
+#> 29: 995 995 23121381 4 2312138104 231213810404
+#> 30: 995 995 23121381 NA 2312138105 231213810504
+#> 31: 995 995 23121381 1 2312138106 231213810604
+#> 32: 0 0 23121381 12 2312138101 231213810105
+#> 33: 0 1 23121381 3 2312138102 231213810205
+#> 34: 995 995 23121381 0 2312138103 231213810305
+#> 35: 995 995 23121381 1 2312138104 231213810405
+#> 36: 995 995 23121381 5 2312138105 231213810505
+#> 37: 995 995 23121381 8 2312138106 231213810605
+#> 38: 0 0 23121381 5 2312138101 231213810106
+#> 39: 0 0 23121381 3 2312138102 231213810206
+#> 40: 995 995 23121381 NA 2312138103 231213810306
+#> 41: 995 995 23121381 1 2312138104 231213810406
+#> 42: 995 995 23121381 1 2312138105 231213810506
+#> 43: 995 995 23121381 1 2312138106 231213810606
+#> 44: 0 1 23121381 3 2312138101 231213810107
+#> 45: 0 1 23121381 2 2312138102 231213810207
+#> 46: 995 995 23121381 NA 2312138103 231213810307
+#> 47: 995 995 23121381 2 2312138104 231213810407
+#> 48: 995 995 23121381 NA 2312138105 231213810507
+#> 49: 995 995 23121381 1 2312138106 231213810607
+#> 50: 0 0 23122159 5 2312215901 231221590101
+#> 51: 0 0 23150818 3 2315081801 231508180101
+#> 52: 0 1 23233061 0 2323306101 232330610101
+#> 53: 995 995 23233061 2 2323306102 232330610201
+#> 54: 995 995 23233061 4 2323306103 232330610301
+#> 55: 995 995 23233061 2 2323306104 232330610401
+#> 56: 995 995 23233061 2 2323306105 232330610501
+#> 57: 0 1 23233651 0 2323365101 232336510101
+#> 58: 995 995 23233651 2 2323365102 232336510201
+#> 59: 995 995 23233651 3 2323365103 232336510301
+#> 60: 995 995 23233651 3 2323365104 232336510401
+#> 61: 995 995 23233651 3 2323365105 232336510501
+#> 62: 995 995 23233651 3 2323365106 232336510601
+#> travel_date begin_day end_day day_weight
+#> 1: 2023-04-27 1 1 504
+#> 2: 2023-04-27 1 1 376
+#> 3: 2023-04-27 1 1 115
+#> 4: 2023-04-27 1 1 306
+#> 5: 2023-04-27 1 1 358
+#> 6: 2023-04-27 1 1 197
+#> 7: 2023-04-27 1 1 546
+#> 8: 2023-05-05 1 1 896
+#> 9: 2023-05-05 1 1 662
+#> 10: 2023-05-05 995 995 228
+#> 11: 2023-05-05 995 995 580
+#> 12: 2023-05-05 995 995 541
+#> 13: 2023-05-05 995 995 909
+#> 14: 2023-05-06 1 1 159
+#> 15: 2023-05-06 1 1 464
+#> 16: 2023-05-06 995 995 518
+#> 17: 2023-05-06 995 995 838
+#> 18: 2023-05-06 995 995 916
+#> 19: 2023-05-06 995 995 368
+#> 20: 2023-05-07 1 1 442
+#> 21: 2023-05-07 1 1 362
+#> 22: 2023-05-07 995 995 904
+#> 23: 2023-05-07 995 995 243
+#> 24: 2023-05-07 995 995 820
+#> 25: 2023-05-07 995 995 827
+#> 26: 2023-05-08 1 1 169
+#> 27: 2023-05-08 1 1 169
+#> 28: 2023-05-08 995 995 495
+#> 29: 2023-05-08 995 995 813
+#> 30: 2023-05-08 995 995 436
+#> 31: 2023-05-08 995 995 177
+#> 32: 2023-05-09 1 1 388
+#> 33: 2023-05-09 1 1 688
+#> 34: 2023-05-09 1 1 497
+#> 35: 2023-05-09 1 1 934
+#> 36: 2023-05-09 1 1 614
+#> 37: 2023-05-09 1 1 414
+#> 38: 2023-05-10 1 1 384
+#> 39: 2023-05-10 1 1 620
+#> 40: 2023-05-10 1 995 748
+#> 41: 2023-05-10 1 995 695
+#> 42: 2023-05-10 1 995 397
+#> 43: 2023-05-10 1 995 544
+#> 44: 2023-05-11 1 1 50
+#> 45: 2023-05-11 1 1 385
+#> 46: 2023-05-11 995 995 247
+#> 47: 2023-05-11 995 995 740
+#> 48: 2023-05-11 995 995 496
+#> 49: 2023-05-11 995 995 713
+#> 50: 2023-05-03 1 1 768
+#> 51: 2023-05-24 1 1 659
+#> 52: 2023-05-25 1 1 111
+#> 53: 2023-05-25 1 1 695
+#> 54: 2023-05-25 1 1 279
+#> 55: 2023-05-25 1 1 661
+#> 56: 2023-05-25 1 1 708
+#> 57: 2023-05-29 1 1 631
+#> 58: 2023-05-29 1 1 167
+#> 59: 2023-05-29 1 1 641
+#> 60: 2023-05-29 1 1 574
+#> 61: 2023-05-29 1 1 543
+#> 62: 2023-05-29 1 1 96
+#> [ reached getOption("max.print") -- omitted 21 rows ]
+#>
+#> $trip
+#> hh_id person_id day_id travel_date trip_id speed_mph
+#> 1: 23016029 2301602901 230160290101 2023-04-27 2.301603e+12 5.9952706
+#> 2: 23016029 2301602901 230160290101 2023-04-27 2.301603e+12 21.5112706
+#> 3: 23016029 2301602901 230160290101 2023-04-27 2.301603e+12 3.5931191
+#> 4: 23016029 2301602901 230160290101 2023-04-27 2.301603e+12 11.9905413
+#> 5: 23016029 2301602901 230160290101 2023-04-27 2.301603e+12 5.9952706
+#> ---
+#> 197: 23416563 2341656304 234165630401 2023-06-05 2.341656e+12 2.4496880
+#> 198: 23416563 2341656305 234165630501 2023-06-05 2.341656e+12 1.5417278
+#> 199: 23416563 2341656305 234165630501 2023-06-05 2.341656e+12 2.5695464
+#> 200: 23416563 2341656306 234165630601 2023-06-05 2.341656e+12 0.8465739
+#> 201: 23416563 2341656306 234165630601 2023-06-05 2.341656e+12 4.2869540
+#> mode_type d_purpose_category trip_weight
+#> 1: 8 6 804
+#> 2: 8 10 523
+#> 3: 8 1 622
+#> 4: 8 6 633
+#> 5: 8 1 413
+#> ---
+#> 197: 10 1 649
+#> 198: 1 4 503
+#> 199: 1 1 338
+#> 200: 8 6 302
+#> 201: 8 1 599
+#>
+#> $vehicle
+#> hh_id vehicle_id fuel_type hh_weight
+#> 1: 23016029 2301602901 1 974
+#> 2: 23016029 2301602902 1 974
+#> 3: 23016029 2301602903 1 974
+#> 4: 23121381 2312138101 1 826
+#> 5: 23121381 2312138102 1 826
+#> 6: 23121381 2312138103 1 826
+#> 7: 23122159 2312215901 1 938
+#> 8: 23122159 2312215902 1 938
+#> 9: 23122159 2312215903 1 938
+#> 10: 23150818 2315081801 1 602
+#> 11: 23150818 2315081802 1 602
+#> 12: 23150818 2315081803 4 602
+#> 13: 23233061 2323306101 1 39
+#> 14: 23233061 2323306102 1 39
+#> 15: 23233061 2323306103 5 39
+#> 16: 23233061 2323306104 1 39
+#> 17: 23233061 2323306105 1 39
+#> 18: 23233651 2323365101 1 345
+#> 19: 23233651 2323365102 1 345
+#> 20: 23233651 2323365103 1 345
+#> 21: 23240801 2324080101 1 783
+#> 22: 23240801 2324080102 1 783
+#> 23: 23240801 2324080103 1 783
+#> 24: 23411644 2341164401 1 154
+#> 25: 23411644 2341164402 1 154
+#> 26: 23411644 2341164403 5 154
+#> 27: 23416563 2341656301 1 928
+#> 28: 23416563 2341656302 1 928
+#> hh_id vehicle_id fuel_type hh_weight
+#>
+
+
hts_get_keycols.Rd
Find key columns in table
+Dataset to find key columns of in data.table format
Boolean whether to return id columns. Default is TRUE.
Boolean whether to return weight columns. Default is TRUE.
Boolean whether to only return highest level weight/id. +Default is FALSE.
+require(data.table)
+hts_get_keycols(dt = trip)
+#> [1] "hh_id" "person_id" "day_id" "trip_id" "trip_weight"
+hts_get_keycols(dt = trip, priority = TRUE)
+#> [1] "trip_id" "trip_weight"
+
+
hts_get_ns.Rd
Get counts from dataset
+Dataset to pull counts from.
Boolean whether to pull weighted estimates.
+require(data.table)
+hts_get_ns(prepped_dt = day, weighted = TRUE)
+#> $unwtd
+#> $unwtd$Households
+#> [1] 1000
+#>
+#> $unwtd$Persons
+#> [1] 1962
+#>
+#> $unwtd$Days
+#> [1] 4326
+#>
+#>
+#> $wtd
+#> $wtd$Days
+#> [1] 2178966
+#>
+#>
+#> $units
+#> [1] "days"
+#>
+
+
hts_melt_vars.Rd
Melts checkbox variables into a single variable
+hts_melt_vars(
+ shared_name = NULL,
+ wide_dt = NULL,
+ shared_name_vars = NULL,
+ variables_dt = variable_list,
+ hts_data = hts_data,
+ remove_missing = TRUE,
+ missing_values = c("Missing Response", "995"),
+ checkbox_label_sep = ":",
+ to_single_row = FALSE
+)
Shared name of the checkbox variable to melt (e.g., 'race_1', +and 'race_2' have shared name of 'race'). Defaults to NULL.
Table containing the checkbox variables to melt in data.table +format.
List of the checkbox variables with the same shared name. +Defaults to NULL.
List of variable locations and descriptions in data.table +format.
List containing household, person, day, trip, and vehicle +datasets in data.table format.
Boolean to remove rows with missing values. Defaults to +TRUE.
Missing values to remove. Defaults to 'Missing Response' +and 995.
Character to use to split description of checkbox +variable in the variable list. Defaults to ':'
Boolean if treating multiple checkbox selections as 'Two +or more'. Defaults to FALSE.
Inputted data table with checkbox variables melted into a single variable, +with a 'variable' column to indicate original variable names and a 'value' column +with the original value of the checkbox variable.
+
+require(data.table)
+require(stringr)
+hts_melt_vars(shared_name = 'race',
+ wide_dt = person,
+ hts_data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> hh_id person_id person_weight variable value
+#> 1: 23000173 2300017301 139 race_1 0
+#> 2: 23000173 2300017304 747 race_1 0
+#> 3: 23000738 2300073801 298 race_1 0
+#> 4: 23000858 2300085801 865 race_1 0
+#> 5: 23000858 2300085802 87 race_1 0
+#> ---
+#> 11679: 23416563 2341656302 889 race_999 0
+#> 11680: 23418712 2341871201 476 race_999 0
+#> 11681: 23419141 2341914101 136 race_999 0
+#> 11682: 23419141 2341914102 170 race_999 0
+#> 11683: 23419703 2341970301 645 race_999 0
+#> race
+#> 1: African American or Black
+#> 2: African American or Black
+#> 3: African American or Black
+#> 4: African American or Black
+#> 5: African American or Black
+#> ---
+#> 11679: Prefer not to answer
+#> 11680: Prefer not to answer
+#> 11681: Prefer not to answer
+#> 11682: Prefer not to answer
+#> 11683: Prefer not to answer
+
+
hts_prep_byvar.Rd
Prepare variable to be summarized
+hts_prep_byvar(
+ summarize_by = NULL,
+ variables_dt = variables_list,
+ hts_data,
+ ...
+)
Name of the variable to be prepped. Default is NULL.
List of variable locations and descriptions in data.table +format.
List containing household, person, day, trip, and vehicle +datasets in data.table format.
Additional parameters to pass to link{hts_melt_vars}
+hts_prep_byvar(summarize_by = 'age',
+ variables_dt = variable_list,
+ hts_data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> hh_id person_id person_weight age
+#> 1: 23000173 2300017301 139 7
+#> 2: 23000173 2300017302 17 3
+#> 3: 23000173 2300017303 400 2
+#> 4: 23000173 2300017304 747 6
+#> 5: 23000738 2300073801 298 8
+#> ---
+#> 2038: 23418712 2341871203 809 2
+#> 2039: 23419141 2341914101 136 6
+#> 2040: 23419141 2341914102 170 5
+#> 2041: 23419141 2341914103 490 1
+#> 2042: 23419703 2341970301 645 9
+hts_prep_byvar(summarize_by = 'race',
+ variables_dt = variable_list,
+ hts_data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> hh_id person_id person_weight race
+#> 1: 23025240 2302524001 243 African American or Black
+#> 2: 23029096 2302909601 772 African American or Black
+#> 3: 23033218 2303321802 170 African American or Black
+#> 4: 23034871 2303487101 977 African American or Black
+#> 5: 23035194 2303519401 630 African American or Black
+#> ---
+#> 1665: 23411086 2341108601 311 Prefer not to answer
+#> 1666: 23411086 2341108602 827 Prefer not to answer
+#> 1667: 23413086 2341308603 514 Prefer not to answer
+#> 1668: 23413585 2341358501 288 Prefer not to answer
+#> 1669: 23413585 2341358502 603 Prefer not to answer
+
+
hts_prep_data.Rd
Prepare datasets to make summaries
+hts_prep_data(
+ summarize_var = NULL,
+ summarize_by = NULL,
+ variables_dt = variable_list,
+ data = hts_data,
+ weighted = TRUE,
+ remove_outliers = TRUE,
+ threshold = 0.975,
+ remove_missing = TRUE,
+ missing_values = c("Missing Response", "995"),
+ not_imputable = -1,
+ strataname = NULL
+)
Name of the variable to summarize. Default is NULL
Name of the variable to summarize the summarize_var by. +Default is NULL.
List of variable locations and descriptions in data.table +format.
List of household, person, vehicle, day, and trip tables in +data.table format.
Whether the data is weighted. Default is TRUE.
Whether to remove outliers for numeric variable. Default +is TRUE.
Threshold to define outliers. Default is 0.975.
Whether to remove missing values from the summary. +Default is TRUE.
Missing values to remove. Default is 995.
Value representing 'Not imputable' to remove. Default +is -1.
Name of strata name to bring in. Default is NULL.
List containing the categorical and numeric datasets of the summary +variables and key columns, and either whether the summarize variable is shared +or a breakdown of outliers, depending on if the summarize variable is +categorical or numeric.
+
+require(data.table)
+require(stringr)
+hts_prep_data(summarize_var = 'age',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> Creating a summary of person age
+#> $cat
+#> hh_id person_id person_weight age
+#> 1: 23000173 2300017301 139 7
+#> 2: 23000173 2300017302 17 3
+#> 3: 23000173 2300017303 400 2
+#> 4: 23000173 2300017304 747 6
+#> 5: 23000738 2300073801 298 8
+#> ---
+#> 2038: 23418712 2341871203 809 2
+#> 2039: 23419141 2341914101 136 6
+#> 2040: 23419141 2341914102 170 5
+#> 2041: 23419141 2341914103 490 1
+#> 2042: 23419703 2341970301 645 9
+#>
+#> $num
+#> NULL
+#>
+#> $var_is_shared
+#> [1] FALSE
+#>
+hts_prep_data(summarize_var = 'speed_mph',
+ summarize_by = 'age',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> Creating a summary of trip speed_mph broken down by person age
+#> $cat
+#> hh_id person_id day_id trip_id trip_weight speed_mph
+#> 1: 23000173 2300017301 230001730102 2.300017e+12 356 17-25
+#> 2: 23000173 2300017301 230001730102 2.300017e+12 679 17-25
+#> 3: 23000173 2300017301 230001730102 2.300017e+12 758 1-9
+#> 4: 23000173 2300017301 230001730102 2.300017e+12 314 1-9
+#> 5: 23000173 2300017301 230001730103 2.300017e+12 883 33-41
+#> ---
+#> 15031: 23419141 2341914103 234191410301 2.341914e+12 950 1-9
+#> 15032: 23419141 2341914103 234191410301 2.341914e+12 123 43 or more
+#> 15033: 23419141 2341914103 234191410301 2.341914e+12 720 25-33
+#> 15034: 23419703 2341970301 234197030101 2.341970e+12 523 9-17
+#> 15035: 23419703 2341970301 234197030101 2.341970e+12 298 9-17
+#> person_weight age
+#> 1: 139 7
+#> 2: 139 7
+#> 3: 139 7
+#> 4: 139 7
+#> 5: 139 7
+#> ---
+#> 15031: 490 1
+#> 15032: 490 1
+#> 15033: 490 1
+#> 15034: 645 9
+#> 15035: 645 9
+#>
+#> $num
+#> hh_id person_id day_id trip_id trip_weight speed_mph
+#> 1: 23000173 2300017301 230001730102 2.300017e+12 356 21.480572
+#> 2: 23000173 2300017301 230001730102 2.300017e+12 679 23.720178
+#> 3: 23000173 2300017301 230001730102 2.300017e+12 758 6.927242
+#> 4: 23000173 2300017301 230001730102 2.300017e+12 314 6.146634
+#> 5: 23000173 2300017301 230001730103 2.300017e+12 883 34.657421
+#> ---
+#> 15031: 23419141 2341914103 234191410301 2.341914e+12 950 8.904454
+#> 15032: 23419141 2341914103 234191410301 2.341914e+12 123 43.284089
+#> 15033: 23419141 2341914103 234191410301 2.341914e+12 720 25.518845
+#> 15034: 23419703 2341970301 234197030101 2.341970e+12 523 13.131100
+#> 15035: 23419703 2341970301 234197030101 2.341970e+12 298 16.413875
+#> person_weight age
+#> 1: 139 7
+#> 2: 139 7
+#> 3: 139 7
+#> 4: 139 7
+#> 5: 139 7
+#> ---
+#> 15031: 490 1
+#> 15032: 490 1
+#> 15033: 490 1
+#> 15034: 645 9
+#> 15035: 645 9
+#>
+#> $outliers
+#> threshold num_removed min_outlier max_outlier
+#> 1: 0.975 386 110.2297 228233.1
+#>
+
+
+hts_prep_data(summarize_var = 'employment',
+ summarize_by = c('age', 'race'),
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> Creating a summary of person employment broken down by person age and person race
+#> $cat
+#> hh_id person_id person_weight employment age
+#> 1: 23000173 2300017301 139 1 7
+#> 2: 23000173 2300017304 747 3 6
+#> 3: 23000738 2300073801 298 1 8
+#> 4: 23000858 2300085801 865 1 6
+#> 5: 23000858 2300085802 87 1 7
+#> ---
+#> 1665: 23416563 2341656302 889 1 6
+#> 1666: 23418712 2341871201 476 1 6
+#> 1667: 23419141 2341914101 136 1 6
+#> 1668: 23419141 2341914102 170 5 5
+#> 1669: 23419703 2341970301 645 5 9
+#> race
+#> 1: White
+#> 2: White
+#> 3: White
+#> 4: Prefer not to answer
+#> 5: White
+#> ---
+#> 1665: African American or Black
+#> 1666: Asian
+#> 1667: White
+#> 1668: White
+#> 1669: White
+#>
+#> $num
+#> NULL
+#>
+#> $var_is_shared
+#> [1] FALSE
+#>
+
hts_prep_triprate.Rd
Prepare datasets for trip rate calculations
+hts_prep_triprate(
+ summarize_by = NULL,
+ variables_dt = variable_list,
+ trip_name = "trip",
+ day_name = "day",
+ remove_outliers = TRUE,
+ threshold = 0.975,
+ weighted = TRUE,
+ hts_data
+)
Name of the variable to summarize trip rates by. Default +is NULL.
List of variable locations and descriptions in data.table +format.
Name of the trip dataset in hts_data.
Name of the day dataset in hts_data.
Boolean whether or not to remove outliers from dataset. +Default is TRUE.
Threshold to define outliers. Default is 0.975.
Whether the data is weighted. Default is TRUE.
List containing household, person, day, trip, and vehicle +datasets in data.table format.
List of binned number of trips with key columns and summarize by variable, +unbinned number of trips with key columns and summarize by variable, and a +breakdown of outliers if removed.
+
+require(data.table)
+require(stringr)
+hts_prep_triprate(variables_dt = variable_list,
+ trip_name = 'trip',
+ day_name = 'day',
+ hts_data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> $cat
+#> hh_id person_id day_id day_weight num_trips_wtd
+#> 1: 23000173 2300017301 230001730101 775 Exactly 0
+#> 2: 23000173 2300017301 230001730102 244 7-14
+#> 3: 23000173 2300017301 230001730103 77 36 or more
+#> 4: 23000173 2300017301 230001730104 91 0-7
+#> 5: 23000173 2300017301 230001730106 55 36 or more
+#> ---
+#> 4213: 23418712 2341871203 234187120301 119 7-14
+#> 4214: 23419141 2341914101 234191410101 626 7-14
+#> 4215: 23419141 2341914102 234191410201 595 0-7
+#> 4216: 23419141 2341914103 234191410301 49 36 or more
+#> 4217: 23419703 2341970301 234197030101 69 7-14
+#>
+#> $num
+#> hh_id person_id day_id day_weight num_trips_wtd
+#> 1: 23000173 2300017301 230001730101 775 0.000000
+#> 2: 23000173 2300017301 230001730102 244 8.635246
+#> 3: 23000173 2300017301 230001730103 77 39.935065
+#> 4: 23000173 2300017301 230001730104 91 4.681319
+#> 5: 23000173 2300017301 230001730106 55 51.763636
+#> ---
+#> 4213: 23418712 2341871203 234187120301 119 11.235294
+#> 4214: 23419141 2341914101 234191410101 626 8.731629
+#> 4215: 23419141 2341914102 234191410201 595 5.848739
+#> 4216: 23419141 2341914103 234191410301 49 58.387755
+#> 4217: 23419703 2341970301 234197030101 69 11.898551
+#>
+#> $outliers
+#> threshold num_removed min_outlier max_outlier
+#> 1: 0.975 109 59.2663 469.7
+#>
+hts_prep_triprate(summarize_by = 'age',
+ variables_dt = variable_list,
+ trip_name = 'trip',
+ day_name = 'day',
+ hts_data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))
+#> $cat
+#> hh_id person_id day_id day_weight person_weight age
+#> 1: 23000173 2300017301 230001730101 775 139 7
+#> 2: 23000173 2300017301 230001730102 244 139 7
+#> 3: 23000173 2300017301 230001730103 77 139 7
+#> 4: 23000173 2300017301 230001730104 91 139 7
+#> 5: 23000173 2300017301 230001730106 55 139 7
+#> ---
+#> 4213: 23418712 2341871203 234187120301 119 809 2
+#> 4214: 23419141 2341914101 234191410101 626 136 6
+#> 4215: 23419141 2341914102 234191410201 595 170 5
+#> 4216: 23419141 2341914103 234191410301 49 490 1
+#> 4217: 23419703 2341970301 234197030101 69 645 9
+#> num_trips_wtd
+#> 1: Exactly 0
+#> 2: 7-14
+#> 3: 36 or more
+#> 4: 0-7
+#> 5: 36 or more
+#> ---
+#> 4213: 7-14
+#> 4214: 7-14
+#> 4215: 0-7
+#> 4216: 36 or more
+#> 4217: 7-14
+#>
+#> $num
+#> hh_id person_id day_id day_weight person_weight age
+#> 1: 23000173 2300017301 230001730101 775 139 7
+#> 2: 23000173 2300017301 230001730102 244 139 7
+#> 3: 23000173 2300017301 230001730103 77 139 7
+#> 4: 23000173 2300017301 230001730104 91 139 7
+#> 5: 23000173 2300017301 230001730106 55 139 7
+#> ---
+#> 4213: 23418712 2341871203 234187120301 119 809 2
+#> 4214: 23419141 2341914101 234191410101 626 136 6
+#> 4215: 23419141 2341914102 234191410201 595 170 5
+#> 4216: 23419141 2341914103 234191410301 49 490 1
+#> 4217: 23419703 2341970301 234197030101 69 645 9
+#> num_trips_wtd
+#> 1: 0.000000
+#> 2: 8.635246
+#> 3: 39.935065
+#> 4: 4.681319
+#> 5: 51.763636
+#> ---
+#> 4213: 11.235294
+#> 4214: 8.731629
+#> 4215: 5.848739
+#> 4216: 58.387755
+#> 4217: 11.898551
+#>
+#> $outliers
+#> threshold num_removed min_outlier max_outlier
+#> 1: 0.975 109 59.2663 469.7
+#>
+
hts_remove_missing_data.Rd
Remove missing data for summary variables
+hts_remove_missing_data(
+ hts_data,
+ variables_dt,
+ summarize_var,
+ summarize_by = NULL,
+ missing_values = c("Missing Response", "995"),
+ not_imputable = -1
+)
List containing household, person, day, trip, and vehicle +datasets in data.table format.
A variable list with descriptions and table locations +of variables.
Variable to be summarized that has it's missing data +removed.
Variable being summarized by that has it's missing data +removed. Default is NULL.
Missing values that will be removed. Defaults are 995 and +'Missing Response'.
Value meaning not_imputable that will be removed. Default +is -1.
+require(data.table)
+hts_remove_missing_data(hts_data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle),
+ variables_dt = variable_list,
+ summarize_var = 'speed_mph',
+ summarize_by = 'mode_type')
+#> $hh
+#> hh_id sample_segment num_trips num_people income_detailed
+#> 1: 23000173 8 66 4 8
+#> 2: 23000738 4 16 1 999
+#> 3: 23000858 16 10 2 10
+#> 4: 23001054 8 75 2 8
+#> 5: 23001072 16 27 1 8
+#> ---
+#> 996: 23415053 5 9 2 8
+#> 997: 23416563 14 16 6 7
+#> 998: 23418712 5 7 3 999
+#> 999: 23419141 14 19 3 10
+#> 1000: 23419703 14 2 1 2
+#> income_followup residence_type hh_weight
+#> 1: 995 1 190
+#> 2: 999 1 54
+#> 3: 995 2 912
+#> 4: 995 1 182
+#> 5: 995 2 958
+#> ---
+#> 996: 995 4 232
+#> 997: 995 2 928
+#> 998: 6 1 314
+#> 999: 995 1 811
+#> 1000: 995 1 407
+#>
+#> $person
+#> ethnicity_1 ethnicity_2 ethnicity_3 ethnicity_4 ethnicity_997
+#> 1: 1 0 0 0 0
+#> 2: 995 995 995 995 995
+#> 3: 995 995 995 995 995
+#> 4: 1 0 0 0 0
+#> 5: 1 0 0 0 0
+#> ---
+#> 2038: 995 995 995 995 995
+#> 2039: 1 0 0 0 0
+#> 2040: 1 0 0 0 0
+#> 2041: 995 995 995 995 995
+#> 2042: 1 0 0 0 0
+#> ethnicity_999 race_1 race_2 race_3 race_4 race_5 race_997 race_999
+#> 1: 0 0 0 0 0 1 0 0
+#> 2: 995 995 995 995 995 995 995 995
+#> 3: 995 995 995 995 995 995 995 995
+#> 4: 0 0 0 0 0 1 0 0
+#> 5: 0 0 0 0 0 1 0 0
+#> ---
+#> 2038: 995 995 995 995 995 995 995 995
+#> 2039: 0 0 0 0 0 1 0 0
+#> 2040: 0 0 0 0 0 1 0 0
+#> 2041: 995 995 995 995 995 995 995 995
+#> 2042: 0 0 0 0 0 1 0 0
+#> hh_id num_trips person_id age gender employment education
+#> 1: 23000173 21 2300017301 7 2 1 6
+#> 2: 23000173 5 2300017302 3 2 5 995
+#> 3: 23000173 2 2300017303 2 2 995 995
+#> 4: 23000173 38 2300017304 6 1 3 6
+#> 5: 23000738 16 2300073801 8 1 1 7
+#> ---
+#> 2038: 23418712 2 2341871203 2 1 995 995
+#> 2039: 23419141 9 2341914101 6 2 1 6
+#> 2040: 23419141 5 2341914102 5 1 5 2
+#> 2041: 23419141 5 2341914103 1 1 995 995
+#> 2042: 23419703 2 2341970301 9 2 5 6
+#> person_weight
+#> 1: 139
+#> 2: 17
+#> 3: 400
+#> 4: 747
+#> 5: 298
+#> ---
+#> 2038: 809
+#> 2039: 136
+#> 2040: 170
+#> 2041: 490
+#> 2042: 645
+#>
+#> $day
+#> delivery_2 delivery_3 delivery_4 delivery_5 delivery_6 delivery_7
+#> 1: 0 0 0 0 0 0
+#> 2: 995 995 995 995 995 995
+#> 3: 995 995 995 995 995 995
+#> 4: 0 0 0 0 0 0
+#> 5: 0 0 0 0 0 0
+#> ---
+#> 4322: 995 995 995 995 995 995
+#> 4323: 1 1 0 0 0 0
+#> 4324: 995 995 995 995 995 995
+#> 4325: 995 995 995 995 995 995
+#> 4326: 0 0 0 1 0 0
+#> delivery_8 delivery_996 hh_id num_trips person_id day_id
+#> 1: 0 1 23000173 0 2300017301 230001730101
+#> 2: 995 995 23000173 NA 2300017302 230001730201
+#> 3: 995 995 23000173 NA 2300017303 230001730301
+#> 4: 0 1 23000173 3 2300017304 230001730401
+#> 5: 0 1 23000173 4 2300017301 230001730102
+#> ---
+#> 4322: 995 995 23418712 2 2341871203 234187120301
+#> 4323: 0 0 23419141 9 2341914101 234191410101
+#> 4324: 995 995 23419141 5 2341914102 234191410201
+#> 4325: 995 995 23419141 5 2341914103 234191410301
+#> 4326: 0 0 23419703 2 2341970301 234197030101
+#> travel_date begin_day end_day day_weight
+#> 1: 2023-04-19 1 1 775
+#> 2: 2023-04-19 995 995 61
+#> 3: 2023-04-19 995 995 818
+#> 4: 2023-04-19 1 1 673
+#> 5: 2023-04-20 1 1 244
+#> ---
+#> 4322: 2023-05-31 1 1 119
+#> 4323: 2023-05-31 1 1 626
+#> 4324: 2023-05-31 1 1 595
+#> 4325: 2023-05-31 1 1 49
+#> 4326: 2023-05-31 1 1 69
+#>
+#> $trip
+#> hh_id person_id day_id travel_date trip_id speed_mph
+#> 1: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 21.480572
+#> 2: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 23.720178
+#> 3: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 6.927242
+#> 4: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 6.146634
+#> 5: 23000173 2300017301 230001730103 2023-04-21 2.300017e+12 34.657421
+#> ---
+#> 15620: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 8.904454
+#> 15621: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 43.284089
+#> 15622: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 25.518845
+#> 15623: 23419703 2341970301 234197030101 2023-05-31 2.341970e+12 13.131100
+#> 15624: 23419703 2341970301 234197030101 2023-05-31 2.341970e+12 16.413875
+#> mode_type d_purpose_category trip_weight
+#> 1: 8 9 356
+#> 2: 8 1 679
+#> 3: 1 9 758
+#> 4: 1 1 314
+#> 5: 8 3 883
+#> ---
+#> 15620: 8 1 950
+#> 15621: 8 6 123
+#> 15622: 8 1 720
+#> 15623: 8 13 523
+#> 15624: 8 1 298
+#>
+#> $vehicle
+#> hh_id vehicle_id fuel_type hh_weight
+#> 1: 23000173 2300017301 1 190
+#> 2: 23000173 2300017302 1 190
+#> 3: 23000173 2300017303 1 190
+#> 4: 23000738 2300073801 5 54
+#> 5: 23000858 2300085801 1 912
+#> ---
+#> 1453: 23418712 2341871202 1 314
+#> 1454: 23419141 2341914101 2 811
+#> 1455: 23419141 2341914102 1 811
+#> 1456: 23419703 2341970301 1 407
+#> 1457: 23419703 2341970302 1 407
+#>
+
+
hts_remove_outliers.Rd
Remove outliers from a numeric variable
+Dataset with a numeric variable to remove outliers from +in data.table format.
Numeric variable to remove outliers from. Default is NULL.
Threshold to define what an outlier is. Default is .975.
+require(data.table)
+hts_remove_outliers(var_dt = trip, numvar = 'speed_mph')
+#> $outlier_description
+#> threshold num_removed min_outlier max_outlier
+#> 1: 0.975 386 110.2297 228233.1
+#>
+#> $dt
+#> hh_id person_id day_id travel_date trip_id speed_mph
+#> 1: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 21.480572
+#> 2: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 23.720178
+#> 3: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 6.927242
+#> 4: 23000173 2300017301 230001730102 2023-04-20 2.300017e+12 6.146634
+#> 5: 23000173 2300017301 230001730103 2023-04-21 2.300017e+12 34.657421
+#> ---
+#> 15031: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 8.904454
+#> 15032: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 43.284089
+#> 15033: 23419141 2341914103 234191410301 2023-05-31 2.341914e+12 25.518845
+#> 15034: 23419703 2341970301 234197030101 2023-05-31 2.341970e+12 13.131100
+#> 15035: 23419703 2341970301 234197030101 2023-05-31 2.341970e+12 16.413875
+#> mode_type d_purpose_category trip_weight
+#> 1: 8 9 356
+#> 2: 8 1 679
+#> 3: 1 9 758
+#> 4: 1 1 314
+#> 5: 8 3 883
+#> ---
+#> 15031: 8 1 950
+#> 15032: 8 6 123
+#> 15033: 8 1 720
+#> 15034: 8 13 523
+#> 15035: 8 1 298
+#>
+
+
hts_summary.Rd
Make household travel survey summaries
+hts_summary(
+ prepped_dt,
+ summarize_var,
+ summarize_by = NULL,
+ summarize_vartype = "categorical",
+ weighted = TRUE,
+ se = FALSE,
+ wtname = NULL,
+ strataname = NULL,
+ checkbox_valname = NULL,
+ checkbox_yesval = NULL
+)
A prepared dataset in data.table format with +the variable to summarize, the variable to summarize by, and the weights, +if used.
Name of the variable to summarize. Default is NULL.
Name of the variable to summarize the summarize_var by. +Default is NULL.
String; one of either 'categorical' (when the +variable being summarized is categorical), 'checkbox' (when the variable being +summarized is derived from a multiple response, aka select-all-that-apply question) +or 'numeric', when the variable being summarized is numeric.
Whether the data is weighted. Default is TRUE.
Whether to calculate standard error. Default is FALSE. Will be set +to FALSE if weighted is FALSE.
Name of the weight column to use. Default is NULL. Must be specified +when weighted = TRUE.
Name of strata name to bring in. Default is NULL.
Name of the column with the checkbox value. Default is NULL. +Must be provided if summarize_var is a checkbox variable.
Value of checkbox_valname that indicates it was selected. +Default is NULL. Must be provided if summarize_var is a checkbox variable.
A list containing (if applicable) categorical and numeric summaries of the +specified variable(s), as well as sample sizes and whether or not the summarized +variable is a shared checkbox variable. +To access the categorical/numeric df use output$summary. +To access the weighted df use output$summary$wtd, and output$summary$unwtd for the +unweighted df. +To access the weight name use output$summary$weight_name. +To access sample sizes use output$n_ls. +To access weighted and unweighted sample sizes respectively, use output$n_ls$wtd +and output$n_ls$unwtd.
+
+require(data.table)
+require(stringr)
+require(dplyr)
+require(srvyr)
+DT = hts_prep_data(summarize_var = 'age',
+ summarize_by = 'employment',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))$cat
+#> Creating a summary of person age broken down by person employment
+output = hts_summary(prepped_dt = DT,
+ summarize_var = 'age',
+ summarize_by = 'employment',
+ summarize_vartype = 'categorical',
+ wtname = 'person_weight')
+
+DT = hts_prep_data(summarize_var = 'speed_mph',
+ summarize_by = 'age',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))$num
+#> Creating a summary of trip speed_mph broken down by person age
+output = hts_summary(prepped_dt = DT,
+ summarize_var = 'speed_mph',
+ summarize_by = 'age',
+ summarize_vartype = 'numeric',
+ wtname = 'trip_weight')
+
+
hts_summary_cat.Rd
Summarize a categorical variable
+hts_summary_cat(
+ prepped_dt,
+ summarize_var = NULL,
+ summarize_by = NULL,
+ weighted = TRUE,
+ se = FALSE,
+ wtname = NULL,
+ strataname = NULL,
+ checkbox_valname = NULL,
+ checkbox_yesval = NULL
+)
Dataset containing the summary variables and key columns in +data.table format.
Name of the categorical variable to summarize. Default is NULL.
Name of the variable to summarize the summarize_var by. +Default is NULL.
Whether the data is weighted. Default is TRUE.
Whether to calculate standard error. Default is FALSE.
Name of the weight column to use. Default is NULL.
Name of strata name to bring in. Default is NULL.
Name of the column with the checkbox value. Default is NULL. +Must be provided if summarize_var is a checkbox variable.
Value of checkbox_valname that indicates it was selected. +Default is NULL. Must be provided if summarize_var is a checkbox variable.
List of unweighted and weighted categorical summaries including counts +and proportions.
+
+require(data.table)
+require(stringr)
+require(dplyr)
+require(srvyr)
+DT = hts_prep_data(summarize_var = 'age',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))$cat
+#> Creating a summary of person age
+hts_summary_cat(prepped_dt = DT,
+ summarize_var = 'age',
+ wtname = 'person_weight')
+#> $unwtd
+#> age count prop
+#> 1: 1 80 0.039177277
+#> 2: 2 187 0.091576885
+#> 3: 3 30 0.014691479
+#> 4: 4 118 0.057786484
+#> 5: 5 454 0.222331048
+#> 6: 6 364 0.178256611
+#> 7: 7 232 0.113614104
+#> 8: 8 252 0.123408423
+#> 9: 9 217 0.106268364
+#> 10: 10 96 0.047012733
+#> 11: 11 12 0.005876592
+#>
+#> $wtd
+#> age count prop est
+#> 1: 1 80 0.035449836 36529
+#> 2: 2 187 0.094923344 97813
+#> 3: 3 30 0.015961112 16447
+#> 4: 4 118 0.056409774 58127
+#> 5: 5 454 0.217321305 223937
+#> 6: 6 364 0.183085511 188659
+#> 7: 7 232 0.110731123 114102
+#> 8: 8 252 0.124048709 127825
+#> 9: 9 217 0.108907634 112223
+#> 10: 10 96 0.046645032 48065
+#> 11: 11 12 0.006516621 6715
+#>
+#> $weight_name
+#> [1] "person_weight"
+#>
+DT = hts_prep_data(summarize_var = 'age',
+ summarize_by = 'employment',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))$cat
+#> Creating a summary of person age broken down by person employment
+hts_summary_cat(prepped_dt = DT,
+ summarize_var = 'age',
+ summarize_by = 'employment',
+ wtname = 'person_weight')
+#> $unwtd
+#> employment age count prop
+#> 1: 1 4 37 0.039827772
+#> 2: 1 5 345 0.371367061
+#> 3: 1 6 264 0.284176534
+#> 4: 1 7 146 0.157158235
+#> 5: 1 8 116 0.124865447
+#> 6: 1 9 19 0.020452099
+#> 7: 1 10 2 0.002152853
+#> 8: 2 3 7 0.040935673
+#> 9: 2 4 34 0.198830409
+#> 10: 2 5 31 0.181286550
+#> 11: 2 6 31 0.181286550
+#> 12: 2 7 19 0.111111111
+#> 13: 2 8 22 0.128654971
+#> 14: 2 9 22 0.128654971
+#> 15: 2 10 5 0.029239766
+#> 16: 3 4 1 0.011111111
+#> 17: 3 5 15 0.166666667
+#> 18: 3 6 22 0.244444444
+#> 19: 3 7 20 0.222222222
+#> 20: 3 8 23 0.255555556
+#> 21: 3 9 9 0.100000000
+#> 22: 5 3 21 0.043933054
+#> 23: 5 4 33 0.069037657
+#> 24: 5 5 27 0.056485356
+#> 25: 5 6 24 0.050209205
+#> 26: 5 7 30 0.062761506
+#> 27: 5 8 80 0.167364017
+#> 28: 5 9 163 0.341004184
+#> 29: 5 10 88 0.184100418
+#> 30: 5 11 12 0.025104603
+#> 31: 6 3 2 0.024691358
+#> 32: 6 4 10 0.123456790
+#> 33: 6 5 29 0.358024691
+#> 34: 6 6 15 0.185185185
+#> 35: 6 7 15 0.185185185
+#> 36: 6 8 8 0.098765432
+#> 37: 6 9 1 0.012345679
+#> 38: 6 10 1 0.012345679
+#> 39: 7 4 2 0.153846154
+#> 40: 7 5 3 0.230769231
+#> 41: 7 6 3 0.230769231
+#> 42: 7 7 2 0.153846154
+#> 43: 7 8 1 0.076923077
+#> 44: 7 9 2 0.153846154
+#> 45: 8 4 1 0.076923077
+#> 46: 8 5 4 0.307692308
+#> 47: 8 6 5 0.384615385
+#> 48: 8 8 2 0.153846154
+#> 49: 8 9 1 0.076923077
+#> 50: 995 1 80 0.299625468
+#> 51: 995 2 187 0.700374532
+#> employment age count prop
+#>
+#> $wtd
+#> employment age count prop est
+#> 1: 1 4 37 0.043813401 20360
+#> 2: 1 5 345 0.365732153 169955
+#> 3: 1 6 264 0.289476176 134519
+#> 4: 1 7 146 0.151894349 70585
+#> 5: 1 8 116 0.126103405 58600
+#> 6: 1 9 19 0.020949090 9735
+#> 7: 1 10 2 0.002031427 944
+#> 8: 2 3 7 0.044806200 3735
+#> 9: 2 4 34 0.185822767 15490
+#> 10: 2 5 31 0.143319857 11947
+#> 11: 2 6 31 0.214313991 17865
+#> 12: 2 7 19 0.109598244 9136
+#> 13: 2 8 22 0.159730803 13315
+#> 14: 2 9 22 0.121546564 10132
+#> 15: 2 10 5 0.020861575 1739
+#> 16: 3 4 1 0.006874947 327
+#> 17: 3 5 15 0.166007905 7896
+#> 18: 3 6 22 0.202590194 9636
+#> 19: 3 7 20 0.226641998 10780
+#> 20: 3 8 23 0.267303002 12714
+#> 21: 3 9 9 0.130581953 6211
+#> 22: 5 3 21 0.046289050 11298
+#> 23: 5 4 33 0.061272150 14955
+#> 24: 5 5 27 0.064517054 15747
+#> 25: 5 6 24 0.054749565 13363
+#> 26: 5 7 30 0.066000205 16109
+#> 27: 5 8 80 0.152178634 37143
+#> 28: 5 9 163 0.345020998 84211
+#> 29: 5 10 88 0.182460309 44534
+#> 30: 5 11 12 0.027512035 6715
+#> 31: 6 3 2 0.034445798 1414
+#> 32: 6 4 10 0.140414129 5764
+#> 33: 6 5 29 0.342679659 14067
+#> 34: 6 6 15 0.177880633 7302
+#> 35: 6 7 15 0.169987820 6978
+#> 36: 6 8 8 0.095907430 3937
+#> 37: 6 9 1 0.018026797 740
+#> 38: 6 10 1 0.020657734 848
+#> 39: 7 4 2 0.154918668 1000
+#> 40: 7 5 3 0.191789311 1238
+#> 41: 7 6 3 0.368706429 2380
+#> 42: 7 7 2 0.079628195 514
+#> 43: 7 8 1 0.119287374 770
+#> 44: 7 9 2 0.085670023 553
+#> 45: 8 4 1 0.025957973 231
+#> 46: 8 5 4 0.346892909 3087
+#> 47: 8 6 5 0.403865603 3594
+#> 48: 8 8 2 0.151252950 1346
+#> 49: 8 9 1 0.072030565 641
+#> 50: 995 1 80 0.271910497 36529
+#> 51: 995 2 187 0.728089503 97813
+#> employment age count prop est
+#>
+#> $weight_name
+#> [1] "person_weight"
+#>
+
+DT = hts_prep_data(summarize_var = 'employment',
+ summarize_by = c('race', 'income_detailed', 'gender'),
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))$cat
+#> Creating a summary of person employment broken down by person race and hh income_detailed and person gender
+
+hts_summary_cat(prepped_dt = DT,
+ summarize_var = 'employment',
+ summarize_by = c('race', 'income_detailed', 'gender'),
+ wtname = 'person_weight')$unwtd
+#> race income_detailed gender employment count
+#> 1: African American or Black 1 1 5 1
+#> 2: African American or Black 1 2 6 1
+#> 3: African American or Black 1 2 8 1
+#> 4: African American or Black 1 999 6 1
+#> 5: African American or Black 2 1 1 1
+#> ---
+#> 393: Prefer not to answer 999 2 8 1
+#> 394: Prefer not to answer 999 999 1 22
+#> 395: Prefer not to answer 999 999 3 4
+#> 396: Prefer not to answer 999 999 5 6
+#> 397: Prefer not to answer 999 999 8 1
+#> prop
+#> 1: 1.00000000
+#> 2: 0.50000000
+#> 3: 0.50000000
+#> 4: 1.00000000
+#> 5: 0.20000000
+#> ---
+#> 393: 0.05882353
+#> 394: 0.66666667
+#> 395: 0.12121212
+#> 396: 0.18181818
+#> 397: 0.03030303
+
hts_summary_num.Rd
Summarize a numeric variable
+hts_summary_num(
+ prepped_dt,
+ summarize_var = NULL,
+ summarize_by = NULL,
+ weighted = TRUE,
+ se = FALSE,
+ wtname = NULL,
+ strataname = NULL
+)
A prepared dataset in data.table format with +the variable to summarize, the variable to summarize by, and the weights, +if used.
Name of the variable to summarize. Default is NULL.
Name of the variable to summarize the summarize_var by. +Default is NULL.
Whether the data is weighted. Default is TRUE.
Whether to calculate standard error. Default is FALSE. Will be set +to FALSE if weighted is FALSE.
Name of the weight column to use. Default is NULL. Must be specified +when weighted = TRUE.
Name of strata name to bring in. Default is NULL.
List of unweighted and weighted numeric summaries including count, min, +max, mean, se, and median.
+
+require(data.table)
+require(stringr)
+require(dplyr)
+require(srvyr)
+DT = hts_prep_data(summarize_var = 'speed_mph',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))$num
+#> Creating a summary of trip speed_mph
+hts_summary_num(prepped_dt = DT,
+ summarize_var = 'speed_mph',
+ wtname = 'trip_weight')
+#> $unwtd
+#> count min max mean median
+#> 1: 15035 0 109.2184 12.89507 9.728796
+#>
+#> $wtd
+#> count min max mean median
+#> 1: 15035 0 109.2184 12.84671 9.760141
+#>
+#> $weight_name
+#> [1] "trip_weight"
+#>
+DT = hts_prep_data(summarize_var = 'speed_mph',
+ summarize_by = 'age',
+ variables_dt = variable_list,
+ data = list('hh' = hh,
+ 'person' = person,
+ 'day' = day,
+ 'trip' = trip,
+ 'vehicle' = vehicle))$num
+#> Creating a summary of trip speed_mph broken down by person age
+hts_summary_num(prepped_dt = DT,
+ summarize_var = 'speed_mph',
+ summarize_by = 'age',
+ wtname = 'trip_weight')
+#> $unwtd
+#> age count min max mean median
+#> 1: 1 311 0.35959216 101.19362 14.55031 10.280068
+#> 2: 2 844 0.00000000 82.89088 12.26536 10.190479
+#> 3: 3 89 0.00000000 98.43097 12.02007 9.323293
+#> 4: 4 984 0.00000000 109.21842 11.97881 7.763898
+#> 5: 5 3704 0.00000000 108.02176 12.21873 8.234977
+#> 6: 6 3685 0.00000000 101.19362 12.50943 9.558940
+#> 7: 7 1940 0.01815516 106.72662 13.84846 10.808561
+#> 8: 8 1644 0.02554711 104.77523 14.14443 11.208044
+#> 9: 9 1379 0.00000000 105.10755 13.40606 11.036031
+#> 10: 10 433 0.07317172 76.07189 13.43742 11.760465
+#> 11: 11 22 0.15677999 88.79454 16.50646 11.099190
+#>
+#> $wtd
+#> age count min max mean median
+#> 1: 1 311 0.35959216 101.19362 14.06340 9.556707
+#> 2: 2 844 0.00000000 82.89088 12.69728 10.496980
+#> 3: 3 89 0.00000000 98.43097 13.16383 12.224664
+#> 4: 4 984 0.00000000 109.21842 11.79167 7.763898
+#> 5: 5 3704 0.00000000 108.02176 12.04057 8.095823
+#> 6: 6 3685 0.00000000 101.19362 12.47361 9.474190
+#> 7: 7 1940 0.01815516 106.72662 13.84666 10.899718
+#> 8: 8 1644 0.02554711 104.77523 14.20364 11.197504
+#> 9: 9 1379 0.00000000 105.10755 13.34549 11.127732
+#> 10: 10 433 0.07317172 76.07189 13.44185 12.035237
+#> 11: 11 22 0.15677999 88.79454 14.11659 11.099190
+#>
+#> $weight_name
+#> [1] "trip_weight"
+#>
+
hts_to_so.Rd
Transform travel survey data.table to a survey object
+Dataframe in data.table format to transform to survey object.
If true creates a weighted survey object. Default is TRUE.
Name of the weight column in the dataframe. Defaults to NULL, +but must exist if weighted is true.
Name of strata name to bring in. Default is NULL.
+require(data.table)
+hts_to_so(prepped_dt = trip, wtname = 'trip_weight')
+#> Independent Sampling design (with replacement)
+#> Called via srvyr
+#> Sampling variables:
+#> - ids: `1`
+#> - weights: weight
+#> Data variables: hh_id (int), person_id (dbl), day_id (dbl), travel_date (date),
+#> trip_id (dbl), speed_mph (dbl), mode_type (int), d_purpose_category (int),
+#> weight (int)
+
+
hts_trip_vehid.Rd
Add vehicle_id to trip table
+hts_trip_vehid(
+ trip_table,
+ vehicle_table,
+ vehicle_mode_type = "Vehicle",
+ values_dt = value_labels,
+ ...
+)
Dataset of trips in data.table format.
Dataset of vehicles in data.table format.
Mode type label for vehicle. Default is 'Vehicle'.
Dataset of value labels in data.table format.
Additional arguments passed to link{factorize_column}
+require(data.table)
+trip_ex = data.table(
+ hh_id = sample(1:10, size = 30, replace = TRUE),
+ trip_id = 1:30,
+ mode_type = sample(1:2, size = 30, replace = TRUE),
+ mode_1 = sample(1, size = 30, replace = TRUE))
+vehicle_ex = data.table(
+ hh_id = sample(1:10, size = 30, replace = TRUE),
+ vehicle_id = 1:30)
+values_ex = data.table(
+ variable = c(rep('mode_type', 2), ('mode_1')),
+ value = c(1,2,1),
+ value_label = c('Vehicle', 'Walk', 'Car')
+ )
+hts_trip_vehid(trip_table = trip_ex,
+ vehicle_table = vehicle_ex,
+ vehicle_mode_type = 'Vehicle',
+ values_dt = values_ex)
+#> hh_id trip_id mode_type mode_1 vehicle_id
+#> 1: 7 1 2 1 <NA>
+#> 2: 5 2 2 1 <NA>
+#> 3: 6 3 1 1 <NA>
+#> 4: 4 4 1 1 <NA>
+#> 5: 6 5 2 1 <NA>
+#> 6: 9 6 2 1 <NA>
+#> 7: 5 7 1 1 <NA>
+#> 8: 5 8 2 1 <NA>
+#> 9: 8 9 1 1 <NA>
+#> 10: 2 10 1 1 <NA>
+#> 11: 7 11 1 1 <NA>
+#> 12: 5 12 2 1 <NA>
+#> 13: 5 13 1 1 <NA>
+#> 14: 2 14 2 1 <NA>
+#> 15: 6 15 1 1 <NA>
+#> 16: 4 16 2 1 <NA>
+#> 17: 2 17 2 1 <NA>
+#> 18: 3 18 1 1 <NA>
+#> 19: 9 19 2 1 <NA>
+#> 20: 6 20 2 1 <NA>
+#> 21: 7 21 2 1 <NA>
+#> 22: 5 22 1 1 <NA>
+#> 23: 6 23 2 1 <NA>
+#> 24: 9 24 1 1 <NA>
+#> 25: 6 25 2 1 <NA>
+#> 26: 10 26 1 1 <NA>
+#> 27: 5 27 2 1 <NA>
+#> 28: 2 28 1 1 <NA>
+#> 29: 1 29 2 1 <NA>
+#> 30: 4 30 2 1 <NA>
+#> hh_id trip_id mode_type mode_1 vehicle_id
+
+
day
+ factorize_column()
+ factorize_df()
+ get_distance_meters()
+ hh
+ hts_bin_var()
+ hts_cbind_var()
+ hts_filter_data()
+ hts_find_var()
+ hts_get_keycols()
+ hts_get_ns()
+ hts_melt_vars()
+ hts_prep_byvar()
+ hts_prep_data()
+ hts_prep_triprate()
+ hts_remove_missing_data()
+ hts_remove_outliers()
+ hts_summary()
+ hts_summary_cat()
+ hts_summary_num()
+ hts_to_so()
+ hts_trip_vehid()
+ join_spatial()
+ person
+ test_data
+ trip
+ value_labels
+ variable_list
+ vehicle
+ join_spatial.Rd
Performs a spatial join to obtain geographic fields for specified lon/lat +columns.
+join_spatial(
+ x,
+ y,
+ id_col,
+ lon_col,
+ lat_col,
+ crs_lonlat = 4326,
+ crs_equal_area = 5070,
+ largest = FALSE
+)
A data.table of survey data
an sf
object with the geometry of interest
character. The column in x
that is a unique identifier
character. The column in x
that has longitude
character. The column in x
that has latitude
numeric. The EPSG code for lonlat data (defaults to WGS84)
numeric. The EPSG code for an equal area projection. +Many spatial operations assume projected data (not lon/lat). Defaults to US +Albers Equal Area
logical. If TRUE
, return x
+features with fields of y
that have the largest overlap with each of
+the features of x
; see st_join
. This is useful if
+join is returning more than one value of y for each x.
if (FALSE) {
+x = data.table(id = 1:3, lon=c(-82.33, -79.17, -76.17), lat = c(35.50, 36.27, 36.49))
+nc = st_read(system.file('shape/nc.shp', package='sf'))[, c('FIPS', 'NAME', 'geometry')]
+z = join_spatial(x, nc, id_col='id', lon_col = 'lon', lat_col = 'lat')
+z}
+
+
person.Rd
A dataset containing person level attributes of almost 2,000 persons from the +2023 Puget Sound Regional Council Household Travel Survey. +The variables are as follows:
+person
A data frame with 1,999 rows and 19 columns:
8 digit household ID
10 digit person ID
Not of Hispnic, Latino, or Spanish origin, response codes
Mexican, Mexican American, Chicano, response codes
Puerto Rican, response codes
Cuban, response codes
Another Hispanic, Latino, or Spanish origin, response codes
Prefer not to answer, response codes
African American or Black, response codes
Americian Indian or Alaska Native, response codes
Asian, response codes
Native Hawaiian or other Pacific Islander, response codes
White, response codes
Other race, response codes
Prefer not to answer, response codes
Age, response codes
Gender, response codes
Employment status, response codes
Highest level of education completed, response codes
test_data.Rd
Test household travel survey dataset
+trip.Rd
A dataset containing attributes of over 20,000 trips from the +2023 Puget Sound Regional Council Household Travel Survey. +The variables are as follows:
+trip
A data frame with 21,378 rows and 7 columns:
8 digit household ID
10 digit person ID
12 digit day ID
13 digit trip ID
Date of trip
Type of transportation used for trip, response codes
Purpose for taking trip to destination, response codes
value_labels.Rd
A dataset containing the values for all variables found in variable_list +The variables are as follows:
+variable_list.Rd
A dataset containing information about all variables existing in the hh, person, +day, trip, and vehicle tables. The variables are as follows:
+variable_list
A data frame with 41 rows and 13 columns:
The order the variables are presented in
Where the variable was created
Name of the variable
The variable is a 'Select all that Apply' question
The variable exists in the hh table
The variable exists in the person table
The variable exists in the day table
The variable exists in the trip table
The variable exists in the vehicle table
Data type of the variable
A description of the variable
Conditions where the variable should have a value