diff --git a/DESCRIPTION b/DESCRIPTION index a2382ee..2c747ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,13 @@ Package: travelSurveyTools Title: travelSurveyTools -Version: 2.3.2 +Version: 2.3.3 Authors@R: c( person("RSG", "Inc.", , "rsg@rsginc.com", role = c("aut", "cre")), person("Ashley", "Asmus", , "ashley.asmus@rsginc.com", role = "aut"), person("Erika", "Redding", , "erika.redding@rsginc.com", role = "aut"), person("Jacob", "Moore", , "jacob.moore@rsginc.com", role = "aut"), - person("James", "Clark", , "james.clark@rsginc.com", role = "aut") + person("James", "Clark", , "james.clark@rsginc.com", role = "aut"), + person("Matt", "Landis", , "matt.landis@rsginc.com", role = "aut") ) Description: An R Package To Make Working With And Summarizing Household Travel Survey Data Easier. License: GPL (>= 3) @@ -30,7 +31,7 @@ Suggests: tigris Config/testthat/edition: 3 Depends: - R (>= 2.10) + R (>= 4.1) LazyData: true VignetteBuilder: knitr URL: https://github.com/RSGInc/travelSurveyTools diff --git a/NEWS.md b/NEWS.md index a8e8d99..afb4758 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# travelSurveyTools 2.3.3 + +- Small updates to DESCRIPTION + # travelSurveyTools 2.3.2 - Additional steps to ensure anonymity of test data diff --git a/R/data.R b/R/data.R index b2ff0d6..7c75536 100644 --- a/R/data.R +++ b/R/data.R @@ -128,7 +128,7 @@ #' A data frame with 55 rows and 10 columns: #' \describe{ #' \item{variable}{Name of the variable} -#' \item{is_checkbox}{The variable is a 'Select all that Apply' question} +#' \item{is_checkbox}{The variable is a multiple response categorical variable question} #' \item{hh}{The variable exists in the hh table} #' \item{person}{The variable exists in the person table} #' \item{day}{The variable exists in the day table} diff --git a/R/hts_summary_cat.R b/R/hts_summary_cat.R index 3edc5f0..39fc6e5 100644 --- a/R/hts_summary_cat.R +++ b/R/hts_summary_cat.R @@ -228,14 +228,14 @@ hts_summary_cat = function(prepped_dt, strataname = strataname) wtd_summary = - so %>% - group_by_at(unlist(groupbyvars)) %>% + so |> + group_by_at(unlist(groupbyvars)) |> summarize( count = length(get(summarize_var)), prop = srvyr::survey_prop(proportion = FALSE, vartype = "se"), est = survey_total(vartype = "se") - ) %>% + ) |> setDT() } else if (!se) { diff --git a/R/hts_summary_num.R b/R/hts_summary_num.R index 43f1a4f..3e70ff7 100644 --- a/R/hts_summary_num.R +++ b/R/hts_summary_num.R @@ -90,15 +90,15 @@ hts_summary_num = function(prepped_dt, # Calculate survey proportions: num_summary_wttype = - num_so_ls[[wt_type]] %>% - group_by_at(unlist(summarize_by)) %>% + num_so_ls[[wt_type]] |> + group_by_at(unlist(summarize_by)) |> summarize( count = length(get(summarize_var)), min = min(get(summarize_var), na.rm = TRUE), max = max(get(summarize_var), na.rm = TRUE), mean = survey_mean(get(summarize_var), vartype = variance_type, na.rm = TRUE), median = survey_median(get(summarize_var), vartype = NULL, na.rm = TRUE) - ) %>% + ) |> setDT() num_summary_ls[[wt_type]] = num_summary_wttype @@ -107,15 +107,15 @@ hts_summary_num = function(prepped_dt, # Calculate survey proportions: num_summary_wttype = - num_so_ls[[wt_type]] %>% - group_by_at(unlist(summarize_by)) %>% + num_so_ls[[wt_type]] |> + group_by_at(unlist(summarize_by)) |> summarize( count = length(get(summarize_var)), min = min(get(summarize_var), na.rm = TRUE), max = max(get(summarize_var), na.rm = TRUE), mean = survey_mean(get(summarize_var), vartype = NULL, na.rm = TRUE), median = survey_median(get(summarize_var), vartype = NULL, na.rm = TRUE) - ) %>% + ) |> setDT() num_summary_ls[[wt_type]] = num_summary_wttype diff --git a/R/utils-pipe.R b/R/utils-pipe.R index fd0b1d1..45333c8 100644 --- a/R/utils-pipe.R +++ b/R/utils-pipe.R @@ -11,4 +11,4 @@ #' @param lhs A value or the magrittr placeholder. #' @param rhs A function call using the magrittr semantics. #' @return The result of calling `rhs(lhs)`. -NULL +NULL \ No newline at end of file diff --git a/man/variable_list.Rd b/man/variable_list.Rd index ca6a185..c5d309f 100644 --- a/man/variable_list.Rd +++ b/man/variable_list.Rd @@ -10,7 +10,7 @@ A data frame with 55 rows and 10 columns: \describe{ \item{variable}{Name of the variable} -\item{is_checkbox}{The variable is a 'Select all that Apply' question} +\item{is_checkbox}{The variable is a multiple response categorical variable question} \item{hh}{The variable exists in the hh table} \item{person}{The variable exists in the person table} \item{day}{The variable exists in the day table} diff --git a/vignettes/a01_getting_started.Rmd b/vignettes/a01_getting_started.Rmd index 830ad45..addd524 100644 --- a/vignettes/a01_getting_started.Rmd +++ b/vignettes/a01_getting_started.Rmd @@ -8,6 +8,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- + ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE) ``` @@ -18,7 +19,7 @@ The `travelSurveyTools` package provides tools for R users to aid use of data fr ## Data Assumptions -`travelSurveyTools` assumes the the data have the structure shown below. If this does not reflect the structure of your data +`travelSurveyTools` assumes the the data have the structure shown below. ### hts_data @@ -28,7 +29,7 @@ hts_data is a list of five core tables: Household dataset -- hh_id: 8 digit household ID +- hh_id: household ID - survey variables asked on a household level - hh_weight: household weight @@ -36,8 +37,8 @@ Household dataset Person dataset -- hh_id: 8 digit household ID -- person_id: 10 digit person ID +- hh_id: household ID +- person_id: person ID - survey variables asked on a person level - person_weight: person weight @@ -45,9 +46,9 @@ Person dataset Day dataset -- hh_id: 8 digit household ID -- person_id: 10 digit person ID -- day_id: 12 digit day ID +- hh_id: household ID +- person_id: person ID +- day_id: day ID - survey variable asked on a day level - day_weight: day weight @@ -55,10 +56,10 @@ Day dataset Trip dataset -- hh_id: 8 digit household ID -- person_id: 10 digit person ID -- day_id: 12 digit day ID -- trip_id: 13 digit trip ID +- hh_id: household ID +- person_id: person ID +- day_id: day ID +- trip_id: trip ID - survey variables asked on a trip level - trip_weight: trip weight @@ -66,8 +67,8 @@ Trip dataset Vehicle dataset -- hh_id: 8 digit household ID -- vehicle_id: 10 digit vehicle ID +- hh_id: household ID +- vehicle_id: vehicle ID - survey responses asked on a vehicle level - hh_weight: household weight @@ -80,7 +81,7 @@ In addition to data from the household travel survey. The codebook is also requi A dataset containing information about all variables existing in the hh, person, day, trip, and vehicle tables. The variables are as follows: - variable: Name of the variable -- is_checkbox: The variable is a 'Select all that Apply' question +- is_checkbox: The variable is a multiple response categorical variable question - hh: The variable exists in the hh table - person: The variable exists in the person table - day: The variable exists in the day table @@ -90,7 +91,7 @@ A dataset containing information about all variables existing in the hh, person, - data_type: Data type of the variable - description: A description of the variable - logic: Conditions where the variable should have a value -- shared_name: the shared name of checkbox variable or the variable name for non-checkbox variables +- shared_name: the shared name of a multiple response categorical variable variable or the variable name for non-multiple response categorical variable variables #### value_labels @@ -206,6 +207,7 @@ If we want to summarize a variable by another variable (e.g., mode type by a per ```{r, mode_type_race_example, echo=TRUE, eval=TRUE} + mode_type_list = hts_prep_data( summarize_var = 'mode_type', summarize_by = 'race', @@ -226,6 +228,26 @@ mode_by_race_summary = hts_summary( mode_by_race_summary$summary +if(FALSE){ + + age_study_year_list = hts_prep_data( + summarize_var = 'age', + summarize_by = 'study_year', + variables_dt = variable_list, + data = test_data + ) + + mode_by_race_summary = hts_summary( + prepped_dt = age_study_year_list$cat, + summarize_var = 'age', + summarize_by = 'study_year', + summarize_vartype = 'categorical', + weighted = TRUE, + wtname = 'trip_weight', + se = TRUE + ) + +} ``` @@ -259,27 +281,22 @@ mode_by_race_summary$summary ```{r, mode_type_ethnicity_example, echo=TRUE, eval=TRUE} -mode_type_list2 = hts_prep_data( +mode_type_race_ethnicity_list = hts_prep_data( summarize_var = 'mode_type', summarize_by = c('race', 'ethnicity'), variables_dt = variable_list, - data = list( - 'hh' = hh, - 'person' = person, - 'day' = day, - 'trip' = trip, - 'vehicle' = vehicle - ) + data = list('hh' = hh, + 'person' = person, + 'day' = day, + 'trip' = trip, + 'vehicle' = vehicle) ) mode_by_race_ethnicity_summary = hts_summary( - prepped_dt = mode_type_list2$cat, + prepped_dt = mode_type_race_ethnicity_list$cat, summarize_var = 'mode_type', summarize_by = c('race', 'ethnicity'), - summarize_vartype = 'categorical', - weighted = TRUE, - wtname = 'trip_weight', - se = TRUE + wtname = 'trip_weight' ) @@ -294,23 +311,26 @@ head(mode_by_race_ethnicity_summary$summary$wtd, 10) ```{r, trip_rates_example} -DT = hts_prep_triprate(summarize_by = 'employment', - variables_dt = variable_list, - trip_name = 'trip', - day_name = 'day', - hts_data = list('hh' = hh, - 'person' = person, - 'day' = day, - 'trip' = trip, - 'vehicle' = vehicle)) - -trip_rate_by_employment_summary = hts_summary(prepped_dt = DT$num, - summarize_var = 'num_trips_wtd', - summarize_by = 'employment', - summarize_vartype = 'numeric', - weighted = TRUE, - wtname = 'day_weight', - se = TRUE) +employment_triprate_list = hts_prep_triprate( + summarize_by = 'employment', + variables_dt = variable_list, + trip_name = 'trip', + day_name = 'day', + hts_data = list('hh' = hh, + 'person' = person, + 'day' = day, + 'trip' = trip, + 'vehicle' = vehicle) +) + +trip_rate_by_employment_summary = hts_summary( + prepped_dt = employment_triprate_list$num, + summarize_var = 'num_trips_wtd', + summarize_by = 'employment', + summarize_vartype = 'numeric', + weighted = TRUE, + wtname = 'day_weight', + se = TRUE) head(trip_rate_by_employment_summary$summary$wtd, 10) @@ -342,13 +362,13 @@ trip_rate_by_employment_summary$summary$wtd `hts_summary` creates outputs that can easily be used to create visuals. -```{r, ggplot_example, echo=TRUE, eval=TRUE} +```{r, ggplot_example, echo=TRUE, eval=TRUE, fig.width=8, fig.height=6} library(ggplot2) p = ggplot( trip_rate_by_employment_summary$summary$wtd, - aes(x = mean, y = employment)) + + aes(x = mean, y = employment, label = count)) + geom_bar(stat = 'identity') + geom_errorbar( aes(xmin = (mean - mean_se), @@ -372,35 +392,42 @@ To summarize a new variable with `hts_summary` it must first be added to the `va test_data$hh[, hh_size := ifelse(num_people < 4, 0, 1)] -variable_list = rbind(variable_list, - data.table(variable = 'hh_size', - is_checkbox = 0, - hh = 1, - person = 0, - day = 0, - trip = 0, - vehicle = 0, - description = 'Household size', - data_type = 'integer/categorical', - shared_name = 'hh_size') - ) - -value_labels = rbind(value_labels, - data.table(variable = rep('hh_size', 2), - value = c(0,1), - label = c('Small household', 'Large household'), - val_order = c(214:215)) - ) - -DT = hts_prep_data(summarize_var = 'hh_size', - variables_dt = variable_list, - data = test_data) - -hh_size_summary = hts_summary(prepped_dt = DT$cat, - summarize_var = 'hh_size', - summarize_vartype = 'categorical', - weighted = TRUE, - wtname = 'hh_weight') + +variable_list = rbind( + variable_list, + data.table(variable = 'hh_size', + is_checkbox = 0, + hh = 1, + person = 0, + day = 0, + trip = 0, + vehicle = 0, + description = 'Household size', + data_type = 'integer/categorical', + shared_name = 'hh_size') +) + +value_labels = rbind( + value_labels, + data.table(variable = rep('hh_size', 2), + value = c(0,1), + label = c('Small household', 'Large household'), + val_order = c(214:215)) +) + +hh_size_list = hts_prep_data( + summarize_var = 'hh_size', + variables_dt = variable_list, + data = test_data +) + +hh_size_summary = hts_summary( + prepped_dt = hh_size_list$cat, + summarize_var = 'hh_size', + summarize_vartype = 'categorical', + weighted = TRUE, + wtname = 'hh_weight' +) factorize_df(df = hh_size_summary$summary$wtd, value_labels, value_label_colname = 'label') diff --git a/vignettes/a02_geographic_summaries.Rmd b/vignettes/a02_geographic_summaries.Rmd index b1803bb..4bd0c96 100644 --- a/vignettes/a02_geographic_summaries.Rmd +++ b/vignettes/a02_geographic_summaries.Rmd @@ -31,19 +31,31 @@ states = states() `hts_summary` can summarize geographic variables. ```{r, county_income, echo=TRUE, eval=TRUE} -DT = hts_prep_data(summarize_var = 'income_detailed', summarize_by = 'home_county', data = test_data) - -output = hts_summary(prepped_dt = DT$cat, summarize_var = 'income_detailed', summarize_by = 'home_county') - -factorize_df(output$summary$unwtd, vals_df = value_labels, value_label_colname = 'label') +income_county_list = hts_prep_data( + summarize_var = 'income_detailed', + summarize_by = 'home_county', + data = test_data + ) + +output = hts_summary( + prepped_dt = income_county_list$cat, + summarize_var = 'income_detailed', + summarize_by = 'home_county' + ) + +factorize_df( + output$summary$unwtd, + vals_df = value_labels, + value_label_colname = 'label' + ) ``` -### Using spatial_join to join together separate shapefiles +### Using join_spatial to join together separate shapefiles -`spatial_join` can link together multiple geographies. +`join_spatial` can link together multiple geographies. -```{r, spatial_join, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE} +```{r, join_spatial, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE} hh = join_spatial( hh, diff --git a/vignettes/a03_trip_rates.Rmd b/vignettes/a03_trip_rates.Rmd index 572de30..9b449f0 100644 --- a/vignettes/a03_trip_rates.Rmd +++ b/vignettes/a03_trip_rates.Rmd @@ -3,11 +3,11 @@ title: "Creating summaries of trip rates" description: > output: html_vignette vignette: > - %\VignetteIndexEntry{Creating summaries of trip rates} +%\VignetteIndexEntry{Creating summaries of trip rates} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} ---- - + --- + ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE) @@ -24,10 +24,12 @@ data("test_data") data("variable_list") data("value_labels") -DT = hts_prep_triprate(variables_dt = variable_list, - trip_name = 'trip', - day_name = 'day', - hts_data = test_data) +prepped_triprates_list = hts_prep_triprate( + variables_dt = variable_list, + trip_name = 'trip', + day_name = 'day', + hts_data = test_data +) ``` @@ -37,7 +39,7 @@ After preparing the data we can create a summary using `hts_summary`. ```{r, hts_summary, echo=TRUE} hts_summary( - prepped_dt = DT$num, + prepped_dt = prepped_triprates_list$num, summarize_var = 'num_trips_wtd', summarize_vartype = 'numeric' ) @@ -49,14 +51,14 @@ We can also summarize trip rates by one or more variables. ```{r, trip_rate_job_type, echo=TRUE, fig.width=8, fig.height=6, warning=FALSE} -DT = hts_prep_triprate(variables_dt = variable_list, +job_type_triprate_list = hts_prep_triprate(variables_dt = variable_list, summarize_by = 'job_type', - trip_name = 'trip', - day_name = 'day', - hts_data = test_data) + trip_name = 'trip', + day_name = 'day', + hts_data = test_data) num_trips_job_type = hts_summary( - prepped_dt = DT$num, + prepped_dt = job_type_triprate_list$num, summarize_by = 'job_type', summarize_var = 'num_trips_wtd', summarize_vartype = 'numeric', @@ -67,9 +69,11 @@ num_trips_job_type = hts_summary( # Label job_type -num_trips_job_type_labeled = factorize_df(num_trips_job_type, - value_labels, - value_label_colname = 'label') +num_trips_job_type_labeled = factorize_df( + num_trips_job_type, + value_labels, + value_label_colname = 'label' +) # Create a plot @@ -86,14 +90,16 @@ ggplot(num_trips_job_type_labeled, ```{r, trip_rate_race_ethnicity, fig.width=8, fig.height=6, warning=FALSE} -DT = hts_prep_triprate(variables_dt = variable_list, - summarize_by = c('race', 'ethnicity'), - trip_name = 'trip', - day_name = 'day', - hts_data = test_data) +race_ethnicity_triprate_list = hts_prep_triprate( + variables_dt = variable_list, + summarize_by = c('race', 'ethnicity'), + trip_name = 'trip', + day_name = 'day', + hts_data = test_data +) num_trips_race_ethnicity = hts_summary( - prepped_dt = DT$num, + prepped_dt = race_ethnicity_triprate_list$num, summarize_by = c('race', 'ethnicity'), summarize_var = 'num_trips_wtd', summarize_vartype = 'numeric', @@ -103,9 +109,11 @@ num_trips_race_ethnicity = hts_summary( )$summary$wtd # label data -num_trips_race_ethnicity_labeled = factorize_df(num_trips_race_ethnicity, - value_labels, - value_label_colname = 'label') +num_trips_race_ethnicity_labeled = factorize_df( + num_trips_race_ethnicity, + value_labels, + value_label_colname = 'label' +) # Create a plot ggplot(num_trips_race_ethnicity_labeled,