Skip to content

Commit

Permalink
Merge pull request #119 from RSGInc/main
Browse files Browse the repository at this point in the history
Pull from main
  • Loading branch information
jacobmoore5067 authored Feb 27, 2024
2 parents 8b8e11a + 2b9e557 commit 2698dcf
Show file tree
Hide file tree
Showing 11 changed files with 189 additions and 349 deletions.
20 changes: 11 additions & 9 deletions R/factorize_column.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,17 @@
#' )
#'
#' @export factorize_column
factorize_column = function(x,
var_str,
vals_df,
variable_colname = 'variable',
value_colname = 'value',
value_label_colname = 'value_label',
value_order_colname = 'value',
extra_labels = NULL,
add_na = TRUE) {
factorize_column = function(
x,
var_str,
vals_df,
variable_colname = 'variable',
value_colname = 'value',
value_label_colname = 'value_label',
value_order_colname = 'value',
extra_labels = NULL,
add_na = TRUE
) {
vals_df = data.table::data.table(vals_df)

# sort the vals_df to ensure the ordered factor is ordered correctly
Expand Down
50 changes: 33 additions & 17 deletions R/hts_bin_var.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,55 @@
#' require(stringr)
#' hts_bin_var(prepped_dt = trip, numvar = 'speed_mph')
#'
hts_bin_var = function(prepped_dt,
numvar,
nbins = 7) {
hts_bin_var = function(
prepped_dt,
numvar,
nbins = 7
) {

# TODO: Allow user to specify bins directly to cut
prepped_dt_binned = data.table::copy(prepped_dt)

data.table::setnames(prepped_dt_binned, old = numvar, new = "numvar")

# Reclassify outliers:
q05 = round(stats::quantile(prepped_dt_binned[, numvar], na.rm = TRUE, 0.025))
q95 = round(stats::quantile(prepped_dt_binned[, numvar], na.rm = TRUE, 0.975))
# TODO: Should this be a parameter.
# TODO: Defaults should be described in description.
min_prob = 0.05
q_min = round(
stats::quantile(
x = prepped_dt_binned[, numvar],
probs = min_prob / 2,
na.rm = TRUE
)
)
q_max = round(
stats::quantile(
x = prepped_dt_binned[, numvar],
probs = 1 - min_prob / 2,
na.rm = TRUE,
)
)

prepped_dt_binned[, binned := ifelse(numvar >= q95, q95, numvar)]
prepped_dt_binned[, binned := ifelse(numvar <= q05, q05, numvar)]
prepped_dt_binned[, binned := ifelse(numvar >= q_max, q_max, numvar)]
prepped_dt_binned[, binned := ifelse(numvar <= q_min, q_min, numvar)]


# Create breaks:
round_digits = ifelse(
as.numeric(q95-q05) < 5,
1,
0)
round_digits = 1 * (as.numeric(q_max-q_min) < 5)


mid_breaks = seq(from = q05,
to = q95,
by = round((q95 - q05) / (nbins - 2), round_digits))
mid_breaks = seq(
from = q_min,
to = q_max,
by = round((q_max - q_min) / (nbins - 2), round_digits)
)

min_break = ifelse(q05 == 0, -Inf, 0)
min_break = ifelse(q_min == 0, -Inf, 0)

all_breaks = c(min_break,
mid_breaks,
q95,
q_max,
Inf)

all_breaks = unique(all_breaks)
Expand All @@ -68,7 +84,7 @@ hts_bin_var = function(prepped_dt,
pattern = ",Inf|, Inf",
replacement = " or more")
binlabels[[1]] =
ifelse(q05 == 0,
ifelse(q_min == 0,
"Exactly 0",
paste0(stringr::str_split_i(
binlabels[[1]], i = 2, pattern = ","
Expand Down
4 changes: 3 additions & 1 deletion R/hts_cbind_var.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ hts_cbind_var = function(lhs_table,
message("Joining ",
rhs_var,
" to table on ",
paste0(common_cols, collapse = ", "))
paste0(common_cols, collapse = ", ")
)

merge_t = merge(
lhs_table,
rhs_table,
Expand Down
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,29 @@
<!-- badges: end -->

## Travel Survey Tools
`travelSurveyTools` is an R package that empowers users of household travel survey (HTS) data to create meaningful summaries of their data. Currently, `travelSurveyTools` is compatible with RSG, Inc. HTS datasets, but we hope to expand to any travel survey. If you would like to collaborate, please contact Suzanne Childress at schildress@psrc.org. These datasets usually contain six tables: household, person, day, trip, vehicle, and location. In the future, we may expand `travelSurveyTools` to work with other types of travel survey data or other types of surveys.
`travelSurveyTools` is an R package that empowers users of household travel survey (HTS) data to create meaningful summaries of their data. Currently, `travelSurveyTools` is compatible with HTS datasets from [RSG, Inc.](https://rsginc.com/), but we hope to expand to any travel survey. If you would like to collaborate, please contact Erika Redding at [erika.redding@rsginc.com](mailto:erika.redding@rsginc.com?subject=TravelSurveyTools). These datasets usually contain six tables: household, person, day, trip, vehicle, and location. In the future, we may expand `travelSurveyTools` to work with other types of travel survey data or other types of surveys.

`travelSurveyTools` is in active development and is open-source; anyone can contribute 🤝. See the CONTRIBUTING page to learn how.
`travelSurveyTools` is in active development and is open-source; anyone can contribute 🤝. See the [CONTRIBUTING](CONTRIBUTING.md) page to learn how.

### Cloning instructions

1. Set config -
usethis::use_git_config(user.name = {"username"}, user.email = {your_email@email.com})
`usethis::use_git_config(user.name = {"username"}, user.email = {your_email@email.com})`

2. Go to github page to generate token -
usethis::create_github_token()
`usethis::create_github_token()`

3. Paste your PAT into pop-up that follows -
credentials::set_github_pat()
`credentials::set_github_pat()`

4. Lastly, remotes::install_github() will work -
remotes::install_github('RSGInc/travelSurveyTools')
4. Lastly, `remotes::install_github()` will work -
`remotes::install_github('RSGInc/travelSurveyTools')`


### What can this package do?

Some of the things this package enables include:

* Cross tabs with an unlimited number of variables
* Summarizes numeric, categorical, date, and date-time variables
* Accepts both weighted and unweighted data
Expand Down
188 changes: 0 additions & 188 deletions scratch.R

This file was deleted.

17 changes: 13 additions & 4 deletions tests/testthat/test_factorize_column.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,28 @@ x = sample(5, replace=TRUE, size=10)
x = c(x,NA)
var_str = 'test_column'

vals_df = data.table(
vals_df = data.table(
variable = "test_column",
value = 1:7,
value_label = letters[1:7],
stringsAsFactors = FALSE)


test_that("Returns a factor",{
expect_equal(class(factorize_column(x, var_str, vals_df)), c("ordered","factor"))
expect_equal(class(factorize_column(x, var_str, vals_df, add_na = FALSE)), c("ordered","factor"))
expect_equal(
class(factorize_column(x, var_str, vals_df)),
c("ordered","factor")
)
expect_equal(
class(factorize_column(x, var_str, vals_df, add_na = FALSE)),
c("ordered","factor")
)
})

test_that("Includes expected value levels",{
expect_equal(levels(factorize_column(x, var_str, vals_df)), c(vals_df$value_label, NA))
expect_equal(
levels(factorize_column(x, var_str, vals_df)),
c(vals_df$value_label, NA)
)
})

2 changes: 1 addition & 1 deletion tests/testthat/test_factorize_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ library(data.table)
dt = data.table(x = c(sample(5, replace=TRUE, size=10),NA),
y = sample(3, replace = TRUE, size = 11))

vals_df = data.table(
vals_df = data.table(
variable = "x",
value = 1:7,
value_label = letters[1:7],
Expand Down
Loading

0 comments on commit 2698dcf

Please sign in to comment.