Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes 06 2024 #161

Merged
merged 10 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Session Data files
.RData
.RDataTmp
.Rproj
*.Rproj

# User-specific files
.Ruserdata
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: travelSurveyTools
Title: travelSurveyTools
Version: 2.4.2
Version: 2.4.3
Authors@R: c(
person("RSG", "Inc.", , "rsg@rsginc.com", role = c("aut", "cre")),
person("Ashley", "Asmus", , "ashley.asmus@rsginc.com", role = "aut"),
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# travelSurveyTools 2.4.3

- Resolve bugs and updated function syntax in `hts_prep_triprate` and `hts_prep_variable`
- Updated `remove_outliers` parameter to default to FALSE in `hts_prep_triprate`
- Added a warning message in `hts_remove_outliers`

# travelSurveyTools 2.4.2

- Add summarize_var and summarize_by to output. Select weight automatically in `hts_summary_wrapper`
Expand Down
256 changes: 131 additions & 125 deletions R/hts_prep_triprate.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,16 @@ hts_prep_triprate = function(summarize_by = NULL,
day_name = "day",
ids = c("hh_id", "person_id", "day_id", "trip_id", "vehicle_id"),
wts = c("hh_weight", "person_weight", "day_weight", "trip_weight", "hh_weight"),
remove_outliers = TRUE,
remove_outliers = FALSE,
threshold = 0.975,
weighted = TRUE,
hts_data) {
hts_data = list(
"hh" = hh,
"person" = person,
"day" = day,
"trip" = trip,
"vehicle" = vehicle)) {

# Check variable_list first
variables_dt = hts_validate_variable_list(variables_dt, hts_data)

Expand Down Expand Up @@ -135,151 +141,151 @@ hts_prep_triprate = function(summarize_by = NULL,
shared_name %in% summarize_by &
get(day_name) == 1 &
get(trip_name) == 1, shared_name]

if (length(day_trip_vars) > 0) {

setnames(variables_dt, trip_name, 'trip_table')

variables_dt[shared_name %in% day_trip_vars, trip_table := 0]

setnames(variables_dt, 'trip_table', trip_name)

}

if (length(summarize_by) > 0) {
byvar_dt = hts_prep_byvar(summarize_by,
variables_dt = variables_dt,
hts_data = hts_data,
byvar_ids = ids,
byvar_wts = wts
)

merge_cols = names(byvar_dt)[names(byvar_dt) %in% names(trip_control)]

triprate_dt = merge(trip_control, byvar_dt, by = merge_cols, all.x = TRUE)

triprate_cols = intersect(names(triprate_dt), c(ids, wts))

triprate_cols = triprate_cols[!triprate_cols %in% c(trip_id, trip_wt)]

triprate_cols_all = c(triprate_cols, summarize_by)

if (weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(get(trip_wt))),
by = triprate_cols_all
]
}

if (!weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(!is.na(get(trip_id)))),
by = triprate_cols_all
]
if (length(day_trip_vars) > 0) {

setnames(variables_dt, trip_name, 'trip_table')

variables_dt[shared_name %in% day_trip_vars, trip_table := 0]

setnames(variables_dt, 'trip_table', trip_name)

}

# fill in with zeros for zero trips on a given day:
triprate_dt[, `:=`(
num_trips = nafill(num_trips, fill = 0)
)]

# If one of the by-variables is in trip table, need to expand to
# include all levels of the variable for every trip, and fill with zeros:
if (trip_id %in% names(byvar_dt)) {
# fill in with zeros for zero trips for a given level of xt_var using dcast:
dcast_formula =
paste0(
paste0(triprate_cols, collapse = " + "),
" ~ ",
paste0(summarize_by, collapse = " + ")
)

triprate_cast = dcast(triprate_dt,
dcast_formula,
value.var = "num_trips",
fill = 0
if (length(summarize_by) > 0) {
byvar_dt = hts_prep_byvar(summarize_by,
variables_dt = variables_dt,
hts_data = hts_data,
byvar_ids = ids,
byvar_wts = wts
)

# Remove columns where NA levels of factors were generated during dcast:
na_filled_cols = names(triprate_cast)[names(triprate_cast) %like% "_NA"]
merge_cols = names(byvar_dt)[names(byvar_dt) %in% names(trip_control)]

if (length(na_filled_cols) > 0) {
triprate_cast[, c(na_filled_cols) := NULL]
}
triprate_dt = merge(trip_control, byvar_dt, by = merge_cols, all.x = TRUE)

# transform back to long format, with separate cols for weighted & unwt. trip rates:
triprate_dt = data.table::melt(
triprate_cast,
id.vars = triprate_cols,
value.name = "num_trips"
)
triprate_cols = intersect(names(triprate_dt), c(ids, wts))

triprate_cols = triprate_cols[!triprate_cols %in% c(trip_id, trip_wt)]

triprate_cols_all = c(triprate_cols, summarize_by)

# Relabel xtab trip vars after melting:
if (length(summarize_by) > 1) {
triprate_dt[, c(summarize_by) := tstrsplit(variable, "_")]
triprate_dt[, variable := NULL]
if (weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(get(trip_wt))),
by = triprate_cols_all
]
}

if (length(summarize_by) == 1) {
setnames(triprate_dt, old = "variable", new = summarize_by)
if (!weighted) {
triprate_dt = triprate_dt[, .(num_trips = sum(!is.na(get(..trip_id)))),
by = triprate_cols_all
]
}

triprate_dt = triprate_dt[]
# fill in with zeros for zero trips on a given day:
triprate_dt[, `:=`(
num_trips = nafill(num_trips, fill = 0)
)]

# If one of the by-variables is in trip table, need to expand to
# include all levels of the variable for every trip, and fill with zeros:
if (trip_id %in% names(byvar_dt)) {
# fill in with zeros for zero trips for a given level of xt_var using dcast:
dcast_formula =
paste0(
paste0(triprate_cols, collapse = " + "),
" ~ ",
paste0(summarize_by, collapse = " + ")
)

triprate_cast = dcast(triprate_dt,
dcast_formula,
value.var = "num_trips",
fill = 0
)

# Remove columns where NA levels of factors were generated during dcast:
na_filled_cols = names(triprate_cast)[names(triprate_cast) %like% "_NA" | names(triprate_cast) == "NA"]

if (length(na_filled_cols) > 0) {
triprate_cast[, c(na_filled_cols) := NULL]
}

# transform back to long format, with separate cols for weighted & unwt. trip rates:
triprate_dt = data.table::melt(
triprate_cast,
id.vars = triprate_cols,
value.name = "num_trips"
)

# Relabel xtab trip vars after melting:
if (length(summarize_by) > 1) {
triprate_dt[, c(summarize_by) := tstrsplit(variable, "_")]
triprate_dt[, variable := NULL]
}

if (length(summarize_by) == 1) {
setnames(triprate_dt, old = "variable", new = summarize_by)
}

triprate_dt = triprate_dt[]
}

if (weighted) {
# calculate trip rate
triprate_dt[, trip_rate :=
ifelse(num_trips == 0, 0, num_trips / get(day_wt))]

# Save counts of trips under a different name
setnames(triprate_dt, "num_trips", "trip_count_wtd")

setnames(triprate_dt, "trip_rate", "num_trips")
}
}

if (weighted) {
# calculate trip rate
triprate_dt[, trip_rate :=
ifelse(num_trips == 0, 0, num_trips / get(day_wt))]
# remove outliers
if (remove_outliers) {
out = hts_remove_outliers(triprate_dt,
numvar = "num_trips",
threshold = threshold
)

# Save counts of trips under a different name
setnames(triprate_dt, "num_trips", "trip_count_wtd")
triprate_dt = out[["dt"]]

setnames(triprate_dt, "trip_rate", "num_trips")
outlier_table = out[["outlier_description"]]
}
}

# remove outliers
if (remove_outliers) {
out = hts_remove_outliers(triprate_dt,
numvar = "num_trips",
threshold = threshold
)

triprate_dt = out[["dt"]]
# Bin trips:
triprate_binned = hts_bin_var(
prepped_dt = triprate_dt,
numvar = "num_trips",
nbins = 7
)

outlier_table = out[["outlier_description"]]
}

# Bin trips:
triprate_binned = hts_bin_var(
prepped_dt = triprate_dt,
numvar = "num_trips",
nbins = 7
)

if (weighted) {
setnames(triprate_dt, "num_trips", "num_trips_wtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_wtd", skip_absent = TRUE)
} else {
setnames(triprate_dt, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
}
prepped_dt_ls = list(
"num" = triprate_dt,
"cat" = triprate_binned
)

# Append outliers:
if (remove_outliers) {
if (weighted) {
setnames(triprate_dt, "num_trips", "num_trips_wtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_wtd", skip_absent = TRUE)
} else {
setnames(triprate_dt, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
setnames(triprate_binned, "num_trips", "num_trips_unwtd", skip_absent = TRUE)
}
prepped_dt_ls = list(
"cat" = triprate_binned,
"num" = triprate_dt,
"outliers" = outlier_table
"cat" = triprate_binned
)
}


return(prepped_dt_ls)

# Append outliers:
if (remove_outliers) {
prepped_dt_ls = list(
"cat" = triprate_binned,
"num" = triprate_dt,
"outliers" = outlier_table
)
}


return(prepped_dt_ls)
}

## quiets concerns of R CMD check
utils::globalVariables(c("trip_weight", "num_trips", "trip_rate", "day_weight", "trip_table"))
utils::globalVariables(c("..trip_id","trip_weight", "num_trips", "trip_rate", "day_weight", "trip_table"))
Loading
Loading