Skip to content

Commit

Permalink
Merge pull request #71 from RSGInc/54-remove-factorize_df-from-prep-d…
Browse files Browse the repository at this point in the history
…ata-and-summary-functions

54 remove factorize df from prep data and summary functions
  • Loading branch information
erika-redding authored Jan 22, 2024
2 parents 336d629 + 6ec66df commit 2f13b8e
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 19 deletions.
8 changes: 3 additions & 5 deletions R/hts_prep_byvar.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ hts_prep_byvar = function(summarize_by = NULL,
shared_name = summarize_by[[b]],
wide_dt = byvar_dt_v,
shared_name_vars = NULL,
variables_dt = variable_list,
remove_missing = TRUE,
missing_values = c("Missing Response", "995"),
checkbox_label_sep = ":",
to_single_row = TRUE
variables_dt = variables_dt,
to_single_row = TRUE,
...
)

}
Expand Down
51 changes: 45 additions & 6 deletions R/hts_prep_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @param threshold Threshold to define outliers. Default is 0.975.
#' @param remove_missing Whether to remove missing values from the summary.
#' Default is TRUE.
#' @param missing_value Missing value to remove. Default is 995.
#' @param missing_values Missing values to remove. Default is 995.
#' @param not_imputable Value representing 'Not imputable' to remove. Default
#' is -1.
#' @param strataname Name of strata name to bring in. Default is NULL.
Expand Down Expand Up @@ -61,10 +61,11 @@ hts_prep_data = function(summarize_var = NULL,
remove_outliers = TRUE,
threshold = 0.975,
remove_missing = TRUE,
missing_value = 995,
missing_values = c("Missing Response", "995"),
not_imputable = -1,
strataname = NULL) {
# tictoc::tic("Total Time")

# Message:
msg_pt1 = paste0("Creating a summary of ",
hts_find_var(summarize_var, variables_dt = variables_dt), " ", summarize_var)
Expand Down Expand Up @@ -99,14 +100,26 @@ hts_prep_data = function(summarize_var = NULL,

# Is this a shared variable?
var_is_shared = variables_dt[shared_name == summarize_var, is_checkbox][1] == 1

# If yes, expand summarize_var:
if (var_is_shared) {

summarize_var = variables_dt[shared_name == summarize_var, variable]

for(i in 1:length(summarize_var)){

if(var_dt[,class(get(summarize_var[i]))] != 'integer'){

message("Checkbox variables must have integer values")

stop()

}

}

}

# Subset table to these column(s):
subset_cols = c(hts_get_keycols(var_dt), summarize_var)

Expand All @@ -125,7 +138,7 @@ hts_prep_data = function(summarize_var = NULL,
shared_name_vars = summarize_var,
remove_missing = TRUE,
checkbox_label_sep = ":",
missing_values = c("Missing Response", "995"),
missing_values = missing_values,
to_single_row = FALSE
)

Expand Down Expand Up @@ -185,6 +198,32 @@ hts_prep_data = function(summarize_var = NULL,

if (length(summarize_by) > 0) {

for (i in 1:length(summarize_by)){

var = summarize_by[i]

byvar_location = hts_find_var(var, variables_dt = variables_dt)

# Select table where this variable lives:
byvar_table = data[[byvar_location]]

byvar_is_shared = variables_dt[shared_name == var, is_checkbox][1] == 1

if (byvar_is_shared) {

var = variables_dt[shared_name == var, variable][1]

}

if(byvar_is_shared & byvar_table[,class(get(var))] != 'integer'){

message("Checkbox variables must have integer values")

stop()

}

}
byvar_dt = hts_prep_byvar(summarize_by,
variables_dt = variables_dt,
hts_data = data)
Expand Down Expand Up @@ -221,7 +260,7 @@ hts_prep_data = function(summarize_var = NULL,
variables_dt = variables_dt,
summarize_var = summarize_var,
summarize_by = summarize_by,
missing_value = missing_value,
missing_values = missing_values,
not_imputable = not_imputable)
}

Expand Down
9 changes: 5 additions & 4 deletions R/hts_remove_missing_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
#' removed.
#' @param summarize_by Variable being summarized by that has it's missing data
#' removed. Default is NULL.
#' @param missing_value Missing value that will be removed. Default is 995.
#' @param missing_values Missing values that will be removed. Defaults are 995 and
#' 'Missing Response'.
#' @param not_imputable Value meaning not_imputable that will be removed. Default
#' is -1.
#'
Expand All @@ -31,7 +32,7 @@ hts_remove_missing_data = function(hts_data,
variables_dt,
summarize_var,
summarize_by = NULL,
missing_value = 995,
missing_values = c("Missing Response", "995"),
not_imputable = -1){

summarize_var_loc = hts_find_var(summarize_var)
Expand All @@ -40,7 +41,7 @@ hts_remove_missing_data = function(hts_data,
summarize_var_name = variables_dt[shared_name == summarize_var, variable][1]

summarize_var_tbl = hts_data[[summarize_var_loc]][
!get(summarize_var_name) %in% c(missing_value, not_imputable) |
!get(summarize_var_name) %in% c(missing_values, not_imputable) |
is.na(get(summarize_var_name))]

summarize_var_id = hts_get_keycols(summarize_var_tbl,
Expand All @@ -65,7 +66,7 @@ hts_remove_missing_data = function(hts_data,
summarize_by_name = variables_dt[shared_name == summarize_by[i], variable][1]

summarize_by_tbl = hts_data[[summarize_by_loc]][
!get(summarize_by_name) %in% c(missing_value, not_imputable) |
!get(summarize_by_name) %in% c(missing_values, not_imputable) |
is.na(get(summarize_by_name))]

summarize_by_id = hts_get_keycols(summarize_by_tbl,
Expand Down
4 changes: 2 additions & 2 deletions man/hts_prep_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/hts_remove_missing_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2f13b8e

Please sign in to comment.