Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add input validation for setter and read functions #231

Merged
merged 10 commits into from
Jan 18, 2023
67 changes: 67 additions & 0 deletions tests/test-misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,73 @@ test_that("Match argument or take default.", {
expect_equal(actual.result, expected.result, info = "Multiple choices with ignored default, two choices")
})

##
## Check presence and datatype of column.
bockthom marked this conversation as resolved.
Show resolved Hide resolved
##

test_that("Check presence and datatype of column.", {

user.names = c("John", "Peter", "Maria", "Susanne")

## contains NaN to verify functionality does not break
age = c(42, 50, NaN, 66)

## contains NA to verify functionality does not break
is.male = c(TRUE, TRUE, FALSE, NA)

## construct simple testing dataframe
data.frame = data.frame(user.names, age, is.male)

## 1) Check base functionality (benign use-case)
expect_no_error(verify.data.frame.columns(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please change this to expect_error(..., NA, message(...)).
The expect_no_error function is a new feature from testthat but is still experimental. So for now we want to stay at the "old" version. Also please put a comment behind the check with Expect no error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know about this, I will do the replacement 😇

data.frame, c("user.names", "age", "is.male"), c("character", "numeric", "logical")),
message = "All columns present and well-typed.")

## 2) Base test with reordered columns
expect_no_error(verify.data.frame.columns(
data.frame, c("is.male", "age", "user.names"), c("logical", "numeric", "character")),
message = "Order of columns does not matter.")

## 3) Specify less columns than present (Allow optional columns)
expect_no_error(verify.data.frame.columns(
data.frame, c("user.names", "age"), c("character", "numeric")),
message = "Optional columns are allowed.")

## 4) Unequal amount of column names and datatypes
expect_error(verify.data.frame.columns(
data.frame, c("user.names", "age", "is.male"), c("character", "numeric")),
message = "More coloumn names specified than datatypes.")
expect_error(verify.data.frame.columns(
data.frame, c("user.names", "age"), c("character", "numeric", "logical")),
message = "More coloumn names specified than datatypes.")

## 5) Datatypes do not match column names
expect_error(verify.data.frame.columns(
data.frame, c("user.names", "age", "is.male"), c("logical", "character", "numeric")),
message = "Column names do not match datatypes.")

## 6) Invalid column / Column not present in dataframe (Typo)
expect_error(verify.data.frame.columns(
data.frame, c("user.name"), c("character")),
message = "Column names do not match datatypes.")
bockthom marked this conversation as resolved.
Show resolved Hide resolved

## 7) No datatypes specified and column names are present
expect_no_error(verify.data.frame.columns(
data.frame, c("user.names", "age", "is.male")),
message = "Column names do not match datatypes.")

## 8) No datatypes specified but column names are not present (Typo)
expect_error(verify.data.frame.columns(
data.frame, c("user.name")),
message = "Column names do not match datatypes.")

## 9) To many column names and no datatypes specified
bockthom marked this conversation as resolved.
Show resolved Hide resolved
expect_error(verify.data.frame.columns(
data.frame, c("user.names", "age", "is.male", "job.orientation")),
message = "Column names do not match datatypes.")

})

## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Date handling -----------------------------------------------------------

Expand Down
70 changes: 70 additions & 0 deletions util-misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,76 @@ match.arg.or.default = function(arg, choices, default = NULL, several.ok = FALSE
}
}

#' Check if a dataframe matches a given structure. This includes the dataframe to contain columns
#' which must match the column names in \code{columns} and the datatypes in \code{data.types}
bockthom marked this conversation as resolved.
Show resolved Hide resolved
#'
#' @param data the dataframe under investigation for structural conformity
#' @param columns a character vector containing the column names the data frame should include
#' @param data.types an ordered vector containing the data types corresponding to the columns.
#' This vector must be of same length of the vector of \code{columns}
#' [default: NULL]
bockthom marked this conversation as resolved.
Show resolved Hide resolved
verify.data.frame.columns = function(data, columns, data.types = NULL) {

## every column of the data frame must be one to one mapped to a datatype expected in the column
## therefore if there aren't as many datatypes provided in \code{data.types} as column names have
## been provided in \code{columns} we can stop here already.
if (!is.null(data.types) && length(columns) != length(data.types)) {
error.message = sprintf("If specified, the length of the two given vectors columns and data.types must match.")
bockthom marked this conversation as resolved.
Show resolved Hide resolved
logging::logerror(error.message)
stop(error.message)
}

## obtain vector of all column names included in the dataframe to ease further checks.
data.frame.columns = colnames(data)

## iterate over all columns in \code{columns}
for (i in seq_along(columns)) {

## obtain the column.
column = columns[i]

## stop verification process early if column is not present in the dataframe.
if (!(column %in% data.frame.columns)) {
error.message = sprintf("Column '%s' is missing from the dataframe", column)
logging::logerror(error.message)
stop(error.message)
}

if (!is.null(data.types)) {

## obtain the datatype that should be present in the dataframe column c
## which is currently under investigation.
bockthom marked this conversation as resolved.
Show resolved Hide resolved
expected.type = data.types[i]

## necessary case distinction for special case list where calling \code{base::class}
## removes information about the listing property
bockthom marked this conversation as resolved.
Show resolved Hide resolved
if (expected.type == "list()") {

## column is not a list
if (!is.list(data[[column]])) {
error.message = sprintf("Column '%s' is expected to be a list but it '%s'",
bockthom marked this conversation as resolved.
Show resolved Hide resolved
column, class(received.type))
logging::logerror(error.message)
stop(error.message)
}

} else {
## obtain the datatype that elements of the current column hold in the dataframe.
received.type = class(data[[column]])

## stop verification process early if column type in the dataframe is not matching
## the expected datatype.
if (!(expected.type %in% received.type)) {
error.message = sprintf("Column '%s' has type '%s' in dataframe, expected '%s'",
column, received.type, expected.type)
logging::logerror(error.message)
stop(error.message)
}
}
}
}
}


## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Empty dataframe creation-------------------------------------------------
Expand Down