Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added parameter for checkSeverity, updated docs & vignette #577

Merged
merged 4 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion R/executeDqChecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#' @param writeToCsv Boolean to indicate if the check results will be written to a csv file. Default is FALSE
#' @param csvFile (OPTIONAL) CSV file to write results
#' @param checkLevels Choose which DQ check levels to execute. Default is all 3 (TABLE, FIELD, CONCEPT)
#' @param checkSeverity Choose which DQ check severity levels to execute. Default is all 3 (fatal, convention, characterization)
#' @param checkNames (OPTIONAL) Choose which check names to execute. Names can be found in inst/csv/OMOP_CDM_v[cdmVersion]_Check_Descriptions.csv. Note that "cdmTable", "cdmField" and "measureValueCompleteness" are always executed.
#' @param cohortDefinitionId The cohort definition id for the cohort you wish to run the DQD on. The package assumes a standard OHDSI cohort table
#' with the fields cohort_definition_id and subject_id.
Expand Down Expand Up @@ -77,6 +78,7 @@ executeDqChecks <- function(connectionDetails,
csvFile = "",
checkLevels = c("TABLE", "FIELD", "CONCEPT"),
checkNames = c(),
checkSeverity = c("fatal", "convention", "characterization"),
cohortDefinitionId = c(),
cohortDatabaseSchema = resultsDatabaseSchema,
cohortTableName = "cohort",
Expand Down Expand Up @@ -110,7 +112,14 @@ executeDqChecks <- function(connectionDetails,
You passed in ', paste(checkLevels, collapse = ", "))
}

stopifnot(is.null(checkNames) | is.character(checkNames), is.null(tablesToExclude) | is.character(tablesToExclude))
if (!all(checkSeverity %in% c("fatal", "convention", "characterization"))) {
stop('checkSeverity argument must be a subset of c("fatal", "convention", "characterization").
You passed in ', paste(checkSeverity, collapse = ", "))
}

stopifnot(is.null(checkNames) | is.character(checkNames),
is.character(checkSeverity),
is.null(tablesToExclude) | is.character(tablesToExclude))
stopifnot(is.character(cdmVersion))

# Warning if check names for determining NA is missing
Expand Down Expand Up @@ -241,6 +250,7 @@ executeDqChecks <- function(connectionDetails,
})]

checkDescriptionsDf <- checkDescriptionsDf[checkDescriptionsDf$checkLevel %in% checkLevels &
checkDescriptionsDf$severity %in% checkSeverity &
checkDescriptionsDf$evaluationFilter != "" &
checkDescriptionsDf$sqlFile != "" &
checkDescriptionsDf$checkName %in% checksToInclude, ]
Expand Down
3 changes: 3 additions & 0 deletions man/executeDqChecks.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions tests/testthat/test-executeDqChecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -375,3 +375,24 @@ test_that("Execute checks on Synthea/Eunomia to test new variable executionTimeS
)
expect_true(is.numeric(results$executionTimeSeconds))
})


test_that("checkNames are filtered by checkSeverity", {
outputFolder <- tempfile("dqd_")
on.exit(unlink(outputFolder, recursive = TRUE))

results <- executeDqChecks(
connectionDetails = connectionDetailsEunomia,
cdmDatabaseSchema = cdmDatabaseSchemaEunomia,
resultsDatabaseSchema = resultsDatabaseSchemaEunomia,
cdmSourceName = "Eunomia",
checkSeverity = "fatal",
outputFolder = outputFolder,
writeToTable = F
)

expectedCheckNames <- c("cdmTable", "cdmField", "isRequired", "cdmDatatype",
"isPrimaryKey", "isForeignKey")
expect_true(all(results$CheckResults$checkName %in% expectedCheckNames))
})

4 changes: 4 additions & 0 deletions vignettes/DataQualityDashboard.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ checkLevels <- c("TABLE", "FIELD", "CONCEPT")
# which DQ checks to run? ------------------------------------
checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3_Check_Descriptions.csv

# which DQ severity levels to run? ----------------------------
checkSeverity <- c("fatal", "convention", "characterization")

# want to EXCLUDE a pre-specified list of checks? run the following code:
#
# checksToExclude <- c() # Names of check types to exclude from your DQD run
Expand Down Expand Up @@ -129,6 +132,7 @@ DataQualityDashboard::executeDqChecks(connectionDetails = connectionDetails,
writeToCsv = writeToCsv,
csvFile = csvFile,
checkLevels = checkLevels,
checkSeverity = checkSeverity,
tablesToExclude = tablesToExclude,
checkNames = checkNames)

Expand Down
Loading