From dd0824dafd2542765a8d030a818f9ffed76689ab Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Thu, 1 Dec 2022 08:33:45 +0100 Subject: [PATCH 01/14] sync with poc branch --- R/stagingData.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index 38bbff1d..2f11c55e 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -148,8 +148,8 @@ cleanStagingData <- function(eolAge, dryRun = TRUE) { paste( "Function invoked in dry run mode and none of the returned files\n", "will be deleted.\n", - "To delete the files please re-run this function with the dryRun\n", - "argument set to 'TRUE'. Godspeed!" + "To delete the files please contemplate and re-run this function\n", + "with the dryRun argument set to 'FALSE'. Godspeed!" ) ) fDelete From 6e9aa6b8757a1b140f1bcdd212873ba42eb3ca46 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Fri, 2 Dec 2022 15:00:46 +0100 Subject: [PATCH 02/14] just a starter --- R/stagingData.R | 113 +++++++++++++++++++++++++++++- inst/createStagingDb.sql | 6 ++ inst/createStagingTab.sql | 8 +++ inst/rapbaseConfig.yml | 5 ++ man/stagingData.Rd | 6 ++ tests/testthat/test-stagingData.R | 91 +++++++++++++++++++++++- 6 files changed, 225 insertions(+), 4 deletions(-) create mode 100644 inst/createStagingDb.sql create mode 100644 inst/createStagingTab.sql diff --git a/R/stagingData.R b/R/stagingData.R index 2f11c55e..ad0ab076 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -43,7 +43,8 @@ #' #' @name stagingData #' @aliases listStagingData mtimeStagingData saveStagingData loadStagingData -#' deleteStagingData cleanStagingData pathStagingData +#' deleteStagingData cleanStagingData pathStagingData dbStagingData +#' dbStagingConnection #' #' @examples #' ## Prep test data @@ -92,9 +93,42 @@ mtimeStagingData <- function(registryName, #' @export saveStagingData <- function(registryName, dataName, data, dir = Sys.getenv("R_RAP_CONFIG_PATH")) { - path <- pathStagingData(registryName, dir) + conf <- getConfig("rapbaseConfig.yml")$r$staging + + if (conf$target == "file") { + path <- pathStagingData(registryName, dir) + readr::write_rds(data, file.path(path, dataName)) + } + + if (conf$target == "db") { + dbStagingData(conf$key) + blob <- memCompress( + serialize(data, connection = NULL), + type = "bzip2" + ) + + df <- data.frame( + registry = registryName, + name = dataName, + data = blob::as.blob(blob) + ) + + cleanQuery <- paste0( + "DELETE FROM data WHERE registry = '", + registryName, + "' AND name = '", + dataName, + "'" + ) - readr::write_rds(data, file.path(path, dataName)) + con <- dbStagingConnection(key = conf$key) + RMariaDB::dbExecute(con, cleanQuery) + RMariaDB::dbAppendTable(con, "data", df) + con <- dbStagingConnection(con = con) + + return(invisible(data)) + + } } #' @rdname stagingData @@ -173,3 +207,76 @@ pathStagingData <- function(registryName, dir) { path } + +#' @rdname stagingData +dbStagingData <- function(key, drop = FALSE) { + + conf <- getConfig()[[key]] + if (is.null(conf)) { + stop(paste("There is no configuration corresponding to key", key)) + } + if (drop) { + query <- paste("DROP DATABASE", conf$name) + msg <- paste0("Database '", conf$name, "' deleted.") + } else { + query <- c( + sprintf( + readLines(system.file("createStagingDb.sql", package = "rapbase")), + conf$name + ), + paste0( + readLines(system.file("createStagingTab.sql", package = "rapbase")), + collapse = "\n" + ) + ) + msg <- paste0("Database '", conf$name, "exists.") + } + + con <- dbStagingConnection(key = key, init = TRUE) + for (q in query) { + tmp <- RMariaDB::dbExecute(con, q) + } + + con <- dbStagingConnection(con = con) + + invisible(msg) +} + +#' @rdname stagingData +dbStagingConnection <- function(key = NULL, con = NULL, init = FALSE) { + + if (inherits(con, "DBIConnection")) { + con <- DBI::dbDisconnect(con) + con <- NULL + return(invisible(con)) + } + + if (!is.null(key)) { + conf <- getConfig()[[key]] + if (is.null(conf)) { + stop( + paste0( + "Could not connect to database because there is no configuration ", + "corresponding to key '", key,"'. Please check key and/or ", + "configuration." + ) + ) + } + if (init) { + dbname <- NULL + } else { + dbname <- conf$name + } + drv <- RMariaDB::MariaDB() + con <- RMariaDB::dbConnect( + drv, + dbname, + host = conf$host, + user = conf$user, + password = conf$pass + ) + return(con) + } else { + stop("Either a key or a valid database connection object must be provided.") + } +} diff --git a/inst/createStagingDb.sql b/inst/createStagingDb.sql new file mode 100644 index 00000000..47330a5b --- /dev/null +++ b/inst/createStagingDb.sql @@ -0,0 +1,6 @@ +SET NAMES utf8; +SET time_zone = '+00:00'; +SET foreign_key_checks = 0; +SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO'; +CREATE DATABASE IF NOT EXISTS `%s` /*!40100 DEFAULT CHARACTER SET utf8 COLLATE utf8_danish_ci */; +USE %s; diff --git a/inst/createStagingTab.sql b/inst/createStagingTab.sql new file mode 100644 index 00000000..daf4512d --- /dev/null +++ b/inst/createStagingTab.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `data` ( + `id` bigint unsigned NOT NULL AUTO_INCREMENT, + `mtime` timestamp DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `registry` varchar(255) NOT NULL COLLATE utf8_danish_ci, + `name` varchar(255) NOT NULL COLLATE utf8_danish_ci, + `data` longblob, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_danish_ci; diff --git a/inst/rapbaseConfig.yml b/inst/rapbaseConfig.yml index 03bf58dc..65f1d2ba 100644 --- a/inst/rapbaseConfig.yml +++ b/inst/rapbaseConfig.yml @@ -20,6 +20,11 @@ r : target: file key: autoreport + # Staging data + staging: + target: file + key: staging + # User data for testing purposes testUser : user : ttester diff --git a/man/stagingData.Rd b/man/stagingData.Rd index e457794f..c2341ddb 100644 --- a/man/stagingData.Rd +++ b/man/stagingData.Rd @@ -9,6 +9,8 @@ \alias{deleteStagingData} \alias{cleanStagingData} \alias{pathStagingData} +\alias{dbStagingData} +\alias{dbStagingConnection} \title{Staging data functions} \usage{ listStagingData(registryName, dir = Sys.getenv("R_RAP_CONFIG_PATH")) @@ -33,6 +35,10 @@ deleteStagingData( cleanStagingData(eolAge, dryRun = TRUE) pathStagingData(registryName, dir) + +dbStagingData(key, drop = FALSE) + +dbStagingConnection(key = NULL, con = NULL) } \arguments{ \item{registryName}{Character string providing the registry name.} diff --git a/tests/testthat/test-stagingData.R b/tests/testthat/test-stagingData.R index b0ba984d..3268ed62 100644 --- a/tests/testthat/test-stagingData.R +++ b/tests/testthat/test-stagingData.R @@ -13,7 +13,18 @@ testPath <- file.path( ) testFile <- file.path(testPath, dataName) -test_that("staging cannot commence if paret directory does not exist", { +# test config for file backend +test_config <- paste0( + "r:", + "\n staging: ", + "\n target: file", + "\n key: staging\n" +) +cf <- file(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "rapbaseConfig.yml")) +writeLines(test_config, cf) +close(cf) + +test_that("staging cannot commence if parent directory does not exist", { expect_error(pathStagingData(registryName, dir = "imaginaryDir")) expect_error( saveStagingData(registryName, "testData", d, dir = "imaginaryDir") @@ -69,10 +80,88 @@ test_that("a global clean of staging data can be performed (also dry run)", { expect_false(file.exists(testFile)) }) +# clean up config for file backend +unlink(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "rapbaseConfig.yml")) + test_that("a global clean of staging data will stop if no parent directory", { Sys.unsetenv("R_RAP_CONFIG_PATH") expect_error(cleanStagingData(0)) }) +# Test with db as backend +Sys.setenv(R_RAP_CONFIG_PATH = tempdir()) + +# Database infrastructure is only available at GA and our own dev env. +# Tests running on other environments should be skipped +checkDb <- function(is_test_that = TRUE) { + if (Sys.getenv("R_RAP_INSTANCE") == "DEV") { + NULL + } else if (Sys.getenv("GITHUB_ACTIONS_RUN_DB_UNIT_TESTS") == "true") { + NULL + } else { + if (is_test_that) { + testthat::skip("Possible lack of database infrastructure") + } else { + 1 + } + } +} + +test_that("env vars needed for db testing is present", { + checkDb() + expect_true("DB_HOST" %in% names(Sys.getenv())) + expect_true("DB_USER" %in% names(Sys.getenv())) + expect_true("DB_PASS" %in% names(Sys.getenv())) +}) + +# make temporary config +test_config <- paste0( + "staging:", + "\n host : ", Sys.getenv("DB_HOST"), + "\n name : staging", + "\n user : ", Sys.getenv("DB_USER"), + "\n pass : ", Sys.getenv("DB_PASS"), + "\n disp : ephemaralUnitTesting\n" +) +cf <- file(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "dbConfig.yml")) +writeLines(test_config, cf) +close(cf) + +test_config <- paste0( + "r:", + "\n staging: ", + "\n target: db", + "\n key: staging\n" +) +cf <- file(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "rapbaseConfig.yml")) +writeLines(test_config, cf) +close(cf) + +if (is.null(checkDb(is_test_that = FALSE))) { + dbStagingData("staging") +} + +test_that("Error is returned when key cannot be found in config", { + expect_error(dbStagingData("wrongEntry")) +}) + +test_that("A db connection object can be opened and closed", { + con <- dbStagingConnection(key = "staging") + expect_true(inherits(con, "DBIConnection")) + con <- dbStagingConnection(con = con) + expect_true(is.null(con)) +}) + +test_that("Data can be staged", { + d0 <- saveStagingData(registryName, "testData", d) + expect_true(identical(d, d0)) +}) + +if (is.null(checkDb(is_test_that = FALSE))) { + dbStagingData("staging", drop = TRUE) +} + # Restore environment +unlink(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "rapbaseConfig.yml")) +unlink(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "dbConfig.yml")) Sys.setenv(R_RAP_CONFIG_PATH = currentConfigPath) From 140f3d94055314d9d00832a61ed06e1689fa1a0b Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Mon, 5 Dec 2022 14:50:47 +0100 Subject: [PATCH 03/14] new pkg --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 0e55678e..eca2f2ec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,6 +25,7 @@ Depends: R (>= 3.5.0) Imports: base64enc, + blob, bookdown, DBI, digest, From 6999add7b13a826097c5890b0019da2de7d2a062 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Mon, 5 Dec 2022 14:56:02 +0100 Subject: [PATCH 04/14] early return --- R/stagingData.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/stagingData.R b/R/stagingData.R index ad0ab076..7c373459 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -97,7 +97,11 @@ saveStagingData <- function(registryName, dataName, data, if (conf$target == "file") { path <- pathStagingData(registryName, dir) - readr::write_rds(data, file.path(path, dataName)) + return( + invisible( + readr::write_rds(data, file.path(path, dataName)) + ) + ) } if (conf$target == "db") { From 3026bd81f5fee53e9799984c28c6deca2621994d Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Mon, 5 Dec 2022 15:31:59 +0100 Subject: [PATCH 05/14] listing also from db backend --- R/stagingData.R | 24 ++++++++++++++++++++++-- tests/testthat/test-stagingData.R | 8 +++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index 7c373459..1c390b2e 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -70,9 +70,29 @@ NULL #' @export listStagingData <- function(registryName, dir = Sys.getenv("R_RAP_CONFIG_PATH")) { - path <- pathStagingData(registryName, dir) - list.files(path) + conf <- getConfig("rapbaseConfig.yml")$r$staging + + if (conf$target == "file") { + path <- pathStagingData(registryName, dir) + + return(list.files(path)) + } + + if (conf$target == "db") { + query <- paste0( + "SELECT name FROM data WHERE registry = ?;" + ) + params <- list(registryName) + con <- dbStagingConnection(key = conf$key) + rs <- RMariaDB::dbSendQuery(con, query) + RMariaDB::dbBind(rs, params) + df <- RMariaDB::dbFetch(rs) + RMariaDB::dbClearResult(rs) + con <- dbStagingConnection(con = con) + + return(df$name) + } } #' @rdname stagingData diff --git a/tests/testthat/test-stagingData.R b/tests/testthat/test-stagingData.R index 3268ed62..50a74464 100644 --- a/tests/testthat/test-stagingData.R +++ b/tests/testthat/test-stagingData.R @@ -152,11 +152,17 @@ test_that("A db connection object can be opened and closed", { expect_true(is.null(con)) }) -test_that("Data can be staged", { +test_that("Data can be staged with db backend", { d0 <- saveStagingData(registryName, "testData", d) expect_true(identical(d, d0)) }) +test_that("staging files can be listed from db backend", { + v <- listStagingData(registryName) + expect_equal(class(v), "character") + expect_identical(v, "testData") +}) + if (is.null(checkDb(is_test_that = FALSE))) { dbStagingData("staging", drop = TRUE) } From 7fdef2f3b873fa97e0ac355c1964ee4f1ae5e443 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Tue, 6 Dec 2022 09:44:16 +0100 Subject: [PATCH 06/14] general db process --- R/stagingData.R | 58 +++++++++++++++++++++++----------------------- man/stagingData.Rd | 5 +++- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index 1c390b2e..3b7d1c12 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -44,7 +44,7 @@ #' @name stagingData #' @aliases listStagingData mtimeStagingData saveStagingData loadStagingData #' deleteStagingData cleanStagingData pathStagingData dbStagingData -#' dbStagingConnection +#' dbStagingConnection dbStagingProcess #' #' @examples #' ## Prep test data @@ -80,16 +80,9 @@ listStagingData <- function(registryName, } if (conf$target == "db") { - query <- paste0( - "SELECT name FROM data WHERE registry = ?;" - ) + query <- "SELECT name FROM data WHERE registry = ?;" params <- list(registryName) - con <- dbStagingConnection(key = conf$key) - rs <- RMariaDB::dbSendQuery(con, query) - RMariaDB::dbBind(rs, params) - df <- RMariaDB::dbFetch(rs) - RMariaDB::dbClearResult(rs) - con <- dbStagingConnection(con = con) + df <- dbStagingProcess(conf$key, query, params) return(df$name) } @@ -126,32 +119,22 @@ saveStagingData <- function(registryName, dataName, data, if (conf$target == "db") { dbStagingData(conf$key) - blob <- memCompress( + b <- memCompress( serialize(data, connection = NULL), type = "bzip2" ) - df <- data.frame( - registry = registryName, - name = dataName, - data = blob::as.blob(blob) - ) - - cleanQuery <- paste0( - "DELETE FROM data WHERE registry = '", - registryName, - "' AND name = '", - dataName, - "'" - ) + # remove any existing registry data with same data name + query <- "DELETE FROM data WHERE registry = ? AND name = ?;" + params <- list(registryName, dataName) + df <- dbStagingProcess(conf$key, query, params, statement = TRUE) - con <- dbStagingConnection(key = conf$key) - RMariaDB::dbExecute(con, cleanQuery) - RMariaDB::dbAppendTable(con, "data", df) - con <- dbStagingConnection(con = con) + # insert new data + query <- "INSERT INTO data (registry, name, data) VALUES (?, ?, ?);" + params <- list(registryName, dataName, blob::as_blob(b)) + df <- dbStagingProcess(conf$key, query, params, statement = TRUE) return(invisible(data)) - } } @@ -304,3 +287,20 @@ dbStagingConnection <- function(key = NULL, con = NULL, init = FALSE) { stop("Either a key or a valid database connection object must be provided.") } } + +#' @rdname stagingData +dbStagingProcess <- function(key, query, params, statement = FALSE) { + + con <- dbStagingConnection(key) + if (statement) { + df <- RMariaDB::dbExecute(con, query, params) + } else { + rs <- RMariaDB::dbSendQuery(con, query) + RMariaDB::dbBind(rs, params) + df <- RMariaDB::dbFetch(rs) + RMariaDB::dbClearResult(rs) + } + con <- dbStagingConnection(con = con) + + df +} \ No newline at end of file diff --git a/man/stagingData.Rd b/man/stagingData.Rd index c2341ddb..a074850c 100644 --- a/man/stagingData.Rd +++ b/man/stagingData.Rd @@ -11,6 +11,7 @@ \alias{pathStagingData} \alias{dbStagingData} \alias{dbStagingConnection} +\alias{dbStagingProcess} \title{Staging data functions} \usage{ listStagingData(registryName, dir = Sys.getenv("R_RAP_CONFIG_PATH")) @@ -38,7 +39,9 @@ pathStagingData(registryName, dir) dbStagingData(key, drop = FALSE) -dbStagingConnection(key = NULL, con = NULL) +dbStagingConnection(key = NULL, con = NULL, init = FALSE) + +dbStagingProcess(key, query, params, statement = FALSE) } \arguments{ \item{registryName}{Character string providing the registry name.} From fdb18362a365c7b2edc0942ef561e839048e3290 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Tue, 6 Dec 2022 11:26:10 +0100 Subject: [PATCH 07/14] time stamp from db --- R/stagingData.R | 22 +++++++++++++++++----- tests/testthat/test-stagingData.R | 4 ++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index 3b7d1c12..c7415e47 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -92,13 +92,25 @@ listStagingData <- function(registryName, #' @export mtimeStagingData <- function(registryName, dir = Sys.getenv("R_RAP_CONFIG_PATH")) { - parentPath <- "stagingData" - path <- file.path(dir, parentPath, registryName) - f <- normalizePath(list.files(path, recursive = TRUE, full.names = TRUE)) - mtime <- file.mtime(f) - names(mtime) <- basename(f) + conf <- getConfig("rapbaseConfig.yml")$r$staging + + if (conf$target == "file") { + parentPath <- "stagingData" + path <- file.path(dir, parentPath, registryName) + f <- normalizePath(list.files(path, recursive = TRUE, full.names = TRUE)) + mtime <- file.mtime(f) + + names(mtime) <- basename(f) + } + if (conf$target == "db") { + query <- "SELECT mtime, name FROM data WHERE registry = ?;" + params <- list(registryName) + df <- dbStagingProcess(conf$key, query, params) + mtime <- as.POSIXct(df$mtime) + names(mtime) <- df$name + } mtime } diff --git a/tests/testthat/test-stagingData.R b/tests/testthat/test-stagingData.R index 50a74464..98442f6f 100644 --- a/tests/testthat/test-stagingData.R +++ b/tests/testthat/test-stagingData.R @@ -163,6 +163,10 @@ test_that("staging files can be listed from db backend", { expect_identical(v, "testData") }) +test_that("modification time of stagin data in db can be obtained", { + expect_true("POSIXct" %in% class(mtimeStagingData(registryName))) +}) + if (is.null(checkDb(is_test_that = FALSE))) { dbStagingData("staging", drop = TRUE) } From 3b7434cb40ccf9c71ef36152b20ec680909a3e6d Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Tue, 6 Dec 2022 14:41:02 +0100 Subject: [PATCH 08/14] staging db load and delete --- R/stagingData.R | 85 +++++++++++++++++++++++-------- man/stagingData.Rd | 5 +- tests/testthat/test-stagingData.R | 30 ++++++++++- 3 files changed, 96 insertions(+), 24 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index c7415e47..9fb38056 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -27,8 +27,9 @@ #' files for the given registry (\code{registryName}). #' \item \code{mtimeStagingData()} returns a staging file-named POSIXct vector #' of modification times for the given registry (\code{registryName}). -#' \item \code{saveStagingData()} returns the data object (\code{data}), -#' invisibly. +#' \item \code{saveStagingData()} when successful returns the data object +#' (\code{data}), invisibly. If saving fails a warning is issued and the +#' function returns FALSE. #' \item \code{loadStagingData()} returns the data object corresponding to #' the name given upon saving (\code{dataName}). If the requested data set #' for loading does not exist the function returns FALSE. @@ -136,17 +137,21 @@ saveStagingData <- function(registryName, dataName, data, type = "bzip2" ) - # remove any existing registry data with same data name + # remove any existing registry data with same data name (should never fail) query <- "DELETE FROM data WHERE registry = ? AND name = ?;" params <- list(registryName, dataName) - df <- dbStagingProcess(conf$key, query, params, statement = TRUE) + d <- dbStagingProcess(conf$key, query, params, statement = TRUE) - # insert new data + # insert new data (can fail, but hard to test...) query <- "INSERT INTO data (registry, name, data) VALUES (?, ?, ?);" params <- list(registryName, dataName, blob::as_blob(b)) - df <- dbStagingProcess(conf$key, query, params, statement = TRUE) - - return(invisible(data)) + d <- dbStagingProcess(conf$key, query, params, statement = TRUE) + if (d > 0) { + return(invisible(data)) + } else { + warning(paste0("The data set '", dataName, "' could not be saved!")) + return(FALSE) + } } } @@ -154,29 +159,67 @@ saveStagingData <- function(registryName, dataName, data, #' @export loadStagingData <- function(registryName, dataName, dir = Sys.getenv("R_RAP_CONFIG_PATH")) { - path <- pathStagingData(registryName, dir) - filePath <- file.path(path, dataName) - if (file.exists(filePath)) { - readr::read_rds(filePath) - } else { - FALSE + conf <- getConfig("rapbaseConfig.yml")$r$staging + + if (conf$target == "file") { + path <- pathStagingData(registryName, dir) + filePath <- file.path(path, dataName) + + if (file.exists(filePath)) { + data <- readr::read_rds(filePath) + } else { + data <- FALSE + } + } + + if (conf$target == "db") { + query <- "SELECT data FROM data WHERE registry = ? AND name = ?;" + params <- list(registryName, dataName) + df <- dbStagingProcess(conf$key, query, params) + if (length(df$data) == 0) { + data <- FALSE + } else { + data <- df$data[[1]] %>% + memDecompress(type = "bzip2") %>% + unserialize() + } } + + data } #' @rdname stagingData #' @export deleteStagingData <- function(registryName, dataName, dir = Sys.getenv("R_RAP_CONFIG_PATH")) { - path <- pathStagingData(registryName, dir) - filePath <- file.path(path, dataName) - if (file.exists(filePath)) { - file.remove(filePath) - TRUE - } else { - FALSE + conf <- getConfig("rapbaseConfig.yml")$r$staging + + if (conf$target == "file") { + path <- pathStagingData(registryName, dir) + filePath <- file.path(path, dataName) + + if (file.exists(filePath)) { + file.remove(filePath) + isDelete <- TRUE + } else { + isDelete <- FALSE + } } + + if (conf$target == "db") { + query <- "DELETE FROM data WHERE registry = ? AND name = ?;" + params <- list(registryName, dataName) + d <- dbStagingProcess(conf$key, query, params, statement = TRUE) + if (d > 0) { + isDelete <- TRUE + } else { + isDelete <- FALSE + } + } + + isDelete } #' @rdname stagingData diff --git a/man/stagingData.Rd b/man/stagingData.Rd index a074850c..d34af1fb 100644 --- a/man/stagingData.Rd +++ b/man/stagingData.Rd @@ -67,8 +67,9 @@ destructive) mode.} files for the given registry (\code{registryName}). \item \code{mtimeStagingData()} returns a staging file-named POSIXct vector of modification times for the given registry (\code{registryName}). - \item \code{saveStagingData()} returns the data object (\code{data}), - invisibly. + \item \code{saveStagingData()} when successful returns the data object + (\code{data}), invisibly. If saving fails a warning is issued and the + function returns FALSE. \item \code{loadStagingData()} returns the data object corresponding to the name given upon saving (\code{dataName}). If the requested data set for loading does not exist the function returns FALSE. diff --git a/tests/testthat/test-stagingData.R b/tests/testthat/test-stagingData.R index 98442f6f..be87e6b4 100644 --- a/tests/testthat/test-stagingData.R +++ b/tests/testthat/test-stagingData.R @@ -61,7 +61,7 @@ test_that("loading none-existing data returns false", { expect_false(loadStagingData(registryName, "imaginaryDataSet")) }) -test_that("deleting a none-existing file returns FALE", { +test_that("deleting a none-existing file returns FALSE", { expect_false(deleteStagingData(registryName, "imaginaryDataSet")) }) @@ -167,6 +167,34 @@ test_that("modification time of stagin data in db can be obtained", { expect_true("POSIXct" %in% class(mtimeStagingData(registryName))) }) +test_that("retrieval of none existing data returns FALSE", { + expect_false(loadStagingData(registryName, "noSuchDataSet")) +}) + +test_that("data can be retrieved from staging db", { + #print(loadStagingData(registryName, dataName)) + expect_equal(loadStagingData(registryName, dataName), d) +}) + +test_that("deleting a none-existing dataset from db returns FALSE", { + expect_false(deleteStagingData(registryName, "imaginaryDataSet")) +}) + +test_that("a dataset can be deleted from db", { + expect_true(deleteStagingData(registryName, dataName)) + expect_false(loadStagingData(registryName, dataName)) +}) + +test_that("a global clean of db staging data can be performed (also dry run)", { + expect_equal(saveStagingData(registryName, dataName, d), d) + expect_true(file.exists(testFile)) + expect_message(cleanStagingData(0)) + expect_equal(class(cleanStagingData(0)), "character") + expect_true(file.exists(testFile)) + expect_invisible(cleanStagingData(0, dryRun = FALSE)) + expect_false(file.exists(testFile)) +}) + if (is.null(checkDb(is_test_that = FALSE))) { dbStagingData("staging", drop = TRUE) } From c1464c0734c7066b7df9e1e37770b2c0b502864d Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Tue, 6 Dec 2022 15:45:11 +0100 Subject: [PATCH 09/14] cleaning staging data in db --- R/stagingData.R | 48 ++++++++++++++++++++++--------- tests/testthat/test-stagingData.R | 6 ++-- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index 9fb38056..243549f1 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -227,31 +227,51 @@ deleteStagingData <- function(registryName, dataName, cleanStagingData <- function(eolAge, dryRun = TRUE) { if (Sys.getenv("R_RAP_CONFIG_PATH") == "") { stop(paste( - "Got no path to staging data. No data will be deleted.", + "No data store provided. Hence, no data will be deleted.", "Exiting." )) } - dir <- Sys.getenv("R_RAP_CONFIG_PATH") - parentPath <- "stagingData" - path <- file.path(dir, parentPath) - f <- normalizePath(list.files(path, recursive = TRUE, full.names = TRUE)) - fAge <- as.numeric(Sys.time()) - as.numeric(file.mtime(f)) - fDelete <- f[fAge > eolAge] + conf <- getConfig("rapbaseConfig.yml")$r$staging + + if (conf$target == "file") { + dir <- Sys.getenv("R_RAP_CONFIG_PATH") + parentPath <- "stagingData" + path <- file.path(dir, parentPath) + f <- normalizePath(list.files(path, recursive = TRUE, full.names = TRUE)) + fAge <- as.numeric(Sys.time()) - as.numeric(file.mtime(f)) + deleteDataset <- f[fAge > eolAge] + } + + if (conf$target == "db") { + eolTime <- Sys.time() - eolAge + query <- paste0( + "SELECT registry, name FROM data WHERE mtime < ? ORDER BY registry, name;" + ) + params <- list(eolTime) + df <- dbStagingProcess(conf$key, query, params) + deleteDataset <- paste0(df$registry, ": ", df$name) + } if (dryRun) { message( paste( - "Function invoked in dry run mode and none of the returned files\n", - "will be deleted.\n", - "To delete the files please contemplate and re-run this function\n", + "Function invoked in dry run mode and none of the returned staging\n", + "data sets will be deleted.\n", + "To delete for real, please contemplate and re-run this function\n", "with the dryRun argument set to 'FALSE'. Godspeed!" ) ) - fDelete + deleteDataset } else { - file.remove(fDelete) - invisible(fDelete) + if (conf$target == "file") { + file.remove(deleteDataset) + } + if (conf$target == "db") { + query <- "DELETE FROM data WHERE mtime < ?;" + d <- dbStagingProcess(conf$key, query, params, statement = TRUE) + } + invisible(deleteDataset) } } @@ -344,7 +364,7 @@ dbStagingConnection <- function(key = NULL, con = NULL, init = FALSE) { } #' @rdname stagingData -dbStagingProcess <- function(key, query, params, statement = FALSE) { +dbStagingProcess <- function(key, query, params = list(), statement = FALSE) { con <- dbStagingConnection(key) if (statement) { diff --git a/tests/testthat/test-stagingData.R b/tests/testthat/test-stagingData.R index be87e6b4..8fc08742 100644 --- a/tests/testthat/test-stagingData.R +++ b/tests/testthat/test-stagingData.R @@ -187,12 +187,12 @@ test_that("a dataset can be deleted from db", { test_that("a global clean of db staging data can be performed (also dry run)", { expect_equal(saveStagingData(registryName, dataName, d), d) - expect_true(file.exists(testFile)) + expect_identical(listStagingData(registryName), dataName) expect_message(cleanStagingData(0)) expect_equal(class(cleanStagingData(0)), "character") - expect_true(file.exists(testFile)) + expect_identical(listStagingData(registryName), dataName) expect_invisible(cleanStagingData(0, dryRun = FALSE)) - expect_false(file.exists(testFile)) + expect_false(loadStagingData(registryName, dataName)) }) if (is.null(checkDb(is_test_that = FALSE))) { From 3923a3c1b4a6d079a0fe115d6d909d95899d0494 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Wed, 7 Dec 2022 13:27:42 +0100 Subject: [PATCH 10/14] extending, error fixing and clean-up --- R/stagingData.R | 27 ++++++++++++++++-- man/stagingData.Rd | 5 +++- tests/testthat/test-stagingData.R | 46 +++++++++++++++++++++---------- 3 files changed, 61 insertions(+), 17 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index 243549f1..301f1b8c 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -45,7 +45,7 @@ #' @name stagingData #' @aliases listStagingData mtimeStagingData saveStagingData loadStagingData #' deleteStagingData cleanStagingData pathStagingData dbStagingData -#' dbStagingConnection dbStagingProcess +#' dbStagingPrereq dbStagingConnection dbStagingProcess #' #' @examples #' ## Prep test data @@ -81,6 +81,7 @@ listStagingData <- function(registryName, } if (conf$target == "db") { + dbStagingPrereq(conf$key) query <- "SELECT name FROM data WHERE registry = ?;" params <- list(registryName) df <- dbStagingProcess(conf$key, query, params) @@ -106,6 +107,7 @@ mtimeStagingData <- function(registryName, } if (conf$target == "db") { + dbStagingPrereq(conf$key) query <- "SELECT mtime, name FROM data WHERE registry = ?;" params <- list(registryName) df <- dbStagingProcess(conf$key, query, params) @@ -131,7 +133,7 @@ saveStagingData <- function(registryName, dataName, data, } if (conf$target == "db") { - dbStagingData(conf$key) + dbStagingPrereq(conf$key) b <- memCompress( serialize(data, connection = NULL), type = "bzip2" @@ -174,6 +176,7 @@ loadStagingData <- function(registryName, dataName, } if (conf$target == "db") { + dbStagingPrereq(conf$key) query <- "SELECT data FROM data WHERE registry = ? AND name = ?;" params <- list(registryName, dataName) df <- dbStagingProcess(conf$key, query, params) @@ -209,6 +212,7 @@ deleteStagingData <- function(registryName, dataName, } if (conf$target == "db") { + dbStagingPrereq(conf$key) query <- "DELETE FROM data WHERE registry = ? AND name = ?;" params <- list(registryName, dataName) d <- dbStagingProcess(conf$key, query, params, statement = TRUE) @@ -244,6 +248,7 @@ cleanStagingData <- function(eolAge, dryRun = TRUE) { } if (conf$target == "db") { + dbStagingPrereq(conf$key) eolTime <- Sys.time() - eolAge query <- paste0( "SELECT registry, name FROM data WHERE mtime < ? ORDER BY registry, name;" @@ -324,6 +329,24 @@ dbStagingData <- function(key, drop = FALSE) { invisible(msg) } +#' @rdname stagingData +dbStagingPrereq <- function(key) { + + con <- dbStagingConnection(key, init = TRUE) + query <- "SHOW DATABASES LIKE 'staging';" + df <- RMariaDB::dbGetQuery(con, query) + # close and remove db connection + con <- dbStagingConnection(con = con) + if (length(df$Database) > 0) { + msg <- "You're good! Database for staging data already exists." + } else { + dbStagingData(key) + msg <- "Database for staging data was created." + } + + invisible(msg) +} + #' @rdname stagingData dbStagingConnection <- function(key = NULL, con = NULL, init = FALSE) { diff --git a/man/stagingData.Rd b/man/stagingData.Rd index d34af1fb..98a309d2 100644 --- a/man/stagingData.Rd +++ b/man/stagingData.Rd @@ -10,6 +10,7 @@ \alias{cleanStagingData} \alias{pathStagingData} \alias{dbStagingData} +\alias{dbStagingPrereq} \alias{dbStagingConnection} \alias{dbStagingProcess} \title{Staging data functions} @@ -39,9 +40,11 @@ pathStagingData(registryName, dir) dbStagingData(key, drop = FALSE) +dbStagingPrereq(key) + dbStagingConnection(key = NULL, con = NULL, init = FALSE) -dbStagingProcess(key, query, params, statement = FALSE) +dbStagingProcess(key, query, params = list(), statement = FALSE) } \arguments{ \item{registryName}{Character string providing the registry name.} diff --git a/tests/testthat/test-stagingData.R b/tests/testthat/test-stagingData.R index 8fc08742..ed44b72a 100644 --- a/tests/testthat/test-stagingData.R +++ b/tests/testthat/test-stagingData.R @@ -114,7 +114,17 @@ test_that("env vars needed for db testing is present", { expect_true("DB_PASS" %in% names(Sys.getenv())) }) -# make temporary config +test_config <- paste0( + "r:", + "\n staging: ", + "\n target: db", + "\n key: staging\n" +) +cf <- file(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "rapbaseConfig.yml")) +writeLines(test_config, cf) +close(cf) + +# make proper dbConfig test_config <- paste0( "staging:", "\n host : ", Sys.getenv("DB_HOST"), @@ -127,25 +137,27 @@ cf <- file(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "dbConfig.yml")) writeLines(test_config, cf) close(cf) -test_config <- paste0( - "r:", - "\n staging: ", - "\n target: db", - "\n key: staging\n" -) -cf <- file(file.path(Sys.getenv("R_RAP_CONFIG_PATH"), "rapbaseConfig.yml")) -writeLines(test_config, cf) -close(cf) +test_that("No connection provided when no key (or connection object) given", { + expect_error(dbStagingConnection(key = NULL, con = NULL)) +}) -if (is.null(checkDb(is_test_that = FALSE))) { - dbStagingData("staging") -} +test_that("No connection provided when insufficient config", { + checkDb() + expect_error(dbStagingConnection("unknown"), regexp = "Could not connect") +}) + +# make new staging database using prereq function +test_that("prereq creates database initially", { + checkDb() + expect_silent(dbStagingPrereq("staging")) +}) test_that("Error is returned when key cannot be found in config", { expect_error(dbStagingData("wrongEntry")) }) test_that("A db connection object can be opened and closed", { + checkDb() con <- dbStagingConnection(key = "staging") expect_true(inherits(con, "DBIConnection")) con <- dbStagingConnection(con = con) @@ -153,26 +165,30 @@ test_that("A db connection object can be opened and closed", { }) test_that("Data can be staged with db backend", { + checkDb() d0 <- saveStagingData(registryName, "testData", d) expect_true(identical(d, d0)) }) test_that("staging files can be listed from db backend", { + checkDb() v <- listStagingData(registryName) expect_equal(class(v), "character") expect_identical(v, "testData") }) test_that("modification time of stagin data in db can be obtained", { + checkDb() expect_true("POSIXct" %in% class(mtimeStagingData(registryName))) }) test_that("retrieval of none existing data returns FALSE", { + checkDb() expect_false(loadStagingData(registryName, "noSuchDataSet")) }) test_that("data can be retrieved from staging db", { - #print(loadStagingData(registryName, dataName)) + checkDb() expect_equal(loadStagingData(registryName, dataName), d) }) @@ -181,11 +197,13 @@ test_that("deleting a none-existing dataset from db returns FALSE", { }) test_that("a dataset can be deleted from db", { + checkDb() expect_true(deleteStagingData(registryName, dataName)) expect_false(loadStagingData(registryName, dataName)) }) test_that("a global clean of db staging data can be performed (also dry run)", { + checkDb() expect_equal(saveStagingData(registryName, dataName, d), d) expect_identical(listStagingData(registryName), dataName) expect_message(cleanStagingData(0)) From 1434ca103e57408ec52cf5e76c6503b8c2ea1dc8 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Wed, 7 Dec 2022 15:32:25 +0100 Subject: [PATCH 11/14] make sure test do not interfere with dev env --- tests/testthat/test-stagingData.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-stagingData.R b/tests/testthat/test-stagingData.R index ed44b72a..9e553276 100644 --- a/tests/testthat/test-stagingData.R +++ b/tests/testthat/test-stagingData.R @@ -128,7 +128,7 @@ close(cf) test_config <- paste0( "staging:", "\n host : ", Sys.getenv("DB_HOST"), - "\n name : staging", + "\n name : test_staging", "\n user : ", Sys.getenv("DB_USER"), "\n pass : ", Sys.getenv("DB_PASS"), "\n disp : ephemaralUnitTesting\n" From 2a6149d65b8729c6627966c012e881aefc05cf8b Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Wed, 7 Dec 2022 15:33:00 +0100 Subject: [PATCH 12/14] update docs, restructure later --- R/stagingData.R | 38 +++++++++++++++++++++++--------------- man/stagingData.Rd | 27 +++++++++++++++------------ 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/R/stagingData.R b/R/stagingData.R index 301f1b8c..0700a90c 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -6,36 +6,39 @@ #' must therefore be established within each registry that take staging data #' into use. #' -#' \code{cleanStagingData()} globally removes all staging data files older than -#' the end-of-life age provided. This is potentially a vastly destructive -#' function that should be used with great care. +#' \code{cleanStagingData()} globally removes all staging data with store date +#' prior to the end-of-life age provided. This is a vastly destructive function +#' that should be used with great care. #' #' @param registryName Character string providing the registry name. #' @param dataName Character string providing the data set name. #' @param data A data object such as a data.frame to be stored as #' \code{dataName}. #' @param dir Character string providing the path to where the staging data -#' directory resides. Default value is \code{Sys.getenv("R_RAP_CONFIG_PATH")}. -#' @param eolAge Numeric providing the staging file end-of-life age in seconds. -#' Based on the current time and the file modification time stamp staging files -#' older than \code{eolAge} will be identified as subject for removal. +#' directory resides in case of storage as files. Default value is +#' \code{Sys.getenv("R_RAP_CONFIG_PATH")}. +#' @param eolAge Numeric providing the staging data end-of-life age in seconds. +#' Based on the current time and the time of storage staging files +#' older than \code{eolAge} will be identified as subject for removal. #' @param dryRun Logical defining if function is to be run in dry (none -#' destructive) mode. +#' destructive) mode. #' #' @return \itemize{ #' \item \code{listStagingData()} returns a character vector of staging data -#' files for the given registry (\code{registryName}). -#' \item \code{mtimeStagingData()} returns a staging file-named POSIXct vector -#' of modification times for the given registry (\code{registryName}). +#' sets for the given registry (\code{registryName}). +#' \item \code{mtimeStagingData()} returns a staging data set named POSIXct +#' vector of modification times for the given registry +#' (\code{registryName}). #' \item \code{saveStagingData()} when successful returns the data object #' (\code{data}), invisibly. If saving fails a warning is issued and the #' function returns FALSE. #' \item \code{loadStagingData()} returns the data object corresponding to #' the name given upon saving (\code{dataName}). If the requested data set #' for loading does not exist the function returns FALSE. -#' \item \code{deleteStagingData()} returns TRUE if the file was deleted and -#' FALSE if not. -#' \item \code{cleanStagingData()} returns a list of files (to be) removed. +#' \item \code{deleteStagingData()} returns TRUE if the data set was deleted +#' and FALSE if not. +#' \item \code{cleanStagingData()} returns a list of data sets (to be) +#' removed. #' \item \code{rapbase:::pathStagingData()} is an internal helper function and #' returns a character string with the path to the staging directory of #' \code{registryName}. If its parent directory (\code{dir}) does not exists @@ -332,8 +335,13 @@ dbStagingData <- function(key, drop = FALSE) { #' @rdname stagingData dbStagingPrereq <- function(key) { + conf <- getConfig()[[key]] + if (is.null(conf)) { + stop(paste("There is no configuration corresponding to key", key)) + } + con <- dbStagingConnection(key, init = TRUE) - query <- "SHOW DATABASES LIKE 'staging';" + query <- paste0("SHOW DATABASES LIKE '", conf$name, "';") df <- RMariaDB::dbGetQuery(con, query) # close and remove db connection con <- dbStagingConnection(con = con) diff --git a/man/stagingData.Rd b/man/stagingData.Rd index 98a309d2..7e6d0719 100644 --- a/man/stagingData.Rd +++ b/man/stagingData.Rd @@ -50,15 +50,16 @@ dbStagingProcess(key, query, params = list(), statement = FALSE) \item{registryName}{Character string providing the registry name.} \item{dir}{Character string providing the path to where the staging data -directory resides. Default value is \code{Sys.getenv("R_RAP_CONFIG_PATH")}.} +directory resides in case of storage as files. Default value is +\code{Sys.getenv("R_RAP_CONFIG_PATH")}.} \item{dataName}{Character string providing the data set name.} \item{data}{A data object such as a data.frame to be stored as \code{dataName}.} -\item{eolAge}{Numeric providing the staging file end-of-life age in seconds. -Based on the current time and the file modification time stamp staging files +\item{eolAge}{Numeric providing the staging data end-of-life age in seconds. +Based on the current time and the time of storage staging files older than \code{eolAge} will be identified as subject for removal.} \item{dryRun}{Logical defining if function is to be run in dry (none @@ -67,18 +68,20 @@ destructive) mode.} \value{ \itemize{ \item \code{listStagingData()} returns a character vector of staging data - files for the given registry (\code{registryName}). - \item \code{mtimeStagingData()} returns a staging file-named POSIXct vector - of modification times for the given registry (\code{registryName}). + sets for the given registry (\code{registryName}). + \item \code{mtimeStagingData()} returns a staging data set named POSIXct + vector of modification times for the given registry + (\code{registryName}). \item \code{saveStagingData()} when successful returns the data object (\code{data}), invisibly. If saving fails a warning is issued and the function returns FALSE. \item \code{loadStagingData()} returns the data object corresponding to the name given upon saving (\code{dataName}). If the requested data set for loading does not exist the function returns FALSE. - \item \code{deleteStagingData()} returns TRUE if the file was deleted and - FALSE if not. - \item \code{cleanStagingData()} returns a list of files (to be) removed. + \item \code{deleteStagingData()} returns TRUE if the data set was deleted + and FALSE if not. + \item \code{cleanStagingData()} returns a list of data sets (to be) + removed. \item \code{rapbase:::pathStagingData()} is an internal helper function and returns a character string with the path to the staging directory of \code{registryName}. If its parent directory (\code{dir}) does not exists @@ -93,9 +96,9 @@ must therefore be established within each registry that take staging data into use. } \details{ -\code{cleanStagingData()} globally removes all staging data files older than -the end-of-life age provided. This is potentially a vastly destructive -function that should be used with great care. +\code{cleanStagingData()} globally removes all staging data with store date +prior to the end-of-life age provided. This is a vastly destructive function +that should be used with great care. } \examples{ ## Prep test data From a3f15ffefce7c2478b3cc8350d1e5f81765cb519 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Thu, 8 Dec 2022 09:18:50 +0100 Subject: [PATCH 13/14] now helper functions --- R/stagingData.R | 56 +++++++++++++++++++++++++++++----- man/stagingData.Rd | 15 --------- man/stagingDataHelper.Rd | 66 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 22 deletions(-) create mode 100644 man/stagingDataHelper.Rd diff --git a/R/stagingData.R b/R/stagingData.R index 0700a90c..813e7102 100644 --- a/R/stagingData.R +++ b/R/stagingData.R @@ -47,8 +47,7 @@ #' #' @name stagingData #' @aliases listStagingData mtimeStagingData saveStagingData loadStagingData -#' deleteStagingData cleanStagingData pathStagingData dbStagingData -#' dbStagingPrereq dbStagingConnection dbStagingProcess +#' deleteStagingData cleanStagingData #' #' @examples #' ## Prep test data @@ -283,7 +282,50 @@ cleanStagingData <- function(eolAge, dryRun = TRUE) { } } -#' @rdname stagingData + +#' Data staging helper (internal) functions +#' +#' A set of helper functions to aid staging of registry data at Rapporteket. +#' +#' +#' @param registryName Character string providing the registry name. +#' @param dir Character string providing the path to where the staging data +#' directory resides in case of storage as files. Default value is +#' \code{Sys.getenv("R_RAP_CONFIG_PATH")}. +#' @param key Character string with key to be used for staging data store +#' credentials. +#' @param drop Logical defining if a database is to be deleted. FALSE by +#' default. +#' @param con A database connection object. +#' @param init Logical defining if the function call will perform an initial +#' set-up of a database. Default value is FALSE +#' @param query Character string providing a database query. +#' @param params List of values to be provided in a parameterized query. +#' @param statement Logical defining if a query is a statement or not. Default +#' value is FALSE. +#' +#' @return \itemize{ +#' \item \code{pathStagingData()} returns a character string with the path to +#' the staging directory of \code{registryName}. If its parent directory +#' (\code{dir}) does not exists an error is returned. +#' \item \code{dbStagingData()} creates or drops a staging data database and +#' returns a message invisibly. +#' \item \code{dbStagingPrereq()} ensures that a database for staging data is +#' properly setup and returns a message, invisibly. +#' \item \code{dbStagingConnection()} returns an open database connection +#' object or, when an open connection object is provided as an argument, +#' closes it and returns \code{NULL} invisibly. +#' \item \code{dbStagingProcess()} returns the raw result of a database query +#' based on the arguments provided. +#' } +#' +#' @name stagingDataHelper +#' @keywords internal +#' @aliases pathStagingData dbStagingData dbStagingPrereq dbStagingConnection +#' dbStagingProcess +NULL + +#' @rdname stagingDataHelper pathStagingData <- function(registryName, dir) { stopifnot(dir.exists(dir)) @@ -298,7 +340,7 @@ pathStagingData <- function(registryName, dir) { path } -#' @rdname stagingData +#' @rdname stagingDataHelper dbStagingData <- function(key, drop = FALSE) { conf <- getConfig()[[key]] @@ -332,7 +374,7 @@ dbStagingData <- function(key, drop = FALSE) { invisible(msg) } -#' @rdname stagingData +#' @rdname stagingDataHelper dbStagingPrereq <- function(key) { conf <- getConfig()[[key]] @@ -355,7 +397,7 @@ dbStagingPrereq <- function(key) { invisible(msg) } -#' @rdname stagingData +#' @rdname stagingDataHelper dbStagingConnection <- function(key = NULL, con = NULL, init = FALSE) { if (inherits(con, "DBIConnection")) { @@ -394,7 +436,7 @@ dbStagingConnection <- function(key = NULL, con = NULL, init = FALSE) { } } -#' @rdname stagingData +#' @rdname stagingDataHelper dbStagingProcess <- function(key, query, params = list(), statement = FALSE) { con <- dbStagingConnection(key) diff --git a/man/stagingData.Rd b/man/stagingData.Rd index 7e6d0719..aa765fe0 100644 --- a/man/stagingData.Rd +++ b/man/stagingData.Rd @@ -8,11 +8,6 @@ \alias{loadStagingData} \alias{deleteStagingData} \alias{cleanStagingData} -\alias{pathStagingData} -\alias{dbStagingData} -\alias{dbStagingPrereq} -\alias{dbStagingConnection} -\alias{dbStagingProcess} \title{Staging data functions} \usage{ listStagingData(registryName, dir = Sys.getenv("R_RAP_CONFIG_PATH")) @@ -35,16 +30,6 @@ deleteStagingData( ) cleanStagingData(eolAge, dryRun = TRUE) - -pathStagingData(registryName, dir) - -dbStagingData(key, drop = FALSE) - -dbStagingPrereq(key) - -dbStagingConnection(key = NULL, con = NULL, init = FALSE) - -dbStagingProcess(key, query, params = list(), statement = FALSE) } \arguments{ \item{registryName}{Character string providing the registry name.} diff --git a/man/stagingDataHelper.Rd b/man/stagingDataHelper.Rd new file mode 100644 index 00000000..d33239fb --- /dev/null +++ b/man/stagingDataHelper.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stagingData.R +\name{stagingDataHelper} +\alias{stagingDataHelper} +\alias{pathStagingData} +\alias{dbStagingData} +\alias{dbStagingPrereq} +\alias{dbStagingConnection} +\alias{dbStagingProcess} +\title{Data staging helper (internal) functions} +\usage{ +pathStagingData(registryName, dir) + +dbStagingData(key, drop = FALSE) + +dbStagingPrereq(key) + +dbStagingConnection(key = NULL, con = NULL, init = FALSE) + +dbStagingProcess(key, query, params = list(), statement = FALSE) +} +\arguments{ +\item{registryName}{Character string providing the registry name.} + +\item{dir}{Character string providing the path to where the staging data +directory resides in case of storage as files. Default value is +\code{Sys.getenv("R_RAP_CONFIG_PATH")}.} + +\item{key}{Character string with key to be used for staging data store +credentials.} + +\item{drop}{Logical defining if a database is to be deleted. FALSE by +default.} + +\item{con}{A database connection object.} + +\item{init}{Logical defining if the function call will perform an initial +set-up of a database. Default value is FALSE} + +\item{query}{Character string providing a database query.} + +\item{params}{List of values to be provided in a parametrized query.} + +\item{statement}{Logical defining if a query is a statement or not. Default +value is FALSE.} +} +\value{ +\itemize{ + \item \code{pathStagingData()} returns a character string with the path to + the staging directory of \code{registryName}. If its parent directory + (\code{dir}) does not exists an error is returned. + \item \code{dbStagingData()} creates or drops a staging data database and + returns a message invisibly. + \item \code{dbStagingPrereq()} ensures that a database for staging data is + properly setup and returns a message, invisibly. + \item \code{dbStagingConnection()} returns an open database connection + object or, when an open connection object is provided as an argument, + closes it and returns \code{NULL} invisibly. + \item \code{dbStagingProcess()} returns the raw result of a database query + based on the arguments provided. +} +} +\description{ +A set of helper functions to aid staging of registry data at Rapporteket. +} +\keyword{internal} From 5ddb3da1f45a1af180d7313e58b8ad96652aec38 Mon Sep 17 00:00:00 2001 From: Are Edvardsen Date: Thu, 8 Dec 2022 09:58:36 +0100 Subject: [PATCH 14/14] dummy config for staging database --- inst/dbConfig.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/inst/dbConfig.yml b/inst/dbConfig.yml index 37ece721..627361ca 100644 --- a/inst/dbConfig.yml +++ b/inst/dbConfig.yml @@ -30,6 +30,14 @@ autoreport: pass : root disp : ForAutoReportDevOnly +# for staging data tests with database as target +staging: + host : db + name : staging + user : root + pass : root + disp : ForStagingDataDevOnly + # for testing, in dev container dev: host : db