From 83fd4ff4b075b178df741d21c055ec58f8ad04c4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 10 Oct 2024 19:46:58 -0400 Subject: [PATCH] tests/testthat/test-query-condition.R --- apis/r/R/QueryCondition.R | 18 +- apis/r/src/query_condition.cpp | 26 +-- apis/r/tests/testthat/test-query-condition.R | 206 +++++++++++++++++++ libtiledbsoma/test/common.cc | 15 +- 4 files changed, 234 insertions(+), 31 deletions(-) create mode 100644 apis/r/tests/testthat/test-query-condition.R diff --git a/apis/r/R/QueryCondition.R b/apis/r/R/QueryCondition.R index fbaff115bc..f1f6419d6b 100644 --- a/apis/r/R/QueryCondition.R +++ b/apis/r/R/QueryCondition.R @@ -55,9 +55,12 @@ parse_query_condition_new <- function( somactx ) { - stopifnot("The schema argument must be an Arrow Schema" = - is(schema, "ArrowObject") && - is(schema, "Schema")) + stopifnot( + "The schema argument must be an Arrow Schema" = + is(schema, "ArrowObject") && + is(schema, "Schema"), + "The argument must be a somactx object" = + is(somactx, "externalptr")) # ---------------------------------------------------------------- # Helpers for walking the parse tree @@ -124,7 +127,6 @@ parse_query_condition_new <- function( r_op_name <- tolower(as.character(node[1])) tdb_op_name <- if (r_op_name == "%in%") "IN" else "NOT_IN" - # XXX EXTRACT HELPER arrow_field <- schema[[attr_name]] if (is.null(arrow_field)) { .error_function("No attribute '", attr_name, "' is present.", call. = FALSE) @@ -168,8 +170,8 @@ parse_query_condition_new <- function( ascii = rhs_text, utf8 = rhs_text, bool = as.logical(rhs_text), - ## XXX DATETIME_MS = as.POSIXct(rhs_text), - ## XXX DATETIME_DAY = as.Date(rhs_text), + date32 = as.POSIXct(rhs_text), + timestamp = as.Date(rhs_text), as.numeric(rhs_text)), arrow_type_name = arrow_type_name, op_name = .map_op_to_character(op_name), @@ -201,12 +203,12 @@ setClass( # ================================================================ #' Creates a 'tiledbsoma_query_condition' object #' -#' @param ctx (optional) A TileDB Ctx object; if not supplied the default +#' @param somactx (optional) A TileDB Ctx object; if not supplied the default #' context object is retrieved #' @return A 'tiledbsoma_query_condition' object #' @export tiledbsoma_empty_query_condition <- function(somactx) { - stopifnot("The argument must be a ctx object" = is(ctx, "externalptr")) + stopifnot("The argument must be a somactx object" = is(somactx, "externalptr")) ptr <- libtiledbsoma_empty_query_condition(somactx) query_condition <- new("tiledbsoma_query_condition", ptr = ptr, init = FALSE) invisible(query_condition) diff --git a/apis/r/src/query_condition.cpp b/apis/r/src/query_condition.cpp index 702140ee50..45aa7a6edd 100644 --- a/apis/r/src/query_condition.cpp +++ b/apis/r/src/query_condition.cpp @@ -130,7 +130,7 @@ void libtiledbsoma_query_condition_from_triple( uint64_t cond_val_size = sizeof(float); query_cond->init(attr_name, (void*)&v, cond_val_size, op); - } else if (arrow_type_name == "ascii" || arrow_type_name == "utf8") { + } else if (arrow_type_name == "utf8" || arrow_type_name == "large_utf8") { std::string v = Rcpp::as(condition_value); query_cond->init(attr_name, v, op); @@ -139,18 +139,18 @@ void libtiledbsoma_query_condition_from_triple( uint64_t cond_val_size = sizeof(bool); query_cond->init(attr_name, (void*)&v, cond_val_size, op); - // XXX FIXME - // } else if (arrow_type_name == "DATETIME_MS") { - // int64_t v = static_cast( - // Rcpp::as(condition_value) * 1000); - // uint64_t cond_val_size = sizeof(int64_t); - // query_cond->init(attr_name, (void*)&v, cond_val_size, op); - - // } else if (arrow_type_name == "DATETIME_DAY") { - // int64_t v = - // static_cast(Rcpp::as(condition_value)); - // uint64_t cond_val_size = sizeof(int64_t); - // query_cond->init(attr_name, (void*)&v, cond_val_size, op); + } else if (arrow_type_name == "timestamp") { + // Arrow timestamp TileDB DATETIME_MS + int64_t v = static_cast( + Rcpp::as(condition_value) * 1000); + uint64_t cond_val_size = sizeof(int64_t); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else if (arrow_type_name == "date32") { + // Arrow date32 TileDB DATETIME_DAY + int64_t v = static_cast(Rcpp::as(condition_value)); + uint64_t cond_val_size = sizeof(int64_t); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); } else { Rcpp::stop( diff --git a/apis/r/tests/testthat/test-query-condition.R b/apis/r/tests/testthat/test-query-condition.R new file mode 100644 index 0000000000..1f89191427 --- /dev/null +++ b/apis/r/tests/testthat/test-query-condition.R @@ -0,0 +1,206 @@ +test_that("DataFrame Factory", { + uri <- tempfile() + if (dir.exists(uri)) unlink(uri, recursive=TRUE) + + ctx <- soma_context() + + sch <- arrow::schema( + arrow::field("soma_joinid", arrow::int64()), + arrow::field("int8", arrow::int8()), + arrow::field("int16", arrow::int16()), + arrow::field("int32", arrow::int32()), + arrow::field("int64", arrow::int64()), + arrow::field("uint8", arrow::uint8()), + arrow::field("uint16", arrow::uint16()), + arrow::field("uint32", arrow::uint32()), + arrow::field("uint64", arrow::uint64()), + arrow::field("string", arrow::string()), + arrow::field("large_utf8", arrow::large_utf8()), + arrow::field("enum", + arrow::dictionary( + index_type = arrow::int8(), + value_type = arrow::utf8(), + ordered = TRUE)), + arrow::field("float32", arrow::float32()), + arrow::field("float64", arrow::float64()) + # TODO: for a follow-up PR + # arrow::field("timestamp_s", arrow::timestamp(unit="s")), + # arrow::field("timestamp_ms", arrow::timestamp(unit="ms")), + # arrow::field("timestamp_us", arrow::timestamp(unit="us")), + # arrow::field("timestamp_ns", arrow::timestamp(unit="ns")) + # Not supported in libtiledbsoma + # arrow::field("datetime_day", arrow::date32()) + ) + + sdf <- SOMADataFrameCreate(uri, sch, index_column_names = "soma_joinid") + expect_true(sdf$exists()) + expect_true(dir.exists(uri)) + + tbl <- arrow::arrow_table( + soma_joinid = 1L:10L, + int8 = -11L:-20L, + int16 = -201L:-210L, + int32 = -301L:-310L, + int64 = -401L:-410L, + uint8 = 11L:20L, + uint16 = 201L:210L, + uint32 = 301L:310L, + uint64 = 401L:410L, + string = c("apple", "ball", "cat", "dog", "egg", "fig", "goose", "hay", "ice", "jam"), + large_utf8 = c("APPLE", "BALL", "CAT", "DOG", "EGG", "FIG", "GOOSE", "HAY", "ICE", "JAM"), + enum = factor( + c("red", "yellow", "green", "red", "red", "red", "yellow", "green", "red", "green"), + levels = c("red", "yellow", "green")), + float32 = 1.5:10.5, + float64 = 11.5:20.5, + # TODO: for a follow-up PR + # timestamp_s = as.POSIXct(as.numeric(3600 + 1:10), tz="GMT"), + # timestamp_ms = as.POSIXct(as.numeric(3600*1000 + 1:10), tz="GMT"), + # timestamp_us = as.POSIXct(as.numeric(3600*1000*1000 + 1:10), tz="GMT"), + # timestamp_ns = as.POSIXct(as.numeric(3600*1000*1000*1000 + 1:10), tz="GMT"), + schema = sch) + sdf$write(tbl) + sdf$close() + + sdf$reopen("READ") + + good_cases <- list( + 'soma_joinid > 5' = function(df) { + expect_equal(df$soma_joinid, 6:10) + expect_equal(df$int32, -306:-310) + }, + 'soma_joinid == 10' = function(df) { + expect_equal(df$soma_joinid, 10) + expect_equal(df$int32, -310) + expect_equal(as.character(df$enum), c("green")) + }, + 'soma_joinid > 4 && soma_joinid < 8' = function(df) { + expect_equal(df$soma_joinid, 5:7) + expect_equal(df$string, c("egg", "fig", "goose")) + expect_equal(df$large_utf8, c("EGG", "FIG", "GOOSE")) + }, + 'soma_joinid < 4 || soma_joinid > 8' = function(df) { + expect_equal(df$soma_joinid, c(1:3, 9:10)) + }, + + 'int8 == 8' = function(df) { + expect_equal(length(df$soma_joinid), 0) + }, + 'int8 == -12' = function(df) { + expect_equal(df$soma_joinid, c(2)) + }, + 'int16 > -203' = function(df) { + expect_equal(df$soma_joinid, c(1, 2)) + }, + 'uint16 < 204' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3)) + }, + 'int32 > -303' = function(df) { + expect_equal(df$soma_joinid, c(1, 2)) + }, + 'uint32 < 304' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3)) + }, + 'int64 > -403' = function(df) { + expect_equal(df$soma_joinid, c(1, 2)) + }, + 'uint64 < 404' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3)) + }, + + 'float32 < 4.5' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3)) + }, + 'float64 < 14.5' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3)) + }, + + 'string == "dog"' = function(df) { + expect_equal(df$soma_joinid, c(4)) + }, + 'string %in% c("fig", "dog")' = function(df) { + expect_equal(df$soma_joinid, c(4, 6)) + }, + 'string %nin% c("fig", "dog")' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10)) + }, + + 'enum == "red"' = function(df) { + expect_equal(df$soma_joinid, c(1, 4, 5, 6, 9)) + }, + 'enum != "red"' = function(df) { + expect_equal(df$soma_joinid, c(2, 3, 7, 8, 10)) + }, + 'enum == "orange"' = function(df) { + expect_equal(length(df$soma_joinid), 0) + }, + 'enum != "orange"' = function(df) { + expect_equal(df$soma_joinid, 1:10) + }, + 'enum %in% c("red", "green")' = function(df) { + expect_equal(df$soma_joinid, c(1, 3, 4, 5, 6, 8, 9, 10)) + }, + 'enum %nin% c("red", "green")' = function(df) { + expect_equal(df$soma_joinid, c(2, 7)) + }, + 'enum %in% c("orange", "green")' = function(df) { + expect_equal(df$soma_joinid, c(3, 8, 10)) + }, + 'enum %nin% c("orange", "green")' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 4, 5, 6, 7, 9)) + }, + 'enum %in% c("orange", "purple")' = function(df) { + expect_equal(length(df$soma_joinid), 0) + }, + 'enum %nin% c("orange", "purple")' = function(df) { + expect_equal(df$soma_joinid, 1:10) + } + + # TODO: for a follow-up PR + # 'timestamp_s < "1969-12-31 20:01:04 EST"' = function(df) { + # expect_equal(df$soma_joinid, 1:3) + # }, + # 'timestamp_ms != "1970-02-11 11:00:05 EST"' = function(df) { + # expect_equal(df$soma_joinid, 1:10) + # }, + # 'timestamp_us > "1970-01-01 00:00:01 GMT"' = function(df) { + # expect_equal(df$soma_joinid, 1:10) + # }, + # 'timestamp_ns > "1970-01-01 00:00:01 GMT"' = function(df) { + # expect_equal(df$soma_joinid, 1:10) + # } + ) + + for (query_string in names(good_cases)) { + parsed <- do.call( + what = tiledbsoma:::parse_query_condition_new, + args = list(expr=str2lang(query_string), schema=sch, somactx=ctx)) + clib_value_filter <- parsed@ptr + + sr <- sr_setup(uri = sdf$uri, ctx, qc=clib_value_filter) + iter <- TableReadIter$new(sr) + tbl <- iter$read_next() + expect_true(iter$read_complete()) + df <- as.data.frame(tbl) + # Call the validator + good_cases[[query_string]](df) + } + + bad_cases <- list( + '', + ' ', + 'nonesuch < 10', + 'soma_joinid << 10', + 'soma_joinid', + 'soma_joinid < 4 or soma_joinid > 8' + ) + + for (query_string in names(bad_cases)) { + expect_error( + do.call( + what = tiledbsoma:::parse_query_condition_new, + args = list(expr=str2lang(query_string), schema=sch, somactx=ctx))) + } + + sdf$close() +}) diff --git a/libtiledbsoma/test/common.cc b/libtiledbsoma/test/common.cc index 4ef735f81c..9f334e7b31 100644 --- a/libtiledbsoma/test/common.cc +++ b/libtiledbsoma/test/common.cc @@ -119,17 +119,12 @@ create_arrow_schema_and_index_columns( // Create index-column info only, no schema involving the attrs ArrowTable create_column_index_info(const std::vector& dim_infos) { for (auto info : dim_infos) { - LOG_DEBUG(fmt::format("create_column_index_info name={}", info.name)); - - LOG_DEBUG(fmt::format( - "create_column_index_info type={}", - tiledb::impl::to_str(info.tiledb_datatype))); - - LOG_DEBUG( - fmt::format("create_column_index_info dim_max={}", info.dim_max)); - LOG_DEBUG(fmt::format( - "create_column_index_info ucd={}", info.use_current_domain)); + "create_column_index_info name={} type={} dim_max={} ucd={}", + info.name, + tiledb::impl::to_str(info.tiledb_datatype), + info.dim_max, + info.use_current_domain)); } auto index_cols_info_schema = _create_index_cols_info_schema(dim_infos);