From d36731f51a93134a753d54b977d938e310e39c3b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 15 Oct 2024 18:00:21 -0400 Subject: [PATCH] Move `parse_query_condition_new` -> `parse_query_condition` --- apis/r/R/QueryCondition.R | 15 ++++- apis/r/R/SOMADataFrame.R | 7 +- apis/r/tests/testthat/test-SCEOutgest.R | 2 + .../testthat/test-SOMAArrayReader-Arrow.R | 23 ++----- apis/r/tests/testthat/test-query-condition.R | 65 +++++++++++++++---- 5 files changed, 76 insertions(+), 36 deletions(-) diff --git a/apis/r/R/QueryCondition.R b/apis/r/R/QueryCondition.R index b3596dff00..fbde378768 100644 --- a/apis/r/R/QueryCondition.R +++ b/apis/r/R/QueryCondition.R @@ -34,7 +34,7 @@ #' Expressions, in the R language syntax, are parsed locally by this function. #' #' @param expr An expression that is understood by the TileDB grammar for -#' query conditions. +#' query conditions, as a character string. #' #' @param schema The Arrow schema for the array for which a query #' condition is being prepared. This is necessary to obtain type information @@ -48,14 +48,18 @@ #' #' @return A `tiledbsoma_query_condition` object. #' @export -parse_query_condition_new <- function( +parse_query_condition <- function( expr, schema, strict=TRUE, somactx ) { + spdl::debug("[parseqc] ENTER [{}]", expr) + stopifnot( + "The expr argument must be a single character string" = + is(expr, "character") && length(expr) == 1, "The schema argument must be an Arrow Schema" = is(schema, "ArrowObject") && is(schema, "Schema"), @@ -182,7 +186,9 @@ parse_query_condition_new <- function( value = switch( arrow_type_name, ascii = rhs_text, + string = rhs_text, utf8 = rhs_text, + large_utf8 = rhs_text, bool = as.logical(rhs_text), # Problem: @@ -229,8 +235,11 @@ parse_query_condition_new <- function( } } + # Convert expr from string to language + aslang <- str2lang(expr) + # Use base-r `substitute` to map the user-provided expression to a parse tree - parse_tree <- substitute(expr) + parse_tree <- substitute(aslang) # Map the parse tree to TileDB core QueryCondition return(.parse_tree_to_qc(parse_tree, debug)) diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index 5ff770de4b..26553b77df 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -178,8 +178,6 @@ SOMADataFrame <- R6::R6Class( private$check_open_for_read() result_order <- match_query_layout(result_order) - uri <- self$uri - arr <- self$object # need array (schema) to properly parse query condition ## if unnamed set names if (!is.null(coords)) { @@ -199,8 +197,9 @@ SOMADataFrame <- R6::R6Class( if (!is.null(value_filter)) { value_filter <- validate_read_value_filter(value_filter) - parsed <- do.call(what = tiledb::parse_query_condition, - args = list(expr = str2lang(value_filter), ta = arr)) + parsed <- do.call( + what = tiledbsoma:::parse_query_condition, + args = list(expr = value_filter, schema = self$schema(), somactx = private$.soma_context)) value_filter <- parsed@ptr } spdl::debug("[SOMADataFrame$read] calling sr_setup for {} at ({},{})", self$uri, diff --git a/apis/r/tests/testthat/test-SCEOutgest.R b/apis/r/tests/testthat/test-SCEOutgest.R index 2fcc2f4d86..6402497c32 100644 --- a/apis/r/tests/testthat/test-SCEOutgest.R +++ b/apis/r/tests/testthat/test-SCEOutgest.R @@ -335,6 +335,7 @@ test_that("Load SCE object from indexed ExperimentQuery", { skip_if(!extended_tests() || covr_tests()) skip_if_not_installed('SingleCellExperiment', .MINIMUM_SCE_VERSION('c')) uri <- tempfile(pattern="sce-experiment-query-value-filters") + n_obs <- 1001L n_var <- 99L n_pcs <- 50L @@ -371,6 +372,7 @@ test_that("Load SCE object from indexed ExperimentQuery", { n_var_select <- length(var_label_values) n_obs_select <- length(obs_label_values) expect_no_condition(obj <- query$to_single_cell_experiment()) + expect_s4_class(obj, 'SingleCellExperiment') expect_identical(dim(obj), c(n_var_select, n_obs_select)) expect_identical( diff --git a/apis/r/tests/testthat/test-SOMAArrayReader-Arrow.R b/apis/r/tests/testthat/test-SOMAArrayReader-Arrow.R index 2135f557a6..8753730a51 100644 --- a/apis/r/tests/testthat/test-SOMAArrayReader-Arrow.R +++ b/apis/r/tests/testthat/test-SOMAArrayReader-Arrow.R @@ -15,33 +15,24 @@ test_that("Arrow Interface from SOMAArrayReader", { tb1 <- soma_array_to_arrow_table(soma_array_reader(uri, columns)) expect_equal(tb1$num_rows, 2638) - arr <- tiledb_array(uri) # need array for schema access to qc parser - qc <- parse_query_condition(n_counts < 1000 && n_genes >= 400, ta = arr) - tb2 <- soma_array_to_arrow_table(soma_array_reader(uri, columns, qc@ptr)) - - expect_equal(tb2$num_rows, 47) - expect_true(all(tb2$n_counts < 1000)) - expect_true(all(tb2$n_genes >= 400)) - - # read everything - tb3 <- soma_array_to_arrow_table(soma_array_reader(uri)) + tb2 <- soma_array_to_arrow_table(soma_array_reader(uri)) - expect_equal(tb3$num_rows, 2638) - expect_equal(tb3$num_columns, 6) + expect_equal(tb2$num_rows, 2638) + expect_equal(tb2$num_columns, 6) # read a subset of rows and columns - tb4 <- soma_array_to_arrow_table(soma_array_reader(uri = uri, + tb3 <- soma_array_to_arrow_table(soma_array_reader(uri = uri, colnames = c("obs_id", "percent_mito", "n_counts", "louvain"), dim_ranges = list(soma_joinid = rbind(bit64::as.integer64(c(1000, 1004)), bit64::as.integer64(c(2000, 2004)))), dim_points=list(soma_joinid = bit64::as.integer64(seq(0, 100, by = 20))))) - expect_equal(tb4$num_rows, 16) - expect_equal(tb4$num_columns, 4) + expect_equal(tb3$num_rows, 16) + expect_equal(tb3$num_columns, 4) - rm(z, tb, rb, tb1, arr, tb2, tb3, tb4) + rm(z, tb, rb, tb1, tb2, tb3) gc() }) diff --git a/apis/r/tests/testthat/test-query-condition.R b/apis/r/tests/testthat/test-query-condition.R index a929f52367..6c65c1800b 100644 --- a/apis/r/tests/testthat/test-query-condition.R +++ b/apis/r/tests/testthat/test-query-condition.R @@ -15,6 +15,8 @@ test_that("DataFrame Factory", { arrow::field("uint32", arrow::uint32()), arrow::field("uint64", arrow::uint64()), arrow::field("string", arrow::string()), + # Unlike in pyarrow there is no arrow::large_string + arrow::field("utf8", arrow::utf8()), arrow::field("large_utf8", arrow::large_utf8()), arrow::field("enum", arrow::dictionary( @@ -47,6 +49,7 @@ test_that("DataFrame Factory", { uint32 = 301L:310L, uint64 = 401L:410L, string = c("apple", "ball", "cat", "dog", "egg", "fig", "goose", "hay", "ice", "jam"), + utf8 = c("apple", "ball", "cat", "dog", "egg", "fig", "goose", "hay", "ice", "jam"), large_utf8 = c("APPLE", "BALL", "CAT", "DOG", "EGG", "FIG", "GOOSE", "HAY", "ICE", "JAM"), enum = factor( c("red", "yellow", "green", "red", "red", "red", "yellow", "green", "red", "green"), @@ -118,6 +121,15 @@ test_that("DataFrame Factory", { 'string == "dog"' = function(df) { expect_equal(df$soma_joinid, c(4)) }, + 'string == "cat" || string == "dog"' = function(df) { + expect_equal(df$soma_joinid, c(3, 4)) + }, + "string == 'cat' || string == 'dog'" = function(df) { + expect_equal(df$soma_joinid, c(3, 4)) + }, + "string == 'cat' || string == 'yak'" = function(df) { + expect_equal(df$soma_joinid, c(3)) + }, 'string %in% c("fig", "dog")' = function(df) { expect_equal(df$soma_joinid, c(4, 6)) }, @@ -125,6 +137,44 @@ test_that("DataFrame Factory", { expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10)) }, + 'utf8 == "dog"' = function(df) { + expect_equal(df$soma_joinid, c(4)) + }, + 'utf8 == "cat" || utf8 == "dog"' = function(df) { + expect_equal(df$soma_joinid, c(3, 4)) + }, + "utf8 == 'cat' || utf8 == 'dog'" = function(df) { + expect_equal(df$soma_joinid, c(3, 4)) + }, + "utf8 == 'cat' || utf8 == 'yak'" = function(df) { + expect_equal(df$soma_joinid, c(3)) + }, + 'utf8 %in% c("fig", "dog")' = function(df) { + expect_equal(df$soma_joinid, c(4, 6)) + }, + 'utf8 %nin% c("fig", "dog")' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10)) + }, + + 'large_utf8 == "DOG"' = function(df) { + expect_equal(df$soma_joinid, c(4)) + }, + 'large_utf8 == "CAT" || large_utf8 == "DOG"' = function(df) { + expect_equal(df$soma_joinid, c(3, 4)) + }, + "large_utf8 == 'CAT' || large_utf8 == 'DOG'" = function(df) { + expect_equal(df$soma_joinid, c(3, 4)) + }, + "large_utf8 == 'CAT' || large_utf8 == 'YAK'" = function(df) { + expect_equal(df$soma_joinid, c(3)) + }, + 'large_utf8 %in% c("FIG", "DOG")' = function(df) { + expect_equal(df$soma_joinid, c(4, 6)) + }, + 'large_utf8 %nin% c("FIG", "DOG")' = function(df) { + expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10)) + }, + 'enum == "red"' = function(df) { expect_equal(df$soma_joinid, c(1, 4, 5, 6, 9)) }, @@ -188,15 +238,7 @@ test_that("DataFrame Factory", { ) for (query_string in names(good_cases)) { - parsed <- do.call( - what = tiledbsoma:::parse_query_condition_new, - args = list(expr=str2lang(query_string), schema=sch, somactx=ctx)) - clib_value_filter <- parsed@ptr - - sr <- sr_setup(uri = sdf$uri, ctx, qc=clib_value_filter) - iter <- TableReadIter$new(sr) - tbl <- iter$read_next() - expect_true(iter$read_complete()) + tbl <- sdf$read(value_filter = query_string)$concat() df <- as.data.frame(tbl) # Call the validator good_cases[[query_string]](df) @@ -212,10 +254,7 @@ test_that("DataFrame Factory", { ) for (query_string in names(bad_cases)) { - expect_error( - do.call( - what = tiledbsoma:::parse_query_condition_new, - args = list(expr=str2lang(query_string), schema=sch, somactx=ctx))) + expect_error(sdf$read(value_filter = query_string)$concat()) } sdf$close()