Skip to content

Commit

Permalink
Move parse_query_condition_new -> parse_query_condition
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 15, 2024
1 parent faa647a commit d36731f
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 36 deletions.
15 changes: 12 additions & 3 deletions apis/r/R/QueryCondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#' Expressions, in the R language syntax, are parsed locally by this function.
#'
#' @param expr An expression that is understood by the TileDB grammar for
#' query conditions.
#' query conditions, as a character string.
#'
#' @param schema The Arrow schema for the array for which a query
#' condition is being prepared. This is necessary to obtain type information
Expand All @@ -48,14 +48,18 @@
#'
#' @return A `tiledbsoma_query_condition` object.
#' @export
parse_query_condition_new <- function(
parse_query_condition <- function(
expr,
schema,
strict=TRUE,
somactx
) {

spdl::debug("[parseqc] ENTER [{}]", expr)

stopifnot(
"The expr argument must be a single character string" =
is(expr, "character") && length(expr) == 1,
"The schema argument must be an Arrow Schema" =
is(schema, "ArrowObject") &&
is(schema, "Schema"),
Expand Down Expand Up @@ -182,7 +186,9 @@ parse_query_condition_new <- function(
value = switch(
arrow_type_name,
ascii = rhs_text,
string = rhs_text,
utf8 = rhs_text,
large_utf8 = rhs_text,
bool = as.logical(rhs_text),
# Problem:

Expand Down Expand Up @@ -229,8 +235,11 @@ parse_query_condition_new <- function(
}
}

# Convert expr from string to language
aslang <- str2lang(expr)

# Use base-r `substitute` to map the user-provided expression to a parse tree
parse_tree <- substitute(expr)
parse_tree <- substitute(aslang)

# Map the parse tree to TileDB core QueryCondition
return(.parse_tree_to_qc(parse_tree, debug))
Expand Down
7 changes: 3 additions & 4 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,6 @@ SOMADataFrame <- R6::R6Class(
private$check_open_for_read()

result_order <- match_query_layout(result_order)
uri <- self$uri
arr <- self$object # need array (schema) to properly parse query condition

## if unnamed set names
if (!is.null(coords)) {
Expand All @@ -199,8 +197,9 @@ SOMADataFrame <- R6::R6Class(

if (!is.null(value_filter)) {
value_filter <- validate_read_value_filter(value_filter)
parsed <- do.call(what = tiledb::parse_query_condition,
args = list(expr = str2lang(value_filter), ta = arr))
parsed <- do.call(
what = tiledbsoma:::parse_query_condition,
args = list(expr = value_filter, schema = self$schema(), somactx = private$.soma_context))
value_filter <- parsed@ptr
}
spdl::debug("[SOMADataFrame$read] calling sr_setup for {} at ({},{})", self$uri,
Expand Down
2 changes: 2 additions & 0 deletions apis/r/tests/testthat/test-SCEOutgest.R
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ test_that("Load SCE object from indexed ExperimentQuery", {
skip_if(!extended_tests() || covr_tests())
skip_if_not_installed('SingleCellExperiment', .MINIMUM_SCE_VERSION('c'))
uri <- tempfile(pattern="sce-experiment-query-value-filters")

n_obs <- 1001L
n_var <- 99L
n_pcs <- 50L
Expand Down Expand Up @@ -371,6 +372,7 @@ test_that("Load SCE object from indexed ExperimentQuery", {
n_var_select <- length(var_label_values)
n_obs_select <- length(obs_label_values)
expect_no_condition(obj <- query$to_single_cell_experiment())

expect_s4_class(obj, 'SingleCellExperiment')
expect_identical(dim(obj), c(n_var_select, n_obs_select))
expect_identical(
Expand Down
23 changes: 7 additions & 16 deletions apis/r/tests/testthat/test-SOMAArrayReader-Arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,24 @@ test_that("Arrow Interface from SOMAArrayReader", {
tb1 <- soma_array_to_arrow_table(soma_array_reader(uri, columns))
expect_equal(tb1$num_rows, 2638)

arr <- tiledb_array(uri) # need array for schema access to qc parser
qc <- parse_query_condition(n_counts < 1000 && n_genes >= 400, ta = arr)
tb2 <- soma_array_to_arrow_table(soma_array_reader(uri, columns, qc@ptr))

expect_equal(tb2$num_rows, 47)
expect_true(all(tb2$n_counts < 1000))
expect_true(all(tb2$n_genes >= 400))


# read everything
tb3 <- soma_array_to_arrow_table(soma_array_reader(uri))
tb2 <- soma_array_to_arrow_table(soma_array_reader(uri))

expect_equal(tb3$num_rows, 2638)
expect_equal(tb3$num_columns, 6)
expect_equal(tb2$num_rows, 2638)
expect_equal(tb2$num_columns, 6)

# read a subset of rows and columns
tb4 <- soma_array_to_arrow_table(soma_array_reader(uri = uri,
tb3 <- soma_array_to_arrow_table(soma_array_reader(uri = uri,
colnames = c("obs_id", "percent_mito", "n_counts", "louvain"),
dim_ranges = list(soma_joinid = rbind(bit64::as.integer64(c(1000, 1004)),
bit64::as.integer64(c(2000, 2004)))),
dim_points=list(soma_joinid = bit64::as.integer64(seq(0, 100, by = 20)))))


expect_equal(tb4$num_rows, 16)
expect_equal(tb4$num_columns, 4)
expect_equal(tb3$num_rows, 16)
expect_equal(tb3$num_columns, 4)

rm(z, tb, rb, tb1, arr, tb2, tb3, tb4)
rm(z, tb, rb, tb1, tb2, tb3)
gc()
})

Expand Down
65 changes: 52 additions & 13 deletions apis/r/tests/testthat/test-query-condition.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ test_that("DataFrame Factory", {
arrow::field("uint32", arrow::uint32()),
arrow::field("uint64", arrow::uint64()),
arrow::field("string", arrow::string()),
# Unlike in pyarrow there is no arrow::large_string
arrow::field("utf8", arrow::utf8()),
arrow::field("large_utf8", arrow::large_utf8()),
arrow::field("enum",
arrow::dictionary(
Expand Down Expand Up @@ -47,6 +49,7 @@ test_that("DataFrame Factory", {
uint32 = 301L:310L,
uint64 = 401L:410L,
string = c("apple", "ball", "cat", "dog", "egg", "fig", "goose", "hay", "ice", "jam"),
utf8 = c("apple", "ball", "cat", "dog", "egg", "fig", "goose", "hay", "ice", "jam"),
large_utf8 = c("APPLE", "BALL", "CAT", "DOG", "EGG", "FIG", "GOOSE", "HAY", "ICE", "JAM"),
enum = factor(
c("red", "yellow", "green", "red", "red", "red", "yellow", "green", "red", "green"),
Expand Down Expand Up @@ -118,13 +121,60 @@ test_that("DataFrame Factory", {
'string == "dog"' = function(df) {
expect_equal(df$soma_joinid, c(4))
},
'string == "cat" || string == "dog"' = function(df) {
expect_equal(df$soma_joinid, c(3, 4))
},
"string == 'cat' || string == 'dog'" = function(df) {
expect_equal(df$soma_joinid, c(3, 4))
},
"string == 'cat' || string == 'yak'" = function(df) {
expect_equal(df$soma_joinid, c(3))
},
'string %in% c("fig", "dog")' = function(df) {
expect_equal(df$soma_joinid, c(4, 6))
},
'string %nin% c("fig", "dog")' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10))
},

'utf8 == "dog"' = function(df) {
expect_equal(df$soma_joinid, c(4))
},
'utf8 == "cat" || utf8 == "dog"' = function(df) {
expect_equal(df$soma_joinid, c(3, 4))
},
"utf8 == 'cat' || utf8 == 'dog'" = function(df) {
expect_equal(df$soma_joinid, c(3, 4))
},
"utf8 == 'cat' || utf8 == 'yak'" = function(df) {
expect_equal(df$soma_joinid, c(3))
},
'utf8 %in% c("fig", "dog")' = function(df) {
expect_equal(df$soma_joinid, c(4, 6))
},
'utf8 %nin% c("fig", "dog")' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10))
},

'large_utf8 == "DOG"' = function(df) {
expect_equal(df$soma_joinid, c(4))
},
'large_utf8 == "CAT" || large_utf8 == "DOG"' = function(df) {
expect_equal(df$soma_joinid, c(3, 4))
},
"large_utf8 == 'CAT' || large_utf8 == 'DOG'" = function(df) {
expect_equal(df$soma_joinid, c(3, 4))
},
"large_utf8 == 'CAT' || large_utf8 == 'YAK'" = function(df) {
expect_equal(df$soma_joinid, c(3))
},
'large_utf8 %in% c("FIG", "DOG")' = function(df) {
expect_equal(df$soma_joinid, c(4, 6))
},
'large_utf8 %nin% c("FIG", "DOG")' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10))
},

'enum == "red"' = function(df) {
expect_equal(df$soma_joinid, c(1, 4, 5, 6, 9))
},
Expand Down Expand Up @@ -188,15 +238,7 @@ test_that("DataFrame Factory", {
)

for (query_string in names(good_cases)) {
parsed <- do.call(
what = tiledbsoma:::parse_query_condition_new,
args = list(expr=str2lang(query_string), schema=sch, somactx=ctx))
clib_value_filter <- parsed@ptr

sr <- sr_setup(uri = sdf$uri, ctx, qc=clib_value_filter)
iter <- TableReadIter$new(sr)
tbl <- iter$read_next()
expect_true(iter$read_complete())
tbl <- sdf$read(value_filter = query_string)$concat()
df <- as.data.frame(tbl)
# Call the validator
good_cases[[query_string]](df)
Expand All @@ -212,10 +254,7 @@ test_that("DataFrame Factory", {
)

for (query_string in names(bad_cases)) {
expect_error(
do.call(
what = tiledbsoma:::parse_query_condition_new,
args = list(expr=str2lang(query_string), schema=sch, somactx=ctx)))
expect_error(sdf$read(value_filter = query_string)$concat())
}

sdf$close()
Expand Down

0 comments on commit d36731f

Please sign in to comment.