From e820e0616e17b9e3d4a85d94cb82bdc7d000a5b3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 10 Oct 2024 14:50:44 -0400 Subject: [PATCH] iterating on query-condition port --- apis/r/R/QueryCondition.R | 314 +++++++++++++++++++++++++++++++++ apis/r/R/RcppExports.R | 16 ++ apis/r/src/RcppExports.cpp | 56 ++++++ apis/r/src/query_condition.cpp | 220 +++++++++++++++++++++++ libtiledbsoma/test/common.cc | 15 +- 5 files changed, 616 insertions(+), 5 deletions(-) create mode 100644 apis/r/R/QueryCondition.R create mode 100644 apis/r/src/query_condition.cpp diff --git a/apis/r/R/QueryCondition.R b/apis/r/R/QueryCondition.R new file mode 100644 index 0000000000..fbaff115bc --- /dev/null +++ b/apis/r/R/QueryCondition.R @@ -0,0 +1,314 @@ +# MIT License +# +# Copyright (c) 2021-2024 TileDB Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# ================================================================ +#' Create a 'tiledbsoma_query_condition' object from an expression +#' +#' The grammar for query conditions is constrained to the operators +#' (\code{">"}, \code{">="}, \code{"<"}, \code{"<="}, \code{"=="}, +#' \code{"!="}, \code{"%in%"}, \code{"%nin%"}), and three boolean operators +#' (\code{"&&"}, also as \code{"&"}, (\code{"||"}, also as \code{"|"}, and +#' \code{"!"} for negation. Note that we locally define \code{"%nin%"} as +#' \code{Negate()} call around \code{%in%)} which extends R a little for this +#' use case. +#' +#' Expressions, in the R language syntax, are parsed locally by this function. +#' +#' @param expr An expression that is understood by the TileDB grammar for +#' query conditions. +#' +#' @param schema The Arrow schema for the array for which a query +#' condition is being prepared. This is necessary to obtain type information +#' about left-hand sides of query expressions. +#' +#' @param strict A boolean toogle to, if set, errors if a non-existing +#' attribute is selected or filtered on, defaults to 'TRUE'; if 'FALSE' a +#' warning is shown but execution proceeds. +#' +#' @param somactx SOMAContext pointer. +#' +#' @return A `tiledbsoma_query_condition` object. +#' @export +parse_query_condition_new <- function( + expr, + schema, + strict=TRUE, + somactx + ) { + + stopifnot("The schema argument must be an Arrow Schema" = + is(schema, "ArrowObject") && + is(schema, "Schema")) + + # ---------------------------------------------------------------- + # Helpers for walking the parse tree + + # Operators + `%!in%` <- Negate(`%in%`) + .is_in_operator <- function(node) { + return(tolower(as.character(node)) %in% c("%in%", "%nin%")) + } + .is_comparison_operator <- function(node) { + return(tolower(as.character(node)) %in% c(">", ">=", "<", "<=", "==", "!=", "%in%", "%nin%")) + } + .is_boolean_operator <- function(node) { + return(as.character(node) %in% c("&&", "||", "!", "&", "|")) + } + + # Leaf nodes + .is_ascii <- function(node) { + return(grepl("^[[:alnum:]_]+$", node)) + } + .is_integer <- function(node) { + return(grepl("^[[:digit:]]+$", as.character(node))) + } + .is_double <- function(node) { + return(grepl("^[[:digit:]\\.]+$", as.character(node)) && length(grepRaw(".", as.character(node), fixed = TRUE, all = TRUE)) == 1) + } + + .error_function <- if (strict) stop else warning + + .map_op_to_character <- function(x) { + return(switch(x, `>` = "GT", `>=` = "GE", `<` = "LT", `<=` = "LE", `==` = "EQ", `!=` = "NE")) + } + + .map_bool_to_character <- function(x) { + return(switch(x, `&&` = "AND", `&` = "AND", `||` = "OR", `|` = "OR", `!` = "NOT")) + } + + # ---------------------------------------------------------------- + # Map the R parse tree (from base-r `substitute`) to a TileDB core QueryCondition + + .parse_tree_to_qc <- function(node, debug=FALSE) { + if (is.symbol(node)) { + stop("Unexpected symbol in expression: ", format(node)) + + } else if (.is_boolean_operator(node[1])) { + spdl::debug("[parseqc] boolop [{}] [{}] [{}]", + as.character(node[2]), + as.character(node[1]), + as.character(node[3])) + + return(tiledbsoma_query_condition_combine( + .parse_tree_to_qc(node[[2]]), + .parse_tree_to_qc(node[[3]]), + .map_bool_to_character(as.character(node[1])), + somactx)) + + } else if (.is_in_operator(node[1])) { + spdl::debug("[parseqc] inop [{}] [{}] [{}]", + as.character(node[2]), + as.character(node[1]), + as.character(node[3])) + + attr_name <- as.character(node[2]) + r_op_name <- tolower(as.character(node[1])) + tdb_op_name <- if (r_op_name == "%in%") "IN" else "NOT_IN" + + # XXX EXTRACT HELPER + arrow_field <- schema[[attr_name]] + if (is.null(arrow_field)) { + .error_function("No attribute '", attr_name, "' is present.", call. = FALSE) + } + arrow_type_name <- arrow_field$type$name + is_enum <- is(arrow_field$type, "DictionaryType") + + values <- eval(parse(text=as.character(node[3]))) + if (arrow_type_name == "int32" && !is_enum) { + values <- as.integer(values) + } + + return(tiledbsoma_query_condition_in_nin(attr_name, tdb_op_name, values, somactx)) + + } else if (.is_comparison_operator(node[1])) { + spdl::debug("[parseqc] cmpop [{}] [{}] [{}]", + as.character(node[2]), + as.character(node[1]), + as.character(node[3])) + + op_name <- as.character(node[1]) + attr_name <- as.character(node[2]) + rhs_text <- as.character(node[3]) + + arrow_field <- schema[[attr_name]] + if (is.null(arrow_field)) { + .error_function("No attribute '", attr_name, "' is present.", call. = FALSE) + } + arrow_type_name <- arrow_field$type$name + + # Take care of factor (aka "enum" case) and set the data type to ASCII + if (arrow_type_name == "dictionary") { + arrow_type_name <- "utf8" + } + + # General case of extracting appropriate value given type info + return(tiledbsoma_query_condition_from_triple( + attr_name = attr_name, + value = switch( + arrow_type_name, + ascii = rhs_text, + utf8 = rhs_text, + bool = as.logical(rhs_text), + ## XXX DATETIME_MS = as.POSIXct(rhs_text), + ## XXX DATETIME_DAY = as.Date(rhs_text), + as.numeric(rhs_text)), + arrow_type_name = arrow_type_name, + op_name = .map_op_to_character(op_name), + qc = tiledbsoma_empty_query_condition(somactx))) + + } else { + stop("Unexpected token in expression: ", format(node)) + } + } + + # Use base-r `substitute` to map the user-provided expression to a parse tree + parse_tree <- substitute(expr) + + # Map the parse tree to TileDB core QueryCondition + return(.parse_tree_to_qc(parse_tree, debug)) +} + +# ================================================================ +#' An S4 class for a TileDB QueryCondition object +#' +#' @slot ptr An external pointer to the underlying implementation +#' @slot init A logical variable tracking if the query condition object has been +#' initialized +#' @exportClass tiledbsoma_query_condition +setClass( + "tiledbsoma_query_condition", + slots = list(ptr = "externalptr", init = "logical")) + +# ================================================================ +#' Creates a 'tiledbsoma_query_condition' object +#' +#' @param ctx (optional) A TileDB Ctx object; if not supplied the default +#' context object is retrieved +#' @return A 'tiledbsoma_query_condition' object +#' @export +tiledbsoma_empty_query_condition <- function(somactx) { + stopifnot("The argument must be a ctx object" = is(ctx, "externalptr")) + ptr <- libtiledbsoma_empty_query_condition(somactx) + query_condition <- new("tiledbsoma_query_condition", ptr = ptr, init = FALSE) + invisible(query_condition) +} + +# ================================================================ +#' Initialize a 'tiledbsoma_query_condition' object +#' +#' Initializes (and possibly allocates) a query condition object using a triplet of +#' attribute name, comparison value, and operator. Six types of conditions are supported, +#' they all take a single scalar comparison argument and attribute to compare against. +#' At present only integer or numeric attribute comparisons are implemented. +#' @param attr_name A character value with the scheme attribute name +#' @param value A scalar value that the attribute is compared against +#' @param arrow_type_name A character value with the TileDB data type of the attribute column, for +#' example 'float' or 'int32' +#' @param op_name A character value with the comparison operation. This must be one of +#' 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE'. +#' @param qc A 'tiledbsoma_query_condition' object to be initialized by this call. +#' @return The initialized 'tiledbsoma_query_condition' object +#' @export +tiledbsoma_query_condition_from_triple <- function( + attr_name, + value, + arrow_type_name, + op_name, + qc) { + + stopifnot( + "Argument 'qc' with query condition object required" = inherits(qc, "tiledbsoma_query_condition"), + "Argument 'attr_name' must be character" = is.character(attr_name), + "Argument 'value' must be of length one" = ( + is.vector(value) || + bit64::is.integer64(value) || + inherits(value, "POSIXt") || + inherits(value, "Date")) && all.equal(length(value),1), + "Argument 'arrow_type_name' must be character" = is.character(arrow_type_name), + "Argument 'op_name' must be character" = is.character(op_name)) + + op_name <- match.arg(op_name, c("LT", "LE", "GT", "GE", "EQ", "NE")) + # If arrow_type_name is int64 or uint64 but the class of value does not yet inherit from + # integer64, cast. + if (grepl("int64", arrow_type_name) && !inherits(value, "integer64")) { + value <- bit64::as.integer64(value) + } + libtiledbsoma_query_condition_from_triple(qc@ptr, attr_name, value, arrow_type_name, op_name) + qc@init <- TRUE + invisible(qc) +} + +# ================================================================ +#' Combine two 'tiledbsoma_query_condition' objects +#' +#' Combines two query condition objects using a relatiional operator. +#' +#' @param lhs A 'tiledbsoma_query_condition' object on the left-hand side of the relation +#' @param rhs A 'tiledbsoma_query_condition' object on the right-hand side of the relation +#' @param op_name A character value with the relation, which must be one of 'AND', 'OR' or 'NOT'. +#' @param somactx SOMAContext pointer. +#' @return The combined 'tiledbsoma_query_condition' object +#' @export +tiledbsoma_query_condition_combine <- function(lhs, rhs, op_name, somactx) { + stopifnot( + "Argument 'lhs' must be a query condition object" = is(lhs, "tiledbsoma_query_condition"), + "Argument 'rhs' must be a query condition object" = is(rhs, "tiledbsoma_query_condition"), + "Argument 'op_name' must be a character" = is.character(op_name)) + op_name <- match.arg(op_name, c("AND", "OR", "NOT")) + qc <- tiledbsoma_empty_query_condition(somactx) + qc@ptr <- libtiledbsoma_query_condition_combine(lhs@ptr, rhs@ptr, op_name) + qc@init <- TRUE + invisible(qc) +} + +# ================================================================ +#' Create a query condition for vector 'IN' and 'NOT_IN' operations +#' +#' Uses \sQuote{IN} and \sQuote{NOT_IN} operators on given attribute +#' +#' @param attr_name A character value with the schema attribute name. +#' +#' @param op_name A character value with the chosen set operation. This must be one of +#' \sQuote{IN} or \sQuote{NOT_IN}. +#' +#' @param values A vector wiith the given values. Supported types are integer, double, +#' integer64, and character. +#' +#' @param somactx SOMAContext pointer. +#' +#' @return A query-condition object is returned +#' @export +tiledbsoma_query_condition_in_nin <- function( + attr_name, + op_name = "IN", + values, + somactx) { + stopifnot("Argument 'attr_name' must be character" = is.character(attr_name), + "Argument 'values' must be int, double, int64 or char" = + (is.numeric(values) || bit64::is.integer64(values) || is.character(values)), + "Argument 'op_name' must be one of 'IN' or 'NOT_IN'" = op_name %in% c("IN", "NOT_IN")) + + qc <- tiledbsoma_empty_query_condition(somactx) + qc@ptr <- libtiledbsoma_query_condition_in_nin(somactx, attr_name, op_name, values) + qc@init <- TRUE + invisible(qc) +} diff --git a/apis/r/R/RcppExports.R b/apis/r/R/RcppExports.R index 961f69b01e..e62722e056 100644 --- a/apis/r/R/RcppExports.R +++ b/apis/r/R/RcppExports.R @@ -126,6 +126,22 @@ set_metadata <- function(uri, key, valuesxp, type, is_array, ctxxp, tsvec = NULL invisible(.Call(`_tiledbsoma_set_metadata`, uri, key, valuesxp, type, is_array, ctxxp, tsvec)) } +libtiledbsoma_empty_query_condition <- function(ctxxp) { + .Call(`_tiledbsoma_libtiledbsoma_empty_query_condition`, ctxxp) +} + +libtiledbsoma_query_condition_from_triple <- function(query_cond, attr_name, condition_value, arrow_type_name, cond_op_string) { + invisible(.Call(`_tiledbsoma_libtiledbsoma_query_condition_from_triple`, query_cond, attr_name, condition_value, arrow_type_name, cond_op_string)) +} + +libtiledbsoma_query_condition_combine <- function(lhs, rhs, str) { + .Call(`_tiledbsoma_libtiledbsoma_query_condition_combine`, lhs, rhs, str) +} + +libtiledbsoma_query_condition_in_nin <- function(ctxxp, attr_name, op_name, values) { + .Call(`_tiledbsoma_libtiledbsoma_query_condition_in_nin`, ctxxp, attr_name, op_name, values) +} + reindex_create <- function() { .Call(`_tiledbsoma_reindex_create`) } diff --git a/apis/r/src/RcppExports.cpp b/apis/r/src/RcppExports.cpp index b530b175c6..0b68b3942e 100644 --- a/apis/r/src/RcppExports.cpp +++ b/apis/r/src/RcppExports.cpp @@ -246,6 +246,58 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// libtiledbsoma_empty_query_condition +Rcpp::XPtr libtiledbsoma_empty_query_condition(Rcpp::XPtr ctxxp); +RcppExport SEXP _tiledbsoma_libtiledbsoma_empty_query_condition(SEXP ctxxpSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::XPtr >::type ctxxp(ctxxpSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledbsoma_empty_query_condition(ctxxp)); + return rcpp_result_gen; +END_RCPP +} +// libtiledbsoma_query_condition_from_triple +void libtiledbsoma_query_condition_from_triple(Rcpp::XPtr query_cond, const std::string& attr_name, SEXP condition_value, const std::string& arrow_type_name, const std::string& cond_op_string); +RcppExport SEXP _tiledbsoma_libtiledbsoma_query_condition_from_triple(SEXP query_condSEXP, SEXP attr_nameSEXP, SEXP condition_valueSEXP, SEXP arrow_type_nameSEXP, SEXP cond_op_stringSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::XPtr >::type query_cond(query_condSEXP); + Rcpp::traits::input_parameter< const std::string& >::type attr_name(attr_nameSEXP); + Rcpp::traits::input_parameter< SEXP >::type condition_value(condition_valueSEXP); + Rcpp::traits::input_parameter< const std::string& >::type arrow_type_name(arrow_type_nameSEXP); + Rcpp::traits::input_parameter< const std::string& >::type cond_op_string(cond_op_stringSEXP); + libtiledbsoma_query_condition_from_triple(query_cond, attr_name, condition_value, arrow_type_name, cond_op_string); + return R_NilValue; +END_RCPP +} +// libtiledbsoma_query_condition_combine +Rcpp::XPtr libtiledbsoma_query_condition_combine(Rcpp::XPtr lhs, Rcpp::XPtr rhs, const std::string& str); +RcppExport SEXP _tiledbsoma_libtiledbsoma_query_condition_combine(SEXP lhsSEXP, SEXP rhsSEXP, SEXP strSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::XPtr >::type lhs(lhsSEXP); + Rcpp::traits::input_parameter< Rcpp::XPtr >::type rhs(rhsSEXP); + Rcpp::traits::input_parameter< const std::string& >::type str(strSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledbsoma_query_condition_combine(lhs, rhs, str)); + return rcpp_result_gen; +END_RCPP +} +// libtiledbsoma_query_condition_in_nin +Rcpp::XPtr libtiledbsoma_query_condition_in_nin(Rcpp::XPtr ctxxp, const std::string& attr_name, const std::string& op_name, SEXP values); +RcppExport SEXP _tiledbsoma_libtiledbsoma_query_condition_in_nin(SEXP ctxxpSEXP, SEXP attr_nameSEXP, SEXP op_nameSEXP, SEXP valuesSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::XPtr >::type ctxxp(ctxxpSEXP); + Rcpp::traits::input_parameter< const std::string& >::type attr_name(attr_nameSEXP); + Rcpp::traits::input_parameter< const std::string& >::type op_name(op_nameSEXP); + Rcpp::traits::input_parameter< SEXP >::type values(valuesSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledbsoma_query_condition_in_nin(ctxxp, attr_name, op_name, values)); + return rcpp_result_gen; +END_RCPP +} // reindex_create Rcpp::XPtr reindex_create(); RcppExport SEXP _tiledbsoma_reindex_create() { @@ -739,6 +791,10 @@ static const R_CallMethodDef CallEntries[] = { {"_tiledbsoma_has_metadata", (DL_FUNC) &_tiledbsoma_has_metadata, 4}, {"_tiledbsoma_delete_metadata", (DL_FUNC) &_tiledbsoma_delete_metadata, 4}, {"_tiledbsoma_set_metadata", (DL_FUNC) &_tiledbsoma_set_metadata, 7}, + {"_tiledbsoma_libtiledbsoma_empty_query_condition", (DL_FUNC) &_tiledbsoma_libtiledbsoma_empty_query_condition, 1}, + {"_tiledbsoma_libtiledbsoma_query_condition_from_triple", (DL_FUNC) &_tiledbsoma_libtiledbsoma_query_condition_from_triple, 5}, + {"_tiledbsoma_libtiledbsoma_query_condition_combine", (DL_FUNC) &_tiledbsoma_libtiledbsoma_query_condition_combine, 3}, + {"_tiledbsoma_libtiledbsoma_query_condition_in_nin", (DL_FUNC) &_tiledbsoma_libtiledbsoma_query_condition_in_nin, 4}, {"_tiledbsoma_reindex_create", (DL_FUNC) &_tiledbsoma_reindex_create, 0}, {"_tiledbsoma_reindex_map", (DL_FUNC) &_tiledbsoma_reindex_map, 2}, {"_tiledbsoma_reindex_lookup", (DL_FUNC) &_tiledbsoma_reindex_lookup, 2}, diff --git a/apis/r/src/query_condition.cpp b/apis/r/src/query_condition.cpp new file mode 100644 index 0000000000..702140ee50 --- /dev/null +++ b/apis/r/src/query_condition.cpp @@ -0,0 +1,220 @@ +#include // for R interface to C++ +#include // for C interface to Arrow (via R package) +#include // for fromInteger64 +#include // for C/C++ interface to Arrow + +// we currently get deprecation warnings by default which are noisy +#ifndef TILEDB_NO_API_DEPRECATION_WARNINGS +#define TILEDB_NO_API_DEPRECATION_WARNINGS +#endif + +// We get these via nanoarrow and must cannot include carrow.h again +#define ARROW_SCHEMA_AND_ARRAY_DEFINED 1 +#include +#include + +#include "rutilities.h" // local declarations +#include "xptr-utils.h" // xptr taggging utilities + +// Helper +tiledb_query_condition_combination_op_t +_tiledb_query_string_to_condition_combination_op(const std::string& opstr) { + if (opstr == "AND") { + return TILEDB_AND; + } else if (opstr == "OR") { + return TILEDB_OR; + } else if (opstr == "NOT") { + return TILEDB_NOT; + } else { + Rcpp::stop("Unknown TileDB combination op string '%s'", opstr.c_str()); + } +} + +// Helper +tiledb_query_condition_op_t _op_name_to_tdb_op(const std::string& opstr) { + if (opstr == "LT") { + return TILEDB_LT; + } else if (opstr == "LE") { + return TILEDB_LE; + } else if (opstr == "GT") { + return TILEDB_GT; + } else if (opstr == "GE") { + return TILEDB_GE; + } else if (opstr == "EQ") { + return TILEDB_EQ; + } else if (opstr == "NE") { + return TILEDB_NE; + } else if (opstr == "IN") { + return TILEDB_IN; + } else if (opstr == "NOT_IN") { + return TILEDB_NOT_IN; + } else { + Rcpp::stop("Unknown TileDB op string '%s'", opstr.c_str()); + } +} + +// [[Rcpp::export]] +Rcpp::XPtr libtiledbsoma_empty_query_condition( + Rcpp::XPtr ctxxp) { + // Shared pointer to SOMAContext from external pointer wrapper: + std::shared_ptr sctx = ctxxp->ctxptr; + // Shared pointer to TileDB Context from SOMAContext: + std::shared_ptr ctx = sctx->tiledb_ctx(); + // Core constructor + return make_xptr( + new tdbs::QueryCondition(*ctx.get())); +} + +// [[Rcpp::export]] +void libtiledbsoma_query_condition_from_triple( + Rcpp::XPtr query_cond, + const std::string& attr_name, + SEXP condition_value, + const std::string& arrow_type_name, + const std::string& cond_op_string) { + // No such: + // print(arrow::large_string()$name) + // print(arrow::double()$name) + + // print(arrow::int64()$name) [1] "int64" + // print(arrow::uint64()$name) [1] "uint64" + // print(arrow::int32()$name) [1] "int32" + // print(arrow::uint32()$name) [1] "uint32" + // print(arrow::int16()$name) [1] "int16" + // print(arrow::uint16()$name) [1] "uint16" + // print(arrow::int8()$name) [1] "int8" + // print(arrow::uint8()$name) [1] "uint8" + // print(arrow::float64()$name) [1] "double" + // print(arrow::float()$name) [1] "float" + // print(arrow::float32()$name) [1] "float" + // print(arrow::string()$name) [1] "utf8" + // print(arrow::binary()$name) [1] "binary" + // print(arrow::large_binary()$name) [1] "large_binary" + // print(arrow::bool()$name) [1] "bool" + // print(arrow::boolean()$name) [1] "bool" + // print(arrow::date64()$name) [1] "date64" + // print(arrow::date32()$name) [1] "date32" + // print(arrow::time32()$name) [1] "time32" + // print(arrow::time64()$name) [1] "time64" + + check_xptr_tag(query_cond); + tiledb_query_condition_op_t op = _op_name_to_tdb_op(cond_op_string); + + if (arrow_type_name == "int64" || arrow_type_name == "uint64") { + int64_t v = Rcpp::fromInteger64(Rcpp::as(condition_value)); + uint64_t cond_val_size = sizeof(int64_t); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else if (arrow_type_name == "int32" || arrow_type_name == "uint32") { + int v = Rcpp::as(condition_value); + uint64_t cond_val_size = sizeof(int); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else if (arrow_type_name == "int16" || arrow_type_name == "uint16") { + int v = Rcpp::as(condition_value); + uint64_t cond_val_size = sizeof(int16_t); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else if (arrow_type_name == "int8" || arrow_type_name == "uint8") { + int v = Rcpp::as(condition_value); + uint64_t cond_val_size = sizeof(int8_t); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else if (arrow_type_name == "double") { + double v = Rcpp::as(condition_value); + uint64_t cond_val_size = sizeof(double); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else if (arrow_type_name == "float") { + float v = static_cast(Rcpp::as(condition_value)); + uint64_t cond_val_size = sizeof(float); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else if (arrow_type_name == "ascii" || arrow_type_name == "utf8") { + std::string v = Rcpp::as(condition_value); + query_cond->init(attr_name, v, op); + + } else if (arrow_type_name == "bool") { + bool v = Rcpp::as(condition_value); + uint64_t cond_val_size = sizeof(bool); + query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + // XXX FIXME + // } else if (arrow_type_name == "DATETIME_MS") { + // int64_t v = static_cast( + // Rcpp::as(condition_value) * 1000); + // uint64_t cond_val_size = sizeof(int64_t); + // query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + // } else if (arrow_type_name == "DATETIME_DAY") { + // int64_t v = + // static_cast(Rcpp::as(condition_value)); + // uint64_t cond_val_size = sizeof(int64_t); + // query_cond->init(attr_name, (void*)&v, cond_val_size, op); + + } else { + Rcpp::stop( + "tiledbsoma query condition: currently unsupported type \"%s\"", + arrow_type_name); + } +} + +// [[Rcpp::export]] +Rcpp::XPtr libtiledbsoma_query_condition_combine( + Rcpp::XPtr lhs, + Rcpp::XPtr rhs, + const std::string& str) { + check_xptr_tag(lhs); + check_xptr_tag(lhs); + tiledb_query_condition_combination_op_t + op = _tiledb_query_string_to_condition_combination_op(str); + tdbs::QueryCondition res = lhs->combine(*rhs.get(), op); + return make_xptr(new tdbs::QueryCondition(res)); +} + +// [[Rcpp::export]] +Rcpp::XPtr libtiledbsoma_query_condition_in_nin( + Rcpp::XPtr ctxxp, + const std::string& attr_name, + const std::string& op_name, + SEXP values) { + // Shared pointer to SOMAContext from external pointer wrapper: + std::shared_ptr sctx = ctxxp->ctxptr; + // Shared pointer to TileDB Context from SOMAContext: + std::shared_ptr ctx = sctx->tiledb_ctx(); + + tiledb_query_condition_op_t op = _op_name_to_tdb_op(op_name); + + if (TYPEOF(values) == INTSXP) { + std::vector iv = Rcpp::as>(values); + auto qc = tdbs::QueryConditionExperimental::create( + *ctx.get(), attr_name, iv, op); + return make_xptr(new tdbs::QueryCondition(qc)); + + } else if (TYPEOF(values) == REALSXP) { + if (Rcpp::isInteger64(values)) { + std::vector dv = Rcpp::fromInteger64( + Rcpp::NumericVector(values)); + auto qc = tdbs::QueryConditionExperimental::create( + *ctx.get(), attr_name, dv, op); + return make_xptr( + new tdbs::QueryCondition(qc)); + } else { + std::vector dv = Rcpp::as>(values); + auto qc = tdbs::QueryConditionExperimental::create( + *ctx.get(), attr_name, dv, op); + return make_xptr( + new tdbs::QueryCondition(qc)); + } + + } else if (TYPEOF(values) == STRSXP) { + std::vector sv = Rcpp::as>( + values); + auto qc = tdbs::QueryConditionExperimental::create( + *ctx.get(), attr_name, sv, op); + return make_xptr(new tdbs::QueryCondition(qc)); + + } else { + Rcpp::stop("No support (yet) for type '%s'.", Rcpp::type2name(values)); + } +} diff --git a/libtiledbsoma/test/common.cc b/libtiledbsoma/test/common.cc index 9f334e7b31..4ef735f81c 100644 --- a/libtiledbsoma/test/common.cc +++ b/libtiledbsoma/test/common.cc @@ -119,12 +119,17 @@ create_arrow_schema_and_index_columns( // Create index-column info only, no schema involving the attrs ArrowTable create_column_index_info(const std::vector& dim_infos) { for (auto info : dim_infos) { + LOG_DEBUG(fmt::format("create_column_index_info name={}", info.name)); + + LOG_DEBUG(fmt::format( + "create_column_index_info type={}", + tiledb::impl::to_str(info.tiledb_datatype))); + + LOG_DEBUG( + fmt::format("create_column_index_info dim_max={}", info.dim_max)); + LOG_DEBUG(fmt::format( - "create_column_index_info name={} type={} dim_max={} ucd={}", - info.name, - tiledb::impl::to_str(info.tiledb_datatype), - info.dim_max, - info.use_current_domain)); + "create_column_index_info ucd={}", info.use_current_domain)); } auto index_cols_info_schema = _create_index_cols_info_schema(dim_infos);