diff --git a/libtiledbsoma/test/unit_soma_collection.cc b/libtiledbsoma/test/unit_soma_collection.cc index 5702991185..abcf1678f9 100644 --- a/libtiledbsoma/test/unit_soma_collection.cc +++ b/libtiledbsoma/test/unit_soma_collection.cc @@ -51,38 +51,84 @@ TEST_CASE("SOMACollection: basic") { } TEST_CASE("SOMACollection: add SOMASparseNDArray") { - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - TimestampRange ts(0, 2); - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-sparse-ndarray"; - std::string sub_uri = "mem://unit-test-add-sparse-ndarray/sub"; - std::string dim_name = "soma_dim_0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - SOMACollection::create(base_uri, ctx, ts); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}}); - - auto index_columns = helper::create_column_index_info(dim_infos); - - std::map expected_map{ - {"sparse_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; - - auto soma_collection = SOMACollection::open( - base_uri, OpenMode::write, ctx, ts); - REQUIRE(soma_collection->timestamp() == ts); - - auto soma_sparse = soma_collection->add_new_sparse_ndarray( - "sparse_ndarray", + TimestampRange ts(0, 2); + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-sparse-ndarray"; + std::string sub_uri = "mem://unit-test-add-sparse-ndarray/sub"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + SOMACollection::create(base_uri, ctx, ts); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + std::map expected_map{ + {"sparse_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; + + auto soma_collection = SOMACollection::open( + base_uri, OpenMode::write, ctx, ts); + REQUIRE(soma_collection->timestamp() == ts); + + auto soma_sparse = soma_collection->add_new_sparse_ndarray( + "sparse_ndarray", + sub_uri, + URIType::absolute, + ctx, + arrow_format, + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second))); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_sparse->uri() == sub_uri); + REQUIRE(soma_sparse->ctx() == ctx); + REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); + REQUIRE(soma_sparse->is_sparse() == true); + REQUIRE(soma_sparse->ndim() == 1); + REQUIRE(soma_sparse->nnz() == 0); + REQUIRE(soma_sparse->timestamp() == ts); + soma_sparse->close(); + soma_collection->close(); + + soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + soma_collection->close(); +} + +TEST_CASE("SOMACollection: add SOMADenseNDArray") { + TimestampRange ts(0, 2); + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-dense-ndarray"; + std::string sub_uri = "mem://unit-test-add-dense-ndarray/sub"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + SOMACollection::create(base_uri, ctx, ts); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}}); + auto index_columns = helper::create_column_index_info(dim_infos); + + std::map expected_map{ + {"dense_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; + + auto soma_collection = SOMACollection::open( + base_uri, OpenMode::write, ctx, ts); + REQUIRE(soma_collection->timestamp() == ts); + + if (helper::have_dense_current_domain_support()) { + auto soma_dense = soma_collection->add_new_dense_ndarray( + "dense_ndarray", sub_uri, URIType::absolute, ctx, @@ -91,14 +137,13 @@ TEST_CASE("SOMACollection: add SOMASparseNDArray") { std::move(index_columns.first), std::move(index_columns.second))); REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_sparse->uri() == sub_uri); - REQUIRE(soma_sparse->ctx() == ctx); - REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); - REQUIRE(soma_sparse->is_sparse() == true); - REQUIRE(soma_sparse->ndim() == 1); - REQUIRE(soma_sparse->nnz() == 0); - REQUIRE(soma_sparse->timestamp() == ts); - soma_sparse->close(); + REQUIRE(soma_dense->uri() == sub_uri); + REQUIRE(soma_dense->ctx() == ctx); + REQUIRE(soma_dense->type() == "SOMADenseNDArray"); + REQUIRE(soma_dense->is_sparse() == false); + REQUIRE(soma_dense->ndim() == 1); + REQUIRE(soma_dense->shape() == std::vector{DIM_MAX + 1}); + REQUIRE(soma_dense->timestamp() == ts); soma_collection->close(); soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); @@ -107,511 +152,394 @@ TEST_CASE("SOMACollection: add SOMASparseNDArray") { } } -TEST_CASE("SOMACollection: add SOMADenseNDArray") { - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - TimestampRange ts(0, 2); - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-dense-ndarray"; - std::string sub_uri = "mem://unit-test-add-dense-ndarray/sub"; - std::string dim_name = "soma_dim_0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - SOMACollection::create(base_uri, ctx, ts); - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}}); - auto index_columns = helper::create_column_index_info(dim_infos); - - std::map expected_map{ - {"dense_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; - - auto soma_collection = SOMACollection::open( - base_uri, OpenMode::write, ctx, ts); - REQUIRE(soma_collection->timestamp() == ts); - - if (helper::have_dense_current_domain_support()) { - auto soma_dense = soma_collection->add_new_dense_ndarray( - "dense_ndarray", - sub_uri, - URIType::absolute, - ctx, - arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second))); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_dense->uri() == sub_uri); - REQUIRE(soma_dense->ctx() == ctx); - REQUIRE(soma_dense->type() == "SOMADenseNDArray"); - REQUIRE(soma_dense->is_sparse() == false); - REQUIRE(soma_dense->ndim() == 1); - REQUIRE(soma_dense->shape() == std::vector{DIM_MAX + 1}); - REQUIRE(soma_dense->timestamp() == ts); - soma_collection->close(); - - soma_collection = SOMACollection::open( - base_uri, OpenMode::read, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - soma_collection->close(); - } - } -} - TEST_CASE("SOMACollection: add SOMADataFrame") { - auto use_current_domain = GENERATE(true); std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - TimestampRange ts(0, 2); - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-dataframe"; - std::string sub_uri = "mem://unit-test-add-dataframe/sub"; - std::string dim_name = "d0"; - std::string attr_name = "a0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - SOMACollection::create(base_uri, ctx, ts); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}}); - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - - std::map expected_map{ - {"dataframe", SOMAGroupEntry(sub_uri, "SOMAArray")}}; - - auto soma_collection = SOMACollection::open( - base_uri, OpenMode::write, ctx, ts); - REQUIRE(soma_collection->timestamp() == ts); - - auto soma_dataframe = soma_collection->add_new_dataframe( - "dataframe", - sub_uri, - URIType::absolute, - ctx, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second))); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_dataframe->uri() == sub_uri); - REQUIRE(soma_dataframe->ctx() == ctx); - REQUIRE(soma_dataframe->type() == "SOMADataFrame"); - std::vector expected_index_column_names = {dim_name}; - REQUIRE( - soma_dataframe->index_column_names() == - expected_index_column_names); - REQUIRE(soma_dataframe->timestamp() == ts); - soma_collection->close(); + TimestampRange ts(0, 2); + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-dataframe"; + std::string sub_uri = "mem://unit-test-add-dataframe/sub"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + SOMACollection::create(base_uri, ctx, ts); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + + std::map expected_map{ + {"dataframe", SOMAGroupEntry(sub_uri, "SOMAArray")}}; + + auto soma_collection = SOMACollection::open( + base_uri, OpenMode::write, ctx, ts); + REQUIRE(soma_collection->timestamp() == ts); - soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_dataframe->count() == 0); - soma_collection->close(); - } + auto soma_dataframe = soma_collection->add_new_dataframe( + "dataframe", + sub_uri, + URIType::absolute, + ctx, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second))); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_dataframe->uri() == sub_uri); + REQUIRE(soma_dataframe->ctx() == ctx); + REQUIRE(soma_dataframe->type() == "SOMADataFrame"); + std::vector expected_index_column_names = {dim_name}; + REQUIRE( + soma_dataframe->index_column_names() == expected_index_column_names); + REQUIRE(soma_dataframe->timestamp() == ts); + soma_collection->close(); + + soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_dataframe->count() == 0); + soma_collection->close(); } TEST_CASE("SOMACollection: add SOMACollection") { - auto use_current_domain = GENERATE(true); - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-collection"; - std::string sub_uri = "mem://unit-test-add-collection/sub"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - SOMACollection::create(base_uri, ctx); - - std::map expected_map{ - {"subcollection", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; - - auto soma_collection = SOMACollection::open( - base_uri, OpenMode::write, ctx); - auto soma_subcollection = soma_collection->add_new_collection( - "subcollection", sub_uri, URIType::absolute, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_subcollection->uri() == sub_uri); - REQUIRE(soma_subcollection->ctx() == ctx); - REQUIRE(soma_subcollection->type() == "SOMACollection"); - soma_collection->close(); + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-collection"; + std::string sub_uri = "mem://unit-test-add-collection/sub"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + SOMACollection::create(base_uri, ctx); + + std::map expected_map{ + {"subcollection", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; + + auto soma_collection = SOMACollection::open(base_uri, OpenMode::write, ctx); + auto soma_subcollection = soma_collection->add_new_collection( + "subcollection", sub_uri, URIType::absolute, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_subcollection->uri() == sub_uri); + REQUIRE(soma_subcollection->ctx() == ctx); + REQUIRE(soma_subcollection->type() == "SOMACollection"); + soma_collection->close(); - soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - soma_collection->close(); - } + soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + soma_collection->close(); } TEST_CASE("SOMACollection: add SOMAExperiment") { - auto use_current_domain = GENERATE(true); - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-experiment"; - std::string sub_uri = "mem://unit-test-add-experiment/sub"; - std::string dim_name = "d0"; - std::string attr_name = "a0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - SOMACollection::create(base_uri, ctx); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}}); - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - - std::map expected_map{ - {"experiment", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; - - auto soma_collection = SOMACollection::open( - base_uri, OpenMode::write, ctx); - auto soma_experiment = soma_collection->add_new_experiment( - "experiment", - sub_uri, - URIType::absolute, - ctx, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second))); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_experiment->uri() == sub_uri); - REQUIRE(soma_experiment->ctx() == ctx); - REQUIRE(soma_experiment->type() == "SOMAExperiment"); - soma_experiment->close(); - soma_collection->close(); + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-experiment"; + std::string sub_uri = "mem://unit-test-add-experiment/sub"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + SOMACollection::create(base_uri, ctx); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + + std::map expected_map{ + {"experiment", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; + + auto soma_collection = SOMACollection::open(base_uri, OpenMode::write, ctx); + auto soma_experiment = soma_collection->add_new_experiment( + "experiment", + sub_uri, + URIType::absolute, + ctx, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second))); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_experiment->uri() == sub_uri); + REQUIRE(soma_experiment->ctx() == ctx); + REQUIRE(soma_experiment->type() == "SOMAExperiment"); + soma_experiment->close(); + soma_collection->close(); - soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - soma_collection->close(); - } + soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + soma_collection->close(); } TEST_CASE("SOMACollection: add SOMAMeasurement") { - auto use_current_domain = GENERATE(true); - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-measurement"; - std::string sub_uri = "mem://unit-test-add-measurement/sub"; - std::string dim_name = "d0"; - std::string attr_name = "a0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - SOMACollection::create(base_uri, ctx); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}}); - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - - std::map expected_map{ - {"measurement", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; - - auto soma_collection = SOMACollection::open( - base_uri, OpenMode::write, ctx); - auto soma_measurement = soma_collection->add_new_measurement( - "measurement", - sub_uri, - URIType::absolute, - ctx, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second))); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_measurement->uri() == sub_uri); - REQUIRE(soma_measurement->ctx() == ctx); - REQUIRE(soma_measurement->type() == "SOMAMeasurement"); - soma_measurement->close(); - soma_collection->close(); + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-measurement"; + std::string sub_uri = "mem://unit-test-add-measurement/sub"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + SOMACollection::create(base_uri, ctx); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + + std::map expected_map{ + {"measurement", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; + + auto soma_collection = SOMACollection::open(base_uri, OpenMode::write, ctx); + auto soma_measurement = soma_collection->add_new_measurement( + "measurement", + sub_uri, + URIType::absolute, + ctx, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second))); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_measurement->uri() == sub_uri); + REQUIRE(soma_measurement->ctx() == ctx); + REQUIRE(soma_measurement->type() == "SOMAMeasurement"); + soma_measurement->close(); + soma_collection->close(); - soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - soma_collection->close(); - } + soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + soma_collection->close(); } TEST_CASE("SOMACollection: metadata") { - auto use_current_domain = GENERATE(true); - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto ctx = std::make_shared(); + auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-collection"; - SOMACollection::create(uri, ctx, TimestampRange(0, 2)); - auto soma_collection = SOMACollection::open( - uri, OpenMode::write, ctx, std::pair(1, 1)); + std::string uri = "mem://unit-test-collection"; + SOMACollection::create(uri, ctx, TimestampRange(0, 2)); + auto soma_collection = SOMACollection::open( + uri, OpenMode::write, ctx, std::pair(1, 1)); - int32_t val = 100; - soma_collection->set_metadata("md", TILEDB_INT32, 1, &val); - soma_collection->close(); + int32_t val = 100; + soma_collection->set_metadata("md", TILEDB_INT32, 1, &val); + soma_collection->close(); - // Read metadata - soma_collection->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(soma_collection->metadata_num() == 3); - REQUIRE(soma_collection->has_metadata("soma_object_type")); - REQUIRE(soma_collection->has_metadata("soma_encoding_version")); - REQUIRE(soma_collection->has_metadata("md")); - auto mdval = soma_collection->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - soma_collection->close(); + // Read metadata + soma_collection->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_collection->metadata_num() == 3); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(soma_collection->has_metadata("md")); + auto mdval = soma_collection->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + soma_collection->close(); - // md should not be available at (2, 2) - soma_collection->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(soma_collection->metadata_num() == 2); - REQUIRE(soma_collection->has_metadata("soma_object_type")); - REQUIRE(soma_collection->has_metadata("soma_encoding_version")); - REQUIRE(!soma_collection->has_metadata("md")); - soma_collection->close(); + // md should not be available at (2, 2) + soma_collection->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_collection->metadata_num() == 2); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(!soma_collection->has_metadata("md")); + soma_collection->close(); - // Metadata should also be retrievable in write mode - soma_collection->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(soma_collection->metadata_num() == 3); - REQUIRE(soma_collection->has_metadata("soma_object_type")); - REQUIRE(soma_collection->has_metadata("soma_encoding_version")); - REQUIRE(soma_collection->has_metadata("md")); - mdval = soma_collection->get_metadata("md"); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write - // mode - soma_collection->delete_metadata("md"); - mdval = soma_collection->get_metadata("md"); - REQUIRE(!mdval.has_value()); - soma_collection->close(); + // Metadata should also be retrievable in write mode + soma_collection->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_collection->metadata_num() == 3); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(soma_collection->has_metadata("md")); + mdval = soma_collection->get_metadata("md"); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + soma_collection->delete_metadata("md"); + mdval = soma_collection->get_metadata("md"); + REQUIRE(!mdval.has_value()); + soma_collection->close(); - // Confirm delete in read mode - soma_collection->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!soma_collection->has_metadata("md")); - REQUIRE(soma_collection->metadata_num() == 2); - } + // Confirm delete in read mode + soma_collection->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_collection->has_metadata("md")); + REQUIRE(soma_collection->metadata_num() == 2); } TEST_CASE("SOMAExperiment: metadata") { - auto use_current_domain = GENERATE(true); - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto ctx = std::make_shared(); - - std::string uri = "mem://unit-test-experiment"; - std::string dim_name = "soma_dim_0"; - std::string attr_name = "soma_data"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}}); - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - - SOMAExperiment::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - auto soma_experiment = SOMAExperiment::open( - uri, OpenMode::write, ctx, std::pair(1, 1)); - - int32_t val = 100; - soma_experiment->set_metadata("md", TILEDB_INT32, 1, &val); - soma_experiment->close(); - - // Read metadata - soma_experiment = SOMAExperiment::open( - uri, OpenMode::read, ctx, TimestampRange(0, 2)); - REQUIRE(soma_experiment->metadata_num() == 4); - REQUIRE(soma_experiment->has_metadata("dataset_type")); - REQUIRE(soma_experiment->has_metadata("soma_object_type")); - REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); - REQUIRE(soma_experiment->has_metadata("md")); - auto mdval = soma_experiment->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - soma_experiment->close(); - - // md should not be available at (2, 2) - soma_experiment = SOMAExperiment::open( - uri, OpenMode::read, ctx, TimestampRange(2, 2)); - REQUIRE(soma_experiment->metadata_num() == 3); - REQUIRE(soma_experiment->has_metadata("dataset_type")); - REQUIRE(soma_experiment->has_metadata("soma_object_type")); - REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); - REQUIRE(!soma_experiment->has_metadata("md")); - soma_experiment->close(); - - // Metadata should also be retrievable in write mode - soma_experiment = SOMAExperiment::open( - uri, OpenMode::write, ctx, TimestampRange(0, 2)); - REQUIRE(soma_experiment->metadata_num() == 4); - REQUIRE(soma_experiment->has_metadata("dataset_type")); - REQUIRE(soma_experiment->has_metadata("soma_object_type")); - REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); - REQUIRE(soma_experiment->has_metadata("md")); - mdval = soma_experiment->get_metadata("md"); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write - // mode - soma_experiment->delete_metadata("md"); - mdval = soma_experiment->get_metadata("md"); - REQUIRE(!mdval.has_value()); - soma_experiment->close(); - - // Confirm delete in read mode - soma_experiment = SOMAExperiment::open( - uri, OpenMode::read, ctx, TimestampRange(0, 2)); - REQUIRE(!soma_experiment->has_metadata("md")); - REQUIRE(soma_experiment->metadata_num() == 3); - } + auto ctx = std::make_shared(); + + std::string uri = "mem://unit-test-experiment"; + std::string dim_name = "soma_dim_0"; + std::string attr_name = "soma_data"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + + SOMAExperiment::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + auto soma_experiment = SOMAExperiment::open( + uri, OpenMode::write, ctx, std::pair(1, 1)); + + int32_t val = 100; + soma_experiment->set_metadata("md", TILEDB_INT32, 1, &val); + soma_experiment->close(); + + // Read metadata + soma_experiment = SOMAExperiment::open( + uri, OpenMode::read, ctx, TimestampRange(0, 2)); + REQUIRE(soma_experiment->metadata_num() == 4); + REQUIRE(soma_experiment->has_metadata("dataset_type")); + REQUIRE(soma_experiment->has_metadata("soma_object_type")); + REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); + REQUIRE(soma_experiment->has_metadata("md")); + auto mdval = soma_experiment->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + soma_experiment->close(); + + // md should not be available at (2, 2) + soma_experiment = SOMAExperiment::open( + uri, OpenMode::read, ctx, TimestampRange(2, 2)); + REQUIRE(soma_experiment->metadata_num() == 3); + REQUIRE(soma_experiment->has_metadata("dataset_type")); + REQUIRE(soma_experiment->has_metadata("soma_object_type")); + REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); + REQUIRE(!soma_experiment->has_metadata("md")); + soma_experiment->close(); + + // Metadata should also be retrievable in write mode + soma_experiment = SOMAExperiment::open( + uri, OpenMode::write, ctx, TimestampRange(0, 2)); + REQUIRE(soma_experiment->metadata_num() == 4); + REQUIRE(soma_experiment->has_metadata("dataset_type")); + REQUIRE(soma_experiment->has_metadata("soma_object_type")); + REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); + REQUIRE(soma_experiment->has_metadata("md")); + mdval = soma_experiment->get_metadata("md"); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + soma_experiment->delete_metadata("md"); + mdval = soma_experiment->get_metadata("md"); + REQUIRE(!mdval.has_value()); + soma_experiment->close(); + + // Confirm delete in read mode + soma_experiment = SOMAExperiment::open( + uri, OpenMode::read, ctx, TimestampRange(0, 2)); + REQUIRE(!soma_experiment->has_metadata("md")); + REQUIRE(soma_experiment->metadata_num() == 3); } TEST_CASE("SOMAMeasurement: metadata") { - auto use_current_domain = GENERATE(true); - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-measurement"; - std::string dim_name = "soma_dim_0"; - std::string attr_name = "soma_data"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}}); - std::vector attr_infos( - {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - - SOMAMeasurement::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - auto soma_measurement = SOMAMeasurement::open( - uri, OpenMode::write, ctx, std::pair(1, 1)); - - int32_t val = 100; - soma_measurement->set_metadata("md", TILEDB_INT32, 1, &val); - soma_measurement->close(); - - // Read metadata - soma_measurement = SOMAMeasurement::open( - uri, OpenMode::read, ctx, TimestampRange(0, 2)); - REQUIRE(soma_measurement->metadata_num() == 3); - REQUIRE(soma_measurement->has_metadata("soma_object_type")); - REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); - REQUIRE(soma_measurement->has_metadata("md")); - auto mdval = soma_measurement->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - soma_measurement->close(); - - // md should not be available at (2, 2) - soma_measurement = SOMAMeasurement::open( - uri, OpenMode::read, ctx, TimestampRange(2, 2)); - REQUIRE(soma_measurement->metadata_num() == 2); - REQUIRE(soma_measurement->has_metadata("soma_object_type")); - REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); - REQUIRE(!soma_measurement->has_metadata("md")); - soma_measurement->close(); - - // Metadata should also be retrievable in write mode - soma_measurement = SOMAMeasurement::open( - uri, OpenMode::write, ctx, TimestampRange(0, 2)); - REQUIRE(soma_measurement->metadata_num() == 3); - REQUIRE(soma_measurement->has_metadata("soma_object_type")); - REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); - REQUIRE(soma_measurement->has_metadata("md")); - mdval = soma_measurement->get_metadata("md"); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write - // mode - soma_measurement->delete_metadata("md"); - mdval = soma_measurement->get_metadata("md"); - REQUIRE(!mdval.has_value()); - soma_measurement->close(); - - // Confirm delete in read mode - soma_measurement = SOMAMeasurement::open( - uri, OpenMode::read, ctx, TimestampRange(0, 2)); - REQUIRE(!soma_measurement->has_metadata("md")); - REQUIRE(soma_measurement->metadata_num() == 2); - } + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-measurement"; + std::string dim_name = "soma_dim_0"; + std::string attr_name = "soma_data"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + + SOMAMeasurement::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + auto soma_measurement = SOMAMeasurement::open( + uri, OpenMode::write, ctx, std::pair(1, 1)); + + int32_t val = 100; + soma_measurement->set_metadata("md", TILEDB_INT32, 1, &val); + soma_measurement->close(); + + // Read metadata + soma_measurement = SOMAMeasurement::open( + uri, OpenMode::read, ctx, TimestampRange(0, 2)); + REQUIRE(soma_measurement->metadata_num() == 3); + REQUIRE(soma_measurement->has_metadata("soma_object_type")); + REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); + REQUIRE(soma_measurement->has_metadata("md")); + auto mdval = soma_measurement->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + soma_measurement->close(); + + // md should not be available at (2, 2) + soma_measurement = SOMAMeasurement::open( + uri, OpenMode::read, ctx, TimestampRange(2, 2)); + REQUIRE(soma_measurement->metadata_num() == 2); + REQUIRE(soma_measurement->has_metadata("soma_object_type")); + REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); + REQUIRE(!soma_measurement->has_metadata("md")); + soma_measurement->close(); + + // Metadata should also be retrievable in write mode + soma_measurement = SOMAMeasurement::open( + uri, OpenMode::write, ctx, TimestampRange(0, 2)); + REQUIRE(soma_measurement->metadata_num() == 3); + REQUIRE(soma_measurement->has_metadata("soma_object_type")); + REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); + REQUIRE(soma_measurement->has_metadata("md")); + mdval = soma_measurement->get_metadata("md"); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + soma_measurement->delete_metadata("md"); + mdval = soma_measurement->get_metadata("md"); + REQUIRE(!mdval.has_value()); + soma_measurement->close(); + + // Confirm delete in read mode + soma_measurement = SOMAMeasurement::open( + uri, OpenMode::read, ctx, TimestampRange(0, 2)); + REQUIRE(!soma_measurement->has_metadata("md")); + REQUIRE(soma_measurement->metadata_num() == 2); } diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index 0dc9d623cb..a1e28aaab2 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -44,7 +44,6 @@ const int64_t SOMA_JOINID_RESIZE_DIM_MAX = 199; struct VariouslyIndexedDataFrameFixture { std::shared_ptr ctx_; std::string uri_; - bool use_current_domain_; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Using Catch2's TEST_CASE_METHOD we can't pass constructor args. @@ -201,59 +200,53 @@ TEST_CASE_METHOD( VariouslyIndexedDataFrameFixture, "SOMADataFrame: basic", "[SOMADataFrame]") { - auto use_current_domain = GENERATE(true); + set_up(std::make_shared(), "mem://unit-test-dataframe-basic"); - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - set_up( - std::make_shared(), "mem://unit-test-dataframe-basic"); - - std::vector dim_infos({i64_dim_info()}); - std::vector attr_infos({u32_attr_info()}); - - REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); - - create(dim_infos, attr_infos); + std::vector dim_infos({i64_dim_info()}); + std::vector attr_infos({u32_attr_info()}); - REQUIRE(SOMADataFrame::exists(uri_, ctx_)); - REQUIRE(!SOMASparseNDArray::exists(uri_, ctx_)); - REQUIRE(!SOMADenseNDArray::exists(uri_, ctx_)); + REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); - auto sdf = open(OpenMode::read); - REQUIRE(sdf->uri() == uri_); - REQUIRE(sdf->ctx() == ctx_); - REQUIRE(sdf->type() == "SOMADataFrame"); - std::vector expected_index_column_names = { - dim_infos[0].name}; - REQUIRE(sdf->index_column_names() == expected_index_column_names); - REQUIRE(sdf->nnz() == 0); - sdf->close(); + create(dim_infos, attr_infos); - std::vector d0(10); - for (int j = 0; j < 10; j++) - d0[j] = j; - std::vector a0(10, 1); + REQUIRE(SOMADataFrame::exists(uri_, ctx_)); + REQUIRE(!SOMASparseNDArray::exists(uri_, ctx_)); + REQUIRE(!SOMADenseNDArray::exists(uri_, ctx_)); + + auto sdf = open(OpenMode::read); + REQUIRE(sdf->uri() == uri_); + REQUIRE(sdf->ctx() == ctx_); + REQUIRE(sdf->type() == "SOMADataFrame"); + std::vector expected_index_column_names = {dim_infos[0].name}; + REQUIRE(sdf->index_column_names() == expected_index_column_names); + REQUIRE(sdf->nnz() == 0); + sdf->close(); - sdf = open(OpenMode::write); - sdf->set_column_data(dim_infos[0].name, d0.size(), d0.data()); - sdf->set_column_data(attr_infos[0].name, a0.size(), a0.data()); - sdf->write(); - sdf->close(); + std::vector d0(10); + for (int j = 0; j < 10; j++) + d0[j] = j; + std::vector a0(10, 1); - sdf = open(OpenMode::read); - while (auto batch = sdf->read_next()) { - auto arrbuf = batch.value(); - auto d0span = arrbuf->at(dim_infos[0].name)->data(); - auto a0span = arrbuf->at(attr_infos[0].name)->data(); - REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); - REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); - } - sdf->close(); + sdf = open(OpenMode::write); + sdf->set_column_data(dim_infos[0].name, d0.size(), d0.data()); + sdf->set_column_data(attr_infos[0].name, a0.size(), a0.data()); + sdf->write(); + sdf->close(); - auto soma_object = SOMAObject::open(uri_, OpenMode::read, ctx_); - REQUIRE(soma_object->uri() == uri_); - REQUIRE(soma_object->type() == "SOMADataFrame"); - soma_object->close(); + sdf = open(OpenMode::read); + while (auto batch = sdf->read_next()) { + auto arrbuf = batch.value(); + auto d0span = arrbuf->at(dim_infos[0].name)->data(); + auto a0span = arrbuf->at(attr_infos[0].name)->data(); + REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); + REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); } + sdf->close(); + + auto soma_object = SOMAObject::open(uri_, OpenMode::read, ctx_); + REQUIRE(soma_object->uri() == uri_); + REQUIRE(soma_object->type() == "SOMADataFrame"); + soma_object->close(); } TEST_CASE_METHOD( @@ -302,80 +295,74 @@ TEST_CASE_METHOD( std::make_pair(R"("NOOP")", TILEDB_FILTER_NONE)); SECTION(std::format("- filter={}", filter.first)) { - auto use_current_domain = GENERATE(true); - std::ostringstream section2; - section2 << "- use_current_domain=" << use_current_domain; - SECTION(section2.str()) { - set_up( - std::make_shared(), - "mem://unit-test-dataframe-platform-config"); - - PlatformConfig platform_config; - platform_config.cell_order = "hilbert"; - platform_config.dataframe_dim_zstd_level = 6; - platform_config.offsets_filters = R"([)" + filter.first + R"(])"; - platform_config.validity_filters = R"([)" + filter.first + R"(])"; - if (filter.second != TILEDB_FILTER_WEBP) { - platform_config.attrs = R"({"a0": {"filters":[)" + - filter.first + R"(]}})"; - } + set_up( + std::make_shared(), + "mem://unit-test-dataframe-platform-config"); + + PlatformConfig platform_config; + platform_config.cell_order = "hilbert"; + platform_config.dataframe_dim_zstd_level = 6; + platform_config.offsets_filters = R"([)" + filter.first + R"(])"; + platform_config.validity_filters = R"([)" + filter.first + R"(])"; + if (filter.second != TILEDB_FILTER_WEBP) { + platform_config.attrs = R"({"a0": {"filters":[)" + filter.first + + R"(]}})"; + } + + std::vector dim_infos({i64_dim_info()}); + std::vector attr_infos({i64_attr_info("a0")}); - std::vector dim_infos({i64_dim_info()}); - std::vector attr_infos({i64_attr_info("a0")}); + REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); - REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); + create(dim_infos, attr_infos, platform_config); - create(dim_infos, attr_infos, platform_config); + auto sdf = open(OpenMode::read); + auto sch = sdf->tiledb_schema(); + REQUIRE( + sch->offsets_filter_list().filter(0).filter_type() == + filter.second); - auto sdf = open(OpenMode::read); - auto sch = sdf->tiledb_schema(); - REQUIRE( - sch->offsets_filter_list().filter(0).filter_type() == - filter.second); + REQUIRE( + sch->validity_filter_list().filter(0).filter_type() == + filter.second); + auto dim_filter = sch->domain() + .dimension(dim_infos[0].name) + .filter_list() + .filter(0); + REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); + REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); + + if (filter.second != TILEDB_FILTER_WEBP) { REQUIRE( - sch->validity_filter_list().filter(0).filter_type() == - filter.second); - - auto dim_filter = sch->domain() - .dimension(dim_infos[0].name) - .filter_list() - .filter(0); - REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); - REQUIRE( - dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); - - if (filter.second != TILEDB_FILTER_WEBP) { - REQUIRE( - sch->attribute(attr_infos[0].name) - .filter_list() - .filter(0) - .filter_type() == filter.second); - } + sch->attribute(attr_infos[0].name) + .filter_list() + .filter(0) + .filter_type() == filter.second); + } - auto config_options = sdf->config_options_from_schema(); - REQUIRE(config_options.capacity == 100000); - REQUIRE(config_options.allows_duplicates == false); - REQUIRE(config_options.tile_order == "row-major"); - REQUIRE(config_options.cell_order == "hilbert"); + auto config_options = sdf->config_options_from_schema(); + REQUIRE(config_options.capacity == 100000); + REQUIRE(config_options.allows_duplicates == false); + REQUIRE(config_options.tile_order == "row-major"); + REQUIRE(config_options.cell_order == "hilbert"); + REQUIRE( + json::parse(config_options.offsets_filters)[0].at("name") == + Filter::to_str(filter.second)); + REQUIRE( + json::parse(config_options.validity_filters)[0].at("name") == + Filter::to_str(filter.second)); + if (filter.second != TILEDB_FILTER_WEBP) { REQUIRE( - json::parse(config_options.offsets_filters)[0].at("name") == - Filter::to_str(filter.second)); - REQUIRE( - json::parse(config_options.validity_filters)[0].at("name") == - Filter::to_str(filter.second)); - if (filter.second != TILEDB_FILTER_WEBP) { - REQUIRE( - json::parse(config_options.attrs)["a0"]["filters"][0].at( - "name") == Filter::to_str(filter.second)); - } - REQUIRE( - json::parse(config_options.dims)["soma_joinid"]["filters"][0] - .at("name") == Filter::to_str(TILEDB_FILTER_ZSTD)); - - sdf->close(); + json::parse(config_options.attrs)["a0"]["filters"][0].at( + "name") == Filter::to_str(filter.second)); } + REQUIRE( + json::parse(config_options.dims)["soma_joinid"]["filters"][0].at( + "name") == Filter::to_str(TILEDB_FILTER_ZSTD)); + + sdf->close(); } } @@ -383,68 +370,62 @@ TEST_CASE_METHOD( VariouslyIndexedDataFrameFixture, "SOMADataFrame: metadata", "[SOMADataFrame]") { - auto use_current_domain = GENERATE(true); + set_up(std::make_shared(), "mem://unit-test-collection"); - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - set_up(std::make_shared(), "mem://unit-test-collection"); - - std::vector dim_infos({i64_dim_info()}); - std::vector attr_infos({u32_attr_info()}); - - REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); + std::vector dim_infos({i64_dim_info()}); + std::vector attr_infos({u32_attr_info()}); - create(dim_infos, attr_infos, PlatformConfig(), TimestampRange(0, 2)); + REQUIRE(!SOMADataFrame::exists(uri_, ctx_)); - auto sdf = open( - OpenMode::write, ResultOrder::automatic, TimestampRange(1, 1)); + create(dim_infos, attr_infos, PlatformConfig(), TimestampRange(0, 2)); - int32_t val = 100; - sdf->set_metadata("md", TILEDB_INT32, 1, &val); - sdf->close(); + auto sdf = open( + OpenMode::write, ResultOrder::automatic, TimestampRange(1, 1)); - // Read metadata - sdf->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(sdf->metadata_num() == 3); - REQUIRE(sdf->has_metadata("soma_object_type")); - REQUIRE(sdf->has_metadata("soma_encoding_version")); - REQUIRE(sdf->has_metadata("md")); - auto mdval = sdf->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - sdf->close(); + int32_t val = 100; + sdf->set_metadata("md", TILEDB_INT32, 1, &val); + sdf->close(); - // md should not be available at (2, 2) - sdf->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(sdf->metadata_num() == 2); - REQUIRE(sdf->has_metadata("soma_object_type")); - REQUIRE(sdf->has_metadata("soma_encoding_version")); - REQUIRE(!sdf->has_metadata("md")); - sdf->close(); + // Read metadata + sdf->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(sdf->metadata_num() == 3); + REQUIRE(sdf->has_metadata("soma_object_type")); + REQUIRE(sdf->has_metadata("soma_encoding_version")); + REQUIRE(sdf->has_metadata("md")); + auto mdval = sdf->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + sdf->close(); - // Metadata should also be retrievable in write mode - sdf->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(sdf->metadata_num() == 3); - REQUIRE(sdf->has_metadata("soma_object_type")); - REQUIRE(sdf->has_metadata("soma_encoding_version")); - REQUIRE(sdf->has_metadata("md")); - mdval = sdf->get_metadata("md"); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); + // md should not be available at (2, 2) + sdf->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(sdf->metadata_num() == 2); + REQUIRE(sdf->has_metadata("soma_object_type")); + REQUIRE(sdf->has_metadata("soma_encoding_version")); + REQUIRE(!sdf->has_metadata("md")); + sdf->close(); - // Delete and have it reflected when reading metadata while in - // write mode - sdf->delete_metadata("md"); - mdval = sdf->get_metadata("md"); - REQUIRE(!mdval.has_value()); - sdf->close(); + // Metadata should also be retrievable in write mode + sdf->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(sdf->metadata_num() == 3); + REQUIRE(sdf->has_metadata("soma_object_type")); + REQUIRE(sdf->has_metadata("soma_encoding_version")); + REQUIRE(sdf->has_metadata("md")); + mdval = sdf->get_metadata("md"); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in + // write mode + sdf->delete_metadata("md"); + mdval = sdf->get_metadata("md"); + REQUIRE(!mdval.has_value()); + sdf->close(); - // Confirm delete in read mode - sdf->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!sdf->has_metadata("md")); - REQUIRE(sdf->metadata_num() == 2); - } + // Confirm delete in read mode + sdf->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!sdf->has_metadata("md")); + REQUIRE(sdf->metadata_num() == 2); } TEST_CASE_METHOD( @@ -490,283 +471,513 @@ TEST_CASE_METHOD( VariouslyIndexedDataFrameFixture, "SOMADataFrame: standard-indexed dataframe dim-sjid attr-str-u32", "[SOMADataFrame]") { - auto use_current_domain = GENERATE(true); + // We have these: + // * upgrade_domain requires the user to specify values for all index + // columns. This is in the spec. + // * resize_soma_joinid_shape allows the user to specify only the + // desired soma_joinid shape. This is crucial for experiment-level + // resize as an internal method at the Python level. + // Both need testing. Each one adds a shape where there wasn't one + // before. So we need to test one or the other on a given run. + auto test_upgrade_domain = GENERATE(false, true); std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; + section << "- test_upgrade_domain=" << test_upgrade_domain; SECTION(section.str()) { - std::string suffix1 = use_current_domain ? "true" : "false"; - // We have these: - // * upgrade_domain requires the user to specify values for all index - // columns. This is in the spec. - // * resize_soma_joinid_shape allows the user to specify only the - // desired soma_joinid shape. This is crucial for experiment-level - // resize as an internal method at the Python level. - // Both need testing. Each one adds a shape where there wasn't one - // before. So we need to test one or the other on a given run. - auto test_upgrade_domain = GENERATE(false, true); - std::ostringstream section2; - section2 << "- test_upgrade_domain=" << test_upgrade_domain; - SECTION(section2.str()) { - std::string suffix2 = test_upgrade_domain ? "true" : "false"; + std::string suffix = test_upgrade_domain ? "true" : "false"; - set_up( - std::make_shared(), - "mem://unit-test-variant-indexed-dataframe-1-" + suffix1 + "-" + - suffix2); + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-1-" + suffix); - std::vector dim_infos({i64_dim_info()}); - std::vector attr_infos( - {str_attr_info(), u32_attr_info()}); + std::vector dim_infos({i64_dim_info()}); + std::vector attr_infos( + {str_attr_info(), u32_attr_info()}); - // Create - create(dim_infos, attr_infos); + // Create + create(dim_infos, attr_infos); - // Check current domain - auto sdf = open(OpenMode::read); + // Check current domain + auto sdf = open(OpenMode::read); - CurrentDomain current_domain = sdf->get_current_domain_for_test(); - REQUIRE(!current_domain.is_empty()); - REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); - NDRectangle ndrect = current_domain.ndrectangle(); + CurrentDomain current_domain = sdf->get_current_domain_for_test(); + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); - std::array i64_range = ndrect.range( - dim_infos[0].name); - REQUIRE(i64_range[0] == (int64_t)0); - REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); + std::array i64_range = ndrect.range( + dim_infos[0].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); - // Check shape before resize - int64_t expect = dim_infos[0].dim_max + 1; - std::optional actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); + // Check shape before resize + int64_t expect = dim_infos[0].dim_max + 1; + std::optional actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); - REQUIRE(sdf->nnz() == 0); + REQUIRE(sdf->nnz() == 0); - sdf->close(); + sdf->close(); - // Write data - write_sjid_u32_str_data_from(0); + // Write data + write_sjid_u32_str_data_from(0); - // Check shape after write - sdf->open(OpenMode::read); + // Check shape after write + sdf->open(OpenMode::read); - REQUIRE(sdf->nnz() == 2); + REQUIRE(sdf->nnz() == 2); - expect = dim_infos[0].dim_max + 1; - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); + expect = dim_infos[0].dim_max + 1; + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); - // Check domainish accessors before resize - ArrowTable non_empty_domain = sdf->get_non_empty_domain(); - std::vector ned_sjid = - ArrowAdapter::get_table_non_string_column_by_name( - non_empty_domain, "soma_joinid"); + // Check domainish accessors before resize + ArrowTable non_empty_domain = sdf->get_non_empty_domain(); + std::vector ned_sjid = + ArrowAdapter::get_table_non_string_column_by_name( + non_empty_domain, "soma_joinid"); - ArrowTable soma_domain = sdf->get_soma_domain(); - std::vector dom_sjid = - ArrowAdapter::get_table_non_string_column_by_name( - soma_domain, "soma_joinid"); + ArrowTable soma_domain = sdf->get_soma_domain(); + std::vector dom_sjid = + ArrowAdapter::get_table_non_string_column_by_name( + soma_domain, "soma_joinid"); - ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); - std::vector maxdom_sjid = - ArrowAdapter::get_table_non_string_column_by_name( - soma_maxdomain, "soma_joinid"); + ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); + std::vector maxdom_sjid = + ArrowAdapter::get_table_non_string_column_by_name( + soma_maxdomain, "soma_joinid"); - REQUIRE(ned_sjid == std::vector({1, 2})); + REQUIRE(ned_sjid == std::vector({1, 2})); - REQUIRE(dom_sjid == std::vector({0, SOMA_JOINID_DIM_MAX})); + REQUIRE(dom_sjid == std::vector({0, SOMA_JOINID_DIM_MAX})); - REQUIRE(maxdom_sjid.size() == 2); - REQUIRE(maxdom_sjid[0] == 0); - REQUIRE(maxdom_sjid[1] > 2000000000); - sdf->close(); + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_sjid[0] == 0); + REQUIRE(maxdom_sjid[1] > 2000000000); + sdf->close(); - REQUIRE(sdf->nnz() == 2); - write_sjid_u32_str_data_from(8); - REQUIRE(sdf->nnz() == 4); + REQUIRE(sdf->nnz() == 2); + write_sjid_u32_str_data_from(8); + REQUIRE(sdf->nnz() == 4); - sdf->open(OpenMode::read); + sdf->open(OpenMode::read); - // Check can_upgrade_domain + // Check can_upgrade_domain + std::unique_ptr + domain_schema = create_index_cols_info_schema(dim_infos); + auto domain_array = ArrowAdapter::make_arrow_array_parent( + dim_infos.size()); + // OK since there currently is no shape set: + domain_array->children[0] = ArrowAdapter::make_arrow_array_child( + std::vector({0, 0})); + auto domain_table = ArrowTable( + std::move(domain_array), std::move(domain_schema)); + StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( + 1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == "testing: dataframe already has its domain set."); + + // Check can_upgrade_soma_joinid_shape + check = sdf->can_upgrade_soma_joinid_shape(1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == "testing: dataframe already has its domain set."); + + sdf->close(); + + // Resize + auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; + + // Expect throw on write beyond current domain before resize + REQUIRE_THROWS(write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX)); + + // Check shape after write + sdf = open(OpenMode::read); + expect = dim_infos[0].dim_max + 1; + + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); + sdf->close(); + + // Apply the domain change + if (test_upgrade_domain) { std::unique_ptr domain_schema = create_index_cols_info_schema(dim_infos); auto domain_array = ArrowAdapter::make_arrow_array_parent( dim_infos.size()); - // OK since there currently is no shape set: domain_array->children[0] = ArrowAdapter::make_arrow_array_child( - std::vector({0, 0})); + std::vector({0, new_shape - 1})); auto domain_table = ArrowTable( std::move(domain_array), std::move(domain_schema)); - StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( - 1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: dataframe already has its domain set."); - - // Check can_upgrade_soma_joinid_shape - check = sdf->can_upgrade_soma_joinid_shape(1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: dataframe already has its domain set."); + // Not open for write + sdf = open(OpenMode::read); + REQUIRE_THROWS(sdf->change_domain(domain_table, "testing")); sdf->close(); - // Resize - auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; - - // Expect throw on write beyond current domain before resize - REQUIRE_THROWS(write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX)); + // Open for write + sdf = open(OpenMode::write); + sdf->change_domain(domain_table, "testing"); + sdf->close(); - // Check shape after write + } else { + // Not open for write sdf = open(OpenMode::read); - expect = dim_infos[0].dim_max + 1; + REQUIRE_THROWS(sdf->resize_soma_joinid_shape(new_shape, "testing")); + sdf->close(); - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); + // Open for write + sdf = open(OpenMode::write); + sdf->resize_soma_joinid_shape(new_shape, "testing"); sdf->close(); + } - // Apply the domain change - if (test_upgrade_domain) { - std::unique_ptr - domain_schema = create_index_cols_info_schema(dim_infos); - auto domain_array = ArrowAdapter::make_arrow_array_parent( - dim_infos.size()); - domain_array - ->children[0] = ArrowAdapter::make_arrow_array_child( - std::vector({0, new_shape - 1})); - auto domain_table = ArrowTable( - std::move(domain_array), std::move(domain_schema)); + // Check shape after resize + sdf = open(OpenMode::read); + expect = SOMA_JOINID_RESIZE_DIM_MAX + 1; + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); + sdf->close(); - // Not open for write - sdf = open(OpenMode::read); - REQUIRE_THROWS(sdf->change_domain(domain_table, "testing")); - sdf->close(); + // Implicitly we expect no throw + write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); - // Open for write - sdf = open(OpenMode::write); - sdf->change_domain(domain_table, "testing"); - sdf->close(); + // Check domainish accessors after resize + sdf->open(OpenMode::read); - } else { - // Not open for write - sdf = open(OpenMode::read); - REQUIRE_THROWS( - sdf->resize_soma_joinid_shape(new_shape, "testing")); - sdf->close(); + non_empty_domain = sdf->get_non_empty_domain(); + ned_sjid = ArrowAdapter::get_table_non_string_column_by_name( + non_empty_domain, "soma_joinid"); - // Open for write - sdf = open(OpenMode::write); - sdf->resize_soma_joinid_shape(new_shape, "testing"); - sdf->close(); - } + soma_domain = sdf->get_soma_domain(); + dom_sjid = ArrowAdapter::get_table_non_string_column_by_name( + soma_domain, "soma_joinid"); - // Check shape after resize - sdf = open(OpenMode::read); - expect = SOMA_JOINID_RESIZE_DIM_MAX + 1; - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); - sdf->close(); + soma_maxdomain = sdf->get_soma_maxdomain(); + maxdom_sjid = ArrowAdapter::get_table_non_string_column_by_name< + int64_t>(soma_maxdomain, "soma_joinid"); - // Implicitly we expect no throw - write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); + REQUIRE(ned_sjid == std::vector({1, 101})); + REQUIRE( + dom_sjid == std::vector({0, SOMA_JOINID_RESIZE_DIM_MAX})); + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_sjid[0] == 0); + REQUIRE(maxdom_sjid[1] > 2000000000); + + // Check can_resize_soma_joinid_shape + check = sdf->can_resize_soma_joinid_shape(1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == + "testing: new soma_joinid shape 1 < existing shape " + "200"); + check = sdf->can_resize_soma_joinid_shape( + SOMA_JOINID_RESIZE_DIM_MAX + 1, "testing"); + REQUIRE(check.first == true); + REQUIRE(check.second == ""); - // Check domainish accessors after resize - sdf->open(OpenMode::read); + sdf->close(); - non_empty_domain = sdf->get_non_empty_domain(); - ned_sjid = ArrowAdapter::get_table_non_string_column_by_name< - int64_t>(non_empty_domain, "soma_joinid"); + // Check can_upgrade_domain + sdf->open(OpenMode::read); + domain_schema = create_index_cols_info_schema(dim_infos); + domain_array = ArrowAdapter::make_arrow_array_parent(dim_infos.size()); + domain_array->children[0] = ArrowAdapter::make_arrow_array_child( + std::vector({0, 0})); + domain_table = ArrowTable( + std::move(domain_array), std::move(domain_schema)); + // The dataframe now has a shape + check = sdf->can_upgrade_soma_joinid_shape(1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == "testing: dataframe already has its domain set."); + sdf->close(); + } +} - soma_domain = sdf->get_soma_domain(); - dom_sjid = ArrowAdapter::get_table_non_string_column_by_name< - int64_t>(soma_domain, "soma_joinid"); +TEST_CASE_METHOD( + VariouslyIndexedDataFrameFixture, + "SOMADataFrame: variant-indexed dataframe dim-u32-sjid attr-str", + "[SOMADataFrame]") { + // We have these: + // * upgrade_domain requires the user to specify values for all + // index + // columns. This is in the spec. + // * resize_soma_joinid_shape allows the user to specify only the + // desired soma_joinid shape. This is crucial for experiment-level + // resize as an internal method at the Python level. + // Both need testing. Each one adds a shape where there wasn't one + // before. So we need to test one or the other on a given run. + auto test_upgrade_domain = GENERATE(false, true); + std::ostringstream section; + section << "- test_upgrade_domain=" << test_upgrade_domain; + SECTION(section.str()) { + std::string suffix = test_upgrade_domain ? "true" : "false"; + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-2-" + suffix); - soma_maxdomain = sdf->get_soma_maxdomain(); - maxdom_sjid = ArrowAdapter::get_table_non_string_column_by_name< - int64_t>(soma_maxdomain, "soma_joinid"); + std::vector dim_infos( + {u32_dim_info(), i64_dim_info()}); + std::vector attr_infos({str_attr_info()}); + + // Create + create(dim_infos, attr_infos); + + // Check current domain + auto sdf = open(OpenMode::read); + + CurrentDomain current_domain = sdf->get_current_domain_for_test(); + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); + + std::array u32_range = ndrect.range( + dim_infos[0].name); + REQUIRE(u32_range[0] == (uint32_t)0); + REQUIRE(u32_range[1] == (uint32_t)dim_infos[0].dim_max); + + std::array i64_range = ndrect.range( + dim_infos[1].name); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[1].dim_max); + + // Check shape before write + int64_t expect = dim_infos[1].dim_max + 1; + std::optional actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); + + sdf->close(); + + REQUIRE(sdf->nnz() == 0); + + // Write + write_sjid_u32_str_data_from(0); + + REQUIRE(sdf->nnz() == 2); + write_sjid_u32_str_data_from(8); + REQUIRE(sdf->nnz() == 4); + + // Check domainish accessors before resize + sdf->open(OpenMode::read); + + ArrowTable non_empty_domain = sdf->get_non_empty_domain(); + std::vector ned_sjid = + ArrowAdapter::get_table_non_string_column_by_name( + non_empty_domain, "soma_joinid"); + std::vector ned_u32 = + ArrowAdapter::get_table_non_string_column_by_name( + non_empty_domain, "myuint32"); + + ArrowTable soma_domain = sdf->get_soma_domain(); + std::vector dom_sjid = + ArrowAdapter::get_table_non_string_column_by_name( + soma_domain, "soma_joinid"); + std::vector dom_u32 = + ArrowAdapter::get_table_non_string_column_by_name( + soma_domain, "myuint32"); + + ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); + std::vector maxdom_sjid = + ArrowAdapter::get_table_non_string_column_by_name( + soma_maxdomain, "soma_joinid"); + std::vector maxdom_u32 = + ArrowAdapter::get_table_non_string_column_by_name( + soma_maxdomain, "myuint32"); + + REQUIRE(ned_sjid == std::vector({1, 10})); + REQUIRE(ned_u32 == std::vector({1234, 5678})); + + REQUIRE(dom_sjid == std::vector({0, 99})); + REQUIRE(dom_u32 == std::vector({0, 9999})); + + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_u32.size() == 2); + + REQUIRE(maxdom_u32[0] == 0); + REQUIRE(maxdom_u32[1] > 2000000000); + + sdf->close(); + + // Check shape after write + sdf = open(OpenMode::read); + expect = dim_infos[1].dim_max + 1; + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); + + // Check can_upgrade_soma_joinid_shape + StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( + 1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == "testing: dataframe already has its domain set."); + + // Check can_upgrade_domain + std::unique_ptr + domain_schema = create_index_cols_info_schema(dim_infos); + auto domain_array = ArrowAdapter::make_arrow_array_parent( + dim_infos.size()); + // OK since there currently is no shape set: + domain_array->children[0] = ArrowAdapter::make_arrow_array_child( + std::vector({0, 0})); + domain_array->children[1] = ArrowAdapter::make_arrow_array_child( + std::vector({0, 0})); + auto domain_table = ArrowTable( + std::move(domain_array), std::move(domain_schema)); + + check = sdf->can_upgrade_soma_joinid_shape(1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == "testing: dataframe already has its domain set."); + + sdf->close(); - REQUIRE(ned_sjid == std::vector({1, 101})); - REQUIRE( - dom_sjid == - std::vector({0, SOMA_JOINID_RESIZE_DIM_MAX})); - REQUIRE(maxdom_sjid.size() == 2); - REQUIRE(maxdom_sjid[0] == 0); - REQUIRE(maxdom_sjid[1] > 2000000000); + // Resize + auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; + uint32_t new_u32_dim_max = (uint32_t)u32_dim_max * 2 + 1; - // Check can_resize_soma_joinid_shape - check = sdf->can_resize_soma_joinid_shape(1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: new soma_joinid shape 1 < existing shape " - "200"); - check = sdf->can_resize_soma_joinid_shape( - SOMA_JOINID_RESIZE_DIM_MAX + 1, "testing"); - REQUIRE(check.first == true); - REQUIRE(check.second == ""); + // Expect throw on write beyond current domain before resize + REQUIRE_THROWS(write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX)); - sdf->close(); + // Check shape after write + sdf = open(OpenMode::read); + expect = dim_infos[1].dim_max + 1; + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); + sdf->close(); - // Check can_upgrade_domain - sdf->open(OpenMode::read); - domain_schema = create_index_cols_info_schema(dim_infos); - domain_array = ArrowAdapter::make_arrow_array_parent( + // Apply the domain change + if (test_upgrade_domain) { + std::unique_ptr + domain_schema = create_index_cols_info_schema(dim_infos); + auto domain_array = ArrowAdapter::make_arrow_array_parent( dim_infos.size()); domain_array->children[0] = ArrowAdapter::make_arrow_array_child( - std::vector({0, 0})); - domain_table = ArrowTable( + std::vector({0, new_u32_dim_max})); + domain_array->children[1] = ArrowAdapter::make_arrow_array_child( + std::vector({0, new_shape - 1})); + auto domain_table = ArrowTable( std::move(domain_array), std::move(domain_schema)); - // The dataframe now has a shape - check = sdf->can_upgrade_soma_joinid_shape(1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: dataframe already has its domain set."); + + // Not open for write + sdf = open(OpenMode::read); + REQUIRE_THROWS(sdf->change_domain(domain_table, "testing")); + sdf->close(); + + // Open for write + sdf = open(OpenMode::write); + sdf->change_domain(domain_table, "testing"); + sdf->close(); + + } else { + // Not open for write + sdf = open(OpenMode::read); + REQUIRE_THROWS(sdf->resize_soma_joinid_shape(new_shape, "testing")); + sdf->close(); + + // Open for write + sdf = open(OpenMode::write); + sdf->resize_soma_joinid_shape(new_shape, "testing"); sdf->close(); } + sdf->close(); + + // Implicitly we expect no throw + write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); + + // Check domainish accessors after resize + sdf->open(OpenMode::read); + + non_empty_domain = sdf->get_non_empty_domain(); + ned_sjid = ArrowAdapter::get_table_non_string_column_by_name( + non_empty_domain, "soma_joinid"); + ned_u32 = ArrowAdapter::get_table_non_string_column_by_name( + non_empty_domain, "myuint32"); + + soma_domain = sdf->get_soma_domain(); + dom_sjid = ArrowAdapter::get_table_non_string_column_by_name( + soma_domain, "soma_joinid"); + dom_u32 = ArrowAdapter::get_table_non_string_column_by_name( + soma_domain, "myuint32"); + + soma_maxdomain = sdf->get_soma_maxdomain(); + maxdom_sjid = ArrowAdapter::get_table_non_string_column_by_name< + int64_t>(soma_maxdomain, "soma_joinid"); + maxdom_u32 = ArrowAdapter::get_table_non_string_column_by_name< + uint32_t>(soma_maxdomain, "myuint32"); + + REQUIRE(ned_sjid == std::vector({1, 101})); + REQUIRE(ned_u32 == std::vector({1234, 5678})); + + REQUIRE(dom_sjid == std::vector({0, 199})); + if (test_upgrade_domain) { + REQUIRE(dom_u32 == std::vector({0, 19999})); + } else { + REQUIRE(dom_u32 == std::vector({0, 9999})); + } + + REQUIRE(maxdom_sjid.size() == 2); + REQUIRE(maxdom_sjid[0] == 0); + REQUIRE(maxdom_sjid[1] > 2000000000); + + REQUIRE(maxdom_u32.size() == 2); + REQUIRE(maxdom_u32[0] == 0); + REQUIRE(maxdom_u32[1] > 2000000000); + + // Check can_resize_soma_joinid_shape + check = sdf->can_resize_soma_joinid_shape(1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == + "testing: new soma_joinid shape 1 < existing shape " + "200"); + check = sdf->can_resize_soma_joinid_shape( + SOMA_JOINID_RESIZE_DIM_MAX + 1, "testing"); + REQUIRE(check.first == true); + REQUIRE(check.second == ""); + + sdf->close(); } } TEST_CASE_METHOD( VariouslyIndexedDataFrameFixture, - "SOMADataFrame: variant-indexed dataframe dim-u32-sjid attr-str", + "SOMADataFrame: variant-indexed dataframe dim-sjid-str attr-u32", "[SOMADataFrame]") { - auto use_current_domain = GENERATE(true); + auto specify_domain = GENERATE(false, true); std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; + section << "- specify_domain=" << specify_domain; SECTION(section.str()) { - std::string suffix1 = use_current_domain ? "true" : "false"; - // We have these: - // * upgrade_domain requires the user to specify values for all - // index - // columns. This is in the spec. - // * resize_soma_joinid_shape allows the user to specify only the - // desired soma_joinid shape. This is crucial for experiment-level - // resize as an internal method at the Python level. - // Both need testing. Each one adds a shape where there wasn't one - // before. So we need to test one or the other on a given run. auto test_upgrade_domain = GENERATE(false, true); - std::ostringstream section2; + std::ostringstream section3; section << "- test_upgrade_domain=" << test_upgrade_domain; - SECTION(section2.str()) { + SECTION(section3.str()) { + std::string suffix1 = specify_domain ? "true" : "false"; std::string suffix2 = test_upgrade_domain ? "true" : "false"; set_up( std::make_shared(), - "mem://unit-test-variant-indexed-dataframe-2-" + suffix1 + "-" + + "mem://unit-test-variant-indexed-dataframe-3-" + suffix1 + "-" + suffix2); + std::string string_lo = specify_domain ? "apple" : ""; + std::string string_hi = specify_domain ? "zebra" : ""; std::vector dim_infos( - {u32_dim_info(), i64_dim_info()}); - std::vector attr_infos({str_attr_info()}); + {i64_dim_info(), str_dim_info(string_lo, string_hi)}); + std::vector attr_infos({u32_attr_info()}); // Create create(dim_infos, attr_infos); @@ -779,22 +990,30 @@ TEST_CASE_METHOD( REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); NDRectangle ndrect = current_domain.ndrectangle(); - std::array u32_range = ndrect.range( + std::array i64_range = ndrect.range( dim_infos[0].name); - REQUIRE(u32_range[0] == (uint32_t)0); - REQUIRE(u32_range[1] == (uint32_t)dim_infos[0].dim_max); + REQUIRE(i64_range[0] == (int64_t)0); + REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); - std::array i64_range = ndrect.range( + std::array str_range = ndrect.range( dim_infos[1].name); - REQUIRE(i64_range[0] == (int64_t)0); - REQUIRE(i64_range[1] == (int64_t)dim_infos[1].dim_max); + if (specify_domain) { + REQUIRE(str_range[0] == dim_infos[1].string_lo); + REQUIRE(str_range[1] == dim_infos[1].string_hi); + } else { + // Can we write empty strings in this range? + REQUIRE(str_range[0] <= ""); + REQUIRE(str_range[1] >= ""); + // Can we write ASCII values in this range? + REQUIRE(str_range[0] < " "); + REQUIRE(str_range[1] > "~"); + } // Check shape before write - int64_t expect = dim_infos[1].dim_max + 1; + int64_t expect = dim_infos[0].dim_max + 1; std::optional actual = sdf->maybe_soma_joinid_shape(); REQUIRE(actual.has_value()); REQUIRE(actual.value() == expect); - sdf->close(); REQUIRE(sdf->nnz() == 0); @@ -806,77 +1025,73 @@ TEST_CASE_METHOD( write_sjid_u32_str_data_from(8); REQUIRE(sdf->nnz() == 4); - // Check domainish accessors before resize - sdf->open(OpenMode::read); + // Check shape after write + sdf = open(OpenMode::read); + expect = dim_infos[0].dim_max + 1; + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(actual.has_value()); + REQUIRE(actual.value() == expect); + // Check domainish accessors before resize ArrowTable non_empty_domain = sdf->get_non_empty_domain(); std::vector ned_sjid = ArrowAdapter::get_table_non_string_column_by_name( non_empty_domain, "soma_joinid"); - std::vector ned_u32 = - ArrowAdapter::get_table_non_string_column_by_name( - non_empty_domain, "myuint32"); + std::vector + ned_str = ArrowAdapter::get_table_string_column_by_name( + non_empty_domain, "mystring"); ArrowTable soma_domain = sdf->get_soma_domain(); std::vector dom_sjid = ArrowAdapter::get_table_non_string_column_by_name( soma_domain, "soma_joinid"); - std::vector dom_u32 = - ArrowAdapter::get_table_non_string_column_by_name( - soma_domain, "myuint32"); + std::vector + dom_str = ArrowAdapter::get_table_string_column_by_name( + soma_domain, "mystring"); ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); std::vector maxdom_sjid = ArrowAdapter::get_table_non_string_column_by_name( soma_maxdomain, "soma_joinid"); - std::vector maxdom_u32 = - ArrowAdapter::get_table_non_string_column_by_name( - soma_maxdomain, "myuint32"); + std::vector + maxdom_str = ArrowAdapter::get_table_string_column_by_name( + soma_maxdomain, "mystring"); REQUIRE(ned_sjid == std::vector({1, 10})); - REQUIRE(ned_u32 == std::vector({1234, 5678})); + REQUIRE(ned_str == std::vector({"apple", "bat"})); REQUIRE(dom_sjid == std::vector({0, 99})); - REQUIRE(dom_u32 == std::vector({0, 9999})); - REQUIRE(maxdom_sjid.size() == 2); - REQUIRE(maxdom_u32.size() == 2); + if (specify_domain) { + REQUIRE(dom_str[0] == dim_infos[1].string_lo); + REQUIRE(dom_str[1] == dim_infos[1].string_hi); + } else { + REQUIRE(dom_str[0] == ""); + REQUIRE(dom_str[1] == ""); + } - REQUIRE(maxdom_u32[0] == 0); - REQUIRE(maxdom_u32[1] > 2000000000); + REQUIRE(maxdom_sjid[0] == 0); + REQUIRE(maxdom_sjid[1] > 2000000000); + REQUIRE(maxdom_str == std::vector({"", ""})); sdf->close(); - // Check shape after write - sdf = open(OpenMode::read); - expect = dim_infos[1].dim_max + 1; - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); - - // Check can_upgrade_soma_joinid_shape - StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( - 1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: dataframe already has its domain set."); - // Check can_upgrade_domain + sdf = open(OpenMode::read); std::unique_ptr domain_schema = create_index_cols_info_schema(dim_infos); auto domain_array = ArrowAdapter::make_arrow_array_parent( dim_infos.size()); - // OK since there currently is no shape set: domain_array->children[0] = ArrowAdapter::make_arrow_array_child( - std::vector({0, 0})); - domain_array->children[1] = ArrowAdapter::make_arrow_array_child( std::vector({0, 0})); + domain_array + ->children[1] = ArrowAdapter::make_arrow_array_child_string( + std::vector({"a", "z"})); auto domain_table = ArrowTable( std::move(domain_array), std::move(domain_schema)); - check = sdf->can_upgrade_soma_joinid_shape(1, "testing"); + StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( + 1, "testing"); // Must fail since this is too small. REQUIRE(check.first == false); REQUIRE( @@ -886,15 +1101,16 @@ TEST_CASE_METHOD( sdf->close(); // Resize + auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; - uint32_t new_u32_dim_max = (uint32_t)u32_dim_max * 2 + 1; - // Expect throw on write beyond current domain before resize + // Expect throw on write beyond current domain before + // resize REQUIRE_THROWS(write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX)); // Check shape after write sdf = open(OpenMode::read); - expect = dim_infos[1].dim_max + 1; + expect = dim_infos[0].dim_max + 1; actual = sdf->maybe_soma_joinid_shape(); REQUIRE(actual.has_value()); REQUIRE(actual.value() == expect); @@ -908,10 +1124,10 @@ TEST_CASE_METHOD( dim_infos.size()); domain_array ->children[0] = ArrowAdapter::make_arrow_array_child( - std::vector({0, new_u32_dim_max})); - domain_array - ->children[1] = ArrowAdapter::make_arrow_array_child( std::vector({0, new_shape - 1})); + domain_array + ->children[1] = ArrowAdapter::make_arrow_array_child_string( + std::vector({"", ""})); auto domain_table = ArrowTable( std::move(domain_array), std::move(domain_schema)); @@ -935,51 +1151,54 @@ TEST_CASE_METHOD( // Open for write sdf = open(OpenMode::write); sdf->resize_soma_joinid_shape(new_shape, "testing"); + sdf->close(); } - sdf->close(); + sdf->open(OpenMode::write); // Implicitly we expect no throw write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); + sdf->close(); // Check domainish accessors after resize - sdf->open(OpenMode::read); + sdf->open(OpenMode::read, TimestampRange(0, 2)); non_empty_domain = sdf->get_non_empty_domain(); ned_sjid = ArrowAdapter::get_table_non_string_column_by_name< int64_t>(non_empty_domain, "soma_joinid"); - ned_u32 = ArrowAdapter::get_table_non_string_column_by_name< - uint32_t>(non_empty_domain, "myuint32"); + ned_str = ArrowAdapter::get_table_string_column_by_name( + non_empty_domain, "mystring"); soma_domain = sdf->get_soma_domain(); dom_sjid = ArrowAdapter::get_table_non_string_column_by_name< int64_t>(soma_domain, "soma_joinid"); - dom_u32 = ArrowAdapter::get_table_non_string_column_by_name< - uint32_t>(soma_domain, "myuint32"); + dom_str = ArrowAdapter::get_table_string_column_by_name( + soma_domain, "mystring"); soma_maxdomain = sdf->get_soma_maxdomain(); maxdom_sjid = ArrowAdapter::get_table_non_string_column_by_name< int64_t>(soma_maxdomain, "soma_joinid"); - maxdom_u32 = ArrowAdapter::get_table_non_string_column_by_name< - uint32_t>(soma_maxdomain, "myuint32"); + maxdom_str = ArrowAdapter::get_table_string_column_by_name( + soma_maxdomain, "mystring"); - REQUIRE(ned_sjid == std::vector({1, 101})); - REQUIRE(ned_u32 == std::vector({1234, 5678})); + REQUIRE(ned_sjid == std::vector({0, 0})); + REQUIRE(ned_str == std::vector({"", ""})); - REQUIRE(dom_sjid == std::vector({0, 199})); - if (test_upgrade_domain) { - REQUIRE(dom_u32 == std::vector({0, 19999})); + REQUIRE(dom_sjid == std::vector({0, 99})); + + if (specify_domain) { + REQUIRE(dom_str[0] == dim_infos[1].string_lo); + REQUIRE(dom_str[1] == dim_infos[1].string_hi); } else { - REQUIRE(dom_u32 == std::vector({0, 9999})); + REQUIRE(dom_str == std::vector({"", ""})); } - REQUIRE(maxdom_sjid.size() == 2); REQUIRE(maxdom_sjid[0] == 0); REQUIRE(maxdom_sjid[1] > 2000000000); - REQUIRE(maxdom_u32.size() == 2); - REQUIRE(maxdom_u32[0] == 0); - REQUIRE(maxdom_u32[1] > 2000000000); + REQUIRE(maxdom_str == std::vector({"", ""})); + + REQUIRE(ned_str == std::vector({"", ""})); // Check can_resize_soma_joinid_shape check = sdf->can_resize_soma_joinid_shape(1, "testing"); @@ -988,7 +1207,7 @@ TEST_CASE_METHOD( REQUIRE( check.second == "testing: new soma_joinid shape 1 < existing shape " - "200"); + "100"); check = sdf->can_resize_soma_joinid_shape( SOMA_JOINID_RESIZE_DIM_MAX + 1, "testing"); REQUIRE(check.first == true); @@ -1001,500 +1220,217 @@ TEST_CASE_METHOD( TEST_CASE_METHOD( VariouslyIndexedDataFrameFixture, - "SOMADataFrame: variant-indexed dataframe dim-sjid-str attr-u32", + "SOMADataFrame: variant-indexed dataframe dim-str-u32 attr-sjid", "[SOMADataFrame]") { - auto use_current_domain = GENERATE(true); + auto specify_domain = GENERATE(false, true); std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; + section << "- specify_domain=" << specify_domain; SECTION(section.str()) { - auto specify_domain = GENERATE(false, true); - std::ostringstream section2; - section2 << "- specify_domain=" << specify_domain; - SECTION(section2.str()) { - auto test_upgrade_domain = GENERATE(false, true); - std::ostringstream section3; - section << "- test_upgrade_domain=" << test_upgrade_domain; - SECTION(section3.str()) { - std::string suffix1 = use_current_domain ? "true" : "false"; - std::string suffix2 = specify_domain ? "true" : "false"; - std::string suffix3 = test_upgrade_domain ? "true" : "false"; - set_up( - std::make_shared(), - "mem://unit-test-variant-indexed-dataframe-3-" + suffix1 + - "-" + suffix2 + "-" + suffix3); - - std::string string_lo = specify_domain ? "apple" : ""; - std::string string_hi = specify_domain ? "zebra" : ""; - std::vector dim_infos( - {i64_dim_info(), str_dim_info(string_lo, string_hi)}); - std::vector attr_infos({u32_attr_info()}); - - // Create - create(dim_infos, attr_infos); - - // Check current domain - auto sdf = open(OpenMode::read); - - CurrentDomain - current_domain = sdf->get_current_domain_for_test(); - REQUIRE(!current_domain.is_empty()); - REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); - NDRectangle ndrect = current_domain.ndrectangle(); - - std::array i64_range = ndrect.range( - dim_infos[0].name); - REQUIRE(i64_range[0] == (int64_t)0); - REQUIRE(i64_range[1] == (int64_t)dim_infos[0].dim_max); - - std::array - str_range = ndrect.range(dim_infos[1].name); - if (specify_domain) { - REQUIRE(str_range[0] == dim_infos[1].string_lo); - REQUIRE(str_range[1] == dim_infos[1].string_hi); - } else { - // Can we write empty strings in this range? - REQUIRE(str_range[0] <= ""); - REQUIRE(str_range[1] >= ""); - // Can we write ASCII values in this range? - REQUIRE(str_range[0] < " "); - REQUIRE(str_range[1] > "~"); - } - - // Check shape before write - int64_t expect = dim_infos[0].dim_max + 1; - std::optional actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); - sdf->close(); - - REQUIRE(sdf->nnz() == 0); - - // Write - write_sjid_u32_str_data_from(0); - - REQUIRE(sdf->nnz() == 2); - write_sjid_u32_str_data_from(8); - REQUIRE(sdf->nnz() == 4); - - // Check shape after write - sdf->open(OpenMode::read); - expect = dim_infos[0].dim_max + 1; - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); - - // Check domainish accessors before resize - ArrowTable non_empty_domain = sdf->get_non_empty_domain(); - std::vector ned_sjid = - ArrowAdapter::get_table_non_string_column_by_name( - non_empty_domain, "soma_joinid"); - std::vector - ned_str = ArrowAdapter::get_table_string_column_by_name( - non_empty_domain, "mystring"); - - ArrowTable soma_domain = sdf->get_soma_domain(); - std::vector dom_sjid = - ArrowAdapter::get_table_non_string_column_by_name( - soma_domain, "soma_joinid"); - std::vector - dom_str = ArrowAdapter::get_table_string_column_by_name( - soma_domain, "mystring"); - - ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); - std::vector maxdom_sjid = - ArrowAdapter::get_table_non_string_column_by_name( - soma_maxdomain, "soma_joinid"); - std::vector - maxdom_str = ArrowAdapter::get_table_string_column_by_name( - soma_maxdomain, "mystring"); - - REQUIRE(ned_sjid == std::vector({1, 10})); - REQUIRE(ned_str == std::vector({"apple", "bat"})); - REQUIRE(dom_sjid == std::vector({0, 99})); - - if (specify_domain) { - REQUIRE(dom_str[0] == dim_infos[1].string_lo); - REQUIRE(dom_str[1] == dim_infos[1].string_hi); - } else { - REQUIRE(dom_str[0] == ""); - REQUIRE(dom_str[1] == ""); - } - - REQUIRE(maxdom_sjid[0] == 0); - REQUIRE(maxdom_sjid[1] > 2000000000); - REQUIRE(maxdom_str == std::vector({"", ""})); - - sdf->close(); - - // Check can_upgrade_domain - sdf->open(OpenMode::read); - std::unique_ptr - domain_schema = create_index_cols_info_schema(dim_infos); - auto domain_array = ArrowAdapter::make_arrow_array_parent( - dim_infos.size()); - domain_array - ->children[0] = ArrowAdapter::make_arrow_array_child( - std::vector({0, 0})); - domain_array - ->children[1] = ArrowAdapter::make_arrow_array_child_string( - std::vector({"a", "z"})); - auto domain_table = ArrowTable( - std::move(domain_array), std::move(domain_schema)); + auto test_upgrade_domain = GENERATE(false, true); + std::ostringstream section3; + section << "- test_upgrade_domain=" << test_upgrade_domain; + SECTION(section3.str()) { + std::string suffix1 = specify_domain ? "true" : "false"; + std::string suffix2 = test_upgrade_domain ? "true" : "false"; + set_up( + std::make_shared(), + "mem://unit-test-variant-indexed-dataframe-4-" + suffix1 + "-" + + suffix2); - StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( - 1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: dataframe already has its domain set."); + std::string string_lo = specify_domain ? "apple" : ""; + std::string string_hi = specify_domain ? "zebra" : ""; + std::vector dim_infos( + {str_dim_info(string_lo, string_hi), u32_dim_info()}); + std::vector attr_infos({i64_attr_info()}); - sdf->close(); + // Create + create(dim_infos, attr_infos); - // Resize + // Check current domain + auto sdf = open(OpenMode::read); - auto new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; + CurrentDomain current_domain = sdf->get_current_domain_for_test(); + REQUIRE(!current_domain.is_empty()); + REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); + NDRectangle ndrect = current_domain.ndrectangle(); - // Expect throw on write beyond current domain before - // resize - REQUIRE_THROWS( - write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX)); - - // Check shape after write - sdf->open(OpenMode::read); - expect = dim_infos[0].dim_max + 1; - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(actual.has_value()); - REQUIRE(actual.value() == expect); - sdf->close(); + std::array str_range = ndrect.range( + dim_infos[0].name); + if (specify_domain) { + REQUIRE(str_range[0] == dim_infos[0].string_lo); + REQUIRE(str_range[1] == dim_infos[0].string_hi); + } else { + // Can we write empty strings in this range? + REQUIRE(str_range[0] <= ""); + REQUIRE(str_range[1] >= ""); + // Can we write ASCII values in this range? + REQUIRE(str_range[0] < " "); + REQUIRE(str_range[1] > "~"); + } - // Apply the domain change - if (test_upgrade_domain) { - std::unique_ptr - domain_schema = create_index_cols_info_schema( - dim_infos); - auto domain_array = ArrowAdapter::make_arrow_array_parent( - dim_infos.size()); - domain_array - ->children[0] = ArrowAdapter::make_arrow_array_child( - std::vector({0, new_shape - 1})); - domain_array->children[1] = - ArrowAdapter::make_arrow_array_child_string( - std::vector({"", ""})); - auto domain_table = ArrowTable( - std::move(domain_array), std::move(domain_schema)); - - // Not open for write - sdf->open(OpenMode::read); - REQUIRE_THROWS(sdf->change_domain(domain_table, "testing")); - sdf->close(); - - // Open for write - sdf->open(OpenMode::write); - sdf->change_domain(domain_table, "testing"); - sdf->close(); - - } else { - // Not open for write - sdf->open(OpenMode::read); - REQUIRE_THROWS( - sdf->resize_soma_joinid_shape(new_shape, "testing")); - sdf->close(); - - // Open for write - sdf->open(OpenMode::write); - sdf->resize_soma_joinid_shape(new_shape, "testing"); - - sdf->close(); - } - - sdf->open(OpenMode::write); - // Implicitly we expect no throw - write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); - sdf->close(); + std::array u32_range = ndrect.range( + dim_infos[1].name); + REQUIRE(u32_range[0] == (uint32_t)0); + REQUIRE(u32_range[1] == (uint32_t)dim_infos[1].dim_max); - // Check domainish accessors after resize - sdf->open(OpenMode::read, TimestampRange(0, 2)); + // Check shape before write + std::optional actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(!actual.has_value()); - non_empty_domain = sdf->get_non_empty_domain(); - ned_sjid = ArrowAdapter::get_table_non_string_column_by_name< - int64_t>(non_empty_domain, "soma_joinid"); + // Check domainish accessors before resize + ArrowTable non_empty_domain = sdf->get_non_empty_domain(); + std::vector ned_str = ArrowAdapter::get_table_string_column_by_name( non_empty_domain, "mystring"); - soma_domain = sdf->get_soma_domain(); - dom_sjid = ArrowAdapter::get_table_non_string_column_by_name< - int64_t>(soma_domain, "soma_joinid"); + ArrowTable soma_domain = sdf->get_soma_domain(); + std::vector dom_str = ArrowAdapter::get_table_string_column_by_name( soma_domain, "mystring"); - soma_maxdomain = sdf->get_soma_maxdomain(); - maxdom_sjid = ArrowAdapter::get_table_non_string_column_by_name< - int64_t>(soma_maxdomain, "soma_joinid"); + ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); + std::vector maxdom_str = ArrowAdapter::get_table_string_column_by_name( soma_maxdomain, "mystring"); - REQUIRE(ned_sjid == std::vector({0, 0})); - REQUIRE(ned_str == std::vector({"", ""})); - - REQUIRE(dom_sjid == std::vector({0, 99})); + REQUIRE(ned_str == std::vector({"", ""})); - if (specify_domain) { - REQUIRE(dom_str[0] == dim_infos[1].string_lo); - REQUIRE(dom_str[1] == dim_infos[1].string_hi); - } else { - REQUIRE(dom_str == std::vector({"", ""})); - } - - REQUIRE(maxdom_sjid[0] == 0); - REQUIRE(maxdom_sjid[1] > 2000000000); + if (specify_domain) { + REQUIRE(dom_str[0] == dim_infos[0].string_lo); + REQUIRE(dom_str[1] == dim_infos[0].string_hi); + } else { + REQUIRE(dom_str == std::vector({"", ""})); + } + REQUIRE(maxdom_str == std::vector({"", ""})); - REQUIRE(maxdom_str == std::vector({"", ""})); + sdf->close(); - REQUIRE(ned_str == std::vector({"", ""})); + REQUIRE(sdf->nnz() == 0); - // Check can_resize_soma_joinid_shape - check = sdf->can_resize_soma_joinid_shape(1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: new soma_joinid shape 1 < existing shape " - "100"); - check = sdf->can_resize_soma_joinid_shape( - SOMA_JOINID_RESIZE_DIM_MAX + 1, "testing"); - REQUIRE(check.first == true); - REQUIRE(check.second == ""); + // Write + write_sjid_u32_str_data_from(0); - sdf->close(); - } - } - } -} + REQUIRE(sdf->nnz() == 2); + write_sjid_u32_str_data_from(8); + // soma_joinid is not a dim here and so the second write is + // an overwrite of the first here + REQUIRE(sdf->nnz() == 2); -TEST_CASE_METHOD( - VariouslyIndexedDataFrameFixture, - "SOMADataFrame: variant-indexed dataframe dim-str-u32 attr-sjid", - "[SOMADataFrame]") { - auto use_current_domain = GENERATE(true); - std::ostringstream section; - section << "- use_current_domain=" << use_current_domain; - SECTION(section.str()) { - auto specify_domain = GENERATE(false, true); - std::ostringstream section2; - section2 << "- specify_domain=" << specify_domain; - SECTION(section2.str()) { - auto test_upgrade_domain = GENERATE(false, true); - std::ostringstream section3; - section << "- test_upgrade_domain=" << test_upgrade_domain; - SECTION(section3.str()) { - std::string suffix1 = use_current_domain ? "true" : "false"; - std::string suffix2 = specify_domain ? "true" : "false"; - std::string suffix3 = test_upgrade_domain ? "true" : "false"; - set_up( - std::make_shared(), - "mem://unit-test-variant-indexed-dataframe-4-" + suffix1 + - "-" + suffix2 + "-" + suffix3); - - std::string string_lo = specify_domain ? "apple" : ""; - std::string string_hi = specify_domain ? "zebra" : ""; - std::vector dim_infos( - {str_dim_info(string_lo, string_hi), u32_dim_info()}); - std::vector attr_infos({i64_attr_info()}); - - // Create - create(dim_infos, attr_infos); - - // Check current domain - auto sdf = open(OpenMode::read); - - CurrentDomain - current_domain = sdf->get_current_domain_for_test(); - REQUIRE(!current_domain.is_empty()); - REQUIRE(current_domain.type() == TILEDB_NDRECTANGLE); - NDRectangle ndrect = current_domain.ndrectangle(); - - std::array - str_range = ndrect.range(dim_infos[0].name); - if (specify_domain) { - REQUIRE(str_range[0] == dim_infos[0].string_lo); - REQUIRE(str_range[1] == dim_infos[0].string_hi); - } else { - // Can we write empty strings in this range? - REQUIRE(str_range[0] <= ""); - REQUIRE(str_range[1] >= ""); - // Can we write ASCII values in this range? - REQUIRE(str_range[0] < " "); - REQUIRE(str_range[1] > "~"); - } - - std::array u32_range = ndrect.range( - dim_infos[1].name); - REQUIRE(u32_range[0] == (uint32_t)0); - REQUIRE(u32_range[1] == (uint32_t)dim_infos[1].dim_max); - - // Check shape before write - std::optional actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(!actual.has_value()); - - // Check domainish accessors before resize - ArrowTable non_empty_domain = sdf->get_non_empty_domain(); - std::vector - ned_str = ArrowAdapter::get_table_string_column_by_name( - non_empty_domain, "mystring"); - - ArrowTable soma_domain = sdf->get_soma_domain(); - std::vector - dom_str = ArrowAdapter::get_table_string_column_by_name( - soma_domain, "mystring"); - - ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); - std::vector - maxdom_str = ArrowAdapter::get_table_string_column_by_name( - soma_maxdomain, "mystring"); - - REQUIRE(ned_str == std::vector({"", ""})); - - if (specify_domain) { - REQUIRE(dom_str[0] == dim_infos[0].string_lo); - REQUIRE(dom_str[1] == dim_infos[0].string_hi); - } else { - REQUIRE(dom_str == std::vector({"", ""})); - } - REQUIRE(maxdom_str == std::vector({"", ""})); + // Check shape after write + sdf = open(OpenMode::read); + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(!actual.has_value()); + sdf->close(); - sdf->close(); + // Check can_upgrade_domain + sdf = open(OpenMode::read); + std::unique_ptr + domain_schema = create_index_cols_info_schema(dim_infos); + auto domain_array = ArrowAdapter::make_arrow_array_parent( + dim_infos.size()); + domain_array + ->children[0] = ArrowAdapter::make_arrow_array_child_string( + std::vector({"a", "z"})); + domain_array->children[1] = ArrowAdapter::make_arrow_array_child( + std::vector({0, 0})); + auto domain_table = ArrowTable( + std::move(domain_array), std::move(domain_schema)); - REQUIRE(sdf->nnz() == 0); + StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( + 1, "testing"); + // Must fail since this is too small. + REQUIRE(check.first == false); + REQUIRE( + check.second == + "testing: dataframe already has its domain set."); - // Write - write_sjid_u32_str_data_from(0); + sdf->close(); - REQUIRE(sdf->nnz() == 2); - write_sjid_u32_str_data_from(8); - // soma_joinid is not a dim here and so the second write is - // an overwrite of the first here - REQUIRE(sdf->nnz() == 2); + // Resize + int64_t new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; + uint32_t new_u32_dim_max = u32_dim_max * 2 + 1; - // Check shape after write - sdf = open(OpenMode::read); - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(!actual.has_value()); - sdf->close(); + // Check shape after write + sdf = open(OpenMode::read); + actual = sdf->maybe_soma_joinid_shape(); + REQUIRE(!actual.has_value()); + sdf->close(); - // Check can_upgrade_domain - sdf = open(OpenMode::read); + // Apply the domain change + if (test_upgrade_domain) { std::unique_ptr domain_schema = create_index_cols_info_schema(dim_infos); auto domain_array = ArrowAdapter::make_arrow_array_parent( dim_infos.size()); domain_array ->children[0] = ArrowAdapter::make_arrow_array_child_string( - std::vector({"a", "z"})); + std::vector({"", ""})); domain_array ->children[1] = ArrowAdapter::make_arrow_array_child( - std::vector({0, 0})); + std::vector({0, new_u32_dim_max})); auto domain_table = ArrowTable( std::move(domain_array), std::move(domain_schema)); - StatusAndReason check = sdf->can_upgrade_soma_joinid_shape( - 1, "testing"); - // Must fail since this is too small. - REQUIRE(check.first == false); - REQUIRE( - check.second == - "testing: dataframe already has its domain set."); - + // Not open for write + sdf = open(OpenMode::read); + REQUIRE_THROWS(sdf->change_domain(domain_table, "testing")); sdf->close(); - // Resize - int64_t new_shape = int64_t{SOMA_JOINID_RESIZE_DIM_MAX + 1}; - uint32_t new_u32_dim_max = u32_dim_max * 2 + 1; + // Open for write + sdf = open(OpenMode::write); + sdf->change_domain(domain_table, "testing"); + sdf->close(); - // Check shape after write + } else { + // Not open for write sdf = open(OpenMode::read); - actual = sdf->maybe_soma_joinid_shape(); - REQUIRE(!actual.has_value()); + REQUIRE_THROWS( + sdf->resize_soma_joinid_shape(new_shape, "testing")); sdf->close(); - // Apply the domain change - if (test_upgrade_domain) { - std::unique_ptr - domain_schema = create_index_cols_info_schema( - dim_infos); - auto domain_array = ArrowAdapter::make_arrow_array_parent( - dim_infos.size()); - domain_array->children[0] = - ArrowAdapter::make_arrow_array_child_string( - std::vector({"", ""})); - domain_array - ->children[1] = ArrowAdapter::make_arrow_array_child( - std::vector({0, new_u32_dim_max})); - auto domain_table = ArrowTable( - std::move(domain_array), std::move(domain_schema)); - - // Not open for write - sdf = open(OpenMode::read); - // REQUIRE_THROWS(sdf->change_domain(domain_table, - // "testing")); - sdf->close(); - - // Open for write - sdf = open(OpenMode::write); - sdf->change_domain(domain_table, "testing"); - sdf->close(); - - } else { - // Not open for write - sdf = open(OpenMode::read); - REQUIRE_THROWS( - sdf->resize_soma_joinid_shape(new_shape, "testing")); - sdf->close(); - - // Open for write - sdf = open(OpenMode::write); - sdf->resize_soma_joinid_shape(new_shape, "testing"); - sdf->close(); - } - + // Open for write sdf = open(OpenMode::write); - write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); + sdf->resize_soma_joinid_shape(new_shape, "testing"); sdf->close(); + } - // Check domainish accessors after resize - sdf->open(OpenMode::read, TimestampRange(0, 2)); + sdf = open(OpenMode::write); + write_sjid_u32_str_data_from(SOMA_JOINID_DIM_MAX); + sdf->close(); - non_empty_domain = sdf->get_non_empty_domain(); - ned_str = ArrowAdapter::get_table_string_column_by_name( - non_empty_domain, "mystring"); + // Check domainish accessors after resize + sdf->open(OpenMode::read, TimestampRange(0, 2)); - soma_domain = sdf->get_soma_domain(); - dom_str = ArrowAdapter::get_table_string_column_by_name( - soma_domain, "mystring"); + non_empty_domain = sdf->get_non_empty_domain(); + ned_str = ArrowAdapter::get_table_string_column_by_name( + non_empty_domain, "mystring"); - soma_maxdomain = sdf->get_soma_maxdomain(); - maxdom_str = ArrowAdapter::get_table_string_column_by_name( - soma_maxdomain, "mystring"); + soma_domain = sdf->get_soma_domain(); + dom_str = ArrowAdapter::get_table_string_column_by_name( + soma_domain, "mystring"); + + soma_maxdomain = sdf->get_soma_maxdomain(); + maxdom_str = ArrowAdapter::get_table_string_column_by_name( + soma_maxdomain, "mystring"); - REQUIRE(ned_str == std::vector({"", ""})); + REQUIRE(ned_str == std::vector({"", ""})); - if (specify_domain) { - REQUIRE(dom_str[0] == dim_infos[0].string_lo); - REQUIRE(dom_str[1] == dim_infos[0].string_hi); - } else { - REQUIRE(dom_str == std::vector({"", ""})); - } - REQUIRE(maxdom_str == std::vector({"", ""})); + if (specify_domain) { + REQUIRE(dom_str[0] == dim_infos[0].string_lo); + REQUIRE(dom_str[1] == dim_infos[0].string_hi); + } else { + REQUIRE(dom_str == std::vector({"", ""})); + } + REQUIRE(maxdom_str == std::vector({"", ""})); - // Check can_resize_soma_joinid_shape - check = sdf->can_resize_soma_joinid_shape(0, "testing"); + // Check can_resize_soma_joinid_shape + check = sdf->can_resize_soma_joinid_shape(0, "testing"); - // Must pass since soma_joinid isn't a dim in this case. - REQUIRE(check.first == true); - REQUIRE(check.second == ""); + // Must pass since soma_joinid isn't a dim in this case. + REQUIRE(check.first == true); + REQUIRE(check.second == ""); - sdf->close(); - } + sdf->close(); } } } diff --git a/libtiledbsoma/test/unit_soma_dense_ndarray.cc b/libtiledbsoma/test/unit_soma_dense_ndarray.cc index 5c8ff9c1fa..4d1aae7f9c 100644 --- a/libtiledbsoma/test/unit_soma_dense_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_dense_ndarray.cc @@ -38,143 +38,32 @@ TEST_CASE("SOMADenseNDArray: basic", "[SOMADenseNDArray]") { // 1000. We want to carefully and explicitly test here that there aren't any // off-by-one errors. int64_t dim_max = 999; - int64_t shape = 1000; - - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dense-ndarray-basic"; - std::string dim_name = "soma_dim_0"; - tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; - tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; - std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( - dim_tiledb_datatype); - std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( - attr_tiledb_datatype); - - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = dim_tiledb_datatype, - .dim_max = dim_max, - .string_lo = "N/A", - .string_hi = "N/A"}}); - - auto index_columns = helper::create_column_index_info(dim_infos); - - if (helper::have_dense_current_domain_support()) { - SOMADenseNDArray::create( - uri, - dim_arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - auto dnda = SOMADenseNDArray::open(uri, OpenMode::read, ctx); - REQUIRE(dnda->shape() == std::vector{dim_max + 1}); - dnda->close(); - } else { - REQUIRE_THROWS(SOMADenseNDArray::create( - uri, - dim_arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2))); - } - } -} - -TEST_CASE("SOMADenseNDArray: platform_config", "[SOMADenseNDArray]") { - int64_t dim_max = 999; - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dense-ndarray-platform-config"; - std::string dim_name = "soma_dim_0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - PlatformConfig platform_config; - platform_config.dense_nd_array_dim_zstd_level = 6; - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = dim_max, - .string_lo = "N/A", - .string_hi = "N/A"}}); - - auto index_columns = helper::create_column_index_info(dim_infos); - - if (helper::have_dense_current_domain_support()) { - SOMADenseNDArray::create( - uri, - arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - platform_config); - - auto dnda = SOMADenseNDArray::open(uri, OpenMode::read, ctx); - auto dim_filter = dnda->tiledb_schema() - ->domain() - .dimension(dim_name) - .filter_list() - .filter(0); - REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); - REQUIRE( - dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); - - dnda->close(); - - } else { - REQUIRE_THROWS(SOMADenseNDArray::create( - uri, - arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - platform_config)); - } - } -} -TEST_CASE("SOMADenseNDArray: metadata", "[SOMADenseNDArray]") { - int64_t dim_max = 999; - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dense-ndarray"; - std::string dim_name = "soma_dim_0"; - tiledb_datatype_t tiledb_datatype = TILEDB_INT64; - std::string arrow_format = ArrowAdapter::tdb_to_arrow_type( - tiledb_datatype); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = tiledb_datatype, - .dim_max = dim_max, - .string_lo = "N/A", - .string_hi = "N/A"}}); - - auto index_columns = helper::create_column_index_info(dim_infos); - - SOMASparseNDArray::create( + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dense-ndarray-basic"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; + tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; + std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( + dim_tiledb_datatype); + std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( + attr_tiledb_datatype); + + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = dim_tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A"}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + if (helper::have_dense_current_domain_support()) { + SOMADenseNDArray::create( uri, - arrow_format, + dim_arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), @@ -182,59 +71,151 @@ TEST_CASE("SOMADenseNDArray: metadata", "[SOMADenseNDArray]") { PlatformConfig(), TimestampRange(0, 2)); - auto dnda = SOMADenseNDArray::open( + auto dnda = SOMADenseNDArray::open(uri, OpenMode::read, ctx); + REQUIRE(dnda->shape() == std::vector{dim_max + 1}); + dnda->close(); + } else { + REQUIRE_THROWS(SOMADenseNDArray::create( uri, - OpenMode::write, + dim_arrow_format, + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), ctx, - {}, - ResultOrder::automatic, - std::pair(1, 1)); - - int32_t val = 100; - dnda->set_metadata("md", TILEDB_INT32, 1, &val); - dnda->close(); + PlatformConfig(), + TimestampRange(0, 2))); + } +} - // Read metadata - dnda->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(dnda->metadata_num() == 3); - REQUIRE(dnda->has_metadata("soma_object_type")); - REQUIRE(dnda->has_metadata("soma_encoding_version")); - REQUIRE(dnda->has_metadata("md")); - auto mdval = dnda->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - dnda->close(); +TEST_CASE("SOMADenseNDArray: platform_config", "[SOMADenseNDArray]") { + int64_t dim_max = 999; + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dense-ndarray-platform-config"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + PlatformConfig platform_config; + platform_config.dense_nd_array_dim_zstd_level = 6; + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A"}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + if (helper::have_dense_current_domain_support()) { + SOMADenseNDArray::create( + uri, + arrow_format, + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + platform_config); - // md should not be available at (2, 2) - dnda->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(dnda->metadata_num() == 2); - REQUIRE(dnda->has_metadata("soma_object_type")); - REQUIRE(dnda->has_metadata("soma_encoding_version")); - REQUIRE(!dnda->has_metadata("md")); - dnda->close(); + auto dnda = SOMADenseNDArray::open(uri, OpenMode::read, ctx); + auto dim_filter = dnda->tiledb_schema() + ->domain() + .dimension(dim_name) + .filter_list() + .filter(0); + REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); + REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); - // Metadata should also be retrievable in write mode - dnda->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(dnda->metadata_num() == 3); - REQUIRE(dnda->has_metadata("soma_object_type")); - REQUIRE(dnda->has_metadata("soma_encoding_version")); - REQUIRE(dnda->has_metadata("md")); - mdval = dnda->get_metadata("md"); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write - // mode - dnda->delete_metadata("md"); - mdval = dnda->get_metadata("md"); - REQUIRE(!mdval.has_value()); dnda->close(); - // Confirm delete in read mode - dnda->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!dnda->has_metadata("md")); - REQUIRE(dnda->metadata_num() == 2); + } else { + REQUIRE_THROWS(SOMADenseNDArray::create( + uri, + arrow_format, + ArrowTable( + std::move(index_columns.first), + std::move(index_columns.second)), + ctx, + platform_config)); } } + +TEST_CASE("SOMADenseNDArray: metadata", "[SOMADenseNDArray]") { + int64_t dim_max = 999; + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dense-ndarray"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A"}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + SOMASparseNDArray::create( + uri, + arrow_format, + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + auto dnda = SOMADenseNDArray::open( + uri, + OpenMode::write, + ctx, + {}, + ResultOrder::automatic, + std::pair(1, 1)); + + int32_t val = 100; + dnda->set_metadata("md", TILEDB_INT32, 1, &val); + dnda->close(); + + // Read metadata + dnda->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(dnda->metadata_num() == 3); + REQUIRE(dnda->has_metadata("soma_object_type")); + REQUIRE(dnda->has_metadata("soma_encoding_version")); + REQUIRE(dnda->has_metadata("md")); + auto mdval = dnda->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + dnda->close(); + + // md should not be available at (2, 2) + dnda->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(dnda->metadata_num() == 2); + REQUIRE(dnda->has_metadata("soma_object_type")); + REQUIRE(dnda->has_metadata("soma_encoding_version")); + REQUIRE(!dnda->has_metadata("md")); + dnda->close(); + + // Metadata should also be retrievable in write mode + dnda->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(dnda->metadata_num() == 3); + REQUIRE(dnda->has_metadata("soma_object_type")); + REQUIRE(dnda->has_metadata("soma_encoding_version")); + REQUIRE(dnda->has_metadata("md")); + mdval = dnda->get_metadata("md"); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + dnda->delete_metadata("md"); + mdval = dnda->get_metadata("md"); + REQUIRE(!mdval.has_value()); + dnda->close(); + + // Confirm delete in read mode + dnda->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!dnda->has_metadata("md")); + REQUIRE(dnda->metadata_num() == 2); +} diff --git a/libtiledbsoma/test/unit_soma_geometry_dataframe.cc b/libtiledbsoma/test/unit_soma_geometry_dataframe.cc index e2b73d4a45..b0c40eff6b 100644 --- a/libtiledbsoma/test/unit_soma_geometry_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_geometry_dataframe.cc @@ -40,319 +40,296 @@ const int64_t SOMA_JOINID_DIM_MAX = 99; TEST_CASE("SOMAGeometryDataFrame: basic", "[SOMAGeometryDataFrame]") { - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri{"mem://unit-test-geometry-basic"}; - PlatformConfig platform_config{}; - - std::vector dim_infos( - {helper::DimInfo( - {.name = "soma_joinid", - .tiledb_datatype = TILEDB_INT64, - .dim_max = SOMA_JOINID_DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "soma_geometry", - .tiledb_datatype = TILEDB_GEOM_WKB, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"})}); - - std::vector spatial_dim_infos( - {helper::DimInfo( - {.name = "x", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 200, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "y", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"})}); - - std::vector attr_infos({helper::AttrInfo( - {.name = "quality", .tiledb_datatype = TILEDB_FLOAT64})}); - - // Check the geometry dataframe doesn't exist yet. - REQUIRE(!SOMAGeometryDataFrame::exists(uri, ctx)); - - // Create the geometry dataframe. - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - auto spatial_columns = helper::create_column_index_info( - spatial_dim_infos); - - SOMAGeometryDataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ArrowTable( - std::move(spatial_columns.first), - std::move(spatial_columns.second)), - ctx, - platform_config, - std::nullopt); - - // Check the geometry dataframe exists and it cannot be read as a - // different object. - REQUIRE(SOMAGeometryDataFrame::exists(uri, ctx)); - REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADataFrame::exists(uri, ctx)); - - auto soma_geometry = SOMAGeometryDataFrame::open( - uri, - OpenMode::read, - ctx, - {}, // column_names, - ResultOrder::automatic, - std::nullopt); - REQUIRE(soma_geometry->uri() == uri); - REQUIRE(soma_geometry->ctx() == ctx); - REQUIRE(soma_geometry->type() == "SOMAGeometryDataFrame"); - std::vector expected_index_column_names = { - dim_infos[0].name, - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[0].name + - "__min", - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[1].name + - "__min", - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[0].name + - "__max", - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[1].name + - "__max"}; - - std::vector expected_spatial_column_names = { - spatial_dim_infos[0].name, spatial_dim_infos[1].name}; - REQUIRE( - soma_geometry->index_column_names() == expected_index_column_names); - REQUIRE( - soma_geometry->spatial_column_names() == - expected_spatial_column_names); - REQUIRE(soma_geometry->nnz() == 0); - soma_geometry->close(); - - auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); - REQUIRE(soma_object->uri() == uri); - REQUIRE(soma_object->type() == "SOMAGeometryDataFrame"); - soma_object->close(); - } + auto ctx = std::make_shared(); + std::string uri{"mem://unit-test-geometry-basic"}; + PlatformConfig platform_config{}; + + std::vector dim_infos( + {helper::DimInfo( + {.name = "soma_joinid", + .tiledb_datatype = TILEDB_INT64, + .dim_max = SOMA_JOINID_DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}), + helper::DimInfo( + {.name = "soma_geometry", + .tiledb_datatype = TILEDB_GEOM_WKB, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A"})}); + + std::vector spatial_dim_infos( + {helper::DimInfo( + {.name = "x", + .tiledb_datatype = TILEDB_FLOAT64, + .dim_max = 200, + .string_lo = "N/A", + .string_hi = "N/A"}), + helper::DimInfo( + {.name = "y", + .tiledb_datatype = TILEDB_FLOAT64, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A"})}); + + std::vector attr_infos({helper::AttrInfo( + {.name = "quality", .tiledb_datatype = TILEDB_FLOAT64})}); + + // Check the geometry dataframe doesn't exist yet. + REQUIRE(!SOMAGeometryDataFrame::exists(uri, ctx)); + + // Create the geometry dataframe. + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + auto spatial_columns = helper::create_column_index_info(spatial_dim_infos); + + SOMAGeometryDataFrame::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ArrowTable( + std::move(spatial_columns.first), + std::move(spatial_columns.second)), + ctx, + platform_config, + std::nullopt); + + // Check the geometry dataframe exists and it cannot be read as a + // different object. + REQUIRE(SOMAGeometryDataFrame::exists(uri, ctx)); + REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADataFrame::exists(uri, ctx)); + + auto soma_geometry = SOMAGeometryDataFrame::open( + uri, + OpenMode::read, + ctx, + {}, // column_names, + ResultOrder::automatic, + std::nullopt); + REQUIRE(soma_geometry->uri() == uri); + REQUIRE(soma_geometry->ctx() == ctx); + REQUIRE(soma_geometry->type() == "SOMAGeometryDataFrame"); + std::vector expected_index_column_names = { + dim_infos[0].name, + SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[0].name + "__min", + SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[1].name + "__min", + SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[0].name + "__max", + SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[1].name + "__max"}; + + std::vector expected_spatial_column_names = { + spatial_dim_infos[0].name, spatial_dim_infos[1].name}; + REQUIRE(soma_geometry->index_column_names() == expected_index_column_names); + REQUIRE( + soma_geometry->spatial_column_names() == expected_spatial_column_names); + REQUIRE(soma_geometry->nnz() == 0); + soma_geometry->close(); + + auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); + REQUIRE(soma_object->uri() == uri); + REQUIRE(soma_object->type() == "SOMAGeometryDataFrame"); + soma_object->close(); } TEST_CASE("SOMAGeometryDataFrame: Roundtrip", "[SOMAGeometryDataFrame]") { - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri{"mem://unit-test-geometry-roundtrip"}; - PlatformConfig platform_config{}; - - std::vector dim_infos( - {helper::DimInfo( - {.name = "soma_joinid", - .tiledb_datatype = TILEDB_INT64, - .dim_max = SOMA_JOINID_DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "soma_geometry", - .tiledb_datatype = TILEDB_GEOM_WKB, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"})}); - - std::vector spatial_dim_infos( - {helper::DimInfo( - {.name = "x", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 200, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "y", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"})}); - - std::vector attr_infos({helper::AttrInfo( - {.name = "quality", .tiledb_datatype = TILEDB_FLOAT64})}); - - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - auto spatial_columns = helper::create_column_index_info( - spatial_dim_infos); - - SOMAGeometryDataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ArrowTable( - std::move(spatial_columns.first), - std::move(spatial_columns.second)), - ctx, - platform_config, - std::nullopt); - - // Create table of data for writing - std::unique_ptr - data_schema = std::make_unique(ArrowSchema{}); - std::unique_ptr data_array = std::make_unique( - ArrowArray{}); - - nanoarrow::UniqueBuffer metadata_buffer; - ArrowMetadataBuilderInit(metadata_buffer.get(), nullptr); - ArrowMetadataBuilderAppend( - metadata_buffer.get(), - ArrowCharView("geometry_type"), - ArrowCharView("polygon_ring")); - - ArrowSchemaInitFromType(data_schema.get(), NANOARROW_TYPE_STRUCT); - ArrowSchemaAllocateChildren(data_schema.get(), 3); - ArrowSchemaInitFromType(data_schema->children[0], NANOARROW_TYPE_LIST); - ArrowSchemaSetMetadata( - data_schema->children[0], - std::string( - (char*)metadata_buffer->data, metadata_buffer->size_bytes) - .c_str()); - ArrowSchemaSetType( - data_schema->children[0]->children[0], NANOARROW_TYPE_DOUBLE); - ArrowSchemaSetName(data_schema->children[0], "soma_geometry"); - ArrowSchemaInitFromType(data_schema->children[1], NANOARROW_TYPE_INT64); - ArrowSchemaSetName(data_schema->children[1], "soma_joinid"); - ArrowSchemaInitFromType( - data_schema->children[2], NANOARROW_TYPE_DOUBLE); - ArrowSchemaSetName(data_schema->children[2], "quality"); - - ArrowArrayInitFromType(data_array.get(), NANOARROW_TYPE_STRUCT); - ArrowArrayAllocateChildren(data_array.get(), 3); - ArrowArrayInitFromType(data_array->children[0], NANOARROW_TYPE_LIST); - ArrowArrayInitFromType(data_array->children[1], NANOARROW_TYPE_INT64); - ArrowArrayInitFromType(data_array->children[2], NANOARROW_TYPE_DOUBLE); - ArrowArrayAllocateChildren(data_array->children[0], 1); - ArrowArrayInitFromType( - data_array->children[0]->children[0], NANOARROW_TYPE_DOUBLE); - ArrowArrayStartAppending(data_array->children[0]); - ArrowArrayStartAppending(data_array->children[0]->children[0]); - ArrowArrayStartAppending(data_array->children[1]); - ArrowArrayStartAppending(data_array->children[2]); - - geometry::GenericGeometry polygon = geometry::Polygon( - std::vector( - {geometry::BasePoint(0, 0), - geometry::BasePoint(1, 0), - geometry::BasePoint(0, 1)})); - NANOARROW_THROW_NOT_OK(ArrowBufferAppendUInt32( - ArrowArrayBuffer(data_array->children[0], 1), 0)); - data_array->children[0]->length = 1; - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendDouble(data_array->children[0]->children[0], 1)); - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendDouble(data_array->children[0]->children[0], 1)); - NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(data_array->children[1], 1)); - NANOARROW_THROW_NOT_OK( - ArrowArrayAppendDouble(data_array->children[2], 63)); - - NANOARROW_THROW_NOT_OK( - ArrowArrayFinishBuildingDefault(data_array->children[0], nullptr)); - NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault( - data_array->children[0]->children[0], nullptr)); - NANOARROW_THROW_NOT_OK( - ArrowArrayFinishBuildingDefault(data_array->children[1], nullptr)); - NANOARROW_THROW_NOT_OK( - ArrowArrayFinishBuildingDefault(data_array->children[2], nullptr)); - - // Write to point cloud. - auto soma_geometry = SOMAGeometryDataFrame::open( - uri, - OpenMode::write, - ctx, - {}, // column_names - ResultOrder::automatic, - std::nullopt); - - soma_geometry->set_array_data( - std::move(data_schema), std::move(data_array)); - soma_geometry->write(); - soma_geometry->close(); - - // Read back the data. - soma_geometry = SOMAGeometryDataFrame::open( - uri, - OpenMode::read, - ctx, - {}, // column_names, - ResultOrder::automatic, - std::nullopt); - - while (auto batch = soma_geometry->read_next()) { - auto arrbuf = batch.value(); - auto d0span = arrbuf->at(dim_infos[0].name)->data(); - auto d1span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[0].name + "__min") - ->data(); - auto d2span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[0].name + "__max") - ->data(); - auto d3span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[1].name + "__min") - ->data(); - auto d4span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[1].name + "__max") - ->data(); - auto wkbs = arrbuf->at(dim_infos[1].name)->binaries(); - auto a0span = arrbuf->at(attr_infos[0].name)->data(); - CHECK( - std::vector({1}) == - std::vector(d0span.begin(), d0span.end())); - CHECK( - std::vector({0}) == - std::vector(d1span.begin(), d1span.end())); - CHECK( - std::vector({1}) == - std::vector(d2span.begin(), d2span.end())); - CHECK( - std::vector({0}) == - std::vector(d3span.begin(), d3span.end())); - CHECK( - std::vector({1}) == - std::vector(d4span.begin(), d4span.end())); - CHECK(geometry::to_wkb(polygon) == wkbs[0]); - CHECK( - std::vector({63}) == - std::vector(a0span.begin(), a0span.end())); - } - soma_geometry->close(); - - auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); - REQUIRE(soma_object->uri() == uri); - REQUIRE(soma_object->type() == "SOMAGeometryDataFrame"); - soma_object->close(); + auto ctx = std::make_shared(); + std::string uri{"mem://unit-test-geometry-roundtrip"}; + PlatformConfig platform_config{}; + + std::vector dim_infos( + {helper::DimInfo( + {.name = "soma_joinid", + .tiledb_datatype = TILEDB_INT64, + .dim_max = SOMA_JOINID_DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}), + helper::DimInfo( + {.name = "soma_geometry", + .tiledb_datatype = TILEDB_GEOM_WKB, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A"})}); + + std::vector spatial_dim_infos( + {helper::DimInfo( + {.name = "x", + .tiledb_datatype = TILEDB_FLOAT64, + .dim_max = 200, + .string_lo = "N/A", + .string_hi = "N/A"}), + helper::DimInfo( + {.name = "y", + .tiledb_datatype = TILEDB_FLOAT64, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A"})}); + + std::vector attr_infos({helper::AttrInfo( + {.name = "quality", .tiledb_datatype = TILEDB_FLOAT64})}); + + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + auto spatial_columns = helper::create_column_index_info(spatial_dim_infos); + + SOMAGeometryDataFrame::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ArrowTable( + std::move(spatial_columns.first), + std::move(spatial_columns.second)), + ctx, + platform_config, + std::nullopt); + + // Create table of data for writing + std::unique_ptr data_schema = std::make_unique( + ArrowSchema{}); + std::unique_ptr data_array = std::make_unique( + ArrowArray{}); + + nanoarrow::UniqueBuffer metadata_buffer; + ArrowMetadataBuilderInit(metadata_buffer.get(), nullptr); + ArrowMetadataBuilderAppend( + metadata_buffer.get(), + ArrowCharView("geometry_type"), + ArrowCharView("polygon_ring")); + + ArrowSchemaInitFromType(data_schema.get(), NANOARROW_TYPE_STRUCT); + ArrowSchemaAllocateChildren(data_schema.get(), 3); + ArrowSchemaInitFromType(data_schema->children[0], NANOARROW_TYPE_LIST); + ArrowSchemaSetMetadata( + data_schema->children[0], + std::string((char*)metadata_buffer->data, metadata_buffer->size_bytes) + .c_str()); + ArrowSchemaSetType( + data_schema->children[0]->children[0], NANOARROW_TYPE_DOUBLE); + ArrowSchemaSetName(data_schema->children[0], "soma_geometry"); + ArrowSchemaInitFromType(data_schema->children[1], NANOARROW_TYPE_INT64); + ArrowSchemaSetName(data_schema->children[1], "soma_joinid"); + ArrowSchemaInitFromType(data_schema->children[2], NANOARROW_TYPE_DOUBLE); + ArrowSchemaSetName(data_schema->children[2], "quality"); + + ArrowArrayInitFromType(data_array.get(), NANOARROW_TYPE_STRUCT); + ArrowArrayAllocateChildren(data_array.get(), 3); + ArrowArrayInitFromType(data_array->children[0], NANOARROW_TYPE_LIST); + ArrowArrayInitFromType(data_array->children[1], NANOARROW_TYPE_INT64); + ArrowArrayInitFromType(data_array->children[2], NANOARROW_TYPE_DOUBLE); + ArrowArrayAllocateChildren(data_array->children[0], 1); + ArrowArrayInitFromType( + data_array->children[0]->children[0], NANOARROW_TYPE_DOUBLE); + ArrowArrayStartAppending(data_array->children[0]); + ArrowArrayStartAppending(data_array->children[0]->children[0]); + ArrowArrayStartAppending(data_array->children[1]); + ArrowArrayStartAppending(data_array->children[2]); + + geometry::GenericGeometry polygon = geometry::Polygon( + std::vector( + {geometry::BasePoint(0, 0), + geometry::BasePoint(1, 0), + geometry::BasePoint(0, 1)})); + NANOARROW_THROW_NOT_OK(ArrowBufferAppendUInt32( + ArrowArrayBuffer(data_array->children[0], 1), 0)); + data_array->children[0]->length = 1; + NANOARROW_THROW_NOT_OK( + ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayAppendDouble(data_array->children[0]->children[0], 1)); + NANOARROW_THROW_NOT_OK( + ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayAppendDouble(data_array->children[0]->children[0], 0)); + NANOARROW_THROW_NOT_OK( + ArrowArrayAppendDouble(data_array->children[0]->children[0], 1)); + NANOARROW_THROW_NOT_OK(ArrowArrayAppendInt(data_array->children[1], 1)); + NANOARROW_THROW_NOT_OK(ArrowArrayAppendDouble(data_array->children[2], 63)); + + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuildingDefault(data_array->children[0], nullptr)); + NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault( + data_array->children[0]->children[0], nullptr)); + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuildingDefault(data_array->children[1], nullptr)); + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuildingDefault(data_array->children[2], nullptr)); + + // Write to point cloud. + auto soma_geometry = SOMAGeometryDataFrame::open( + uri, + OpenMode::write, + ctx, + {}, // column_names + ResultOrder::automatic, + std::nullopt); + + soma_geometry->set_array_data( + std::move(data_schema), std::move(data_array)); + soma_geometry->write(); + soma_geometry->close(); + + // Read back the data. + soma_geometry = SOMAGeometryDataFrame::open( + uri, + OpenMode::read, + ctx, + {}, // column_names, + ResultOrder::automatic, + std::nullopt); + + while (auto batch = soma_geometry->read_next()) { + auto arrbuf = batch.value(); + auto d0span = arrbuf->at(dim_infos[0].name)->data(); + auto d1span = arrbuf + ->at( + SOMA_GEOMETRY_DIMENSION_PREFIX + + spatial_dim_infos[0].name + "__min") + ->data(); + auto d2span = arrbuf + ->at( + SOMA_GEOMETRY_DIMENSION_PREFIX + + spatial_dim_infos[0].name + "__max") + ->data(); + auto d3span = arrbuf + ->at( + SOMA_GEOMETRY_DIMENSION_PREFIX + + spatial_dim_infos[1].name + "__min") + ->data(); + auto d4span = arrbuf + ->at( + SOMA_GEOMETRY_DIMENSION_PREFIX + + spatial_dim_infos[1].name + "__max") + ->data(); + auto wkbs = arrbuf->at(dim_infos[1].name)->binaries(); + auto a0span = arrbuf->at(attr_infos[0].name)->data(); + CHECK( + std::vector({1}) == + std::vector(d0span.begin(), d0span.end())); + CHECK( + std::vector({0}) == + std::vector(d1span.begin(), d1span.end())); + CHECK( + std::vector({1}) == + std::vector(d2span.begin(), d2span.end())); + CHECK( + std::vector({0}) == + std::vector(d3span.begin(), d3span.end())); + CHECK( + std::vector({1}) == + std::vector(d4span.begin(), d4span.end())); + CHECK(geometry::to_wkb(polygon) == wkbs[0]); + CHECK( + std::vector({63}) == + std::vector(a0span.begin(), a0span.end())); } + soma_geometry->close(); + + auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); + REQUIRE(soma_object->uri() == uri); + REQUIRE(soma_object->type() == "SOMAGeometryDataFrame"); + soma_object->close(); } diff --git a/libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc b/libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc index 8b03620f07..3c3da91258 100644 --- a/libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc @@ -36,131 +36,120 @@ const int64_t SOMA_JOINID_DIM_MAX = 99; TEST_CASE("SOMAPointCloudDataFrame: basic", "[SOMAPointCloudDataFrame]") { - auto use_current_domain = GENERATE(true); + auto ctx = std::make_shared(); + std::string uri{"mem://unit-test-point-cloud-basic"}; + PlatformConfig platform_config{}; - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri{"mem://unit-test-point-cloud-basic"}; - PlatformConfig platform_config{}; + std::vector dim_infos({ + helper::DimInfo( + {.name = "soma_joinid", + .tiledb_datatype = TILEDB_INT64, + .dim_max = SOMA_JOINID_DIM_MAX, + .string_lo = "N/A", + .string_hi = "N/A"}), + helper::DimInfo( + {.name = "x", + .tiledb_datatype = TILEDB_UINT32, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A"}), + helper::DimInfo( + {.name = "y", + .tiledb_datatype = TILEDB_UINT32, + .dim_max = 100, + .string_lo = "N/A", + .string_hi = "N/A"}), + }); - std::vector dim_infos({ - helper::DimInfo( - {.name = "soma_joinid", - .tiledb_datatype = TILEDB_INT64, - .dim_max = SOMA_JOINID_DIM_MAX, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "x", - .tiledb_datatype = TILEDB_UINT32, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "y", - .tiledb_datatype = TILEDB_UINT32, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"}), - }); + std::vector attr_infos({helper::AttrInfo( + {.name = "radius", .tiledb_datatype = TILEDB_FLOAT64})}); - std::vector attr_infos({helper::AttrInfo( - {.name = "radius", .tiledb_datatype = TILEDB_FLOAT64})}); + // Check the point cloud doesn't exist yet. + REQUIRE(!SOMAPointCloudDataFrame::exists(uri, ctx)); - // Check the point cloud doesn't exist yet. - REQUIRE(!SOMAPointCloudDataFrame::exists(uri, ctx)); + // Create the point cloud. + auto [schema, index_columns] = + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); + SOMAPointCloudDataFrame::create( + uri, + std::move(schema), + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx, + platform_config, + std::nullopt); - // Create the point cloud. - auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns( - dim_infos, attr_infos); - SOMAPointCloudDataFrame::create( - uri, - std::move(schema), - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - platform_config, - std::nullopt); + // Check the point cloud exists and it cannot be read as a different + // object. + REQUIRE(SOMAPointCloudDataFrame::exists(uri, ctx)); + REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADataFrame::exists(uri, ctx)); - // Check the point cloud exists and it cannot be read as a different - // object. - REQUIRE(SOMAPointCloudDataFrame::exists(uri, ctx)); - REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADataFrame::exists(uri, ctx)); + auto soma_point_cloud = SOMAPointCloudDataFrame::open( + uri, + OpenMode::read, + ctx, + {}, // column_names, + ResultOrder::automatic, + std::nullopt); + REQUIRE(soma_point_cloud->uri() == uri); + REQUIRE(soma_point_cloud->ctx() == ctx); + REQUIRE(soma_point_cloud->type() == "SOMAPointCloudDataFrame"); + std::vector expected_index_column_names = { + dim_infos[0].name, dim_infos[1].name, dim_infos[2].name}; + REQUIRE( + soma_point_cloud->index_column_names() == expected_index_column_names); + REQUIRE(soma_point_cloud->nnz() == 0); + soma_point_cloud->close(); - auto soma_point_cloud = SOMAPointCloudDataFrame::open( - uri, - OpenMode::read, - ctx, - {}, // column_names, - ResultOrder::automatic, - std::nullopt); - REQUIRE(soma_point_cloud->uri() == uri); - REQUIRE(soma_point_cloud->ctx() == ctx); - REQUIRE(soma_point_cloud->type() == "SOMAPointCloudDataFrame"); - std::vector expected_index_column_names = { - dim_infos[0].name, dim_infos[1].name, dim_infos[2].name}; - REQUIRE( - soma_point_cloud->index_column_names() == - expected_index_column_names); - REQUIRE(soma_point_cloud->nnz() == 0); - soma_point_cloud->close(); + // Create vectors of data for writing. + std::vector d0(10); + std::iota(d0.begin(), d0.end(), 0); + std::vector d1(10); + std::iota(d1.begin(), d1.end(), 1); + std::vector d2(10, 10); + std::iota(d2.begin(), d2.end(), 0.0); + std::vector a0(10, 1.0); - // Create vectors of data for writing. - std::vector d0(10); - std::iota(d0.begin(), d0.end(), 0); - std::vector d1(10); - std::iota(d1.begin(), d1.end(), 1); - std::vector d2(10, 10); - std::iota(d2.begin(), d2.end(), 0.0); - std::vector a0(10, 1.0); + // Write to point cloud. + soma_point_cloud = SOMAPointCloudDataFrame::open( + uri, + OpenMode::write, + ctx, + {}, // column_names + ResultOrder::automatic, + std::nullopt); + soma_point_cloud->set_column_data(dim_infos[0].name, d0.size(), d0.data()); + soma_point_cloud->set_column_data(dim_infos[1].name, d1.size(), d1.data()); + soma_point_cloud->set_column_data(dim_infos[2].name, d2.size(), d2.data()); + soma_point_cloud->set_column_data(attr_infos[0].name, a0.size(), a0.data()); + soma_point_cloud->write(); + soma_point_cloud->close(); - // Write to point cloud. - soma_point_cloud = SOMAPointCloudDataFrame::open( - uri, - OpenMode::write, - ctx, - {}, // column_names - ResultOrder::automatic, - std::nullopt); - soma_point_cloud->set_column_data( - dim_infos[0].name, d0.size(), d0.data()); - soma_point_cloud->set_column_data( - dim_infos[1].name, d1.size(), d1.data()); - soma_point_cloud->set_column_data( - dim_infos[2].name, d2.size(), d2.data()); - soma_point_cloud->set_column_data( - attr_infos[0].name, a0.size(), a0.data()); - soma_point_cloud->write(); - soma_point_cloud->close(); - - // Read back the data. - soma_point_cloud = SOMAPointCloudDataFrame::open( - uri, - OpenMode::read, - ctx, - {}, // column_names, - ResultOrder::automatic, - std::nullopt); - while (auto batch = soma_point_cloud->read_next()) { - auto arrbuf = batch.value(); - auto d0span = arrbuf->at(dim_infos[0].name)->data(); - auto d1span = arrbuf->at(dim_infos[1].name)->data(); - auto d2span = arrbuf->at(dim_infos[2].name)->data(); - auto a0span = arrbuf->at(attr_infos[0].name)->data(); - CHECK(d0 == std::vector(d0span.begin(), d0span.end())); - CHECK(d1 == std::vector(d1span.begin(), d1span.end())); - CHECK(d2 == std::vector(d2span.begin(), d2span.end())); - CHECK(a0 == std::vector(a0span.begin(), a0span.end())); - } - soma_point_cloud->close(); - - auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); - REQUIRE(soma_object->uri() == uri); - REQUIRE(soma_object->type() == "SOMAPointCloudDataFrame"); - soma_object->close(); + // Read back the data. + soma_point_cloud = SOMAPointCloudDataFrame::open( + uri, + OpenMode::read, + ctx, + {}, // column_names, + ResultOrder::automatic, + std::nullopt); + while (auto batch = soma_point_cloud->read_next()) { + auto arrbuf = batch.value(); + auto d0span = arrbuf->at(dim_infos[0].name)->data(); + auto d1span = arrbuf->at(dim_infos[1].name)->data(); + auto d2span = arrbuf->at(dim_infos[2].name)->data(); + auto a0span = arrbuf->at(attr_infos[0].name)->data(); + CHECK(d0 == std::vector(d0span.begin(), d0span.end())); + CHECK(d1 == std::vector(d1span.begin(), d1span.end())); + CHECK(d2 == std::vector(d2span.begin(), d2span.end())); + CHECK(a0 == std::vector(a0span.begin(), a0span.end())); } + soma_point_cloud->close(); + + auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx); + REQUIRE(soma_object->uri() == uri); + REQUIRE(soma_object->type() == "SOMAPointCloudDataFrame"); + soma_object->close(); } diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc index fa857add8e..b386443213 100644 --- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc @@ -40,260 +40,243 @@ TEST_CASE("SOMASparseNDArray: basic", "[SOMASparseNDArray]") { int64_t dim_max = 999; int64_t shape = 1000; - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-sparse-ndarray-basic"; - std::string dim_name = "soma_dim_0"; - std::string attr_name = "soma_data"; - tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; - tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; - std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( - dim_tiledb_datatype); - std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( - attr_tiledb_datatype); - - REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = dim_tiledb_datatype, - .dim_max = dim_max, - .string_lo = "N/A", - .string_hi = "N/A"}}); - - auto index_columns = helper::create_column_index_info(dim_infos); - - SOMASparseNDArray::create( - uri, - attr_arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - REQUIRE(SOMASparseNDArray::exists(uri, ctx)); - REQUIRE(!SOMADataFrame::exists(uri, ctx)); - REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - - auto snda = SOMASparseNDArray::open(uri, OpenMode::read, ctx); - REQUIRE(snda->uri() == uri); - REQUIRE(snda->ctx() == ctx); - REQUIRE(snda->type() == "SOMASparseNDArray"); - REQUIRE(snda->is_sparse() == true); - REQUIRE(snda->soma_data_type() == attr_arrow_format); - auto schema = snda->tiledb_schema(); - REQUIRE(schema->has_attribute(attr_name)); - REQUIRE(schema->array_type() == TILEDB_SPARSE); - REQUIRE(schema->domain().has_dimension(dim_name)); - REQUIRE(snda->ndim() == 1); - REQUIRE(snda->nnz() == 0); - - auto expect = std::vector({shape}); - REQUIRE(snda->shape() == expect); - - snda->close(); - - std::vector d0(10); - for (int j = 0; j < 10; j++) - d0[j] = j; - std::vector a0(10, 1); - - snda->open(OpenMode::write); - snda->set_column_data(dim_name, d0.size(), d0.data()); - snda->set_column_data(attr_name, a0.size(), a0.data()); - snda->write(); - snda->close(); - - snda->open(OpenMode::read); - while (auto batch = snda->read_next()) { - auto arrbuf = batch.value(); - auto d0span = arrbuf->at(dim_name)->data(); - auto a0span = arrbuf->at(attr_name)->data(); - REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); - REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); - } - snda->close(); - - std::vector d0b({dim_max, dim_max + 1}); - std::vector a0b({30, 40}); - - // Try out-of-bounds write before resize. - // * Without current domain support: this should throw since it's - // outside the (immutable) doqain. - // * With current domain support: this should throw since it's outside - // the (mutable) current domain. - snda = SOMASparseNDArray::open(uri, OpenMode::write, ctx); - snda->set_column_data(dim_name, d0b.size(), d0b.data()); - snda->set_column_data(attr_name, a0b.size(), a0b.data()); - REQUIRE_THROWS(snda->write()); - snda->close(); - - auto new_shape = std::vector({shape * 2}); - - snda = SOMASparseNDArray::open(uri, OpenMode::write, ctx); - // Should throw since this already has a shape (core current - // domain). - REQUIRE_THROWS(snda->upgrade_shape(new_shape, "testing")); - snda->resize(new_shape, "testing"); - snda->close(); - - // Try out-of-bounds write after resize. - snda->open(OpenMode::write); - snda->set_column_data(dim_name, d0b.size(), d0b.data()); - snda->set_column_data(attr_name, a0b.size(), a0b.data()); - // Implicitly checking for no throw - snda->write(); - snda->close(); - - snda->open(OpenMode::read); - REQUIRE(snda->shape() == new_shape); - snda->close(); + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-sparse-ndarray-basic"; + std::string dim_name = "soma_dim_0"; + std::string attr_name = "soma_data"; + tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; + tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; + std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( + dim_tiledb_datatype); + std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( + attr_tiledb_datatype); + + REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = dim_tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A"}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + SOMASparseNDArray::create( + uri, + attr_arrow_format, + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + REQUIRE(SOMASparseNDArray::exists(uri, ctx)); + REQUIRE(!SOMADataFrame::exists(uri, ctx)); + REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); + + auto snda = SOMASparseNDArray::open(uri, OpenMode::read, ctx); + REQUIRE(snda->uri() == uri); + REQUIRE(snda->ctx() == ctx); + REQUIRE(snda->type() == "SOMASparseNDArray"); + REQUIRE(snda->is_sparse() == true); + REQUIRE(snda->soma_data_type() == attr_arrow_format); + auto schema = snda->tiledb_schema(); + REQUIRE(schema->has_attribute(attr_name)); + REQUIRE(schema->array_type() == TILEDB_SPARSE); + REQUIRE(schema->domain().has_dimension(dim_name)); + REQUIRE(snda->ndim() == 1); + REQUIRE(snda->nnz() == 0); + + auto expect = std::vector({shape}); + REQUIRE(snda->shape() == expect); + + snda->close(); + + std::vector d0(10); + for (int j = 0; j < 10; j++) + d0[j] = j; + std::vector a0(10, 1); + + snda->open(OpenMode::write); + snda->set_column_data(dim_name, d0.size(), d0.data()); + snda->set_column_data(attr_name, a0.size(), a0.data()); + snda->write(); + snda->close(); + + snda->open(OpenMode::read); + while (auto batch = snda->read_next()) { + auto arrbuf = batch.value(); + auto d0span = arrbuf->at(dim_name)->data(); + auto a0span = arrbuf->at(attr_name)->data(); + REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); + REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); } + snda->close(); + + std::vector d0b({dim_max, dim_max + 1}); + std::vector a0b({30, 40}); + + // Try out-of-bounds write before resize. + // * Without current domain support: this should throw since it's + // outside the (immutable) doqain. + // * With current domain support: this should throw since it's outside + // the (mutable) current domain. + snda = SOMASparseNDArray::open(uri, OpenMode::write, ctx); + snda->set_column_data(dim_name, d0b.size(), d0b.data()); + snda->set_column_data(attr_name, a0b.size(), a0b.data()); + REQUIRE_THROWS(snda->write()); + snda->close(); + + auto new_shape = std::vector({shape * 2}); + + snda = SOMASparseNDArray::open(uri, OpenMode::write, ctx); + // Should throw since this already has a shape (core current + // domain). + REQUIRE_THROWS(snda->upgrade_shape(new_shape, "testing")); + snda->resize(new_shape, "testing"); + snda->close(); + + // Try out-of-bounds write after resize. + snda->open(OpenMode::write); + snda->set_column_data(dim_name, d0b.size(), d0b.data()); + snda->set_column_data(attr_name, a0b.size(), a0b.data()); + // Implicitly checking for no throw + snda->write(); + snda->close(); + + snda->open(OpenMode::read); + REQUIRE(snda->shape() == new_shape); + snda->close(); } TEST_CASE("SOMASparseNDArray: platform_config", "[SOMASparseNDArray]") { int64_t dim_max = 999; - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dataframe-platform-config"; - std::string dim_name = "soma_dim_0"; - tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; - tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; - std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( - dim_tiledb_datatype); - std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( - attr_tiledb_datatype); - - PlatformConfig platform_config; - platform_config.sparse_nd_array_dim_zstd_level = 6; - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = dim_tiledb_datatype, - .dim_max = dim_max, - .string_lo = "N/A", - .string_hi = "N/A"}}); - - auto index_columns = helper::create_column_index_info(dim_infos); - - SOMASparseNDArray::create( - uri, - attr_arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - platform_config); - - auto soma_dataframe = SOMASparseNDArray::open(uri, OpenMode::read, ctx); - auto dim_filter = soma_dataframe->tiledb_schema() - ->domain() - .dimension(dim_name) - .filter_list() - .filter(0); - REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); - REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); - - soma_dataframe->close(); - } + auto ctx = std::make_shared(); + std::string uri = "mem://unit-test-dataframe-platform-config"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; + tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; + std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( + dim_tiledb_datatype); + std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( + attr_tiledb_datatype); + + PlatformConfig platform_config; + platform_config.sparse_nd_array_dim_zstd_level = 6; + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = dim_tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A"}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + SOMASparseNDArray::create( + uri, + attr_arrow_format, + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx, + platform_config); + + auto soma_dataframe = SOMASparseNDArray::open(uri, OpenMode::read, ctx); + auto dim_filter = soma_dataframe->tiledb_schema() + ->domain() + .dimension(dim_name) + .filter_list() + .filter(0); + REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); + REQUIRE(dim_filter.get_option(TILEDB_COMPRESSION_LEVEL) == 6); + + soma_dataframe->close(); } TEST_CASE("SOMASparseNDArray: metadata", "[SOMASparseNDArray]") { int64_t dim_max = 999; - auto use_current_domain = GENERATE(true); - - SECTION(std::format("- use_current_domain={}", use_current_domain)) { - auto ctx = std::make_shared(); - - std::string uri = "mem://unit-test-sparse-ndarray"; - std::string dim_name = "soma_dim_0"; - tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; - tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; - std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( - dim_tiledb_datatype); - std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( - attr_tiledb_datatype); - - std::vector dim_infos( - {{.name = dim_name, - .tiledb_datatype = dim_tiledb_datatype, - .dim_max = dim_max, - .string_lo = "N/A", - .string_hi = "N/A"}}); - - auto index_columns = helper::create_column_index_info(dim_infos); - - SOMASparseNDArray::create( - uri, - attr_arrow_format, - ArrowTable( - std::move(index_columns.first), - std::move(index_columns.second)), - ctx, - PlatformConfig(), - TimestampRange(0, 2)); - - auto snda = SOMASparseNDArray::open( - uri, - OpenMode::write, - ctx, - {}, - ResultOrder::automatic, - std::pair(1, 1)); - - int32_t val = 100; - snda->set_metadata("md", TILEDB_INT32, 1, &val); - snda->close(); - - // Read metadata - snda->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(snda->metadata_num() == 3); - REQUIRE(snda->has_metadata("soma_object_type")); - REQUIRE(snda->has_metadata("soma_encoding_version")); - REQUIRE(snda->has_metadata("md")); - auto mdval = snda->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - snda->close(); - - // md should not be available at (2, 2) - snda->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(snda->metadata_num() == 2); - REQUIRE(snda->has_metadata("soma_object_type")); - REQUIRE(snda->has_metadata("soma_encoding_version")); - REQUIRE(!snda->has_metadata("md")); - snda->close(); - - // Metadata should also be retrievable in write mode - snda->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(snda->metadata_num() == 3); - REQUIRE(snda->has_metadata("soma_object_type")); - REQUIRE(snda->has_metadata("soma_encoding_version")); - REQUIRE(snda->has_metadata("md")); - mdval = snda->get_metadata("md"); - REQUIRE( - *((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write - // mode - snda->delete_metadata("md"); - mdval = snda->get_metadata("md"); - REQUIRE(!mdval.has_value()); - snda->close(); - - // Confirm delete in read mode - snda->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!snda->has_metadata("md")); - REQUIRE(snda->metadata_num() == 2); - } + auto ctx = std::make_shared(); + + std::string uri = "mem://unit-test-sparse-ndarray"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t dim_tiledb_datatype = TILEDB_INT64; + tiledb_datatype_t attr_tiledb_datatype = TILEDB_INT32; + std::string dim_arrow_format = ArrowAdapter::tdb_to_arrow_type( + dim_tiledb_datatype); + std::string attr_arrow_format = ArrowAdapter::tdb_to_arrow_type( + attr_tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = dim_tiledb_datatype, + .dim_max = dim_max, + .string_lo = "N/A", + .string_hi = "N/A"}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + + SOMASparseNDArray::create( + uri, + attr_arrow_format, + ArrowTable( + std::move(index_columns.first), std::move(index_columns.second)), + ctx, + PlatformConfig(), + TimestampRange(0, 2)); + + auto snda = SOMASparseNDArray::open( + uri, + OpenMode::write, + ctx, + {}, + ResultOrder::automatic, + std::pair(1, 1)); + + int32_t val = 100; + snda->set_metadata("md", TILEDB_INT32, 1, &val); + snda->close(); + + // Read metadata + snda->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(snda->metadata_num() == 3); + REQUIRE(snda->has_metadata("soma_object_type")); + REQUIRE(snda->has_metadata("soma_encoding_version")); + REQUIRE(snda->has_metadata("md")); + auto mdval = snda->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + snda->close(); + + // md should not be available at (2, 2) + snda->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(snda->metadata_num() == 2); + REQUIRE(snda->has_metadata("soma_object_type")); + REQUIRE(snda->has_metadata("soma_encoding_version")); + REQUIRE(!snda->has_metadata("md")); + snda->close(); + + // Metadata should also be retrievable in write mode + snda->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(snda->metadata_num() == 3); + REQUIRE(snda->has_metadata("soma_object_type")); + REQUIRE(snda->has_metadata("soma_encoding_version")); + REQUIRE(snda->has_metadata("md")); + mdval = snda->get_metadata("md"); + REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write + // mode + snda->delete_metadata("md"); + mdval = snda->get_metadata("md"); + REQUIRE(!mdval.has_value()); + snda->close(); + + // Confirm delete in read mode + snda->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!snda->has_metadata("md")); + REQUIRE(snda->metadata_num() == 2); } TEST_CASE(