From f0942d9e7ce68fe6cce372be59c557759b949e22 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:20:41 +0300 Subject: [PATCH] Wrap as_built function (#1994) --- tiledb/__init__.py | 1 + tiledb/core.cc | 27 ++++++++++----- tiledb/highlevel.py | 17 +++++++++ tiledb/tests/test_libtiledb.py | 63 +++++++++++++++++++++++++++++++++- 4 files changed, 98 insertions(+), 10 deletions(-) diff --git a/tiledb/__init__.py b/tiledb/__init__.py index d3ae82b81c..f7593356a7 100644 --- a/tiledb/__init__.py +++ b/tiledb/__init__.py @@ -82,6 +82,7 @@ from .highlevel import ( array_exists, array_fragments, + as_built, empty_like, from_numpy, open, diff --git a/tiledb/core.cc b/tiledb/core.cc index 7d3c9e1d82..8a5f778f7a 100644 --- a/tiledb/core.cc +++ b/tiledb/core.cc @@ -80,14 +80,12 @@ static std::unique_ptr g_stats; py::dtype tiledb_dtype(tiledb_datatype_t type, uint32_t cell_val_num); struct BufferInfo { - BufferInfo(std::string name, size_t data_nbytes, tiledb_datatype_t data_type, uint32_t cell_val_num, size_t offsets_num, size_t validity_num, bool isvar = false, bool isnullable = false) : name(name), type(data_type), cell_val_num(cell_val_num), isvar(isvar), isnullable(isnullable) { - try { dtype = tiledb_dtype(data_type, cell_val_num); elem_nbytes = tiledb_datatype_size(type); @@ -282,7 +280,6 @@ uint64_t count_zeros(py::array_t a) { } class PyAgg { - using ByteBuffer = py::array_t; using AggToBufferMap = std::map; using AttrToAggsMap = std::map; @@ -524,7 +521,6 @@ class PyAgg { }; class PyQuery { - private: Context ctx_; std::shared_ptr domain_; @@ -762,7 +758,6 @@ class PyQuery { bool is_sparse() { return array_->schema().array_type() == TILEDB_SPARSE; } void import_buffer(std::string name, py::array data, py::array offsets) { - tiledb_datatype_t type; uint32_t cell_val_num; std::tie(type, cell_val_num) = buffer_type(name); @@ -939,7 +934,6 @@ class PyQuery { auto offset_ptr = buf.offsets.mutable_data(); if (buf.isvar) { - if (offset_elem_num > 0) { // account for 'sm.var_offsets.extra_element' offset_elem_num -= (use_arrow_) ? 1 : 0; @@ -1120,7 +1114,6 @@ class PyQuery { } void allocate_buffers() { - // allocate buffers for dims // - we want to return dims first, if any requested for (size_t dim_idx = 0; dim_idx < domain_->ndim(); dim_idx++) { @@ -1260,7 +1253,6 @@ class PyQuery { py::array unpack_buffer(std::string name, py::array buf, py::array_t off) { - auto start = std::chrono::high_resolution_clock::now(); if (off.size() < 1) @@ -1673,6 +1665,22 @@ py::object python_internal_stats(bool dict = false) { } } +py::str as_built_dump() { + tiledb_string_t *s; + int rc = tiledb_as_built_dump(&s); + if (rc != TILEDB_OK) { + TPY_ERROR_LOC("Could not dump as built."); + } + const char *data_ptr; + py::size_t length; + + tiledb_string_view(s, &data_ptr, &length); + py::str res(data_ptr, length); + tiledb_string_free(&s); + + return res; +} + void init_core(py::module &m) { init_query_condition(m); @@ -1724,12 +1732,13 @@ void init_core(py::module &m) { m.def("array_to_buffer", &convert_np); m.def("init_stats", &init_stats); - m.def("disable_stats", &init_stats); + m.def("disable_stats", &disable_stats); m.def("python_internal_stats", &python_internal_stats, py::arg("dict") = false); m.def("increment_stat", &increment_stat); m.def("get_stats", &get_stats); m.def("use_stats", &use_stats); + m.def("as_built_dump", &as_built_dump); /* We need to make sure C++ TileDBError is translated to a correctly-typed py diff --git a/tiledb/highlevel.py b/tiledb/highlevel.py index 5a1b11b6b3..b29c89d535 100644 --- a/tiledb/highlevel.py +++ b/tiledb/highlevel.py @@ -1,3 +1,5 @@ +import json + import numpy as np import tiledb @@ -254,6 +256,21 @@ def is_ndarray_like(arr): return schema +def as_built(return_json_string=False): + """ + Dumps the TileDB build configuration to a dictionary or string. + + :param bool return_json_string: Return the output as a string instead of a dictionary + :return: dict or str + """ + res = tiledb.main.as_built_dump() + + if return_json_string: + return res + + return json.loads(res) + + def _schema_like_numpy( array, ctx, diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index bbfa614b9f..5641324be1 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -3829,7 +3829,7 @@ def test_offset_can_fit_data_var_size_cannot( tiledb.stats_disable() -class TestTest(DiskTestCase): +class TestPath(DiskTestCase): def test_path(self, pytestconfig): path = self.path("foo") if pytestconfig.getoption("vfs") == "s3": @@ -3843,3 +3843,64 @@ def test_path(self, pytestconfig): ) def test_no_output(self): print("this test should fail") + + +class TestAsBuilt(DiskTestCase): + def test_as_built(self): + dump = tiledb.as_built(return_json_string=True) + assert isinstance(dump, str) + # ensure we get a non-empty string + assert len(dump) > 0 + dump_dict = tiledb.as_built() + assert isinstance(dump_dict, dict) + # ensure we get a non-empty dict + assert len(dump_dict) > 0 + + # validate top-level key + assert "as_built" in dump_dict + assert isinstance(dump_dict["as_built"], dict) + assert len(dump_dict["as_built"]) > 0 + + # validate parameters key + assert "parameters" in dump_dict["as_built"] + assert isinstance(dump_dict["as_built"]["parameters"], dict) + assert len(dump_dict["as_built"]["parameters"]) > 0 + + # validate storage_backends key + assert "storage_backends" in dump_dict["as_built"]["parameters"] + assert isinstance(dump_dict["as_built"]["parameters"]["storage_backends"], dict) + assert len(dump_dict["as_built"]["parameters"]["storage_backends"]) > 0 + + x = dump_dict["as_built"]["parameters"]["storage_backends"] + + # validate storage_backends attributes + vfs = tiledb.VFS() + if vfs.supports("azure"): + assert x["azure"]["enabled"] == True + else: + assert x["azure"]["enabled"] == False + + if vfs.supports("gcs"): + assert x["gcs"]["enabled"] == True + else: + assert x["gcs"]["enabled"] == False + + if vfs.supports("hdfs"): + assert x["hdfs"]["enabled"] == True + else: + assert x["hdfs"]["enabled"] == False + + if vfs.supports("s3"): + assert x["s3"]["enabled"] == True + else: + assert x["s3"]["enabled"] == False + + # validate support key + assert "support" in dump_dict["as_built"]["parameters"] + assert isinstance(dump_dict["as_built"]["parameters"]["support"], dict) + assert len(dump_dict["as_built"]["parameters"]["support"]) > 0 + + # validate support attributes - check only if boolean + assert dump_dict["as_built"]["parameters"]["support"]["serialization"][ + "enabled" + ] in [True, False]