From 983f3491918ab1677ac8743b7d755153be01d189 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Wed, 21 Jun 2023 11:25:52 -0400
Subject: [PATCH 01/25] Changes to examples for testing

+ TODO: Revert this commit
---
 examples/quickstart_dimension_labels.py       |   8 +-
 .../quickstart_dimension_labels_string.py     | 179 ++++++++++++++++++
 examples/string_float_int_dimensions.py       |   3 +
 3 files changed, 186 insertions(+), 4 deletions(-)
 create mode 100644 examples/quickstart_dimension_labels_string.py

diff --git a/examples/quickstart_dimension_labels.py b/examples/quickstart_dimension_labels.py
index 4fade0bde9..998223799f 100644
--- a/examples/quickstart_dimension_labels.py
+++ b/examples/quickstart_dimension_labels.py
@@ -104,9 +104,9 @@ def read_array(uri: str):
     else:
 
         # Only create and write to the array if it doesn't already exist.
-        if tiledb.object_type(ARRAY_NAME) != "array":
-            create_array(ARRAY_NAME)
-            write_array(ARRAY_NAME)
-
+        if tiledb.object_type(ARRAY_NAME) == "array":
+            tiledb.Array.delete_array(ARRAY_NAME)
+        create_array(ARRAY_NAME)
+        write_array(ARRAY_NAME)
         # Read from the array and print output.
         read_array(ARRAY_NAME)
diff --git a/examples/quickstart_dimension_labels_string.py b/examples/quickstart_dimension_labels_string.py
new file mode 100644
index 0000000000..6e27c03005
--- /dev/null
+++ b/examples/quickstart_dimension_labels_string.py
@@ -0,0 +1,179 @@
+# quickstart_dense.py
+#
+# LICENSE
+#
+# The MIT License
+#
+# Copyright (c) 2023 TileDB, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# DESCRIPTION
+#
+# Please refer to the TileDB and TileDB-Py documentation for more information:
+#   https://docs.tiledb.com/main/how-to
+#   https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html
+#
+# When run, this program will create a simple 1D dense array with a dimension label, write
+# some data to it, and read a slice of the data back.
+#
+
+import numpy as np
+import tiledb
+
+def create_array(uri: str):
+    """Create array schema with dimension labels"""
+    dim1 = tiledb.Dim("d1", domain=(1, 4), dtype=np.int32)
+    dim2 = tiledb.Dim("d2", domain=(1, 5), dtype=np.int32)
+    # TODO: Using np.bytes_ here was not converting correctly in DataType.from_numpy()
+    # + dimension.py overrides bytes_->TILEDB_STRING_ASCII and does not use DataType.from_numpy()
+    dim_labels = {
+        0: {"l1": dim1.create_label_schema("increasing", "ascii")},
+        1: {
+            "l2": dim2.create_label_schema("increasing", np.int64),
+            "l3": dim2.create_label_schema("increasing", np.float64),
+        },
+    }
+    dom = tiledb.Domain(dim1, dim2)
+
+    # Var-sized attributes seems to work without any issues.
+    att1 = tiledb.Attr("a1", var=True, dtype=np.bytes_)
+    att2 = tiledb.Attr("a2", var=True, dtype=np.int64)
+    schema = tiledb.ArraySchema(sparse=False, domain=dom, attrs=(att1, att2), dim_labels=dim_labels)
+    tiledb.Array.create(uri, schema)
+
+
+def write_array(uri: str):
+    """Write attribute and label data to the array"""
+    a1_data = np.array(
+        [
+            "a",
+            "bb",
+            "ccc",
+            "dddd",
+            "eeeee",
+            "a",
+            "bb",
+            "ccc",
+            "d",
+            "eeeeeeeeee",
+            "a",
+            "bb",
+            "ccc",
+            "d",
+            "eeeeeeeeee",
+            "a",
+            "bb",
+            "ccc",
+            "d",
+            "eeeeeeeeee",
+            # "a",
+            # "bb",
+            # "ccc",
+            # "d",
+            # "eeeeeeeeee",
+        ]
+    ).reshape(4, 5)
+
+    a2_data = np.array(
+        [
+            np.repeat(1, 1).astype(np.int64),
+            np.repeat(2, 2).astype(np.int64),
+            np.repeat(3, 3).astype(np.int64),
+            np.repeat(4, 4).astype(np.int64),
+            np.repeat(5, 5).astype(np.int64),
+
+            np.repeat(1, 5).astype(np.int64),
+            np.repeat(2, 4).astype(np.int64),
+            np.repeat(3, 3).astype(np.int64),
+            np.repeat(4, 2).astype(np.int64),
+            np.repeat(5, 1).astype(np.int64),
+
+            np.repeat(1, 1).astype(np.int64),
+            np.repeat(2, 2).astype(np.int64),
+            np.repeat(3, 3).astype(np.int64),
+            np.repeat(4, 4).astype(np.int64),
+            np.repeat(5, 5).astype(np.int64),
+
+            np.repeat(1, 5).astype(np.int64),
+            np.repeat(2, 4).astype(np.int64),
+            np.repeat(3, 1).astype(np.int64),
+            np.repeat(4, 2).astype(np.int64),
+            np.repeat(5, 3).astype(np.int64),
+
+            # np.repeat(1, 1).astype(np.int64),
+            # np.repeat(2, 1).astype(np.int64),
+            # np.repeat(3, 5).astype(np.int64),
+            # np.repeat(4, 5).astype(np.int64),
+            # np.repeat(5, 10).astype(np.int64),
+        ], dtype=object
+    ).reshape(4, 5)
+
+    # l1_data = np.array(["a", "bb", "ccc", "dddd", "eeeee"])
+    l1_data = np.array(["a", "bb", "ccc", "ddd"])
+    l2_data = np.arange(-2, 3)
+    l3_data = np.linspace(-1.0, 1.0, 5)
+    with tiledb.open(uri, "w") as array:
+        array[:] = {"a1": a1_data, "a2": a2_data, "l1": l1_data, "l2": l2_data, "l3": l3_data}
+
+
+def read_array(uri: str):
+    """Read the array from the dimension label"""
+
+    with tiledb.open(uri, "r") as array:
+        # data1 = array.label_index(["l2"])[1, 1:2]
+        # print("Reading array on [[1, -1:1]] with label 'l2' on dim2")
+        # for name, value in data1.items():
+        #     print(f"  '{name}'={value}")
+
+        # data2 = array.label_index(["l1", "l2"])[4:5, -2:2]
+        # print("Reading array on [[4:5, -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
+        # for name, value in data2.items():
+        #     print(f"  '{name}'={value}")
+
+        # Should read all data
+        print("Reading array on [['a':'ddd']] with label 'l1' on dim1")
+        data3 = array.label_index(["l1"])["a":"ddd"]
+        for name, value in data3.items():
+            print(f"  '{name}'={value}")
+
+
+if __name__ == "__main__":
+    # Name of the array to create.
+    ARRAY_NAME = "/home/shaun/Documents/Arrays/quickstart_labels_string_py"
+    conf = tiledb.Config({
+        "sm.io_concurrency_level": "1",
+        "sm.compute_concurrency_level": "1",
+    })
+    tiledb.default_ctx(conf)
+
+    LIBVERSION = tiledb.libtiledb.version()
+    vfs = tiledb.VFS()
+
+    if LIBVERSION[0] == 2 and LIBVERSION[1] < 15:
+        print(
+            f"Dimension labels requires libtiledb version >= 2.15.0. Current version is"
+            f" {LIBVERSION[0]}.{LIBVERSION[1]}.{LIBVERSION[2]}"
+        )
+    else:
+        if vfs.is_dir(ARRAY_NAME):
+            vfs.remove_dir(ARRAY_NAME)
+        create_array(ARRAY_NAME)
+        write_array(ARRAY_NAME)
+        read_array(ARRAY_NAME)
diff --git a/examples/string_float_int_dimensions.py b/examples/string_float_int_dimensions.py
index d1fb03b95d..edda458cc6 100644
--- a/examples/string_float_int_dimensions.py
+++ b/examples/string_float_int_dimensions.py
@@ -39,6 +39,9 @@
 import tiledb
 
 path = "sparse_mixed_demo"
+vfs = tiledb.VFS()
+if vfs.is_dir(path):
+    vfs.remove_dir(path)
 
 dom = tiledb.Domain(
     *[

From 0ec4a060803fcda28afbaf11355291bb1ebfc156 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Thu, 22 Jun 2023 16:17:27 -0400
Subject: [PATCH 02/25] WIP

---
 tiledb/core.cc                   | 62 +++++++++++++++++++++++---------
 tiledb/dimension_label_schema.py |  3 ++
 tiledb/multirange_indexing.py    | 22 ++++++------
 3 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/tiledb/core.cc b/tiledb/core.cc
index 55fa95e14d..2da17792f7 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -467,6 +467,14 @@ class PyQuery {
     return array_schema_->has_attribute(name);
   }
 
+  bool is_dimension_label(std::string name) {
+#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
+    return ArraySchemaExperimental::has_dimension_label(ctx_, *array_schema_, name);
+#else
+    return false;
+#endif
+  }
+
   bool is_var(std::string name) {
     if (is_dimension(name)) {
       auto dim = domain_->dimension(name);
@@ -474,6 +482,12 @@ class PyQuery {
     } else if (is_attribute(name)) {
       auto attr = array_schema_->attribute(name);
       return attr.cell_val_num() == TILEDB_VAR_NUM;
+#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
+    } else if (is_dimension_label(name)) {
+      auto dim_label = ArraySchemaExperimental::dimension_label(ctx_,
+        *array_schema_, name);
+      return dim_label.label_cell_val_num() == TILEDB_VAR_NUM;
+#endif
     } else {
       TPY_ERROR_LOC("Unknown buffer type for is_var check (expected attribute "
                     "or dimension)")
@@ -481,7 +495,7 @@ class PyQuery {
   }
 
   bool is_nullable(std::string name) {
-    if (is_dimension(name)) {
+    if (is_dimension(name) || is_dimension_label(name)) {
       return false;
     }
 
@@ -498,6 +512,13 @@ class PyQuery {
     } else if (is_attribute(name)) {
       type = array_schema_->attribute(name).type();
       cell_val_num = array_schema_->attribute(name).cell_val_num();
+#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
+    } else if (is_dimension_label(name)) {
+      auto dim_label = ArraySchemaExperimental::dimension_label(ctx_,
+        *array_schema_, name);
+      type = dim_label.label_type();
+      cell_val_num = dim_label.label_cell_val_num();
+#endif
     } else {
       TPY_ERROR_LOC("Unknown buffer '" + name + "'");
     }
@@ -621,28 +642,34 @@ class PyQuery {
 
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
   void alloc_label_buffer(std::string &label_name, uint64_t ncells) {
-
     auto dim_label = ArraySchemaExperimental::dimension_label(
         ctx_, *array_schema_, label_name);
+    std::cout << "label_name = " << label_name << std::endl;
+    std::cout << "\tncells = " << ncells << std::endl;
 
     tiledb_datatype_t type = dim_label.label_type();
     uint32_t cell_val_num = dim_label.label_cell_val_num();
     uint64_t cell_nbytes = tiledb_datatype_size(type);
-    if (cell_val_num != TILEDB_VAR_NUM) {
+    std::cout << "\tcell_nbytes = " << cell_nbytes << std::endl;
+    bool var = cell_val_num == TILEDB_VAR_NUM;
+    bool nullable = false;
+    uint64_t buf_nbytes = 0;
+
+    if (!var) {
+      std::cout << "\tcell_val_num = " << cell_val_num << std::endl;
       cell_nbytes *= cell_val_num;
+      std::cout << "\tcell_nbytes *= cell_val_num = " << cell_nbytes << std::endl;
+      buf_nbytes = ncells * cell_nbytes;
+      std::cout << "\tbuf_nbytes = ncells * cell_nbytes = " << buf_nbytes << std::endl;
     } else {
-      throw TileDBError(
-          "reading variable length dimension labels is not yet supported");
+      // TODO: I think we still need est_result_size here.
+      // + Given range ['a', 'ddd'], I don't see another way to calculate the label data size between 'a' and 'ddd'.
+      buf_nbytes = 9; // Full label data for this hard-coded example is ['a', 'bb', 'ccc', 'ddd']
     }
-    auto dtype = tiledb_dtype(type, cell_val_num);
 
-    uint64_t buf_nbytes = ncells * cell_nbytes;
-    uint64_t offsets_num = 0;
+    uint64_t offsets_num = var ? ncells : 0;
     uint64_t validity_num = 0;
 
-    bool var = cell_val_num == TILEDB_VAR_NUM;
-    bool nullable = false;
-
     buffers_order_.push_back(label_name);
     buffers_.insert(
         {label_name, BufferInfo(label_name, buf_nbytes, type, cell_val_num,
@@ -763,16 +790,17 @@ class PyQuery {
 
       if ((Py_ssize_t)(buf.data_vals_read * buf.elem_nbytes) >
           (Py_ssize_t)buf.data.size()) {
-        throw TileDBError("After read query, data buffer out of bounds: " +
-                          name);
+        throw TileDBError("After read query, data buffer out of bounds: " + name + " ("
+                          + std::to_string(buf.data_vals_read * buf.elem_nbytes) + " > "
+                          + std::to_string(buf.data.size()) + ")");
       }
       if ((Py_ssize_t)buf.offsets_read > buf.offsets.size()) {
-        throw TileDBError("After read query, offsets buffer out of bounds: " +
-                          name);
+        throw TileDBError("After read query, offsets buffer out of bounds: " + name + " ("
+                          + std::to_string(buf.offsets_read) + " > " + std::to_string(buf.offsets.size()) + ")");
       }
       if ((Py_ssize_t)buf.validity_vals_read > buf.validity.size()) {
-        throw TileDBError("After read query, validity buffer out of bounds: " +
-                          name);
+        throw TileDBError("After read query, validity buffer out of bounds: " + name + " ("
+                          + std::to_string(buf.validity_vals_read) + " > " + std::to_string(buf.validity.size()) + ")");
       }
     }
   }
diff --git a/tiledb/dimension_label_schema.py b/tiledb/dimension_label_schema.py
index d6e953a9dd..fbb1cb964b 100644
--- a/tiledb/dimension_label_schema.py
+++ b/tiledb/dimension_label_schema.py
@@ -35,6 +35,9 @@ def __init__(
         # Get DataType and DataOrder objects
         _label_order = DataOrder[order]
         _label_dtype = DataType.from_numpy(label_dtype)
+        # TOOD: Fix from_numpy for np.bytes_ (?)
+        if _label_dtype.tiledb_type == lt.DataType.CHAR:
+            _label_dtype = DataType(np.bytes_, lt.DataType.STRING_ASCII, lt.TILEDB_VAR_NUM)
         _dim_dtype = DataType.from_numpy(dim_dtype)
 
         # Convert the tile extent (if set)
diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
index 1b58c8d21b..e0b7a66d60 100644
--- a/tiledb/multirange_indexing.py
+++ b/tiledb/multirange_indexing.py
@@ -486,10 +486,6 @@ def __init__(
         self._labels: Dict[int, str] = {}
         for label_name in labels:
             dim_label = array.schema.dim_label(label_name)
-            if dim_label.isvar:
-                raise NotImplementedError(
-                    "querying by variable length labels is not yet implemented"
-                )
             dim_idx = dim_label.dim_index
             if dim_idx in self._labels:
                 raise TileDBError(
@@ -624,16 +620,22 @@ def _get_pyquery_results(
     pyquery: PyQuery, schema: ArraySchema
 ) -> Dict[str, np.ndarray]:
     result_dict = OrderedDict()
-    for name, item in pyquery.results().items():
+    res = pyquery.results()
+    # TODO: There are no offsets at item[1] for the label result buffer, resulting in exception from numpy in else case.
+    # + Var size labels should have len(item[1]) > 0; We should not hit the else case below.
+    for name, item in res.items():
         if len(item[1]) > 0:
             arr = pyquery.unpack_buffer(name, item[0], item[1])
         else:
             arr = item[0]
-            arr.dtype = (
-                schema.attr_or_dim_dtype(name)
-                if not schema.has_dim_label(name)
-                else schema.dim_label(name).dtype
-            )
+            if schema.has_dim_label(name):
+                if schema.dim_label(name).isvar:
+                    # arr.dtype = np.uint8  # TODO: Revert all changes here. This is just hard-coded for POC.
+                    arr = pyquery.unpack_buffer(name, item[0], [0, 1, 3, 6])
+                else:
+                    arr.dtype = schema.dim_label(name).dtype
+            else:
+                arr.dtype = schema.attr_or_dim_dtype(name)
         result_dict[name if name != "__attr" else ""] = arr
     return result_dict
 

From 7398457ac9f14b43e44b020a93bebba47e48298c Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Fri, 23 Jun 2023 14:02:36 -0400
Subject: [PATCH 03/25] Allocate label var size buffers

---
 .../quickstart_dimension_labels_string.py     | 23 ++++++++--------
 tiledb/cc/query.cc                            |  2 ++
 tiledb/core.cc                                | 27 +++++++------------
 tiledb/dimension_label_schema.py              |  4 +--
 tiledb/multirange_indexing.py                 | 12 +++++++--
 5 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/examples/quickstart_dimension_labels_string.py b/examples/quickstart_dimension_labels_string.py
index 6e27c03005..9a2c3df2ab 100644
--- a/examples/quickstart_dimension_labels_string.py
+++ b/examples/quickstart_dimension_labels_string.py
@@ -41,10 +41,9 @@ def create_array(uri: str):
     """Create array schema with dimension labels"""
     dim1 = tiledb.Dim("d1", domain=(1, 4), dtype=np.int32)
     dim2 = tiledb.Dim("d2", domain=(1, 5), dtype=np.int32)
-    # TODO: Using np.bytes_ here was not converting correctly in DataType.from_numpy()
-    # + dimension.py overrides bytes_->TILEDB_STRING_ASCII and does not use DataType.from_numpy()
+    # TODO: Test label query with N var-size labels.
     dim_labels = {
-        0: {"l1": dim1.create_label_schema("increasing", "ascii")},
+        0: {"l1": dim1.create_label_schema("increasing", np.bytes_)},
         1: {
             "l2": dim2.create_label_schema("increasing", np.int64),
             "l3": dim2.create_label_schema("increasing", np.float64),
@@ -137,15 +136,15 @@ def read_array(uri: str):
     """Read the array from the dimension label"""
 
     with tiledb.open(uri, "r") as array:
-        # data1 = array.label_index(["l2"])[1, 1:2]
-        # print("Reading array on [[1, -1:1]] with label 'l2' on dim2")
-        # for name, value in data1.items():
-        #     print(f"  '{name}'={value}")
-
-        # data2 = array.label_index(["l1", "l2"])[4:5, -2:2]
-        # print("Reading array on [[4:5, -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
-        # for name, value in data2.items():
-        #     print(f"  '{name}'={value}")
+        data1 = array.label_index(["l2"])[1, 1:2]
+        print("Reading array on [[1, -1:1]] with label 'l2' on dim2")
+        for name, value in data1.items():
+            print(f"  '{name}'={value}")
+
+        data2 = array.label_index(["l1", "l2"])["a":"ddd", -2:2]
+        print("Reading array on [[4:5, -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
+        for name, value in data2.items():
+            print(f"  '{name}'={value}")
 
         # Should read all data
         print("Reading array on [['a':'ddd']] with label 'l1' on dim1")
diff --git a/tiledb/cc/query.cc b/tiledb/cc/query.cc
index 7320084ac7..9d515dc13c 100644
--- a/tiledb/cc/query.cc
+++ b/tiledb/cc/query.cc
@@ -50,6 +50,8 @@ void init_query(py::module &m) {
 
       .def("has_results", &Query::has_results)
 
+      .def("est_result_size_var", &Query::est_result_size_var)
+
       .def("is_complete",
            [](const Query &query) {
              return query.query_status() == Query::Status::COMPLETE;
diff --git a/tiledb/core.cc b/tiledb/core.cc
index 2da17792f7..897ac35188 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -314,7 +314,7 @@ class PyQuery {
   tiledb_layout_t layout_ = TILEDB_ROW_MAJOR;
 
   // label buffer list
-  std::vector<std::pair<string, uint64_t>> label_input_buffer_data_;
+  std::vector<std::tuple<string, uint64_t, uint64_t>> label_input_buffer_data_;
 
   py::object pyschema_;
 
@@ -641,34 +641,27 @@ class PyQuery {
   }
 
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
-  void alloc_label_buffer(std::string &label_name, uint64_t ncells) {
+  void alloc_label_buffer(std::string &label_name, uint64_t ncells, uint64_t var_size) {
     auto dim_label = ArraySchemaExperimental::dimension_label(
         ctx_, *array_schema_, label_name);
-    std::cout << "label_name = " << label_name << std::endl;
-    std::cout << "\tncells = " << ncells << std::endl;
 
     tiledb_datatype_t type = dim_label.label_type();
     uint32_t cell_val_num = dim_label.label_cell_val_num();
     uint64_t cell_nbytes = tiledb_datatype_size(type);
-    std::cout << "\tcell_nbytes = " << cell_nbytes << std::endl;
     bool var = cell_val_num == TILEDB_VAR_NUM;
     bool nullable = false;
     uint64_t buf_nbytes = 0;
+    uint64_t offsets_num = 0;
+    uint64_t validity_num = 0;
 
     if (!var) {
-      std::cout << "\tcell_val_num = " << cell_val_num << std::endl;
       cell_nbytes *= cell_val_num;
-      std::cout << "\tcell_nbytes *= cell_val_num = " << cell_nbytes << std::endl;
       buf_nbytes = ncells * cell_nbytes;
-      std::cout << "\tbuf_nbytes = ncells * cell_nbytes = " << buf_nbytes << std::endl;
     } else {
-      // TODO: I think we still need est_result_size here.
-      // + Given range ['a', 'ddd'], I don't see another way to calculate the label data size between 'a' and 'ddd'.
-      buf_nbytes = 9; // Full label data for this hard-coded example is ['a', 'bb', 'ccc', 'ddd']
+      buf_nbytes = var_size;
+      offsets_num = ncells;
     }
 
-    uint64_t offsets_num = var ? ncells : 0;
-    uint64_t validity_num = 0;
 
     buffers_order_.push_back(label_name);
     buffers_.insert(
@@ -676,14 +669,14 @@ class PyQuery {
                                 offsets_num, validity_num, var, nullable)});
   }
 #else
-  void alloc_label_buffer(std::string &, uint64_t) {
+  void alloc_label_buffer(std::string &, uint64_t, uint64_t) {
     throw TileDBError(
         "Using dimension labels requires libtiledb version 2.15.0 or greater");
   }
 #endif
 
-  void add_label_buffer(std::string &label_name, uint64_t ncells) {
-    label_input_buffer_data_.push_back({label_name, ncells});
+  void add_label_buffer(std::string &label_name, uint64_t ncells, uint64_t var_size) {
+    label_input_buffer_data_.push_back({label_name, ncells, var_size});
   }
 
   py::object get_buffers() {
@@ -965,7 +958,7 @@ class PyQuery {
 
     // allocate buffers for label dimensions
     for (auto &label_data : label_input_buffer_data_) {
-      alloc_label_buffer(label_data.first, label_data.second);
+      alloc_label_buffer(std::get<0>(label_data), std::get<1>(label_data), std::get<2>(label_data));
     }
 
     // allocate buffers for attributes
diff --git a/tiledb/dimension_label_schema.py b/tiledb/dimension_label_schema.py
index fbb1cb964b..a93fa73dc9 100644
--- a/tiledb/dimension_label_schema.py
+++ b/tiledb/dimension_label_schema.py
@@ -35,9 +35,9 @@ def __init__(
         # Get DataType and DataOrder objects
         _label_order = DataOrder[order]
         _label_dtype = DataType.from_numpy(label_dtype)
-        # TOOD: Fix from_numpy for np.bytes_ (?)
-        if _label_dtype.tiledb_type == lt.DataType.CHAR:
+        if np.issubdtype(label_dtype, np.bytes_):
             _label_dtype = DataType(np.bytes_, lt.DataType.STRING_ASCII, lt.TILEDB_VAR_NUM)
+
         _dim_dtype = DataType.from_numpy(dim_dtype)
 
         # Convert the tile extent (if set)
diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
index e0b7a66d60..b9f650e6b7 100644
--- a/tiledb/multirange_indexing.py
+++ b/tiledb/multirange_indexing.py
@@ -519,6 +519,12 @@ def _run_query(self) -> Dict[str, np.ndarray]:
         # query and update the pyquery with the actual dimensions.
         if self.label_query is not None and not self.label_query.is_complete():
             self.label_query.submit()
+
+            est_var_size = [0, 0]
+            for label_name in self._labels.values():
+                if self.array.schema.dim_label(label_name).isvar:
+                    est_var_size = self.label_query.est_result_size_var(label_name)
+
             if not self.label_query.is_complete():
                 raise TileDBError("failed to get dimension ranges from labels")
             label_subarray = self.label_query.subarray()
@@ -537,7 +543,7 @@ def _run_query(self) -> Dict[str, np.ndarray]:
             for dim_idx, label_name in self._labels.items():
                 if self.result_shape is None:
                     raise TileDBError("failed to compute subarray shape")
-                self.pyquery.add_label_buffer(label_name, self.result_shape[dim_idx])
+                self.pyquery.add_label_buffer(label_name, self.result_shape[dim_idx], est_var_size[1])
         return super()._run_query()
 
 
@@ -631,7 +637,9 @@ def _get_pyquery_results(
             if schema.has_dim_label(name):
                 if schema.dim_label(name).isvar:
                     # arr.dtype = np.uint8  # TODO: Revert all changes here. This is just hard-coded for POC.
-                    arr = pyquery.unpack_buffer(name, item[0], [0, 1, 3, 6])
+                    # arr = pyquery.unpack_buffer(name, item[0], [0, 1])  # 'bb'
+                    # arr = pyquery.unpack_buffer(name, item[0], [0, 1, 3])  # 'ccc'
+                    arr = pyquery.unpack_buffer(name, item[0], [0, 1, 3, 6])  # 'ddd'
                 else:
                     arr.dtype = schema.dim_label(name).dtype
             else:

From 33fd00aa2cf8e22c8e3f28a9c69058482dd22fea Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Fri, 23 Jun 2023 15:30:49 -0400
Subject: [PATCH 04/25] Merge alloc_buffer and alloc_label_buffer

---
 .../quickstart_dimension_labels_string.py     |   4 +-
 tiledb/core.cc                                | 132 ++++++++----------
 2 files changed, 61 insertions(+), 75 deletions(-)

diff --git a/examples/quickstart_dimension_labels_string.py b/examples/quickstart_dimension_labels_string.py
index 9a2c3df2ab..c361ce73ac 100644
--- a/examples/quickstart_dimension_labels_string.py
+++ b/examples/quickstart_dimension_labels_string.py
@@ -136,13 +136,13 @@ def read_array(uri: str):
     """Read the array from the dimension label"""
 
     with tiledb.open(uri, "r") as array:
-        data1 = array.label_index(["l2"])[1, 1:2]
+        data1 = array.label_index(["l2"])[1, -1:1]
         print("Reading array on [[1, -1:1]] with label 'l2' on dim2")
         for name, value in data1.items():
             print(f"  '{name}'={value}")
 
         data2 = array.label_index(["l1", "l2"])["a":"ddd", -2:2]
-        print("Reading array on [[4:5, -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
+        print("Reading array on [['a':'ddd', -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
         for name, value in data2.items():
             print(f"  '{name}'={value}")
 
diff --git a/tiledb/core.cc b/tiledb/core.cc
index 897ac35188..1c12e0a88a 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -314,7 +314,7 @@ class PyQuery {
   tiledb_layout_t layout_ = TILEDB_ROW_MAJOR;
 
   // label buffer list
-  std::vector<std::tuple<string, uint64_t, uint64_t>> label_input_buffer_data_;
+  std::unordered_map<string, std::pair<uint64_t, uint64_t>> label_input_buffer_data_;
 
   py::object pyschema_;
 
@@ -564,51 +564,72 @@ class PyQuery {
   }
 
   void alloc_buffer(std::string name) {
-
     tiledb_datatype_t type;
     uint32_t cell_val_num;
-    std::tie(type, cell_val_num) = buffer_type(name);
-    uint64_t cell_nbytes = tiledb_datatype_size(type);
-    if (cell_val_num != TILEDB_VAR_NUM)
-      cell_nbytes *= cell_val_num;
-    auto dtype = tiledb_dtype(type, cell_val_num);
-
+    uint64_t cell_nbytes;
+    bool var;
+    bool nullable;
     uint64_t buf_nbytes = 0;
     uint64_t offsets_num = 0;
     uint64_t validity_num = 0;
-
-    bool var = is_var(name);
-    bool nullable = is_nullable(name);
     bool dense = array_schema_->array_type() == TILEDB_DENSE;
+    if (is_dimension_label(name)) {
+#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
+      auto dim_label = ArraySchemaExperimental::dimension_label(ctx_, *array_schema_, name);
+      type = dim_label.label_type();
+      cell_val_num = dim_label.label_cell_val_num();
+      var = cell_val_num == TILEDB_VAR_NUM;
+      nullable = false;
 
-    if (retries_ < 1 && dense) {
-      // we must not call after submitting
-      if (nullable && var) {
-        auto sizes = query_->est_result_size_var_nullable(name);
-        offsets_num = sizes[0];
-        buf_nbytes = sizes[1];
-        validity_num = sizes[2] / sizeof(uint8_t);
-      } else if (nullable && !var) {
-        auto sizes = query_->est_result_size_nullable(name);
-        buf_nbytes = sizes[0];
-        validity_num = sizes[1] / sizeof(uint8_t);
-      } else if (!nullable && var) {
-        auto size_pair = query_->est_result_size_var(name);
+      cell_nbytes = tiledb_datatype_size(type);
+      uint64_t ncells = label_input_buffer_data_[name].first;
+
+      if (!var) {
+        cell_nbytes *= cell_val_num;
+        buf_nbytes = ncells * cell_nbytes;
+      } else {
+        buf_nbytes = label_input_buffer_data_[name].second;
+        offsets_num = ncells;
+      }
+#endif
+    } else {
+      std::tie(type, cell_val_num) = buffer_type(name);
+      cell_nbytes = tiledb_datatype_size(type);
+      if (cell_val_num != TILEDB_VAR_NUM) {
+        cell_nbytes *= cell_val_num;
+      }
+      var = is_var(name);
+      nullable = is_nullable(name);
+
+      if (retries_ < 1 && dense) {
+        // we must not call after submitting
+        if (nullable && var) {
+          auto sizes = query_->est_result_size_var_nullable(name);
+          offsets_num = sizes[0];
+          buf_nbytes = sizes[1];
+          validity_num = sizes[2] / sizeof(uint8_t);
+        } else if (nullable && !var) {
+          auto sizes = query_->est_result_size_nullable(name);
+          buf_nbytes = sizes[0];
+          validity_num = sizes[1] / sizeof(uint8_t);
+        } else if (!nullable && var) {
+          auto size_pair = query_->est_result_size_var(name);
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR < 2
-        buf_nbytes = size_pair.first;
-        offsets_num = size_pair.second;
+          buf_nbytes = size_pair.first;
+          offsets_num = size_pair.second;
 #else
-        buf_nbytes = size_pair[0];
-        offsets_num = size_pair[1];
+          buf_nbytes = size_pair[0];
+          offsets_num = size_pair[1];
 #endif
-      } else { // !nullable && !var
-        buf_nbytes = query_->est_result_size(name);
-      }
+        } else { // !nullable && !var
+          buf_nbytes = query_->est_result_size(name);
+        }
 
-      // Add extra offset to estimate in order to avoid incomplete resubmit
-      // libtiledb 2.7.* does not include extra element in estimate.
-      // Remove this section after resolution of SC-16301.
-      offsets_num += (var && use_arrow_) ? 1 : 0;
+        // Add extra offset to estimate in order to avoid incomplete resubmit
+        // libtiledb 2.7.* does not include extra element in estimate.
+        // Remove this section after resolution of SC-16301.
+        offsets_num += (var && use_arrow_) ? 1 : 0;
+      }
     }
 
     // - for sparse arrays: don't try to allocate more than alloc_max_bytes_
@@ -640,43 +661,8 @@ class PyQuery {
                           validity_num, var, nullable)});
   }
 
-#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
-  void alloc_label_buffer(std::string &label_name, uint64_t ncells, uint64_t var_size) {
-    auto dim_label = ArraySchemaExperimental::dimension_label(
-        ctx_, *array_schema_, label_name);
-
-    tiledb_datatype_t type = dim_label.label_type();
-    uint32_t cell_val_num = dim_label.label_cell_val_num();
-    uint64_t cell_nbytes = tiledb_datatype_size(type);
-    bool var = cell_val_num == TILEDB_VAR_NUM;
-    bool nullable = false;
-    uint64_t buf_nbytes = 0;
-    uint64_t offsets_num = 0;
-    uint64_t validity_num = 0;
-
-    if (!var) {
-      cell_nbytes *= cell_val_num;
-      buf_nbytes = ncells * cell_nbytes;
-    } else {
-      buf_nbytes = var_size;
-      offsets_num = ncells;
-    }
-
-
-    buffers_order_.push_back(label_name);
-    buffers_.insert(
-        {label_name, BufferInfo(label_name, buf_nbytes, type, cell_val_num,
-                                offsets_num, validity_num, var, nullable)});
-  }
-#else
-  void alloc_label_buffer(std::string &, uint64_t, uint64_t) {
-    throw TileDBError(
-        "Using dimension labels requires libtiledb version 2.15.0 or greater");
-  }
-#endif
-
   void add_label_buffer(std::string &label_name, uint64_t ncells, uint64_t var_size) {
-    label_input_buffer_data_.push_back({label_name, ncells, var_size});
+    label_input_buffer_data_[label_name] = {ncells, var_size};
   }
 
   py::object get_buffers() {
@@ -957,8 +943,8 @@ class PyQuery {
     }
 
     // allocate buffers for label dimensions
-    for (auto &label_data : label_input_buffer_data_) {
-      alloc_label_buffer(std::get<0>(label_data), std::get<1>(label_data), std::get<2>(label_data));
+    for (const auto &label_data : label_input_buffer_data_) {
+      alloc_buffer(label_data.first);
     }
 
     // allocate buffers for attributes

From 187f7ed71374ccc6079aa9778e316a8d87cee19f Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Mon, 26 Jun 2023 15:49:00 -0400
Subject: [PATCH 05/25] Fix missing offset buffer data

+ Bug was in core Query::result_buffer_elements_nullable
---
 .../quickstart_dimension_labels_string.py     |  4 ++--
 tiledb/multirange_indexing.py                 | 20 ++++++-------------
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/examples/quickstart_dimension_labels_string.py b/examples/quickstart_dimension_labels_string.py
index c361ce73ac..b75b053dd5 100644
--- a/examples/quickstart_dimension_labels_string.py
+++ b/examples/quickstart_dimension_labels_string.py
@@ -141,8 +141,8 @@ def read_array(uri: str):
         for name, value in data1.items():
             print(f"  '{name}'={value}")
 
-        data2 = array.label_index(["l1", "l2"])["a":"ddd", -2:2]
-        print("Reading array on [['a':'ddd', -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
+        data2 = array.label_index(["l1", "l2"])["a":"ccc", -2:2]
+        print("Reading array on [['a':'ccc', -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
         for name, value in data2.items():
             print(f"  '{name}'={value}")
 
diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
index b9f650e6b7..b0378dafad 100644
--- a/tiledb/multirange_indexing.py
+++ b/tiledb/multirange_indexing.py
@@ -626,24 +626,16 @@ def _get_pyquery_results(
     pyquery: PyQuery, schema: ArraySchema
 ) -> Dict[str, np.ndarray]:
     result_dict = OrderedDict()
-    res = pyquery.results()
-    # TODO: There are no offsets at item[1] for the label result buffer, resulting in exception from numpy in else case.
-    # + Var size labels should have len(item[1]) > 0; We should not hit the else case below.
-    for name, item in res.items():
+    for name, item in pyquery.results().items():
         if len(item[1]) > 0:
             arr = pyquery.unpack_buffer(name, item[0], item[1])
         else:
             arr = item[0]
-            if schema.has_dim_label(name):
-                if schema.dim_label(name).isvar:
-                    # arr.dtype = np.uint8  # TODO: Revert all changes here. This is just hard-coded for POC.
-                    # arr = pyquery.unpack_buffer(name, item[0], [0, 1])  # 'bb'
-                    # arr = pyquery.unpack_buffer(name, item[0], [0, 1, 3])  # 'ccc'
-                    arr = pyquery.unpack_buffer(name, item[0], [0, 1, 3, 6])  # 'ddd'
-                else:
-                    arr.dtype = schema.dim_label(name).dtype
-            else:
-                arr.dtype = schema.attr_or_dim_dtype(name)
+            arr.dtype = (
+                schema.attr_or_dim_dtype(name)
+                if not schema.has_dim_label(name)
+                else schema.dim_label(name).dtype
+            )
         result_dict[name if name != "__attr" else ""] = arr
     return result_dict
 

From 92dbe408155c8221cacea0f426bc14d78ae00d08 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Tue, 27 Jun 2023 11:42:09 -0400
Subject: [PATCH 06/25] Add UTs

---
 tiledb/dimension_label_schema.py     |   8 ++-
 tiledb/tests/test_dimension_label.py | 102 +++++++++++++++++++++++++--
 2 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/tiledb/dimension_label_schema.py b/tiledb/dimension_label_schema.py
index a93fa73dc9..dc1365de32 100644
--- a/tiledb/dimension_label_schema.py
+++ b/tiledb/dimension_label_schema.py
@@ -34,9 +34,11 @@ def __init__(
 
         # Get DataType and DataOrder objects
         _label_order = DataOrder[order]
-        _label_dtype = DataType.from_numpy(label_dtype)
-        if np.issubdtype(label_dtype, np.bytes_):
-            _label_dtype = DataType(np.bytes_, lt.DataType.STRING_ASCII, lt.TILEDB_VAR_NUM)
+        _label_dtype = DataType.from_numpy(
+            np.dtype(label_dtype) if label_dtype not in ("ascii", "blob") else label_dtype)
+        np_dtype = _label_dtype.np_dtype
+        if np.issubdtype(np_dtype, np.bytes_) or np.issubdtype(np_dtype, np.str_):
+            _label_dtype = DataType(np_dtype, lt.DataType.STRING_ASCII, lt.TILEDB_VAR_NUM)
 
         _dim_dtype = DataType.from_numpy(dim_dtype)
 
diff --git a/tiledb/tests/test_dimension_label.py b/tiledb/tests/test_dimension_label.py
index 6edc346597..ac99caca4b 100644
--- a/tiledb/tests/test_dimension_label.py
+++ b/tiledb/tests/test_dimension_label.py
@@ -148,12 +148,15 @@ def test_add_to_array_schema_dim_dtype_mismatch(self):
         tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15,
         reason="dimension labels requires libtiledb version 2.15 or greater",
     )
-    def test_dimension_label_round_trip_dense_array(self):
+    @pytest.mark.parametrize("var", [True, False])
+    def test_dimension_label_round_trip_dense_array(self, var):
         # Create array schema with dimension labels
         dim = tiledb.Dim("d1", domain=(1, 10))
         dom = tiledb.Domain(dim)
         att = tiledb.Attr("a1", dtype=np.int64)
         dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.int64)}}
+        if var:
+            dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.bytes_)}}
         schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels)
 
         # Create array
@@ -163,6 +166,8 @@ def test_dimension_label_round_trip_dense_array(self):
         # Write data to the array and the label
         attr_data = np.arange(1, 11)
         label_data = np.arange(-9, 10, 2)
+        if var:
+            label_data = np.array([str(chr(ord('a') + c) * (10 - c)).encode("utf-8") for c in range(10)])
         with tiledb.open(uri, "w") as array:
             array[:] = {"a1": attr_data, "l1": label_data}
 
@@ -181,7 +186,8 @@ def test_dimension_label_round_trip_dense_array(self):
             indexer = array.label_index(["l1"])
 
             # Read full array
-            result = indexer[-100:100]
+            result = indexer[label_data[0]:label_data[-1]]
+
             np.testing.assert_array_equal(result["a1"], attr_data)
             np.testing.assert_array_equal(result["l1"], label_data)
 
@@ -192,11 +198,18 @@ def test_dimension_label_round_trip_dense_array(self):
                 assert result["a1"][0] == attr_data[index]
                 assert result["l1"][0] == label_index
 
+            for index in range(10):
+                label_index = label_data[index:]
+                result = indexer[label_index[0]:label_index[-1]]
+                np.testing.assert_array_equal(result["a1"], attr_data[index:])
+                np.testing.assert_array_equal(result["l1"], label_index)
+
     @pytest.mark.skipif(
         tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15,
         reason="dimension labels requires libtiledb version 2.15 or greater",
     )
-    def test_dimension_label_round_trip_multidim_dense_array(self):
+    @pytest.mark.parametrize("var", [True, False])
+    def test_dimension_label_round_trip_multidim_dense_array(self, var):
         # Create array schema with dimension labels
         dim1 = tiledb.Dim("x_index", domain=(1, 8))
         dim2 = tiledb.Dim("y_index", domain=(1, 8))
@@ -204,7 +217,7 @@ def test_dimension_label_round_trip_multidim_dense_array(self):
         att = tiledb.Attr("value", dtype=np.int64)
         dim_labels = {
             0: {
-                "x1": dim1.create_label_schema("increasing", np.float64),
+                "x1": dim1.create_label_schema("increasing", np.float64 if not var else "U"),
                 "x2": dim1.create_label_schema("decreasing", np.int64),
             },
             1: {
@@ -220,6 +233,8 @@ def test_dimension_label_round_trip_multidim_dense_array(self):
         # Write data to the array and the label
         attr_data = np.reshape(np.arange(1, 65), (8, 8))
         x1_data = np.linspace(-1.0, 1.0, 8)
+        if var:
+            x1_data = np.array([str(chr(ord('a') + c - 1) * c).encode('utf-8') for c in range(1, 9)])
         x2_data = np.arange(8, 0, -1)
         y1_data = np.arange(9, 17)
         with tiledb.open(uri, "w") as array:
@@ -233,7 +248,7 @@ def test_dimension_label_round_trip_multidim_dense_array(self):
         # Test querying by label
         with tiledb.open(uri, "r") as array:
             # Read full array: labels on both ranges
-            result = array.label_index(["x1", "y1"])[-1.0:1.0, 9:17]
+            result = array.label_index(["x1", "y1"])[x1_data[0]:x1_data[-1], 9:17]
             np.testing.assert_array_equal(result["value"], attr_data)
             np.testing.assert_array_equal(result["x1"], x1_data)
             np.testing.assert_array_equal(result["y1"], y1_data)
@@ -261,12 +276,13 @@ def test_dimension_label_round_trip_multidim_dense_array(self):
         tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15,
         reason="dimension labels requires libtiledb version 2.15 or greater",
     )
-    def test_dimension_label_round_trip_sparse_array(self):
+    @pytest.mark.parametrize("var", [True, False])
+    def test_dimension_label_round_trip_sparse_array(self, var):
         # Create array schema with dimension labels
         dim = tiledb.Dim("index", domain=(1, 10))
         dom = tiledb.Domain(dim)
         att = tiledb.Attr("value", dtype=np.int64)
-        dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.int64)}}
+        dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.int64 if not var else "ascii")}}
         schema = tiledb.ArraySchema(
             domain=dom, attrs=(att,), dim_labels=dim_labels, sparse=True
         )
@@ -279,6 +295,8 @@ def test_dimension_label_round_trip_sparse_array(self):
         index_data = np.arange(1, 11)
         attr_data = np.arange(11, 21)
         label_data = np.arange(-10, 0)
+        if var:
+            label_data = np.array([str(chr(ord('a') + c) * (10 - c)).encode('utf-8') for c in range(10)])
         with tiledb.open(uri, "w") as array:
             array[index_data] = {"value": attr_data, "l1": label_data}
 
@@ -290,3 +308,73 @@ def test_dimension_label_round_trip_sparse_array(self):
         with tiledb.open(dim_label.uri, "r") as label1:
             output_label_data = label1[:][dim_label.label_attr_name]
             np.testing.assert_array_equal(output_label_data, label_data)
+
+    @pytest.mark.skipif(
+        tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15,
+        reason="dimension labels requires libtiledb version 2.15 or greater",
+    )
+    def test_dimension_label_round_trip_dense_var(self):
+        # Create array schema with dimension labels
+        dims = [
+            tiledb.Dim("d1", domain=(1, 10), dtype=np.int64),
+            tiledb.Dim("d2", domain=(1, 10), dtype=np.int64),
+        ]
+        dom = tiledb.Domain(*dims)
+        att = tiledb.Attr("value", var=True, dtype="S")
+        dim_labels = {
+            0: {
+                "l1": dims[0].create_label_schema("increasing", np.float32),
+            },
+            1: {
+                "l2": dims[1].create_label_schema("decreasing", np.int32),
+                "l3": dims[1].create_label_schema("increasing", np.bytes_),
+            },
+        }
+
+        schema = tiledb.ArraySchema(
+            domain=dom, attrs=(att,), dim_labels=dim_labels, sparse=False
+        )
+
+        # Create array
+        uri = self.path("dense_array_with_var_label2")
+        tiledb.Array.create(uri, schema)
+
+        # Write data to the array and the label
+        attr_data = np.array(
+            [[str(chr(ord('z') - c) * (10 - c)).encode('utf-8') for c in range(10)] for i in range(10)])
+        l1_data = np.arange(10, dtype=np.float32)
+        l2_data = np.arange(10, 0, -1, dtype=np.int32)
+        l3_data = np.array([str(chr(ord('a') + c) * (c + 1)).encode('utf-8') for c in range(10)])
+
+        with tiledb.open(uri, "w") as array:
+            array[:, :] = {"value": attr_data, "l1": l1_data, "l2": l2_data, "l3": l3_data}
+
+        # Load the array schema and get the URI of the dimension label
+        schema = tiledb.ArraySchema.load(uri)
+        for label_name, label_data in {"l1": l1_data, "l2": l2_data, "l3": l3_data}.items():
+            dim_label = schema.dim_label(label_name)
+            # Read and check the data directly from the dimension label
+            with tiledb.open(dim_label.uri, "r") as label:
+                output_label_data = label[:][dim_label.label_attr_name]
+                np.testing.assert_array_equal(output_label_data, label_data)
+
+            with tiledb.open(uri, "r") as array:
+                indexer = array.label_index([label_name])
+                lower = min(label_data[0], label_data[-1])
+                upper = max(label_data[0], label_data[-1])
+                if label_name == "l1":
+                    all_data = indexer[lower:upper]
+                else:
+                    all_data = indexer[:, lower:upper]
+                np.testing.assert_array_equal(all_data[label_name], label_data)
+                np.testing.assert_array_equal(all_data["value"], attr_data)
+
+                # Slice array with varying sizes.
+                for index in range(10):
+                    label_index = label_data[index:]
+                    if label_name == "l1":
+                        result = indexer[lower:upper]
+                    else:
+                        result = indexer[:, lower:upper]
+                    np.testing.assert_array_equal(result["value"][index:], attr_data[index:])
+                    np.testing.assert_array_equal(result[label_name][index:], label_index)

From f1af354bb771ccdb11a6504608d20f043013ba3a Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Tue, 27 Jun 2023 11:43:33 -0400
Subject: [PATCH 07/25] Revert "Changes to examples for testing"

This reverts commit 32df5f37ac6a1fed520c86d66bf6ae1fb19bff81.
---
 examples/quickstart_dimension_labels.py       |   8 +-
 .../quickstart_dimension_labels_string.py     | 178 ------------------
 examples/string_float_int_dimensions.py       |   3 -
 3 files changed, 4 insertions(+), 185 deletions(-)
 delete mode 100644 examples/quickstart_dimension_labels_string.py

diff --git a/examples/quickstart_dimension_labels.py b/examples/quickstart_dimension_labels.py
index 998223799f..4fade0bde9 100644
--- a/examples/quickstart_dimension_labels.py
+++ b/examples/quickstart_dimension_labels.py
@@ -104,9 +104,9 @@ def read_array(uri: str):
     else:
 
         # Only create and write to the array if it doesn't already exist.
-        if tiledb.object_type(ARRAY_NAME) == "array":
-            tiledb.Array.delete_array(ARRAY_NAME)
-        create_array(ARRAY_NAME)
-        write_array(ARRAY_NAME)
+        if tiledb.object_type(ARRAY_NAME) != "array":
+            create_array(ARRAY_NAME)
+            write_array(ARRAY_NAME)
+
         # Read from the array and print output.
         read_array(ARRAY_NAME)
diff --git a/examples/quickstart_dimension_labels_string.py b/examples/quickstart_dimension_labels_string.py
deleted file mode 100644
index b75b053dd5..0000000000
--- a/examples/quickstart_dimension_labels_string.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# quickstart_dense.py
-#
-# LICENSE
-#
-# The MIT License
-#
-# Copyright (c) 2023 TileDB, Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# DESCRIPTION
-#
-# Please refer to the TileDB and TileDB-Py documentation for more information:
-#   https://docs.tiledb.com/main/how-to
-#   https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html
-#
-# When run, this program will create a simple 1D dense array with a dimension label, write
-# some data to it, and read a slice of the data back.
-#
-
-import numpy as np
-import tiledb
-
-def create_array(uri: str):
-    """Create array schema with dimension labels"""
-    dim1 = tiledb.Dim("d1", domain=(1, 4), dtype=np.int32)
-    dim2 = tiledb.Dim("d2", domain=(1, 5), dtype=np.int32)
-    # TODO: Test label query with N var-size labels.
-    dim_labels = {
-        0: {"l1": dim1.create_label_schema("increasing", np.bytes_)},
-        1: {
-            "l2": dim2.create_label_schema("increasing", np.int64),
-            "l3": dim2.create_label_schema("increasing", np.float64),
-        },
-    }
-    dom = tiledb.Domain(dim1, dim2)
-
-    # Var-sized attributes seems to work without any issues.
-    att1 = tiledb.Attr("a1", var=True, dtype=np.bytes_)
-    att2 = tiledb.Attr("a2", var=True, dtype=np.int64)
-    schema = tiledb.ArraySchema(sparse=False, domain=dom, attrs=(att1, att2), dim_labels=dim_labels)
-    tiledb.Array.create(uri, schema)
-
-
-def write_array(uri: str):
-    """Write attribute and label data to the array"""
-    a1_data = np.array(
-        [
-            "a",
-            "bb",
-            "ccc",
-            "dddd",
-            "eeeee",
-            "a",
-            "bb",
-            "ccc",
-            "d",
-            "eeeeeeeeee",
-            "a",
-            "bb",
-            "ccc",
-            "d",
-            "eeeeeeeeee",
-            "a",
-            "bb",
-            "ccc",
-            "d",
-            "eeeeeeeeee",
-            # "a",
-            # "bb",
-            # "ccc",
-            # "d",
-            # "eeeeeeeeee",
-        ]
-    ).reshape(4, 5)
-
-    a2_data = np.array(
-        [
-            np.repeat(1, 1).astype(np.int64),
-            np.repeat(2, 2).astype(np.int64),
-            np.repeat(3, 3).astype(np.int64),
-            np.repeat(4, 4).astype(np.int64),
-            np.repeat(5, 5).astype(np.int64),
-
-            np.repeat(1, 5).astype(np.int64),
-            np.repeat(2, 4).astype(np.int64),
-            np.repeat(3, 3).astype(np.int64),
-            np.repeat(4, 2).astype(np.int64),
-            np.repeat(5, 1).astype(np.int64),
-
-            np.repeat(1, 1).astype(np.int64),
-            np.repeat(2, 2).astype(np.int64),
-            np.repeat(3, 3).astype(np.int64),
-            np.repeat(4, 4).astype(np.int64),
-            np.repeat(5, 5).astype(np.int64),
-
-            np.repeat(1, 5).astype(np.int64),
-            np.repeat(2, 4).astype(np.int64),
-            np.repeat(3, 1).astype(np.int64),
-            np.repeat(4, 2).astype(np.int64),
-            np.repeat(5, 3).astype(np.int64),
-
-            # np.repeat(1, 1).astype(np.int64),
-            # np.repeat(2, 1).astype(np.int64),
-            # np.repeat(3, 5).astype(np.int64),
-            # np.repeat(4, 5).astype(np.int64),
-            # np.repeat(5, 10).astype(np.int64),
-        ], dtype=object
-    ).reshape(4, 5)
-
-    # l1_data = np.array(["a", "bb", "ccc", "dddd", "eeeee"])
-    l1_data = np.array(["a", "bb", "ccc", "ddd"])
-    l2_data = np.arange(-2, 3)
-    l3_data = np.linspace(-1.0, 1.0, 5)
-    with tiledb.open(uri, "w") as array:
-        array[:] = {"a1": a1_data, "a2": a2_data, "l1": l1_data, "l2": l2_data, "l3": l3_data}
-
-
-def read_array(uri: str):
-    """Read the array from the dimension label"""
-
-    with tiledb.open(uri, "r") as array:
-        data1 = array.label_index(["l2"])[1, -1:1]
-        print("Reading array on [[1, -1:1]] with label 'l2' on dim2")
-        for name, value in data1.items():
-            print(f"  '{name}'={value}")
-
-        data2 = array.label_index(["l1", "l2"])["a":"ccc", -2:2]
-        print("Reading array on [['a':'ccc', -2:2]] with label 'l1' on dim1 and 'l2' on dim2")
-        for name, value in data2.items():
-            print(f"  '{name}'={value}")
-
-        # Should read all data
-        print("Reading array on [['a':'ddd']] with label 'l1' on dim1")
-        data3 = array.label_index(["l1"])["a":"ddd"]
-        for name, value in data3.items():
-            print(f"  '{name}'={value}")
-
-
-if __name__ == "__main__":
-    # Name of the array to create.
-    ARRAY_NAME = "/home/shaun/Documents/Arrays/quickstart_labels_string_py"
-    conf = tiledb.Config({
-        "sm.io_concurrency_level": "1",
-        "sm.compute_concurrency_level": "1",
-    })
-    tiledb.default_ctx(conf)
-
-    LIBVERSION = tiledb.libtiledb.version()
-    vfs = tiledb.VFS()
-
-    if LIBVERSION[0] == 2 and LIBVERSION[1] < 15:
-        print(
-            f"Dimension labels requires libtiledb version >= 2.15.0. Current version is"
-            f" {LIBVERSION[0]}.{LIBVERSION[1]}.{LIBVERSION[2]}"
-        )
-    else:
-        if vfs.is_dir(ARRAY_NAME):
-            vfs.remove_dir(ARRAY_NAME)
-        create_array(ARRAY_NAME)
-        write_array(ARRAY_NAME)
-        read_array(ARRAY_NAME)
diff --git a/examples/string_float_int_dimensions.py b/examples/string_float_int_dimensions.py
index edda458cc6..d1fb03b95d 100644
--- a/examples/string_float_int_dimensions.py
+++ b/examples/string_float_int_dimensions.py
@@ -39,9 +39,6 @@
 import tiledb
 
 path = "sparse_mixed_demo"
-vfs = tiledb.VFS()
-if vfs.is_dir(path):
-    vfs.remove_dir(path)
 
 dom = tiledb.Domain(
     *[

From 71388f785f3dc47106c473cd012a2c4983359c82 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Tue, 27 Jun 2023 11:59:41 -0400
Subject: [PATCH 08/25] Test with CI

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6fc1dda9a7..5dbd9a6a20 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 ### DO NOT USE ON CI
 
 # Target branch: Note that this should be set to the current core release, not `dev`
-TILEDB_VERSION = "2.15.4"
+TILEDB_VERSION = "smr/sc-29317/py-var-size-dim-labels"
 
 # allow overriding w/ environment variable
 TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION

From aef844d66e46e60eb61805871cc9aa6dd14d5f6d Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Tue, 27 Jun 2023 12:04:20 -0400
Subject: [PATCH 09/25] Format

---
 tiledb/dimension_label_schema.py     |  9 +++-
 tiledb/multirange_indexing.py        |  4 +-
 tiledb/tests/test_dimension_label.py | 61 +++++++++++++++++++++-------
 3 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/tiledb/dimension_label_schema.py b/tiledb/dimension_label_schema.py
index dc1365de32..f107dcb109 100644
--- a/tiledb/dimension_label_schema.py
+++ b/tiledb/dimension_label_schema.py
@@ -35,10 +35,15 @@ def __init__(
         # Get DataType and DataOrder objects
         _label_order = DataOrder[order]
         _label_dtype = DataType.from_numpy(
-            np.dtype(label_dtype) if label_dtype not in ("ascii", "blob") else label_dtype)
+            np.dtype(label_dtype)
+            if label_dtype not in ("ascii", "blob")
+            else label_dtype
+        )
         np_dtype = _label_dtype.np_dtype
         if np.issubdtype(np_dtype, np.bytes_) or np.issubdtype(np_dtype, np.str_):
-            _label_dtype = DataType(np_dtype, lt.DataType.STRING_ASCII, lt.TILEDB_VAR_NUM)
+            _label_dtype = DataType(
+                np_dtype, lt.DataType.STRING_ASCII, lt.TILEDB_VAR_NUM
+            )
 
         _dim_dtype = DataType.from_numpy(dim_dtype)
 
diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
index b0378dafad..a93bccb513 100644
--- a/tiledb/multirange_indexing.py
+++ b/tiledb/multirange_indexing.py
@@ -543,7 +543,9 @@ def _run_query(self) -> Dict[str, np.ndarray]:
             for dim_idx, label_name in self._labels.items():
                 if self.result_shape is None:
                     raise TileDBError("failed to compute subarray shape")
-                self.pyquery.add_label_buffer(label_name, self.result_shape[dim_idx], est_var_size[1])
+                self.pyquery.add_label_buffer(
+                    label_name, self.result_shape[dim_idx], est_var_size[1]
+                )
         return super()._run_query()
 
 
diff --git a/tiledb/tests/test_dimension_label.py b/tiledb/tests/test_dimension_label.py
index ac99caca4b..a3e77e8515 100644
--- a/tiledb/tests/test_dimension_label.py
+++ b/tiledb/tests/test_dimension_label.py
@@ -167,7 +167,9 @@ def test_dimension_label_round_trip_dense_array(self, var):
         attr_data = np.arange(1, 11)
         label_data = np.arange(-9, 10, 2)
         if var:
-            label_data = np.array([str(chr(ord('a') + c) * (10 - c)).encode("utf-8") for c in range(10)])
+            label_data = np.array(
+                [str(chr(ord("a") + c) * (10 - c)).encode("utf-8") for c in range(10)]
+            )
         with tiledb.open(uri, "w") as array:
             array[:] = {"a1": attr_data, "l1": label_data}
 
@@ -186,7 +188,7 @@ def test_dimension_label_round_trip_dense_array(self, var):
             indexer = array.label_index(["l1"])
 
             # Read full array
-            result = indexer[label_data[0]:label_data[-1]]
+            result = indexer[label_data[0] : label_data[-1]]
 
             np.testing.assert_array_equal(result["a1"], attr_data)
             np.testing.assert_array_equal(result["l1"], label_data)
@@ -200,7 +202,7 @@ def test_dimension_label_round_trip_dense_array(self, var):
 
             for index in range(10):
                 label_index = label_data[index:]
-                result = indexer[label_index[0]:label_index[-1]]
+                result = indexer[label_index[0] : label_index[-1]]
                 np.testing.assert_array_equal(result["a1"], attr_data[index:])
                 np.testing.assert_array_equal(result["l1"], label_index)
 
@@ -217,7 +219,9 @@ def test_dimension_label_round_trip_multidim_dense_array(self, var):
         att = tiledb.Attr("value", dtype=np.int64)
         dim_labels = {
             0: {
-                "x1": dim1.create_label_schema("increasing", np.float64 if not var else "U"),
+                "x1": dim1.create_label_schema(
+                    "increasing", np.float64 if not var else "U"
+                ),
                 "x2": dim1.create_label_schema("decreasing", np.int64),
             },
             1: {
@@ -234,7 +238,9 @@ def test_dimension_label_round_trip_multidim_dense_array(self, var):
         attr_data = np.reshape(np.arange(1, 65), (8, 8))
         x1_data = np.linspace(-1.0, 1.0, 8)
         if var:
-            x1_data = np.array([str(chr(ord('a') + c - 1) * c).encode('utf-8') for c in range(1, 9)])
+            x1_data = np.array(
+                [str(chr(ord("a") + c - 1) * c).encode("utf-8") for c in range(1, 9)]
+            )
         x2_data = np.arange(8, 0, -1)
         y1_data = np.arange(9, 17)
         with tiledb.open(uri, "w") as array:
@@ -248,7 +254,7 @@ def test_dimension_label_round_trip_multidim_dense_array(self, var):
         # Test querying by label
         with tiledb.open(uri, "r") as array:
             # Read full array: labels on both ranges
-            result = array.label_index(["x1", "y1"])[x1_data[0]:x1_data[-1], 9:17]
+            result = array.label_index(["x1", "y1"])[x1_data[0] : x1_data[-1], 9:17]
             np.testing.assert_array_equal(result["value"], attr_data)
             np.testing.assert_array_equal(result["x1"], x1_data)
             np.testing.assert_array_equal(result["y1"], y1_data)
@@ -282,7 +288,13 @@ def test_dimension_label_round_trip_sparse_array(self, var):
         dim = tiledb.Dim("index", domain=(1, 10))
         dom = tiledb.Domain(dim)
         att = tiledb.Attr("value", dtype=np.int64)
-        dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.int64 if not var else "ascii")}}
+        dim_labels = {
+            0: {
+                "l1": dim.create_label_schema(
+                    "increasing", np.int64 if not var else "ascii"
+                )
+            }
+        }
         schema = tiledb.ArraySchema(
             domain=dom, attrs=(att,), dim_labels=dim_labels, sparse=True
         )
@@ -296,7 +308,9 @@ def test_dimension_label_round_trip_sparse_array(self, var):
         attr_data = np.arange(11, 21)
         label_data = np.arange(-10, 0)
         if var:
-            label_data = np.array([str(chr(ord('a') + c) * (10 - c)).encode('utf-8') for c in range(10)])
+            label_data = np.array(
+                [str(chr(ord("a") + c) * (10 - c)).encode("utf-8") for c in range(10)]
+            )
         with tiledb.open(uri, "w") as array:
             array[index_data] = {"value": attr_data, "l1": label_data}
 
@@ -341,17 +355,32 @@ def test_dimension_label_round_trip_dense_var(self):
 
         # Write data to the array and the label
         attr_data = np.array(
-            [[str(chr(ord('z') - c) * (10 - c)).encode('utf-8') for c in range(10)] for i in range(10)])
+            [
+                [str(chr(ord("z") - c) * (10 - c)).encode("utf-8") for c in range(10)]
+                for i in range(10)
+            ]
+        )
         l1_data = np.arange(10, dtype=np.float32)
         l2_data = np.arange(10, 0, -1, dtype=np.int32)
-        l3_data = np.array([str(chr(ord('a') + c) * (c + 1)).encode('utf-8') for c in range(10)])
+        l3_data = np.array(
+            [str(chr(ord("a") + c) * (c + 1)).encode("utf-8") for c in range(10)]
+        )
 
         with tiledb.open(uri, "w") as array:
-            array[:, :] = {"value": attr_data, "l1": l1_data, "l2": l2_data, "l3": l3_data}
+            array[:, :] = {
+                "value": attr_data,
+                "l1": l1_data,
+                "l2": l2_data,
+                "l3": l3_data,
+            }
 
         # Load the array schema and get the URI of the dimension label
         schema = tiledb.ArraySchema.load(uri)
-        for label_name, label_data in {"l1": l1_data, "l2": l2_data, "l3": l3_data}.items():
+        for label_name, label_data in {
+            "l1": l1_data,
+            "l2": l2_data,
+            "l3": l3_data,
+        }.items():
             dim_label = schema.dim_label(label_name)
             # Read and check the data directly from the dimension label
             with tiledb.open(dim_label.uri, "r") as label:
@@ -376,5 +405,9 @@ def test_dimension_label_round_trip_dense_var(self):
                         result = indexer[lower:upper]
                     else:
                         result = indexer[:, lower:upper]
-                    np.testing.assert_array_equal(result["value"][index:], attr_data[index:])
-                    np.testing.assert_array_equal(result[label_name][index:], label_index)
+                    np.testing.assert_array_equal(
+                        result["value"][index:], attr_data[index:]
+                    )
+                    np.testing.assert_array_equal(
+                        result[label_name][index:], label_index
+                    )

From c7c5c9f8076e26aadeef75c46cabcd8631a9422a Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Tue, 27 Jun 2023 12:46:32 -0400
Subject: [PATCH 10/25] Fix UT

---
 setup.py                             |  2 +-
 tiledb/tests/test_dimension_label.py | 15 +++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 5dbd9a6a20..d98eadcfae 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 ### DO NOT USE ON CI
 
 # Target branch: Note that this should be set to the current core release, not `dev`
-TILEDB_VERSION = "smr/sc-29317/py-var-size-dim-labels"
+TILEDB_VERSION = "4903c723c194d4ba948cb2958e45f65e8369f287"  # TODO: Revert
 
 # allow overriding w/ environment variable
 TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION
diff --git a/tiledb/tests/test_dimension_label.py b/tiledb/tests/test_dimension_label.py
index a3e77e8515..06ea4b2732 100644
--- a/tiledb/tests/test_dimension_label.py
+++ b/tiledb/tests/test_dimension_label.py
@@ -401,13 +401,20 @@ def test_dimension_label_round_trip_dense_var(self):
                 # Slice array with varying sizes.
                 for index in range(10):
                     label_index = label_data[index:]
+                    lower = min(label_index[0], label_index[-1])
+                    upper = max(label_index[0], label_index[-1])
                     if label_name == "l1":
                         result = indexer[lower:upper]
+                        # Check against dim1
+                        np.testing.assert_array_equal(
+                            result["value"], attr_data[index:, :]
+                        )
                     else:
                         result = indexer[:, lower:upper]
+                        # Check against dim2
+                        np.testing.assert_array_equal(
+                            result["value"], attr_data[:, index:]
+                        )
                     np.testing.assert_array_equal(
-                        result["value"][index:], attr_data[index:]
-                    )
-                    np.testing.assert_array_equal(
-                        result[label_name][index:], label_index
+                        result[label_name], label_index
                     )

From 1567ea94109bccbe2da59c2545c6f7cacbf582e6 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Tue, 27 Jun 2023 13:18:08 -0400
Subject: [PATCH 11/25] Format

---
 setup.py                             |  2 +-
 tiledb/core.cc                       | 37 +++++++++++++++++-----------
 tiledb/tests/test_dimension_label.py |  4 +--
 3 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/setup.py b/setup.py
index d98eadcfae..3827fbefd9 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 ### DO NOT USE ON CI
 
 # Target branch: Note that this should be set to the current core release, not `dev`
-TILEDB_VERSION = "4903c723c194d4ba948cb2958e45f65e8369f287"  # TODO: Revert
+TILEDB_VERSION = "107803e42eef7be9d9935b178d6a10d67eac14f3"  # TODO: Revert
 
 # allow overriding w/ environment variable
 TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION
diff --git a/tiledb/core.cc b/tiledb/core.cc
index 1c12e0a88a..6070f7e6f4 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -314,7 +314,8 @@ class PyQuery {
   tiledb_layout_t layout_ = TILEDB_ROW_MAJOR;
 
   // label buffer list
-  std::unordered_map<string, std::pair<uint64_t, uint64_t>> label_input_buffer_data_;
+  std::unordered_map<string, std::pair<uint64_t, uint64_t>>
+      label_input_buffer_data_;
 
   py::object pyschema_;
 
@@ -469,7 +470,8 @@ class PyQuery {
 
   bool is_dimension_label(std::string name) {
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
-    return ArraySchemaExperimental::has_dimension_label(ctx_, *array_schema_, name);
+    return ArraySchemaExperimental::has_dimension_label(ctx_, *array_schema_,
+                                                        name);
 #else
     return false;
 #endif
@@ -484,8 +486,8 @@ class PyQuery {
       return attr.cell_val_num() == TILEDB_VAR_NUM;
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
     } else if (is_dimension_label(name)) {
-      auto dim_label = ArraySchemaExperimental::dimension_label(ctx_,
-        *array_schema_, name);
+      auto dim_label =
+          ArraySchemaExperimental::dimension_label(ctx_, *array_schema_, name);
       return dim_label.label_cell_val_num() == TILEDB_VAR_NUM;
 #endif
     } else {
@@ -514,8 +516,8 @@ class PyQuery {
       cell_val_num = array_schema_->attribute(name).cell_val_num();
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
     } else if (is_dimension_label(name)) {
-      auto dim_label = ArraySchemaExperimental::dimension_label(ctx_,
-        *array_schema_, name);
+      auto dim_label =
+          ArraySchemaExperimental::dimension_label(ctx_, *array_schema_, name);
       type = dim_label.label_type();
       cell_val_num = dim_label.label_cell_val_num();
 #endif
@@ -575,7 +577,8 @@ class PyQuery {
     bool dense = array_schema_->array_type() == TILEDB_DENSE;
     if (is_dimension_label(name)) {
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 15
-      auto dim_label = ArraySchemaExperimental::dimension_label(ctx_, *array_schema_, name);
+      auto dim_label =
+          ArraySchemaExperimental::dimension_label(ctx_, *array_schema_, name);
       type = dim_label.label_type();
       cell_val_num = dim_label.label_cell_val_num();
       var = cell_val_num == TILEDB_VAR_NUM;
@@ -661,7 +664,8 @@ class PyQuery {
                           validity_num, var, nullable)});
   }
 
-  void add_label_buffer(std::string &label_name, uint64_t ncells, uint64_t var_size) {
+  void add_label_buffer(std::string &label_name, uint64_t ncells,
+                        uint64_t var_size) {
     label_input_buffer_data_[label_name] = {ncells, var_size};
   }
 
@@ -769,17 +773,20 @@ class PyQuery {
 
       if ((Py_ssize_t)(buf.data_vals_read * buf.elem_nbytes) >
           (Py_ssize_t)buf.data.size()) {
-        throw TileDBError("After read query, data buffer out of bounds: " + name + " ("
-                          + std::to_string(buf.data_vals_read * buf.elem_nbytes) + " > "
-                          + std::to_string(buf.data.size()) + ")");
+        throw TileDBError(
+            "After read query, data buffer out of bounds: " + name + " (" +
+            std::to_string(buf.data_vals_read * buf.elem_nbytes) + " > " +
+            std::to_string(buf.data.size()) + ")");
       }
       if ((Py_ssize_t)buf.offsets_read > buf.offsets.size()) {
-        throw TileDBError("After read query, offsets buffer out of bounds: " + name + " ("
-                          + std::to_string(buf.offsets_read) + " > " + std::to_string(buf.offsets.size()) + ")");
+        throw TileDBError("After read query, offsets buffer out of bounds: " +
+                          name + " (" + std::to_string(buf.offsets_read) +
+                          " > " + std::to_string(buf.offsets.size()) + ")");
       }
       if ((Py_ssize_t)buf.validity_vals_read > buf.validity.size()) {
-        throw TileDBError("After read query, validity buffer out of bounds: " + name + " ("
-                          + std::to_string(buf.validity_vals_read) + " > " + std::to_string(buf.validity.size()) + ")");
+        throw TileDBError("After read query, validity buffer out of bounds: " +
+                          name + " (" + std::to_string(buf.validity_vals_read) +
+                          " > " + std::to_string(buf.validity.size()) + ")");
       }
     }
   }
diff --git a/tiledb/tests/test_dimension_label.py b/tiledb/tests/test_dimension_label.py
index 06ea4b2732..0daeab2e36 100644
--- a/tiledb/tests/test_dimension_label.py
+++ b/tiledb/tests/test_dimension_label.py
@@ -415,6 +415,4 @@ def test_dimension_label_round_trip_dense_var(self):
                         np.testing.assert_array_equal(
                             result["value"], attr_data[:, index:]
                         )
-                    np.testing.assert_array_equal(
-                        result[label_name], label_index
-                    )
+                    np.testing.assert_array_equal(result[label_name], label_index)

From 045bd97fed2c33e4b037d605012761afe285960c Mon Sep 17 00:00:00 2001
From: "J.P. Dark" <24235303+jp-dark@users.noreply.github.com>
Date: Tue, 27 Jun 2023 14:50:18 -0400
Subject: [PATCH 12/25] Add string dimension label example

---
 examples/string_dimension_labels.py | 92 +++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 examples/string_dimension_labels.py

diff --git a/examples/string_dimension_labels.py b/examples/string_dimension_labels.py
new file mode 100644
index 0000000000..443896626e
--- /dev/null
+++ b/examples/string_dimension_labels.py
@@ -0,0 +1,92 @@
+# string_dimension_label.py
+#
+# LICENSE
+#
+# The MIT License
+#
+# Copyright (c) 2023 TileDB, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# DESCRIPTION
+#
+# Please refer to the TileDB and TileDB-Py documentation for more information:
+#   https://docs.tiledb.com/main/how-to
+#   https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html
+#
+# When run, this program will create a simple 2D dense array with a string dimension
+# dimension on the column dimension, and read a slice back with the dimension label.
+#
+
+import numpy as np
+
+import tiledb
+
+
+def create_array(uri: str):
+    """Create array schema with a dimension label on the columns"""
+    dim1 = tiledb.Dim("row", domain=(1, 5))
+    dim2 = tiledb.Dim("column", domain=(1, 5))
+    dom = tiledb.Domain(dim1, dim2)
+    att = tiledb.Attr("a1", dtype=np.int64)
+    dim_labels = {1: {"name": dim2.create_label_schema("increasing", "ascii")}}
+    schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels)
+    tiledb.Array.create(uri, schema)
+
+
+def write_array(uri: str):
+    """Write attribute and label data to the array"""
+    a1_data = np.reshape(np.arange(1, 26), (5, 5))
+    label_data = np.array(["alpha", "beta", "gamma", "kappa", "omega"])
+    with tiledb.open(uri, "w") as array:
+        array[:, :] = {"a1": a1_data, "name": label_data}
+
+
+def read_array(uri: str):
+    """Read the array from the dimension label"""
+
+    with tiledb.open(uri, "r") as array:
+        data = array.label_index(["name"])[1, "beta":"kappa"]
+        print(
+            "Reading array on [[1, 'beta':'kappa']] with label 'name' on dimension 'col'"
+        )
+        for name, value in data.items():
+            print(f"  '{name}'={value}")
+
+
+if __name__ == "__main__":
+    # Name of the array to create.
+    ARRAY_NAME = "string_dimension_labels"
+
+    LIBVERSION = tiledb.libtiledb.version()
+
+    if LIBVERSION[0] == 2 and LIBVERSION[1] < 15:
+        print(
+            f"Dimension labels requires libtiledb version >= 2.15.0. Current version is"
+            f" {LIBVERSION[0]}.{LIBVERSION[1]}.{LIBVERSION[2]}"
+        )
+
+    else:
+        # Only create and write to the array if it doesn't already exist.
+        if tiledb.object_type(ARRAY_NAME) != "array":
+            create_array(ARRAY_NAME)
+            write_array(ARRAY_NAME)
+
+        # Read from the array and print output.
+        read_array(ARRAY_NAME)

From 8dfea3eac3843a6d2a47bdf5ec4c9a81aa9e6d74 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Wed, 28 Jun 2023 13:00:27 -0400
Subject: [PATCH 13/25] Changes to use experimental APIs from core

---
 setup.py                      |  2 +-
 tiledb/cc/query.cc            | 15 +++++++++++++++
 tiledb/core.cc                |  4 +++-
 tiledb/multirange_indexing.py |  2 +-
 4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 3827fbefd9..f4a5a7b796 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 ### DO NOT USE ON CI
 
 # Target branch: Note that this should be set to the current core release, not `dev`
-TILEDB_VERSION = "107803e42eef7be9d9935b178d6a10d67eac14f3"  # TODO: Revert
+TILEDB_VERSION = "88a73c8a14ebf41ed75b6e60ed86f7e835e5da91"  # TODO: Revert
 
 # allow overriding w/ environment variable
 TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION
diff --git a/tiledb/cc/query.cc b/tiledb/cc/query.cc
index 9d515dc13c..772fcec392 100644
--- a/tiledb/cc/query.cc
+++ b/tiledb/cc/query.cc
@@ -1,4 +1,5 @@
 #include <tiledb/tiledb> // C++
+#include <tiledb/tiledb_experimental>
 
 #include "common.h"
 
@@ -50,6 +51,20 @@ void init_query(py::module &m) {
 
       .def("has_results", &Query::has_results)
 
+#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 16
+      .def("est_result_size_var_label",
+            [](const Query & query, const std::string& attr_name, bool label_data) {
+              return QueryExperimental::est_result_size_var_label(query, attr_name, label_data);
+            })
+#else
+      .def("est_result_size_var_label",
+           [](const Query & query, const std::string& attr_name, bool label_data) {
+            throw TileDBError("Estimate result size for dimension label data queries requires libtiledb version 2.15.0 "
+            "or greater");
+           })
+#endif
+
+      // For dimension labels, experimental variant above adds support to retrieve underlying data query estimates.
       .def("est_result_size_var", &Query::est_result_size_var)
 
       .def("is_complete",
diff --git a/tiledb/core.cc b/tiledb/core.cc
index 6070f7e6f4..4dbd3ee064 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -722,7 +722,9 @@ class PyQuery {
   }
 
   void update_read_elem_num() {
-#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3
+#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 16
+    auto result_elements = QueryExperimental::result_buffer_elements_nullable(*query_);
+#elif TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3
     // needs https://github.com/TileDB-Inc/TileDB/pull/2238
     auto result_elements = query_->result_buffer_elements_nullable();
 #else
diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
index a93bccb513..83126f0e2f 100644
--- a/tiledb/multirange_indexing.py
+++ b/tiledb/multirange_indexing.py
@@ -523,7 +523,7 @@ def _run_query(self) -> Dict[str, np.ndarray]:
             est_var_size = [0, 0]
             for label_name in self._labels.values():
                 if self.array.schema.dim_label(label_name).isvar:
-                    est_var_size = self.label_query.est_result_size_var(label_name)
+                    est_var_size = self.label_query.est_result_size_var_label(label_name, False)
 
             if not self.label_query.is_complete():
                 raise TileDBError("failed to get dimension ranges from labels")

From 38881e16f9953856459ee6259c28dfda0b49fe05 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Wed, 28 Jun 2023 13:08:29 -0400
Subject: [PATCH 14/25] Format

---
 tiledb/cc/query.cc            | 11 +++++++----
 tiledb/core.cc                |  5 +++--
 tiledb/multirange_indexing.py |  4 +++-
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/tiledb/cc/query.cc b/tiledb/cc/query.cc
index 772fcec392..5301a8877b 100644
--- a/tiledb/cc/query.cc
+++ b/tiledb/cc/query.cc
@@ -53,9 +53,11 @@ void init_query(py::module &m) {
 
 #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 16
       .def("est_result_size_var_label",
-            [](const Query & query, const std::string& attr_name, bool label_data) {
-              return QueryExperimental::est_result_size_var_label(query, attr_name, label_data);
-            })
+           [](const Query &query, const std::string &attr_name,
+              bool label_data) {
+             return QueryExperimental::est_result_size_var_label(
+                 query, attr_name, label_data);
+           })
 #else
       .def("est_result_size_var_label",
            [](const Query & query, const std::string& attr_name, bool label_data) {
@@ -64,7 +66,8 @@ void init_query(py::module &m) {
            })
 #endif
 
-      // For dimension labels, experimental variant above adds support to retrieve underlying data query estimates.
+      // For dimension labels, experimental variant above adds support to
+      // retrieve underlying data query estimates.
       .def("est_result_size_var", &Query::est_result_size_var)
 
       .def("is_complete",
diff --git a/tiledb/core.cc b/tiledb/core.cc
index 4dbd3ee064..1d97114a90 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -723,7 +723,8 @@ class PyQuery {
 
   void update_read_elem_num() {
 #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 16
-    auto result_elements = QueryExperimental::result_buffer_elements_nullable(*query_);
+    auto result_elements =
+        QueryExperimental::result_buffer_elements_nullable(*query_);
 #elif TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3
     // needs https://github.com/TileDB-Inc/TileDB/pull/2238
     auto result_elements = query_->result_buffer_elements_nullable();
@@ -1410,7 +1411,7 @@ class PyQuery {
     }
   }
 
-}; // class PyQuery
+}; // namespace tiledbpy
 
 void init_stats() {
   g_stats.reset(new StatsInfo());
diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
index 83126f0e2f..6aae75dac9 100644
--- a/tiledb/multirange_indexing.py
+++ b/tiledb/multirange_indexing.py
@@ -523,7 +523,9 @@ def _run_query(self) -> Dict[str, np.ndarray]:
             est_var_size = [0, 0]
             for label_name in self._labels.values():
                 if self.array.schema.dim_label(label_name).isvar:
-                    est_var_size = self.label_query.est_result_size_var_label(label_name, False)
+                    est_var_size = self.label_query.est_result_size_var_label(
+                        label_name, False
+                    )
 
             if not self.label_query.is_complete():
                 raise TileDBError("failed to get dimension ranges from labels")

From 39b02c1b16a632a76cbe920917b2b54f86bd70d1 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Thu, 29 Jun 2023 11:41:42 -0400
Subject: [PATCH 15/25] Remove calls to est_result_size

---
 setup.py                      |  2 +-
 tiledb/cc/query.cc            | 19 -------------------
 tiledb/core.cc                | 13 +++++--------
 tiledb/multirange_indexing.py | 11 +----------
 4 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/setup.py b/setup.py
index f4a5a7b796..4243ba6a69 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 ### DO NOT USE ON CI
 
 # Target branch: Note that this should be set to the current core release, not `dev`
-TILEDB_VERSION = "88a73c8a14ebf41ed75b6e60ed86f7e835e5da91"  # TODO: Revert
+TILEDB_VERSION = "c1bf4e0eccf2b2fde72a32cdd240928b7da8a64a"  # TODO: Revert
 
 # allow overriding w/ environment variable
 TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION
diff --git a/tiledb/cc/query.cc b/tiledb/cc/query.cc
index 5301a8877b..145277c33f 100644
--- a/tiledb/cc/query.cc
+++ b/tiledb/cc/query.cc
@@ -51,25 +51,6 @@ void init_query(py::module &m) {
 
       .def("has_results", &Query::has_results)
 
-#if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 16
-      .def("est_result_size_var_label",
-           [](const Query &query, const std::string &attr_name,
-              bool label_data) {
-             return QueryExperimental::est_result_size_var_label(
-                 query, attr_name, label_data);
-           })
-#else
-      .def("est_result_size_var_label",
-           [](const Query & query, const std::string& attr_name, bool label_data) {
-            throw TileDBError("Estimate result size for dimension label data queries requires libtiledb version 2.15.0 "
-            "or greater");
-           })
-#endif
-
-      // For dimension labels, experimental variant above adds support to
-      // retrieve underlying data query estimates.
-      .def("est_result_size_var", &Query::est_result_size_var)
-
       .def("is_complete",
            [](const Query &query) {
              return query.query_status() == Query::Status::COMPLETE;
diff --git a/tiledb/core.cc b/tiledb/core.cc
index 1d97114a90..ee440baac0 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -314,8 +314,7 @@ class PyQuery {
   tiledb_layout_t layout_ = TILEDB_ROW_MAJOR;
 
   // label buffer list
-  std::unordered_map<string, std::pair<uint64_t, uint64_t>>
-      label_input_buffer_data_;
+  std::unordered_map<string, uint64_t> label_input_buffer_data_;
 
   py::object pyschema_;
 
@@ -585,15 +584,14 @@ class PyQuery {
       nullable = false;
 
       cell_nbytes = tiledb_datatype_size(type);
-      uint64_t ncells = label_input_buffer_data_[name].first;
+      uint64_t ncells = label_input_buffer_data_[name];
 
       if (!var) {
         cell_nbytes *= cell_val_num;
-        buf_nbytes = ncells * cell_nbytes;
       } else {
-        buf_nbytes = label_input_buffer_data_[name].second;
         offsets_num = ncells;
       }
+      buf_nbytes = ncells * cell_nbytes;
 #endif
     } else {
       std::tie(type, cell_val_num) = buffer_type(name);
@@ -664,9 +662,8 @@ class PyQuery {
                           validity_num, var, nullable)});
   }
 
-  void add_label_buffer(std::string &label_name, uint64_t ncells,
-                        uint64_t var_size) {
-    label_input_buffer_data_[label_name] = {ncells, var_size};
+  void add_label_buffer(std::string &label_name, uint64_t ncells) {
+    label_input_buffer_data_[label_name] = ncells;
   }
 
   py::object get_buffers() {
diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
index 6aae75dac9..dc0354d8d3 100644
--- a/tiledb/multirange_indexing.py
+++ b/tiledb/multirange_indexing.py
@@ -520,13 +520,6 @@ def _run_query(self) -> Dict[str, np.ndarray]:
         if self.label_query is not None and not self.label_query.is_complete():
             self.label_query.submit()
 
-            est_var_size = [0, 0]
-            for label_name in self._labels.values():
-                if self.array.schema.dim_label(label_name).isvar:
-                    est_var_size = self.label_query.est_result_size_var_label(
-                        label_name, False
-                    )
-
             if not self.label_query.is_complete():
                 raise TileDBError("failed to get dimension ranges from labels")
             label_subarray = self.label_query.subarray()
@@ -545,9 +538,7 @@ def _run_query(self) -> Dict[str, np.ndarray]:
             for dim_idx, label_name in self._labels.items():
                 if self.result_shape is None:
                     raise TileDBError("failed to compute subarray shape")
-                self.pyquery.add_label_buffer(
-                    label_name, self.result_shape[dim_idx], est_var_size[1]
-                )
+                self.pyquery.add_label_buffer(label_name, self.result_shape[dim_idx])
         return super()._run_query()
 
 

From bb10cf7aeb1f3ca7078e16405a32a7a36047a531 Mon Sep 17 00:00:00 2001
From: Shaun Reed <shaunrd0@gmail.com>
Date: Thu, 6 Jul 2023 09:59:32 -0400
Subject: [PATCH 16/25] Update to use result_buffer_elements_nullable_labels
 rename

---
 setup.py       | 2 +-
 tiledb/core.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 4243ba6a69..1e97a5d140 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 ### DO NOT USE ON CI
 
 # Target branch: Note that this should be set to the current core release, not `dev`
-TILEDB_VERSION = "c1bf4e0eccf2b2fde72a32cdd240928b7da8a64a"  # TODO: Revert
+TILEDB_VERSION = "a2ab3c2d29296cb78e51c87888a45b2ed901d8a3"  # TODO: Revert
 
 # allow overriding w/ environment variable
 TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION
diff --git a/tiledb/core.cc b/tiledb/core.cc
index ee440baac0..5047dbe1b1 100644
--- a/tiledb/core.cc
+++ b/tiledb/core.cc
@@ -721,7 +721,7 @@ class PyQuery {
   void update_read_elem_num() {
 #if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 16
     auto result_elements =
-        QueryExperimental::result_buffer_elements_nullable(*query_);
+        QueryExperimental::result_buffer_elements_nullable_labels(*query_);
 #elif TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 3
     // needs https://github.com/TileDB-Inc/TileDB/pull/2238
     auto result_elements = query_->result_buffer_elements_nullable();

From 6e8b4df11cd5401eccc49aa957bfa4f36059602b Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Wed, 12 Jul 2023 15:49:56 -0400
Subject: [PATCH 17/25] Test

---
 tiledb/tests/cc/test_cc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py
index 20ef3b88ee..366b261552 100644
--- a/tiledb/tests/cc/test_cc.py
+++ b/tiledb/tests/cc/test_cc.py
@@ -1,5 +1,6 @@
 import os
 import tempfile
+import time
 
 import numpy as np
 import pytest
@@ -141,6 +142,7 @@ def test_array():
 
     arrw = lt.Array(ctx, uri, lt.QueryType.WRITE)
     arrw.delete_metadata("key")
+    time.sleep(0.1)
     arrw.close()
 
     arr = lt.Array(ctx, uri, lt.QueryType.READ)

From 6247dbf81ac0894decd407fe0d73a2d5adb29a0a Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Wed, 12 Jul 2023 16:57:09 -0400
Subject: [PATCH 18/25] Use 2.16.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 1e97a5d140..bd692b0b6e 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
 ### DO NOT USE ON CI
 
 # Target branch: Note that this should be set to the current core release, not `dev`
-TILEDB_VERSION = "a2ab3c2d29296cb78e51c87888a45b2ed901d8a3"  # TODO: Revert
+TILEDB_VERSION = "2.16.0"  # TODO: Revert
 
 # allow overriding w/ environment variable
 TILEDB_VERSION = os.environ.get("TILEDB_VERSION") or TILEDB_VERSION

From 862fac06514ebf78a64d3aa5f97b872f833bf9d9 Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Thu, 13 Jul 2023 15:43:00 -0400
Subject: [PATCH 19/25] DEBUG

---
 tiledb/tests/cc/test_cc.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py
index 366b261552..29c723a80b 100644
--- a/tiledb/tests/cc/test_cc.py
+++ b/tiledb/tests/cc/test_cc.py
@@ -144,8 +144,15 @@ def test_array():
     arrw.delete_metadata("key")
     time.sleep(0.1)
     arrw.close()
+    time.sleep(0.1)
 
     arr = lt.Array(ctx, uri, lt.QueryType.READ)
+    try:
+        import subprocess
+
+        subprocess.check_output(["tree", uri])
+    except Exception:
+        pass
     with pytest.raises(KeyError):
         arr.get_metadata("key")
     assert not arr.has_metadata("key")[0]

From 58a748f00a0cc0a8cd617e4c70d70732fd9dc9fb Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Thu, 13 Jul 2023 16:14:55 -0400
Subject: [PATCH 20/25] DEBUG

---
 tiledb/tests/cc/test_cc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py
index 29c723a80b..b70ce4da0a 100644
--- a/tiledb/tests/cc/test_cc.py
+++ b/tiledb/tests/cc/test_cc.py
@@ -150,8 +150,9 @@ def test_array():
     try:
         import subprocess
 
-        subprocess.check_output(["tree", uri])
+        print(subprocess.check_output(["tree", uri]).decode())
     except Exception:
+        print("failed")
         pass
     with pytest.raises(KeyError):
         arr.get_metadata("key")

From b6030d20e1ec38522bd2d5ca546c195acbc9ccf4 Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Thu, 13 Jul 2023 16:20:27 -0400
Subject: [PATCH 21/25] DEBUG

---
 tiledb/tests/cc/test_cc.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py
index b70ce4da0a..55318d8e48 100644
--- a/tiledb/tests/cc/test_cc.py
+++ b/tiledb/tests/cc/test_cc.py
@@ -148,9 +148,16 @@ def test_array():
 
     arr = lt.Array(ctx, uri, lt.QueryType.READ)
     try:
+        import base64
         import subprocess
 
         print(subprocess.check_output(["tree", uri]).decode())
+
+        print("--- starting ---")
+        subprocess.check_output(["tar", "czvf", "/tmp/array.tgz", uri])
+        with open("/tmp/array.tgz", "rb") as f:
+            print(base64.b64encode(f.read()))
+        print("--- ending encoded array tgz ---")
     except Exception:
         print("failed")
         pass

From fa5c35589f0482095c0243da3e7d8cbe48f57539 Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Thu, 13 Jul 2023 23:14:04 -0400
Subject: [PATCH 22/25] use tempfile

---
 tiledb/tests/cc/test_cc.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py
index 55318d8e48..255937c308 100644
--- a/tiledb/tests/cc/test_cc.py
+++ b/tiledb/tests/cc/test_cc.py
@@ -153,9 +153,10 @@ def test_array():
 
         print(subprocess.check_output(["tree", uri]).decode())
 
-        print("--- starting ---")
-        subprocess.check_output(["tar", "czvf", "/tmp/array.tgz", uri])
-        with open("/tmp/array.tgz", "rb") as f:
+        path = os.path.join(tempfile.mkdtemp(), "array.tgz")
+        print("--- starting --- ", path)
+        subprocess.check_output(["tar", "czvf", path, uri])
+        with open(path, "rb") as f:
             print(base64.b64encode(f.read()))
         print("--- ending encoded array tgz ---")
     except Exception:

From 1a296a883cba71e575ce299c96f8871451320dbe Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Thu, 13 Jul 2023 23:36:08 -0400
Subject: [PATCH 23/25] DEBUG

---
 .github/workflows/ci.yml   | 2 +-
 tiledb/tests/cc/test_cc.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a5e4a46dde..65200dcf6e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,7 +52,7 @@ jobs:
       # Remove after upstream PR fully-deployed:
       # - https://github.com/actions/runner-images/pull/7125
       - name: "Install homebrew dependencies"
-        run: brew install pkg-config
+        run: brew install pkg-config tar
         if: matrix.os == 'macos-11'
 
       - name: "Install dependencies"
diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py
index 255937c308..3a94f4041f 100644
--- a/tiledb/tests/cc/test_cc.py
+++ b/tiledb/tests/cc/test_cc.py
@@ -159,8 +159,8 @@ def test_array():
         with open(path, "rb") as f:
             print(base64.b64encode(f.read()))
         print("--- ending encoded array tgz ---")
-    except Exception:
-        print("failed")
+    except Exception as exc:
+        print("failed: ", exc)
         pass
     with pytest.raises(KeyError):
         arr.get_metadata("key")

From 71a2dc0eaf6f04bce064c70137248e1d73eb84cd Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Fri, 14 Jul 2023 08:29:57 -0400
Subject: [PATCH 24/25] DEBUG

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 65200dcf6e..6a22fc1f82 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
-      
+
       - run: rm -rf $RUNNER_TOOL_CACHE/Python/3.7.17
         if: matrix.os == 'macos-11' && matrix.python-version == '3.7'
       - uses: actions/setup-python@v4
@@ -52,7 +52,7 @@ jobs:
       # Remove after upstream PR fully-deployed:
       # - https://github.com/actions/runner-images/pull/7125
       - name: "Install homebrew dependencies"
-        run: brew install pkg-config tar
+        run: brew install pkg-config
         if: matrix.os == 'macos-11'
 
       - name: "Install dependencies"

From 29b10e48c09d500141333e70fde5af4006151bee Mon Sep 17 00:00:00 2001
From: Isaiah Norton <isaiah@tiledb.io>
Date: Fri, 14 Jul 2023 08:47:18 -0400
Subject: [PATCH 25/25] DEBUG

---
 tiledb/tests/cc/test_cc.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py
index 3a94f4041f..9529187603 100644
--- a/tiledb/tests/cc/test_cc.py
+++ b/tiledb/tests/cc/test_cc.py
@@ -151,7 +151,10 @@ def test_array():
         import base64
         import subprocess
 
-        print(subprocess.check_output(["tree", uri]).decode())
+        try:
+            print(subprocess.check_output(["tree", uri]).decode())
+        except Exception as e1:
+            print("tree got exception: ", e1)
 
         path = os.path.join(tempfile.mkdtemp(), "array.tgz")
         print("--- starting --- ", path)