diff --git a/tiledb/tests/fixtures.py b/tiledb/tests/fixtures.py index ccb1f28628..96c77cfc33 100644 --- a/tiledb/tests/fixtures.py +++ b/tiledb/tests/fixtures.py @@ -8,7 +8,7 @@ INTEGER_DTYPES = ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8"] -@pytest.fixture(scope="module", params=["hilbert", "row-major"]) +@pytest.fixture(scope="module", params=["hilbert", "row-major", "col-major"]) def sparse_cell_order(request): yield request.param diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index ae26e4c6b0..cc80575319 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -2701,7 +2701,23 @@ def test_sparse_mixed_domain_uint_float64(self, sparse_cell_order): self.assertEqual(a_nonempty[0], (0, 49)) self.assertEqual(a_nonempty[1], (-100.0, 100.0)) - def test_sparse_string_domain(self, sparse_cell_order): + @pytest.mark.parametrize( + "coords, expected_ned, allows_duplicates", + [ + ([b"aa", b"bbb", b"c", b"dddd"], [b"aa", b"dddd"], False), + ([b""], [b"", b""], True), + ([b"", b"", b"", b""], [b"", b""], True), + ], + ) + def test_sparse_string_domain( + self, coords, expected_ned, allows_duplicates, sparse_cell_order + ): + # if sparse_cell_order in ("hilbert", "row-major", "col-major") and allows_duplicates == True: + # if tiledb.libtiledb.version() < (2, 8): + # pytest.xfail( + # "Skipping known bug with legacy reader and empty strings" + # ) + path = self.path("sparse_string_domain") dom = tiledb.Domain(tiledb.Dim(name="d", domain=(None, None), dtype=np.bytes_)) att = tiledb.Attr(name="a", dtype=np.int64) @@ -2710,22 +2726,36 @@ def test_sparse_string_domain(self, sparse_cell_order): attrs=(att,), sparse=True, cell_order=sparse_cell_order, + allows_duplicates=allows_duplicates, capacity=10000, ) tiledb.SparseArray.create(path, schema) - data = [1, 2, 3, 4] - coords = [b"aa", b"bbb", b"c", b"dddd"] + data = [1, 2, 3, 4][: len(coords)] with tiledb.open(path, "w") as A: + breakpoint() A[coords] = data with tiledb.open(path) as A: ned = A.nonempty_domain()[0] - res = A[ned[0] : ned[1]] - assert_array_equal(res["a"], data) - self.assertEqual(set(res["d"]), set(coords)) - self.assertEqual(A.nonempty_domain(), ((b"aa", b"dddd"),)) + self.assertEqual(A.nonempty_domain(), ((tuple(expected_ned)),)) + + if not ( + sparse_cell_order in ("hilbert", "row-major", "col-major") + and allows_duplicates == True + ): + assert_array_equal(A[ned[0] : ned[1]]["a"], data) + self.assertEqual(set(A[ned[0] : ned[1]]["d"]), set(coords)) + + if allows_duplicates and sparse_cell_order != "hilbert": + res_u1 = A.query(order="U").multi_index[ned[0] : ned[1]] + assert_array_equal(res_u1["a"], data) + self.assertEqual(set(res_u1["d"]), set(coords)) + + res_u2 = A.query(order="U")[ned[0] : ned[1]] + assert_array_equal(res_u2["a"], data) + self.assertEqual(set(res_u2["d"]), set(coords)) def test_sparse_string_domain2(self, sparse_cell_order): path = self.path("sparse_string_domain2")