Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] Fix last 2.27+Python+dense failing test case #3269

Merged
merged 3 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,15 @@ def read(
)

arrow_table = pa.concat_tables(arrow_tables)
return pa.Tensor.from_numpy(
arrow_table.column("soma_data").to_numpy().reshape(target_shape)
)
npval = arrow_table.column("soma_data").to_numpy()
# TODO: as currently coded we're looking at the non-empty domain upper
# bound but not its lower bound. That works fine if data are written at
# the start: e.g. domain (0, 99) and data written at 0,1,2,3,4. It
# doesn't work fine if data are written at say (40,41,42,43,44).
#
# This is tracked on https://github.com/single-cell-data/TileDB-SOMA/issues/3271
reshaped = npval.reshape(target_shape)
return pa.Tensor.from_numpy(reshaped)

def write(
self,
Expand Down
64 changes: 58 additions & 6 deletions libtiledbsoma/src/soma/managed_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ void ManagedQuery::_fill_in_subarrays_if_dense(bool is_read) {
if (current_domain.is_empty()) {
_fill_in_subarrays_if_dense_without_new_shape(is_read);
} else {
_fill_in_subarrays_if_dense_with_new_shape(current_domain);
_fill_in_subarrays_if_dense_with_new_shape(current_domain, is_read);
}
LOG_TRACE("[ManagedQuery] _fill_in_subarrays exit");
}
Expand Down Expand Up @@ -231,7 +231,7 @@ void ManagedQuery::_fill_in_subarrays_if_dense_without_new_shape(bool is_read) {
array_shape = std::make_pair(ned[0], ned[1]);
}
} else {
// Non-empty d0main is not avaiable for access at write time.
// Non-empty domain is not avaiable for access at write time.
array_shape = schema.domain().dimension(0).domain<int64_t>();
}

Expand All @@ -246,7 +246,7 @@ void ManagedQuery::_fill_in_subarrays_if_dense_without_new_shape(bool is_read) {
}

void ManagedQuery::_fill_in_subarrays_if_dense_with_new_shape(
const CurrentDomain& current_domain) {
const CurrentDomain& current_domain, bool is_read) {
LOG_TRACE(
"[ManagedQuery] _fill_in_subarrays_if_dense_with_new_shape enter");
if (current_domain.type() != TILEDB_NDRECTANGLE) {
Expand Down Expand Up @@ -280,13 +280,65 @@ void ManagedQuery::_fill_in_subarrays_if_dense_with_new_shape(
}

std::array<int64_t, 2> lo_hi_arr = ndrect.range<int64_t>(dim_name);
std::pair<int64_t, int64_t> lo_hi_pair(lo_hi_arr[0], lo_hi_arr[1]);
int64_t cd_lo = lo_hi_arr[0];
int64_t cd_hi = lo_hi_arr[1];
int64_t lo = cd_lo;
int64_t hi = cd_hi;

LOG_TRACE(fmt::format(
"[ManagedQuery] _fill_in_subarrays_if_dense_with_new_shape dim "
"name {} current domain ({}, {})",
dim_name,
cd_lo,
cd_hi));

if (is_read) {
// Check if the array has been written to by using the C API as
// there is no way to to check for an empty domain using the current
// C++ API.
//
// Non-empty domain is not avaliable at write time (the core array
// isn't open for read) -- but, we don't need to do this calculation
// for writes anyway.
int32_t is_empty;
int64_t ned[2];
ctx_->handle_error(tiledb_array_get_non_empty_domain_from_name(
ctx_->ptr().get(),
array_->ptr().get(),
dim_name.c_str(),
&ned,
&is_empty));
if (is_empty == 1) {
LOG_TRACE(fmt::format(
"[ManagedQuery] _fill_in_subarrays_if_dense_with_new_shape "
"dim name {} non-empty domain is absent",
dim_name));
} else {
int64_t ned_lo = ned[0];
int64_t ned_hi = ned[1];
LOG_TRACE(fmt::format(
"[ManagedQuery] _fill_in_subarrays_if_dense_with_new_shape "
"dim name {} non-empty domain ({}, {})",
dim_name,
ned_lo,
ned_hi));
if (ned_lo > cd_lo) {
lo = ned_lo;
}
if (ned_hi < cd_hi) {
hi = ned_hi;
}
}
}

LOG_TRACE(fmt::format(
"[ManagedQuery] _fill_in_subarrays_if_dense_with_new_shape dim "
"name {} select ({}, {})",
dim_name,
lo_hi_pair.first,
lo_hi_pair.second));
lo,
hi));

std::pair<int64_t, int64_t> lo_hi_pair(lo, hi);
select_ranges(dim_name, std::vector({lo_hi_pair}));
}
}
Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/src/soma/managed_query.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ class ManagedQuery {
*/
void _fill_in_subarrays_if_dense(bool is_read);
void _fill_in_subarrays_if_dense_with_new_shape(
const CurrentDomain& current_domain);
const CurrentDomain& current_domain, bool is_read);
void _fill_in_subarrays_if_dense_without_new_shape(bool is_read);

// TileDB array being queried.
Expand Down