Skip to content

Commit

Permalink
fix: is_valid checks in header-only library (#3091)
Browse files Browse the repository at this point in the history
* feat: set an explicit index in Indexed and IndexedOption

Useful in particular for categorical builders

* test: new tests checking presence of a bug in is_valid

* style: pre-commit fixes

* test: renamed test file with pull request number

* test: use const instead of define

* test: use ! instead of not operator (issue with Windows)

* test: switch bug_fixed to  1, tests will fail until bug fixed

* fix: reimplement is_valid to check max of index_ against len of content

* test: print out error message

* fix: is_valid in IndexedOption now checks index_ is less than len of content

* test: IndexOption should have signed index to accommodate -1

* test: print out error from is_valid

* fix: reimplemented is_valid for Union, now checks index is below length of content for each tag

* feat: implement max_index tracking in Indexed

* fix: set max_index_ also in extend_index

* feat: append_valid() now calls append_valid(i)

* fix: set max_index_ in extend_valid

* feat: implement is_valid with max_index_

* feat: check index of Indexed is >0, if not set max_index_ to max int

* feat: custom error message for negative index

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
zonca and pre-commit-ci[bot] authored May 3, 2024
1 parent 7f1d261 commit 082e1b7
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 22 deletions.
93 changes: 71 additions & 22 deletions header-only/layout-builder/awkward/LayoutBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1121,7 +1121,8 @@ namespace awkward {
/// buffer, using `AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS` for initializing the buffer.
Indexed()
: index_(
awkward::GrowableBuffer<PRIMITIVE>(AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS)) {
awkward::GrowableBuffer<PRIMITIVE>(AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS)),
max_index_(0) {
size_t id = 0;
set_id(id);
}
Expand All @@ -1132,7 +1133,8 @@ namespace awkward {
///
/// @param options Initial size configuration of a buffer.
Indexed(const awkward::BuilderOptions& options)
: index_(awkward::GrowableBuffer<PRIMITIVE>(options)) {
: index_(awkward::GrowableBuffer<PRIMITIVE>(options)),
max_index_(0) {
size_t id = 0;
set_id(id);
}
Expand All @@ -1151,6 +1153,19 @@ namespace awkward {
return content_;
}

/// @brief Inserts an explicit value in the `index` buffer and
/// returns the reference to the builder content.
BUILDER&
append_index(size_t i) noexcept {
index_.append(i);
if (i > max_index_) {
max_index_ = i;
} else if (i < 0) {
max_index_ = UINTMAX_MAX;
}
return content_;
}

/// @brief Inserts `size` number indices in the `index` buffer
/// and returns the reference to the builder content.
///
Expand All @@ -1159,6 +1174,9 @@ namespace awkward {
extend_index(size_t size) noexcept {
size_t start = content_.length();
size_t stop = start + size;
if (stop - 1 > max_index_) {
max_index_ = stop - 1;
}
for (size_t i = start; i < stop; i++) {
index_.append(i);
}
Expand Down Expand Up @@ -1189,6 +1207,7 @@ namespace awkward {
/// of the builder.
void
clear() noexcept {
max_index_ = 0;
index_.clear();
content_.clear();
}
Expand All @@ -1211,17 +1230,21 @@ namespace awkward {
/// @brief Checks for validity and consistency.
bool
is_valid(std::string& error) const noexcept {
if (content_.length() != index_.length()) {

if (max_index_ == UINTMAX_MAX) {
std::stringstream out;
out << "Indexed node" << id_ << " has content length "
<< content_.length() << " but index has length " << index_.length()
<< "\n";
out << "Indexed node" << id_ << " has a negative index\n";
error.append(out.str());
return false;
} else if (max_index_ >= content_.length()) {
std::stringstream out;
out << "Indexed node" << id_ << " has index " << max_index_
<< " but content has length "
<< content_.length() << "\n";
error.append(out.str());

return false;
} else {
return content_.is_valid(error);
}
return content_.is_valid(error);
}

/// @brief Copies and concatenates all the accumulated data in each of the
Expand Down Expand Up @@ -1290,6 +1313,9 @@ namespace awkward {

/// @brief Unique form ID.
size_t id_;

/// @brief Keep track of maximum index value.
size_t max_index_;
};

/// @class IndexedOption
Expand All @@ -1310,7 +1336,8 @@ namespace awkward {
IndexedOption()
: index_(
awkward::GrowableBuffer<PRIMITIVE>(AWKWARD_LAYOUTBUILDER_DEFAULT_OPTIONS)),
last_valid_(-1) {
last_valid_(-1),
max_index_(0) {
size_t id = 0;
set_id(id);
}
Expand All @@ -1322,7 +1349,8 @@ namespace awkward {
/// @param options Initial size configuration of a buffer.
IndexedOption(const awkward::BuilderOptions& options)
: index_(awkward::GrowableBuffer<PRIMITIVE>(options)),
last_valid_(-1) {
last_valid_(-1),
max_index_(0) {
size_t id = 0;
set_id(id);
}
Expand All @@ -1338,7 +1366,18 @@ namespace awkward {
BUILDER&
append_valid() noexcept {
last_valid_ = content_.length();
index_.append(last_valid_);
return append_valid(last_valid_);
}

/// @brief Inserts an explicit value in the `index` buffer and
/// returns the reference to the builder content.
BUILDER&
append_valid(size_t i) noexcept {
last_valid_ = content_.length();
index_.append(i);
if (i > max_index_) {
max_index_ = i;
}
return content_;
}

Expand All @@ -1351,6 +1390,9 @@ namespace awkward {
size_t start = content_.length();
size_t stop = start + size;
last_valid_ = stop - 1;
if (last_valid_ > max_index_) {
max_index_ = last_valid_;
}
for (size_t i = start; i < stop; i++) {
index_.append(i);
}
Expand Down Expand Up @@ -1411,17 +1453,16 @@ namespace awkward {
/// @brief Checks for validity and consistency.
bool
is_valid(std::string& error) const noexcept {
if (content_.length() != last_valid_ + 1) {
if (max_index_ >= content_.length()) {
std::stringstream out;
out << "IndexedOption node" << id_ << " has content length "
<< content_.length() << " but last valid index is " << last_valid_
<< "\n";
out << "IndexedOption node" << id_ << " has index " << max_index_
<< " but content has length "
<< content_.length() << "\n";
error.append(out.str());

return false;
} else {
return content_.is_valid(error);
}
return content_.is_valid(error);
}

/// @brief Retrieves the names and sizes (in bytes) of the buffers used
Expand Down Expand Up @@ -1502,6 +1543,9 @@ namespace awkward {

/// @brief Last valid index.
size_t last_valid_;

/// @brief Keep track of maximum index value.
size_t max_index_;
};

/// @class Unmasked
Expand Down Expand Up @@ -2298,11 +2342,16 @@ namespace awkward {
auto index_sequence((std::index_sequence_for<BUILDERS...>()));

std::vector<size_t> lengths = content_lengths(index_sequence);
for (size_t tag = 0; tag < contents_count_; tag++) {
if (lengths[tag] != last_valid_index_[tag] + 1) {
std::unique_ptr<INDEX[]> index_ptr(new INDEX[index_.length()]);
index_.concatenate(index_ptr.get());
std::unique_ptr<TAGS[]> tags_ptr(new TAGS[tags_.length()]);
tags_.concatenate(tags_ptr.get());
for (size_t i = 0; i < index_.length(); i++) {
if (index_ptr.get()[i] < 0 || index_ptr.get()[i] >= lengths[tags_ptr.get()[i]]) {
std::stringstream out;
out << "Union node" << id_ << " has content length " << lengths[tag]
<< " but index length " << last_valid_index_[tag] << "\n";
out << "Union node" << id_ << " has index " << index_ptr.get()[i]
<< " at position " << i << " but content has length "
<< lengths[tags_ptr.get()[i]] << "\n";
error.append(out.str());

return false;
Expand Down
4 changes: 4 additions & 0 deletions header-only/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ endmacro(addtest_nolibs)
addtest_nolibs(test_1494-layout-builder test_1494-layout-builder.cpp)
addtest_nolibs(test_1542-growable-buffer test_1542-growable-buffer.cpp)
addtest_nolibs(test_1560-builder-options test_1560-builder-options.cpp)
addtest_nolibs(test_3091-layout-builder-is-valid
test_3091-layout-builder-is-valid.cpp)

target_link_libraries(test_1494-layout-builder PRIVATE awkward::layout-builder)
target_link_libraries(test_1542-growable-buffer
PRIVATE awkward::growable-buffer)
target_link_libraries(test_1560-builder-options
PRIVATE awkward::builder-options)
target_link_libraries(test_3091-layout-builder-is-valid
PRIVATE awkward::layout-builder)
116 changes: 116 additions & 0 deletions header-only/tests/test_3091-layout-builder-is-valid.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

#include "awkward/LayoutBuilder.h"

// if BUG_FIXED is false, the tests confirm the presence of the bugs in is_valid
const bool BUG_FIXED=1;

template<class PRIMITIVE, class BUILDER>
using IndexedBuilder = awkward::LayoutBuilder::Indexed<PRIMITIVE, BUILDER>;

template<class PRIMITIVE, class BUILDER>
using IndexedOptionBuilder = awkward::LayoutBuilder::IndexedOption<PRIMITIVE, BUILDER>;

template<class PRIMITIVE>
using StringBuilder = awkward::LayoutBuilder::String<PRIMITIVE>;

void
test_Indexed_categorical() {
IndexedBuilder<uint32_t, StringBuilder<double>> builder;
assert(builder.length() == 0);
builder.set_parameters("\"__array__\": \"categorical\"");

auto& subbuilder = builder.append_index(0);
builder.append_index(1);

subbuilder.append("zero");
subbuilder.append("one");

std::string error;
assert(builder.is_valid(error) == true);

// index and content could have different lengths
builder.append_index(1);
assert(builder.is_valid(error) == BUG_FIXED);
}

void
test_Indexed_categorical_invalid_index() {
IndexedBuilder<uint32_t, StringBuilder<double>> builder;
assert(builder.length() == 0);
builder.set_parameters("\"__array__\": \"categorical\"");

auto& subbuilder = builder.append_index(0);
builder.append_index(1);

subbuilder.append("zero");
subbuilder.append("one");

std::string error;
assert(builder.is_valid(error) == true);

// index should be less than the length of content
subbuilder.append("two");
builder.append_index(9);
bool assertion = builder.is_valid(error) == !BUG_FIXED;
std::cout << error << std::endl;
assert(assertion);
}

void
test_IndexedOption_categorical() {
IndexedOptionBuilder<int32_t, StringBuilder<double>> builder;
assert(builder.length() == 0);
builder.set_parameters("\"__array__\": \"categorical\"");

builder.append_invalid();
auto& subbuilder = builder.append_valid(1);
subbuilder.append("zero");
builder.append_valid(1);
subbuilder.append("one");

std::string error;
bool assertion = builder.is_valid(error);
std::cout << error << std::endl;
assert(assertion);

// index and content could have different lengths
builder.append_valid(1);
builder.append_valid(1);
builder.append_valid(1);
assert(builder.is_valid(error) == BUG_FIXED);
}

void
test_IndexedOption_categorical_invalid_index() {
IndexedOptionBuilder<int32_t, StringBuilder<double>> builder;
assert(builder.length() == 0);
builder.set_parameters("\"__array__\": \"categorical\"");

builder.append_invalid();
auto& subbuilder = builder.append_valid(1);
subbuilder.append("zero");
builder.append_valid(1);
subbuilder.append("one");

std::string error;
bool assertion = builder.is_valid(error);
std::cout << error << std::endl;
assert(assertion);

// index should be less than the length of content
builder.append_valid(9);
subbuilder.append("two");
assertion = builder.is_valid(error) == !BUG_FIXED;
std::cout << error << std::endl;
assert(assertion);
}

int main(int /* argc */, char ** /* argv */) {
test_Indexed_categorical();
test_Indexed_categorical_invalid_index();
test_IndexedOption_categorical();
test_IndexedOption_categorical_invalid_index();

return 0;
}

0 comments on commit 082e1b7

Please sign in to comment.