Skip to content

Commit

Permalink
Workaround for bug from apache-arrow 5.0 in FixedSizeBinaryBuilder. (#…
Browse files Browse the repository at this point in the history
…510)

* Workaround for bug from apache-arrow 5.0 in FixedSizeBinaryBuilder.

The behavior of "Resize + Advance" of FixedSizeBinaryBuilder changed in
arrow 5.0, leads to bugs in vineyard codebase.

It should be a bug in apache arrow, and hopefully being fixed in upstream.

See also: apache/arrow@e990d177b1f1

Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com>
  • Loading branch information
sighingnow authored Sep 22, 2021
1 parent 19757b1 commit 7968057
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 3 deletions.
3 changes: 3 additions & 0 deletions modules/basic/ds/arrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ class FixedSizeBinaryArrayBuilder : public FixedSizeBinaryArrayBaseBuilder {
std::shared_ptr<arrow::FixedSizeBinaryArray> GetArray() { return array_; }

Status Build(Client& client) override {
VINEYARD_ASSERT(array_->length() == 0 || array_->values()->size() != 0,
"Invalid array values");

std::unique_ptr<BlobWriter> buffer_writer;
RETURN_ON_ERROR(client.CreateBlob(array_->values()->size(), buffer_writer));
memcpy(buffer_writer->data(), array_->values()->data(),
Expand Down
24 changes: 24 additions & 0 deletions modules/basic/ds/arrow_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,30 @@ class PodArrayBuilder : public arrow::FixedSizeBinaryBuilder {
return reinterpret_cast<T*>(
arrow::FixedSizeBinaryBuilder::GetMutableValue(i));
}

// the bahavior of `arrow::FixedSizeBinaryBuilder` has been changed in
// https://github.com/apache/arrow/commit/e990d177, and hopeful to be
// fixed in arrow 0.6.0.

arrow::Status Resize(int64_t capacity) override {
#if defined(ARROW_VERSION) && ARROW_VERSION < 5000000
return arrow::FixedSizeBinaryBuilder::Resize(capacity);
#else
auto status = arrow::FixedSizeBinaryBuilder::Resize(capacity);
if (!status.ok()) {
return status;
}
return arrow::FixedSizeBinaryBuilder::AppendEmptyValues(capacity);
#endif
}

arrow::Status Advance(int64_t elements) {
#if defined(ARROW_VERSION) && ARROW_VERSION < 5000000
return arrow::FixedSizeBinaryBuilder::Advance(elements);
#else
return arrow::Status::OK();
#endif
}
};

/**
Expand Down
1 change: 1 addition & 0 deletions modules/graph/fragment/arrow_fragment.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ limitations under the License.
#include "arrow/util/config.h"
#include "arrow/util/key_value_metadata.h"
#include "boost/algorithm/string.hpp"

#include "grape/graph/adj_list.h"
#include "grape/utils/vertex_array.h"

Expand Down
3 changes: 2 additions & 1 deletion modules/graph/fragment/property_graph_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ limitations under the License.

#include "arrow/builder.h"
#include "arrow/compute/api.h"
#include "arrow/util/config.h"
#include "arrow/util/key_value_metadata.h"
#include "boost/leaf/all.hpp"
#include "grape/utils/atomic_ops.h"

#include "grape/serialization/in_archive.h"
#include "grape/serialization/out_archive.h"
#include "grape/utils/atomic_ops.h"
#include "grape/utils/vertex_array.h"
#include "grape/worker/comm_spec.h"

Expand Down
3 changes: 3 additions & 0 deletions src/client/ds/blob.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ size_t Blob::size() const { return allocated_size(); }
size_t Blob::allocated_size() const { return size_; }

const char* Blob::data() const {
if (size_ == 0) {
return nullptr;
}
if (size_ > 0 && (buffer_ == nullptr || buffer_->size() == 0)) {
throw std::invalid_argument(
"The object might be a (partially) remote object and the payload data "
Expand Down
5 changes: 3 additions & 2 deletions src/server/server/vineyard_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,13 @@ Status VineyardServer::CreateData(
const json& tree,
callback_t<const ObjectID, const Signature, const InstanceID> callback) {
ENSURE_VINEYARDD_READY();
ObjectID id = GenerateObjectID();
#if !defined(NDEBUG)
if (VLOG_IS_ON(10)) {
VLOG(10) << "Got request from client to create data:";
// NB: glog has limit on maximum lines.
std::cerr << tree.dump(4) << std::endl;
std::cerr << id << " " << ObjectIDToString(id) << " " << tree.dump(4)
<< std::endl;
VLOG(10) << "=========================================";
}
#endif
Expand All @@ -313,7 +315,6 @@ Status VineyardServer::CreateData(

RETURN_ON_ASSERT(type != "vineyard::Blob", "Blob has no metadata");

ObjectID id = GenerateObjectID();
// Check if instance_id information available
RETURN_ON_ASSERT(tree.contains("instance_id"),
"The instance_id filed must be presented");
Expand Down
17 changes: 17 additions & 0 deletions test/arrow_data_structure_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,23 @@ int main(int argc, char** argv) {
auto sliced_internal_array = r3->GetArray();
CHECK(sliced_internal_array->Equals(a3));

arrow::FixedSizeBinaryBuilder b4(arrow::fixed_size_binary(sizeof(S)));
#if defined(ARROW_VERSION) && ARROW_VERSION < 5000000
CHECK_ARROW_ERROR(b4.Resize(10));
#else
CHECK_ARROW_ERROR(b4.Resize(10));
CHECK_ARROW_ERROR(b4.AppendEmptyValues(10));
#endif
#if defined(ARROW_VERSION) && ARROW_VERSION < 5000000
CHECK_ARROW_ERROR(b4.Advance(10));
#endif
std::shared_ptr<arrow::FixedSizeBinaryArray> a4;
CHECK_ARROW_ERROR(b4.Finish(&a4));

CHECK_NE(a4->length(), 0);
CHECK_NE(a4->values()->size(), 0);
CHECK_EQ(a4->values()->size(), a4->length() * sizeof(S));

LOG(INFO) << "Passed binary array wrapper tests...";
}

Expand Down

0 comments on commit 7968057

Please sign in to comment.