Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/xuepanchen/arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
xuepanchen committed Jan 22, 2018
2 parents d4bbd15 + 77f8f3c commit 360e601
Show file tree
Hide file tree
Showing 36 changed files with 463 additions and 257 deletions.
2 changes: 1 addition & 1 deletion c_glib/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ AC_CONFIG_FILES([
arrow-gpu-glib/arrow-gpu-glib.pc
doc/Makefile
doc/reference/Makefile
doc/reference/xml/Makefile
doc/reference/entities.xml
example/Makefile
example/lua/Makefile
tool/Makefile
Expand Down
4 changes: 1 addition & 3 deletions c_glib/doc/reference/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
# specific language governing permissions and limitations
# under the License.

SUBDIRS = \
xml

DOC_MODULE = arrow-glib

DOC_MAIN_SGML_FILE = $(DOC_MODULE)-docs.xml
Expand Down Expand Up @@ -72,4 +69,5 @@ CLEANFILES += \
$(DOC_MODULE).types

EXTRA_DIST += \
entities.xml.in \
meson.build
4 changes: 2 additions & 2 deletions c_glib/doc/reference/arrow-glib-docs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
[
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
<!ENTITY % gtkdocentities SYSTEM "xml/gtkdocentities.ent">
<!ENTITY % gtkdocentities SYSTEM "entities.xml">
%gtkdocentities;
]>
<book id="index">
<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
<bookinfo>
<title>&package_name; Reference Manual</title>
<releaseinfo>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
specific language governing permissions and limitations
under the License.
-->
<!ENTITY package "@package@">
<!ENTITY package_bugreport "@package_bugreport@">
<!ENTITY package_name "@package_name@">
<!ENTITY package_string "@package_string@">
<!ENTITY package_url "@package_url@">
<!ENTITY package_version "@package_version@">
<!ENTITY package "@PACKAGE@">
<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
<!ENTITY package_name "@PACKAGE_NAME@">
<!ENTITY package_string "@PACKAGE_STRING@">
<!ENTITY package_url "@PACKAGE_URL@">
<!ENTITY package_version "@PACKAGE_VERSION@">
13 changes: 12 additions & 1 deletion c_glib/doc/reference/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,18 @@
# specific language governing permissions and limitations
# under the License.

subdir('xml')
entities_conf = configuration_data()
entities_conf.set('PACKAGE', meson.project_name())
entities_conf.set('PACKAGE_BUGREPORT',
'https://issues.apache.org/jira/browse/ARROW')
entities_conf.set('PACKAGE_NAME', meson.project_name())
entities_conf.set('PACKAGE_STRING',
' '.join([meson.project_name(), version]))
entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
entities_conf.set('PACKAGE_VERSION', version)
configure_file(input: 'entities.xml.in',
output: 'entities.xml',
configuration: entities_conf)

private_headers = [
]
Expand Down
20 changes: 0 additions & 20 deletions c_glib/doc/reference/xml/Makefile.am

This file was deleted.

31 changes: 0 additions & 31 deletions c_glib/doc/reference/xml/meson.build

This file was deleted.

6 changes: 3 additions & 3 deletions ci/travis_lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ popd
# Fail fast on style checks
sudo pip install flake8

PYARROW_DIR=$TRAVIS_BUILD_DIR/python/pyarrow
PYTHON_DIR=$TRAVIS_BUILD_DIR/python

flake8 --count $PYARROW_DIR
flake8 --count $PYTHON_DIR/pyarrow

# Check Cython files with some checks turned off
flake8 --count --config=$PYTHON_DIR/.flake8.cython \
$PYARROW_DIR
$PYTHON_DIR/pyarrow
6 changes: 3 additions & 3 deletions cpp/src/arrow/python/numpy_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ static Status AppendObjectBinaries(PyArrayObject* arr, PyArrayObject* mask,
continue;
} else if (!PyBytes_Check(obj)) {
std::stringstream ss;
ss << "Error converting to Python objects to bytes: ";
ss << "Error converting from Python objects to bytes: ";
RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
return Status::Invalid(ss.str());
}
Expand Down Expand Up @@ -230,7 +230,7 @@ static Status AppendObjectStrings(PyArrayObject* arr, PyArrayObject* mask, int64
*have_bytes = true;
} else {
std::stringstream ss;
ss << "Error converting to Python objects to String/UTF8: ";
ss << "Error converting from Python objects to String/UTF8: ";
RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
return Status::Invalid(ss.str());
}
Expand Down Expand Up @@ -278,7 +278,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas
tmp_obj.reset(obj);
} else if (!PyBytes_Check(obj)) {
std::stringstream ss;
ss << "Error converting to Python objects to FixedSizeBinary: ";
ss << "Error converting from Python objects to FixedSizeBinary: ";
RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
return Status::Invalid(ss.str());
}
Expand Down
31 changes: 31 additions & 0 deletions cpp/src/arrow/table-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,21 @@ TEST_F(TestChunkedArray, EqualsDifferingLengths) {
ASSERT_TRUE(one_->Equals(*another_.get()));
}

TEST_F(TestChunkedArray, SliceEquals) {
arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
Construct();

std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50);
ASSERT_EQ(slice->length(), 50);
ASSERT_TRUE(slice->Equals(one_->Slice(125, 50)));

std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50);
ASSERT_EQ(slice2->length(), 50);
ASSERT_TRUE(slice2->Equals(slice));
}

class TestColumn : public TestChunkedArray {
protected:
void Construct() override {
Expand Down Expand Up @@ -158,6 +173,22 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
ASSERT_RAISES(Invalid, column_->ValidateData());
}

TEST_F(TestColumn, SliceEquals) {
arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
one_field_ = field("column", int32());
Construct();

std::shared_ptr<Column> slice = one_col_->Slice(125, 50);
ASSERT_EQ(slice->length(), 50);
ASSERT_TRUE(slice->Equals(one_col_->Slice(125, 50)));

std::shared_ptr<Column> slice2 = one_col_->Slice(75)->Slice(25)->Slice(25, 50);
ASSERT_EQ(slice2->length(), 50);
ASSERT_TRUE(slice2->Equals(slice));
}

TEST_F(TestColumn, Equals) {
std::vector<bool> null_bitmap(100, true);
std::vector<int32_t> data(100, 1);
Expand Down
24 changes: 24 additions & 0 deletions cpp/src/arrow/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,30 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
return Equals(*other.get());
}

std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
DCHECK_LE(offset, length_);

int curr_chunk = 0;
while (offset >= chunk(curr_chunk)->length()) {
offset -= chunk(curr_chunk)->length();
curr_chunk++;
}

ArrayVector new_chunks;
while (length > 0 && curr_chunk < num_chunks()) {
new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
length -= chunk(curr_chunk)->length() - offset;
offset = 0;
curr_chunk++;
}

return std::make_shared<ChunkedArray>(new_chunks);
}

std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
return Slice(offset, length_);
}

Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
: field_(field) {
data_ = std::make_shared<ChunkedArray>(chunks);
Expand Down
36 changes: 35 additions & 1 deletion cpp/src/arrow/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class ARROW_EXPORT ChunkedArray {
/// \return the total length of the chunked array; computed on construction
int64_t length() const { return length_; }

/// \return the total number of nulls among all chunks
int64_t null_count() const { return null_count_; }

int num_chunks() const { return static_cast<int>(chunks_.size()); }
Expand All @@ -53,6 +54,20 @@ class ARROW_EXPORT ChunkedArray {

const ArrayVector& chunks() const { return chunks_; }

/// \brief Construct a zero-copy slice of the chunked array with the
/// indicated offset and length
///
/// \param[in] offset the position of the first element in the constructed
/// slice
/// \param[in] length the length of the slice. If there are not enough
/// elements in the chunked array, the length will be adjusted accordingly
///
/// \return a new object wrapped in std::shared_ptr<ChunkedArray>
std::shared_ptr<ChunkedArray> Slice(int64_t offset, int64_t length) const;

/// \brief Slice from offset until end of the chunked array
std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;

std::shared_ptr<DataType> type() const;

bool Equals(const ChunkedArray& other) const;
Expand All @@ -67,8 +82,9 @@ class ARROW_EXPORT ChunkedArray {
ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
};

/// \class Column
/// \brief An immutable column data structure consisting of a field (type
/// metadata) and a logical chunked data array
/// metadata) and a chunked data array
class ARROW_EXPORT Column {
public:
Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
Expand Down Expand Up @@ -97,6 +113,24 @@ class ARROW_EXPORT Column {
/// \return the column's data as a chunked logical array
std::shared_ptr<ChunkedArray> data() const { return data_; }

/// \brief Construct a zero-copy slice of the column with the indicated
/// offset and length
///
/// \param[in] offset the position of the first element in the constructed
/// slice
/// \param[in] length the length of the slice. If there are not enough
/// elements in the column, the length will be adjusted accordingly
///
/// \return a new object wrapped in std::shared_ptr<Column>
std::shared_ptr<Column> Slice(int64_t offset, int64_t length) const {
return std::make_shared<Column>(field_, data_->Slice(offset, length));
}

/// \brief Slice from offset until end of the column
std::shared_ptr<Column> Slice(int64_t offset) const {
return std::make_shared<Column>(field_, data_->Slice(offset));
}

bool Equals(const Column& other) const;
bool Equals(const std::shared_ptr<Column>& other) const;

Expand Down
Loading

0 comments on commit 360e601

Please sign in to comment.