Skip to content

Commit

Permalink
Merge branch 'apache:main' into GAC-odbc-driver
Browse files Browse the repository at this point in the history
  • Loading branch information
alinaliBQ authored Nov 28, 2023
2 parents 39f8308 + 143b475 commit ef032c0
Show file tree
Hide file tree
Showing 25 changed files with 180 additions and 35 deletions.
2 changes: 1 addition & 1 deletion ci/scripts/PKGBUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
_realname=arrow
pkgbase=mingw-w64-${_realname}
pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
pkgver=14.0.0.9000
pkgver=14.0.1.9000
pkgrel=8000
pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
arch=("any")
Expand Down
14 changes: 7 additions & 7 deletions cpp/src/parquet/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,9 @@ class TypedComparatorImpl
return Helper::Compare(type_length_, a, b);
}

bool Compare(const T& a, const T& b) override { return CompareInline(a, b); }
bool Compare(const T& a, const T& b) const override { return CompareInline(a, b); }

std::pair<T, T> GetMinMax(const T* values, int64_t length) override {
std::pair<T, T> GetMinMax(const T* values, int64_t length) const override {
DCHECK_GT(length, 0);

T min = Helper::DefaultMin();
Expand All @@ -457,7 +457,7 @@ class TypedComparatorImpl

std::pair<T, T> GetMinMaxSpaced(const T* values, int64_t length,
const uint8_t* valid_bits,
int64_t valid_bits_offset) override {
int64_t valid_bits_offset) const override {
DCHECK_GT(length, 0);

T min = Helper::DefaultMin();
Expand All @@ -477,7 +477,7 @@ class TypedComparatorImpl
return {min, max};
}

std::pair<T, T> GetMinMax(const ::arrow::Array& values) override {
std::pair<T, T> GetMinMax(const ::arrow::Array& values) const override {
ParquetException::NYI(values.type()->ToString());
}

Expand All @@ -491,7 +491,7 @@ class TypedComparatorImpl
template <>
std::pair<int32_t, int32_t>
TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* values,
int64_t length) {
int64_t length) const {
DCHECK_GT(length, 0);

const uint32_t* unsigned_values = reinterpret_cast<const uint32_t*>(values);
Expand Down Expand Up @@ -537,13 +537,13 @@ std::pair<ByteArray, ByteArray> GetMinMaxBinaryHelper(

template <>
std::pair<ByteArray, ByteArray> TypedComparatorImpl<true, ByteArrayType>::GetMinMax(
const ::arrow::Array& values) {
const ::arrow::Array& values) const {
return GetMinMaxBinaryHelper<true>(*this, values);
}

template <>
std::pair<ByteArray, ByteArray> TypedComparatorImpl<false, ByteArrayType>::GetMinMax(
const ::arrow::Array& values) {
const ::arrow::Array& values) const {
return GetMinMaxBinaryHelper<false>(*this, values);
}

Expand Down
8 changes: 4 additions & 4 deletions cpp/src/parquet/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,16 @@ class TypedComparator : public Comparator {

/// \brief Scalar comparison of two elements, return true if first
/// is strictly less than the second
virtual bool Compare(const T& a, const T& b) = 0;
virtual bool Compare(const T& a, const T& b) const = 0;

/// \brief Compute maximum and minimum elements in a batch of
/// elements without any nulls
virtual std::pair<T, T> GetMinMax(const T* values, int64_t length) = 0;
virtual std::pair<T, T> GetMinMax(const T* values, int64_t length) const = 0;

/// \brief Compute minimum and maximum elements from an Arrow array. Only
/// valid for certain Parquet Type / Arrow Type combinations, like BYTE_ARRAY
/// / arrow::BinaryArray
virtual std::pair<T, T> GetMinMax(const ::arrow::Array& values) = 0;
virtual std::pair<T, T> GetMinMax(const ::arrow::Array& values) const = 0;

/// \brief Compute maximum and minimum elements in a batch of
/// elements with accompanying bitmap indicating which elements are
Expand All @@ -96,7 +96,7 @@ class TypedComparator : public Comparator {
/// the first element in the sequence
virtual std::pair<T, T> GetMinMaxSpaced(const T* values, int64_t length,
const uint8_t* valid_bits,
int64_t valid_bits_offset) = 0;
int64_t valid_bits_offset) const = 0;
};

/// \brief Typed version of Comparator::Make
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
apache-arrow-apt-source (14.0.1-1) unstable; urgency=low

* New upstream release.

-- Raúl Cumplido <raulcumplido@gmail.com> Mon, 06 Nov 2023 22:23:27 -0000

apache-arrow-apt-source (14.0.0-1) unstable; urgency=low

* New upstream release.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ else
fi

%changelog
* Mon Nov 06 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.1-1
- New upstream release.

* Thu Oct 19 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.0-1
- New upstream release.

Expand Down
6 changes: 6 additions & 0 deletions dev/tasks/linux-packages/apache-arrow/debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
apache-arrow (14.0.1-1) unstable; urgency=low

* New upstream release.

-- Raúl Cumplido <raulcumplido@gmail.com> Mon, 06 Nov 2023 22:23:27 -0000

apache-arrow (14.0.0-1) unstable; urgency=low

* New upstream release.
Expand Down
3 changes: 3 additions & 0 deletions dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,9 @@ Documentation for Apache Parquet GLib.
%{_datadir}/gtk-doc/html/parquet-glib/

%changelog
* Mon Nov 06 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.1-1
- New upstream release.

* Thu Oct 19 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.0-1
- New upstream release.

Expand Down
5 changes: 5 additions & 0 deletions docs/source/_static/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
"url": "https://arrow.apache.org/docs/",
"preferred": true
},
{
"name": "14.0",
"version": "14.0/",
"url": "https://arrow.apache.org/docs/14.0/"
},
{
"name": "13.0",
"version": "13.0/",
Expand Down
3 changes: 3 additions & 0 deletions docs/source/developers/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,9 @@ Be sure to go through on the following checklist:

.. code-block:: Bash
# You can run the script with BUMP_TAG=0 and BUMP_PUSH=0
# this will avoid default pushing to main and pushing the tag
# but you will require to push manually after reviewing the commits.
# dev/release/post-11-bump-versions.sh 10.0.0 11.0.0
dev/release/post-11-bump-versions.sh X.Y.Z NEXT_X.NEXT_Y.NEXT_Z
Expand Down
4 changes: 2 additions & 2 deletions docs/source/format/CDataInterface.rst
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ names and types of child fields are read from the child arrays.
+------------------------+---------------------------------------------------+------------+
| ``+L`` | large list | |
+------------------------+---------------------------------------------------+------------+
| ``+lv`` | list-view | |
| ``+vl`` | list-view | |
+------------------------+---------------------------------------------------+------------+
| ``+Lv`` | large list-view | |
| ``+vL`` | large list-view | |
+------------------------+---------------------------------------------------+------------+
| ``+w:123`` | fixed-sized list [123 items] | |
+------------------------+---------------------------------------------------+------------+
Expand Down
2 changes: 1 addition & 1 deletion docs/source/format/Columnar.rst
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ A struct array has its own validity bitmap that is independent of its
child arrays' validity bitmaps. The validity bitmap for the struct
array might indicate a null when one or more of its child arrays has
a non-null value in its corresponding slot; or conversely, a child
array might have a null in its validity bitmap while the struct array's
array might indicate a null in its validity bitmap while the struct array's
validity bitmap shows a non-null value.

Therefore, to know whether a particular child entry is valid, one must
Expand Down
2 changes: 2 additions & 0 deletions go/arrow/array.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ type ArrayData interface {
// Dictionary returns the ArrayData object for the dictionary if this is a
// dictionary array, otherwise it will be nil.
Dictionary() ArrayData
// SizeInBytes returns the size of the ArrayData buffers and any children and/or dictionary in bytes.
SizeInBytes() uint64
}

// Array represents an immutable sequence of values using the Arrow in-memory format.
Expand Down
27 changes: 26 additions & 1 deletion go/arrow/array/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,34 @@ func (d *Data) SetDictionary(dict arrow.ArrayData) {
}
}

// SizeInBytes returns the size of the Data and any children and/or dictionary in bytes by
// recursively examining the nested structures of children and/or dictionary.
// The value returned is an upper-bound since offset is not taken into account.
func (d *Data) SizeInBytes() uint64 {
var size uint64

if d == nil {
return 0
}

for _, b := range d.Buffers() {
size += uint64(b.Len())
}
for _, c := range d.Children() {
size += c.SizeInBytes()
}
if dict := d.Dictionary(); dict != nil {
size += dict.SizeInBytes()
}

return size
}

// NewSliceData returns a new slice that shares backing data with the input.
// The returned Data slice starts at i and extends j-i elements, such as:
// slice := data[i:j]
//
// slice := data[i:j]
//
// The returned value must be Release'd after use.
//
// NewSliceData panics if the slice is outside the valid range of the input Data.
Expand Down
75 changes: 75 additions & 0 deletions go/arrow/array/data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,78 @@ func TestDataReset(t *testing.T) {
data.Reset(&arrow.Int64Type{}, 5, data.Buffers(), nil, 1, 2)
}
}

func TestSizeInBytes(t *testing.T) {
var buffers1 = make([]*memory.Buffer, 0, 3)

for i := 0; i < cap(buffers1); i++ {
buffers1 = append(buffers1, memory.NewBufferBytes([]byte("15-bytes-buffer")))
}
data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0)
var arrayData arrow.ArrayData = data
dataWithChild := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{arrayData}, 0, 0)

t.Run("buffers only", func(t *testing.T) {
expectedSize := uint64(45)
if actualSize := data.SizeInBytes(); actualSize != expectedSize {
t.Errorf("expected size %d, got %d", expectedSize, actualSize)
}
})

t.Run("buffers and child data", func(t *testing.T) {
// 45 bytes in buffers, 45 bytes in child data
expectedSize := uint64(90)
if actualSize := dataWithChild.SizeInBytes(); actualSize != expectedSize {
t.Errorf("expected size %d, got %d", expectedSize, actualSize)
}
})

t.Run("buffers and nested child data", func(t *testing.T) {
var dataWithChildArrayData arrow.ArrayData = dataWithChild
var dataWithNestedChild arrow.ArrayData = NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{dataWithChildArrayData}, 0, 0)
// 45 bytes in buffers, 90 bytes in nested child data
expectedSize := uint64(135)
if actualSize := dataWithNestedChild.SizeInBytes(); actualSize != expectedSize {
t.Errorf("expected size %d, got %d", expectedSize, actualSize)
}
})

t.Run("buffers and dictionary", func(t *testing.T) {
dictData := data
dataWithDict := NewDataWithDictionary(&arrow.StringType{}, 10, buffers1, 0, 0, dictData)
// 45 bytes in buffers, 45 bytes in dictionary
expectedSize := uint64(90)
if actualSize := dataWithDict.SizeInBytes(); actualSize != expectedSize {
t.Errorf("expected size %d, got %d", expectedSize, actualSize)
}
})

t.Run("sliced data", func(t *testing.T) {
sliceData := NewSliceData(arrayData, 3, 5)
// offset is not taken into account in SizeInBytes()
expectedSize := uint64(45)
if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize {
t.Errorf("expected size %d, got %d", expectedSize, actualSize)
}
})

t.Run("sliced data with children", func(t *testing.T) {
var dataWithChildArrayData arrow.ArrayData = dataWithChild
sliceData := NewSliceData(dataWithChildArrayData, 3, 5)
// offset is not taken into account in SizeInBytes()
expectedSize := uint64(90)
if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize {
t.Errorf("expected size %d, got %d", expectedSize, actualSize)
}
})

t.Run("buffers with children which are sliced data", func(t *testing.T) {
sliceData := NewSliceData(arrayData, 3, 5)
dataWithSlicedChildren := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{sliceData}, 0, 0)
// offset is not taken into account in SizeInBytes()
expectedSize := uint64(90)
if actualSize := dataWithSlicedChildren.SizeInBytes(); actualSize != expectedSize {
t.Errorf("expected size %d, got %d", expectedSize, actualSize)
}
})
}
6 changes: 3 additions & 3 deletions js/src/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
// v4 doesn't seem to be able to tree-shake the rest of those exports.
//
// We will have to keep these enums in sync when we re-generate the flatbuffers
// code from the shchemas. See js/DEVELOP.md for info on how to run flatbuffers
// code from the schemas. See js/DEVELOP.md for info on how to run flatbuffers
// code generation.
//
////
Expand Down Expand Up @@ -174,7 +174,7 @@ export enum Type {
FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same number of bytes */
FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */
Map = 17, /** Map of named logical types */
Duration = 18, /** Measure of elapsed time in either seconds, miliseconds, microseconds or nanoseconds. */
Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. */

Dictionary = -1, /** Dictionary aka Category type */
Int8 = -2,
Expand Down Expand Up @@ -215,7 +215,7 @@ export enum BufferType {
OFFSET = 0,

/**
* actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector
* actual data, either fixed width primitive types in slots or variable width delimited by an OFFSET vector
*/
DATA = 1,

Expand Down
2 changes: 1 addition & 1 deletion js/src/fb/timestamp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ import { TimeUnit } from './time-unit.js';
* no indication of how to map this information to a physical point in time.
* Naive date-times must be handled with care because of this missing
* information, and also because daylight saving time (DST) may make
* some values ambiguous or non-existent. A naive date-time may be
* some values ambiguous or nonexistent. A naive date-time may be
* stored as a struct with Date and Time fields. However, it may also be
* encoded into a Timestamp column with an empty timezone. The timestamp
* values should be computed "as if" the timezone of the date-time values
Expand Down
2 changes: 1 addition & 1 deletion js/src/ipc/reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ export class RecordBatchReader<T extends TypeMap = any> extends ReadableInterop<

//
// Since TS is a structural type system, we define the following subclass stubs
// so that concrete types exist to associate with with the interfaces below.
// so that concrete types exist to associate with the interfaces below.
//
// The implementation for each RecordBatchReader is hidden away in the set of
// `RecordBatchReaderImpl` classes in the second half of this file. This allows
Expand Down
4 changes: 2 additions & 2 deletions js/src/vector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,8 @@ export class Vector<T extends DataType = any> {
* values.
*
* Memoization is very useful when decoding a value is expensive such as
* Uft8. The memoization creates a cache of the size of the Vector and
* therfore increases memory usage.
* Utf8. The memoization creates a cache of the size of the Vector and
* therefore increases memory usage.
*
* @returns A new vector that memoizes calls to {@link get}.
*/
Expand Down
2 changes: 1 addition & 1 deletion js/src/visitor/builderctor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ export class GetBuilderCtor extends Visitor {
public visitDurationSecond() { return DurationSecondBuilder; }
public visitDurationMillisecond() { return DurationMillisecondBuilder; }
public visitDurationMicrosecond() { return DurationMicrosecondBuilder; }
public visistDurationNanosecond() { return DurationNanosecondBuilder; }
public visitDurationNanosecond() { return DurationNanosecondBuilder; }
public visitFixedSizeList() { return FixedSizeListBuilder; }
public visitMap() { return MapBuilder; }
}
Expand Down
2 changes: 1 addition & 1 deletion js/src/visitor/indexof.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ function indexOfValue<T extends DataType>(data: Data<T>, searchElement?: T['TVal
function indexOfUnion<T extends DataType>(data: Data<T>, searchElement?: T['TValue'] | null, fromIndex?: number): number {
// Unions are special -- they do have a nullBitmap, but so can their children.
// If the searchElement is null, we don't know whether it came from the Union's
// bitmap or one of its childrens'. So we don't interrogate the Union's bitmap,
// bitmap or one of its children's. So we don't interrogate the Union's bitmap,
// since that will report the wrong index if a child has a null before the Union.
const get = getVisitor.getVisitFn(data);
const compare = createElementComparator(searchElement);
Expand Down
4 changes: 2 additions & 2 deletions js/src/visitor/jsonvectorassembler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ export class JSONVectorAssembler extends Visitor {

/** @nocollapse */
public static assemble<T extends RecordBatch>(...batches: T[]) {
const assemlber = new JSONVectorAssembler();
const assembler = new JSONVectorAssembler();
return batches.map(({ schema, data }) => {
return assemlber.visitMany(schema.fields, data.children);
return assembler.visitMany(schema.fields, data.children);
});
}

Expand Down
2 changes: 1 addition & 1 deletion r/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: arrow
Title: Integration to 'Apache' 'Arrow'
Version: 14.0.0.9000
Version: 14.0.1.9000
Authors@R: c(
person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
Expand Down
Loading

0 comments on commit ef032c0

Please sign in to comment.