Skip to content

Commit

Permalink
Added an interface for Iterable ValueVectors
Browse files Browse the repository at this point in the history
* The new interface indicates that a ValueVector is iterable
* Contains default methods for getting an Iterator and Iterable
  • Loading branch information
normanj-bitquill committed Jun 6, 2024
1 parent 396f42b commit 83be6e0
Show file tree
Hide file tree
Showing 51 changed files with 155 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@
import org.apache.arrow.dataset.scanner.Scanner;
import org.apache.arrow.dataset.source.Dataset;
import org.apache.arrow.dataset.source.DatasetFactory;
import org.apache.arrow.vector.ValueIterableVector;
import org.apache.arrow.vector.ipc.ArrowReader;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.Text;
import org.apache.arrow.vector.util.ValueVectorUtility;
import org.hamcrest.collection.IsIterableContainingInOrder;
import org.junit.ClassRule;
import org.junit.Test;
Expand Down Expand Up @@ -240,11 +240,13 @@ public void testRunExtendedExpressionsFilter() throws Exception {
int rowcount = 0;
while (reader.loadNextBatch()) {
rowcount += reader.getVectorSchemaRoot().getRowCount();
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("id")),
IsIterableContainingInOrder.contains(new Integer[] {19, 1, 11}));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("name")),
IsIterableContainingInOrder.contains(
new Text("value_19"), new Text("value_1"), new Text("value_11")));
final ValueIterableVector<Integer> idVector =
(ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("id");
assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(new Integer[] {19, 1, 11}));
final ValueIterableVector<Text> nameVector =
(ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("name");
assertThat(nameVector.getValueIterable(), IsIterableContainingInOrder.contains(
new Text("value_19"), new Text("value_1"), new Text("value_11")));
}
assertEquals(3, rowcount);
}
Expand Down Expand Up @@ -337,9 +339,12 @@ public void testRunExtendedExpressionsProjection() throws Exception {
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("add_two_to_column_a")),
IsIterableContainingInOrder.contains(21, 3, 13, 23, 47));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b")),
final ValueIterableVector<Integer> sumVector =
(ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("add_two_to_column_a");
assertThat(sumVector.getValueIterable(), IsIterableContainingInOrder.contains(21, 3, 13, 23, 47));
final ValueIterableVector<Text> nameVector =
(ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("concat_column_a_and_b");
assertThat(nameVector.getValueIterable(),
IsIterableContainingInOrder.contains(
new Text("value_19 - value_19"), new Text("value_1 - value_1"),
new Text("value_11 - value_11"), new Text("value_21 - value_21"),
Expand Down Expand Up @@ -378,9 +383,9 @@ public void testRunExtendedExpressionsProjectionWithFilterInsteadOfProjectionExc
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector(
"filter_id_lower_than_20")),
IsIterableContainingInOrder.contains(true, true, true, false, false));
final ValueIterableVector<Boolean> booleanVector =
(ValueIterableVector<Boolean>) reader.getVectorSchemaRoot().getVector("filter_id_lower_than_20");
assertThat(booleanVector.getValueIterable(), IsIterableContainingInOrder.contains(true, true, true, false, false));
rowcount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(5, rowcount);
Expand Down Expand Up @@ -450,9 +455,12 @@ public void testRunExtendedExpressionsProjectAndFilter() throws Exception {
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("add_two_to_column_a")),
IsIterableContainingInOrder.contains(21, 3, 13));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b")),
final ValueIterableVector<Integer> sumVector =
(ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("add_two_to_column_a");
assertThat(sumVector.getValueIterable(), IsIterableContainingInOrder.contains(21, 3, 13));
final ValueIterableVector<Text> nameVector =
(ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("conccat_column_a_and_b");
assertThat(nameVector.getValueIterable(),
IsIterableContainingInOrder.contains(
new Text("value_19 - value_19"), new Text("value_1 - value_1"),
new Text("value_11 - value_11")));
Expand Down
4 changes: 3 additions & 1 deletion java/vector/src/main/codegen/templates/DenseUnionVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueIterableVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.AbstractStructVector;
import org.apache.arrow.vector.complex.ListVector;
Expand Down Expand Up @@ -62,6 +63,7 @@
import org.apache.arrow.vector.util.CallBack;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.ValueIterableVector;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.arrow.util.Preconditions;

Expand All @@ -84,7 +86,7 @@
* each time the vector is accessed.
* Source code generated using FreeMarker template ${.template_name}
*/
public class DenseUnionVector extends AbstractContainerVector implements FieldVector {
public class DenseUnionVector extends AbstractContainerVector implements FieldVector, ValueIterableVector<Object> {
int valueCount;

NonNullableStructVector internalStruct;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* integer values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class BigIntVector extends BaseFixedWidthVector implements BaseIntVector {
public final class BigIntVector extends BaseFixedWidthVector implements BaseIntVector, ValueIterableVector<Long> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* boolean values which could be null. Each value in the vector corresponds
* to a single bit in the underlying data stream backing the vector.
*/
public final class BitVector extends BaseFixedWidthVector {
public final class BitVector extends BaseFixedWidthVector implements ValueIterableVector<Boolean> {

private static final int HASH_CODE_FOR_ZERO = 17;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* date values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class DateDayVector extends BaseFixedWidthVector {
public final class DateDayVector extends BaseFixedWidthVector implements ValueIterableVector<Integer> {

public static final byte TYPE_WIDTH = 4;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* date values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class DateMilliVector extends BaseFixedWidthVector {
public final class DateMilliVector extends BaseFixedWidthVector implements ValueIterableVector<LocalDateTime> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
* decimal values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class Decimal256Vector extends BaseFixedWidthVector {
public final class Decimal256Vector extends BaseFixedWidthVector implements ValueIterableVector<BigDecimal> {
public static final int MAX_PRECISION = 76;
public static final byte TYPE_WIDTH = 32;
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
* decimal values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class DecimalVector extends BaseFixedWidthVector {
public final class DecimalVector extends BaseFixedWidthVector implements ValueIterableVector<BigDecimal> {
public static final int MAX_PRECISION = 38;
public static final byte TYPE_WIDTH = 16;
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
* A validity buffer (bit vector) is maintained to track which elements in the
* vector are null.
*/
public final class DurationVector extends BaseFixedWidthVector {
public final class DurationVector extends BaseFixedWidthVector implements ValueIterableVector<Duration> {
public static final byte TYPE_WIDTH = 8;

private final TimeUnit unit;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
* binary values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public class FixedSizeBinaryVector extends BaseFixedWidthVector {
public class FixedSizeBinaryVector extends BaseFixedWidthVector implements ValueIterableVector<byte[]> {
private final int byteWidth;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
* short values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector {
public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector,
ValueIterableVector<Short> {
public static final byte TYPE_WIDTH = 2;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
* float values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class Float4Vector extends BaseFixedWidthVector implements FloatingPointVector {
public final class Float4Vector extends BaseFixedWidthVector implements FloatingPointVector,
ValueIterableVector<Float> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
* double values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class Float8Vector extends BaseFixedWidthVector implements FloatingPointVector {
public final class Float8Vector extends BaseFixedWidthVector implements FloatingPointVector,
ValueIterableVector<Double> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* integer values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class IntVector extends BaseFixedWidthVector implements BaseIntVector {
public final class IntVector extends BaseFixedWidthVector implements BaseIntVector, ValueIterableVector<Integer> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* A validity buffer (bit vector) is maintained to track which elements in the
* vector are null.
*/
public final class IntervalDayVector extends BaseFixedWidthVector {
public final class IntervalDayVector extends BaseFixedWidthVector implements ValueIterableVector<Duration> {
public static final byte TYPE_WIDTH = 8;
private static final byte MILLISECOND_OFFSET = 4;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
* Month, day and nanoseconds are independent from one another and there
* is no specific limits imposed on their values.
*/
public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector {
public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector
implements ValueIterableVector<PeriodDuration> {
public static final byte TYPE_WIDTH = 16;
private static final byte DAY_OFFSET = 4;
private static final byte NANOSECOND_OFFSET = 8;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* interval (years and months) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class IntervalYearVector extends BaseFixedWidthVector {
public final class IntervalYearVector extends BaseFixedWidthVector implements ValueIterableVector<Period> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
* to track which elements in the vector are null.
* The size of the underlying buffer can be over 2GB.
*/
public final class LargeVarBinaryVector extends BaseLargeVariableWidthVector {
public final class LargeVarBinaryVector extends BaseLargeVariableWidthVector implements ValueIterableVector<byte[]> {

/**
* Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* The offset width of this vector is 8, so the underlying buffer can be larger than 2GB.
* </p>
*/
public final class LargeVarCharVector extends BaseLargeVariableWidthVector {
public final class LargeVarCharVector extends BaseLargeVariableWidthVector implements ValueIterableVector<Text> {

/**
* Instantiate a LargeVarCharVector. This doesn't allocate any memory for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
/**
* A null type vector.
*/
public class NullVector implements FieldVector {
public class NullVector implements FieldVector, ValueIterableVector<Object> {

private int valueCount;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* short values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector {
public final class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector, ValueIterableVector<Short> {
public static final byte TYPE_WIDTH = 2;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* A validity buffer (bit vector) is maintained to track which elements in the
* vector are null.
*/
public final class TimeMicroVector extends BaseFixedWidthVector {
public final class TimeMicroVector extends BaseFixedWidthVector implements ValueIterableVector<Long> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* time (millisecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeMilliVector extends BaseFixedWidthVector {
public final class TimeMilliVector extends BaseFixedWidthVector implements ValueIterableVector<LocalDateTime> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* time (nanosecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeNanoVector extends BaseFixedWidthVector {
public final class TimeNanoVector extends BaseFixedWidthVector implements ValueIterableVector<Long> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* time (seconds resolution) values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class TimeSecVector extends BaseFixedWidthVector {
public final class TimeSecVector extends BaseFixedWidthVector implements ValueIterableVector<Integer> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* timestamp (microsecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeStampMicroTZVector extends TimeStampVector {
public final class TimeStampMicroTZVector extends TimeStampVector implements ValueIterableVector<Long> {
private final String timeZone;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* timestamp (microsecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeStampMicroVector extends TimeStampVector {
public final class TimeStampMicroVector extends TimeStampVector implements ValueIterableVector<LocalDateTime> {

/**
* Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* timestamp (millisecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeStampMilliTZVector extends TimeStampVector {
public final class TimeStampMilliTZVector extends TimeStampVector implements ValueIterableVector<Long> {
private final String timeZone;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* timestamp (millisecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeStampMilliVector extends TimeStampVector {
public final class TimeStampMilliVector extends TimeStampVector implements ValueIterableVector<LocalDateTime> {

/**
* Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* timestamp (nanosecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeStampNanoTZVector extends TimeStampVector {
public final class TimeStampNanoTZVector extends TimeStampVector implements ValueIterableVector<Long> {
private final String timeZone;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* timestamp (nanosecond resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeStampNanoVector extends TimeStampVector {
public final class TimeStampNanoVector extends TimeStampVector implements ValueIterableVector<LocalDateTime> {

/**
* Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* timestamp (seconds resolution) values which could be null. A validity buffer
* (bit vector) is maintained to track which elements in the vector are null.
*/
public final class TimeStampSecTZVector extends TimeStampVector {
public final class TimeStampSecTZVector extends TimeStampVector implements ValueIterableVector<Long> {
private final String timeZone;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* timestamp (seconds resolution) values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class TimeStampSecVector extends TimeStampVector {
public final class TimeStampSecVector extends TimeStampVector implements ValueIterableVector<LocalDateTime> {

/**
* Instantiate a TimeStampSecVector. This doesn't allocate any memory for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* byte values which could be null. A validity buffer (bit vector) is
* maintained to track which elements in the vector are null.
*/
public final class TinyIntVector extends BaseFixedWidthVector implements BaseIntVector {
public final class TinyIntVector extends BaseFixedWidthVector implements BaseIntVector, ValueIterableVector<Byte> {
public static final byte TYPE_WIDTH = 1;

/**
Expand Down
Loading

0 comments on commit 83be6e0

Please sign in to comment.