Skip to content

Commit

Permalink
GH-41287: [Java] ListViewVector Implementation (#41285)
Browse files Browse the repository at this point in the history
### Rationale for this change

Apache Arrow format defines ListView and this has been introduced into other language bindings and the objective of this PR is to provide the initial ListView support to Java by adding `ListViewVector`. 

### Non-Goals 

The following list of issues propose the extended work depending on this PR. They were separated to streamline the implementation process. 

- [ ] #41272
- [ ] https://github.com/apache/arrow/issues/41286
- [ ] https://github.com/apache/arrow/issues/41290
- [ ] https://github.com/apache/arrow/issues/41288
- [ ] https://github.com/apache/arrow/issues/41289
- [ ] #41269
- [ ] #41291
- [ ] #41292
- [ ] #41270
- [ ] https://github.com/apache/arrow/issues/41293
- [ ] https://github.com/apache/arrow/issues/41294
- [ ] #41569
- [ ] #41570
- [ ] #41584
- [ ] #41585

### Are these changes tested?

Yes

### Are there any user-facing changes?

No

* GitHub Issue: #41287

Lead-authored-by: Vibhatha Abeykoon <vibhatha@gmail.com>
Co-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
vibhatha and vibhatha authored May 16, 2024
1 parent 1c15c88 commit 0574988
Show file tree
Hide file tree
Showing 14 changed files with 3,059 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -256,4 +256,9 @@ public ColumnBinder visit(ArrowType.Interval type) {
public ColumnBinder visit(ArrowType.Duration type) {
throw new UnsupportedOperationException("No column binder implemented for type " + type);
}

@Override
public ColumnBinder visit(ArrowType.ListView type) {
throw new UnsupportedOperationException("No column binder implemented for type " + type);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.ListView;
import org.apache.arrow.vector.util.DataSizeRoundingUtil;

/**
Expand Down Expand Up @@ -328,4 +329,9 @@ public List<ArrowBuf> visit(ArrowType.Interval type) {
public List<ArrowBuf> visit(ArrowType.Duration type) {
return Arrays.asList(maybeImportBitmap(type), importFixedBytes(type, 1, DurationVector.TYPE_WIDTH));
}

@Override
public List<ArrowBuf> visit(ListView type) {
throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,11 @@ public Boolean visit(ArrowType.Interval type) {
public Boolean visit(ArrowType.Duration type) {
return new DurationAvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
}

@Override
public Boolean visit(ArrowType.ListView type) {
throw new UnsupportedOperationException("Binding is not yet supported for type " + type);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,11 @@ public AvaticaParameter visit(ArrowType.Interval type) {
public AvaticaParameter visit(ArrowType.Duration type) {
return new DurationAvaticaParameterConverter(type).createParameter(field);
}

@Override
public AvaticaParameter visit(ArrowType.ListView type) {
throw new UnsupportedOperationException("AvaticaParameter not yet supported for type " + type);
}
}

}
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@
name: "Duration",
fields: [{name: "unit", type: short, valueType: TimeUnit}],
complex: false
},
{
name: "ListView",
fields: [],
complex: true
}
]
}
24 changes: 23 additions & 1 deletion java/vector/src/main/codegen/templates/UnionListWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.math.BigDecimal;

<@pp.dropOutputFile />
<#list ["List", "LargeList"] as listName>
<#list ["List", "ListView", "LargeList"] as listName>
<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" />
Expand Down Expand Up @@ -59,6 +59,10 @@ public class Union${listName}Writer extends AbstractFieldWriter {
private static final int OFFSET_WIDTH = 4;
</#if>
<#if listName = "ListView">
private static final long SIZE_WIDTH = 4;
</#if>
public Union${listName}Writer(${listName}Vector vector) {
this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
}
Expand Down Expand Up @@ -193,6 +197,24 @@ public void endList() {
setPosition(idx() + 1);
listStarted = false;
}
<#elseif listName == "ListView">
@Override
public void startList() {
vector.startNewValue(idx());
writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH));
listStarted = true;
}
@Override
public void endList() {
int sizeUptoIdx = 0;
for (int i = 0; i < idx(); i++) {
sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH);
}
vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx);
setPosition(idx() + 1);
listStarted = false;
}
<#else>
@Override
public void startList() {
Expand Down
2 changes: 1 addition & 1 deletion java/vector/src/main/codegen/templates/UnionReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {

private static final int NUM_SUPPORTED_TYPES = 48;
private static final int NUM_SUPPORTED_TYPES = 49;

private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
public UnionVector data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,18 @@ public class BufferLayout {

/**
* Enumeration of the different logical types a buffer can have.
* Data buffer is common to most of the layouts.
* Offset buffer is used for variable width types.
* Validity buffer is used for nullable types.
* Type buffer is used for Union types.
* Size buffer is used for ListView and LargeListView types.
*/
public enum BufferType {
DATA("DATA"),
OFFSET("OFFSET"),
VALIDITY("VALIDITY"),
TYPE("TYPE_ID");
TYPE("TYPE_ID"),
SIZE("SIZE");

private final String name;

Expand All @@ -57,6 +63,7 @@ public String getName() {
private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32);
private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16);
private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8);
private static final BufferLayout SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 32);

public static BufferLayout typeBuffer() {
return TYPE_BUFFER;
Expand All @@ -70,6 +77,10 @@ public static BufferLayout largeOffsetBuffer() {
return LARGE_OFFSET_BUFFER;
}

public static BufferLayout sizeBuffer() {
return SIZE_BUFFER;
}

/**
* Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128
* inclusive.
Expand Down
20 changes: 18 additions & 2 deletions java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,24 @@ public TypeLayout visit(Timestamp type) {
}

@Override
public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
public TypeLayout visit(ArrowType.List type) {
List<BufferLayout> vectors = asList(
BufferLayout.validityVector(),
BufferLayout.offsetBuffer()
);
return new TypeLayout(vectors);
}

@Override
public TypeLayout visit(ArrowType.ListView type) {
List<BufferLayout> vectors = asList(
BufferLayout.validityVector(),
BufferLayout.offsetBuffer(),
BufferLayout.sizeBuffer()
);
return new TypeLayout(vectors);
}

@Override
public TypeLayout visit(ArrowType.LargeList type) {
List<BufferLayout> vectors = asList(
Expand Down Expand Up @@ -312,11 +322,17 @@ public Integer visit(Timestamp type) {
}

@Override
public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
public Integer visit(ArrowType.List type) {
// validity buffer + offset buffer
return 2;
}

@Override
public Integer visit(ArrowType.ListView type) {
// validity buffer + offset buffer + size buffer
return 3;
}

@Override
public Integer visit(ArrowType.LargeList type) {
// validity buffer + offset buffer
Expand Down
Loading

0 comments on commit 0574988

Please sign in to comment.