Skip to content

Commit

Permalink
apacheGH-40339: [Java] StringView Initial Implementation (apache#40340)
Browse files Browse the repository at this point in the history
### Rationale for this change 

StringView implementation in Java. This PR only includes the core implementation of StringView

### What changes are included in this PR?

- [X] Adding ViewVarBinaryVector
- [X] Adding ViewVarCharVector
- [X] Adding corresponding test cases in the given scope
- [X] Including required implementation extensions with not supported warnings
- [X] Interface for Holders

### Non Goals of this PR

- [ ] apache#40937
- [ ] apache#40936
- [ ] apache#40932
- [ ] apache#40943
- [ ] apache#40944
- [ ] apache#40942
- [ ] https://github.com/apache/arrow/issues/40945
- [ ] https://github.com/apache/arrow/issues/40941
- [ ] https://github.com/apache/arrow/issues/40946

### Are these changes tested?

Yes. Existing test cases on `VarCharVector` and `VarBinaryVector` are verified with view implementations and additional test cases have also been added to check view functionality. And explitly tests have been added to evaluate the view functionality with `ViewVarCharVector`

### Are there any user-facing changes?

Yes, this introduces a new API and some public methods have been included in an interface so that it can be extended to write custom functionality like done for views. 

* GitHub Issue: apache#40339

Lead-authored-by: Vibhatha Abeykoon <vibhatha@gmail.com>
Co-authored-by: vibhatha <vibhatha@gmail.com>
Co-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
2 people authored and tolleybot committed May 2, 2024
1 parent e96aba7 commit 5b555ef
Show file tree
Hide file tree
Showing 40 changed files with 3,898 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ public ColumnBinder visit(ArrowType.Utf8 type) {
new VarCharBinder<>(varChar, jdbcType);
}

@Override
public ColumnBinder visit(ArrowType.Utf8View type) {
throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported");
}

@Override
public ColumnBinder visit(ArrowType.LargeUtf8 type) {
LargeVarCharVector varChar = (LargeVarCharVector) vector;
Expand All @@ -162,6 +167,11 @@ public ColumnBinder visit(ArrowType.Binary type) {
new VarBinaryBinder<>(varBinary, jdbcType);
}

@Override
public ColumnBinder visit(ArrowType.BinaryView type) {
throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported");
}

@Override
public ColumnBinder visit(ArrowType.LargeBinary type) {
LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ public List<ArrowBuf> visit(ArrowType.Utf8 type) {
}
}

@Override
public List<ArrowBuf> visit(ArrowType.Utf8View type) {
throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported");
}

@Override
public List<ArrowBuf> visit(ArrowType.LargeUtf8 type) {
try (ArrowBuf offsets = importOffsets(type, LargeVarCharVector.OFFSET_WIDTH)) {
Expand Down Expand Up @@ -237,6 +242,11 @@ public List<ArrowBuf> visit(ArrowType.Binary type) {
}
}

@Override
public List<ArrowBuf> visit(ArrowType.BinaryView type) {
throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported");
}

@Override
public List<ArrowBuf> visit(ArrowType.LargeBinary type) {
try (ArrowBuf offsets = importOffsets(type, LargeVarBinaryVector.OFFSET_WIDTH)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.driver.jdbc.converter.impl;

import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.calcite.avatica.AvaticaParameter;
import org.apache.calcite.avatica.remote.TypedValue;

/** AvaticaParameterConverter for BinaryView Arrow types. */
public class BinaryViewAvaticaParameterConverter extends BaseAvaticaParameterConverter {

public BinaryViewAvaticaParameterConverter(ArrowType.BinaryView type) {

}

@Override
public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) {
throw new UnsupportedOperationException("Not implemented");
}

@Override
public AvaticaParameter createParameter(Field field) {
return createParameter(field, false);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.driver.jdbc.converter.impl;

import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.calcite.avatica.AvaticaParameter;
import org.apache.calcite.avatica.remote.TypedValue;

/**
* AvaticaParameterConverter for Utf8View Arrow types.
*/
public class Utf8ViewAvaticaParameterConverter extends BaseAvaticaParameterConverter {

public Utf8ViewAvaticaParameterConverter(ArrowType.Utf8View type) {
}

@Override
public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) {
throw new UnsupportedOperationException("Utf8View not supported");
}

@Override
public AvaticaParameter createParameter(Field field) {
return createParameter(field, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ public Boolean visit(ArrowType.Utf8 type) {
return new Utf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
}

@Override
public Boolean visit(ArrowType.Utf8View type) {
throw new UnsupportedOperationException("Utf8View is unsupported");
}

@Override
public Boolean visit(ArrowType.LargeUtf8 type) {
return new LargeUtf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
Expand All @@ -200,6 +205,11 @@ public Boolean visit(ArrowType.Binary type) {
return new BinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
}

@Override
public Boolean visit(ArrowType.BinaryView type) {
throw new UnsupportedOperationException("BinaryView is unsupported");
}

@Override
public Boolean visit(ArrowType.LargeBinary type) {
return new LargeBinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.stream.Stream;

import org.apache.arrow.driver.jdbc.converter.impl.BinaryAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.BinaryViewAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.BoolAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.DateAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.DecimalAvaticaParameterConverter;
Expand All @@ -43,6 +44,7 @@
import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter;
import org.apache.arrow.driver.jdbc.converter.impl.Utf8ViewAvaticaParameterConverter;
import org.apache.arrow.flight.sql.FlightSqlColumnMetadata;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
Expand Down Expand Up @@ -208,6 +210,11 @@ public AvaticaParameter visit(ArrowType.Utf8 type) {
return new Utf8AvaticaParameterConverter(type).createParameter(field);
}

@Override
public AvaticaParameter visit(ArrowType.Utf8View type) {
return new Utf8ViewAvaticaParameterConverter(type).createParameter(field);
}

@Override
public AvaticaParameter visit(ArrowType.LargeUtf8 type) {
return new LargeUtf8AvaticaParameterConverter(type).createParameter(field);
Expand All @@ -218,6 +225,11 @@ public AvaticaParameter visit(ArrowType.Binary type) {
return new BinaryAvaticaParameterConverter(type).createParameter(field);
}

@Override
public AvaticaParameter visit(ArrowType.BinaryView type) {
return new BinaryViewAvaticaParameterConverter(type).createParameter(field);
}

@Override
public AvaticaParameter visit(ArrowType.LargeBinary type) {
return new LargeBinaryAvaticaParameterConverter(type).createParameter(field);
Expand Down
1 change: 1 addition & 0 deletions java/memory/memory-core/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@
requires jsr305;
requires org.immutables.value;
requires org.slf4j;
requires org.checkerframework.checker.qual;
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,6 @@ public interface ReusableBuffer<T> {
* @param len the number of bytes of the new data
*/
void set(ArrowBuf srcBytes, long start, long len);

void set(byte[] srcBytes, long start, long len);
}
10 changes: 10 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@
fields: [],
complex: false
},
{
name: "Utf8View",
fields: [],
complex: false
},
{
name: "LargeUtf8",
fields: [],
Expand All @@ -75,6 +80,11 @@
fields: [],
complex: false
},
{
name: "BinaryView",
fields: [],
complex: false
},
{
name: "LargeBinary",
fields: [],
Expand Down
4 changes: 3 additions & 1 deletion java/vector/src/main/codegen/data/ValueVectorTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,9 @@
fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "ArrowBuf"}],
minor: [
{ class: "VarBinary" , friendlyType: "byte[]" },
{ class: "VarChar" , friendlyType: "Text" }
{ class: "VarChar" , friendlyType: "Text" },
{ class: "ViewVarBinary" , friendlyType: "byte[]" },
{ class: "ViewVarChar" , friendlyType: "Text" }
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions java/vector/src/main/codegen/templates/HolderReaderImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ public void read(Nullable${name}Holder h) {
</#if>
byte[] value = new byte [length];
holder.buffer.getBytes(holder.start, value, 0, length);
<#if minor.class == "VarBinary" || minor.class == "LargeVarBinary">
<#if minor.class == "VarBinary" || minor.class == "LargeVarBinary" || minor.class == "ViewVarBinary">
return value;
<#elseif minor.class == "VarChar" || minor.class == "LargeVarChar">
<#elseif minor.class == "VarChar" || minor.class == "LargeVarChar" || minor.class == "ViewVarChar">
Text text = new Text();
text.set(value);
return text;
Expand Down
2 changes: 1 addition & 1 deletion java/vector/src/main/codegen/templates/UnionReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {

private static final int NUM_SUPPORTED_TYPES = 46;
private static final int NUM_SUPPORTED_TYPES = 48;

private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
public UnionVector data;
Expand Down
10 changes: 3 additions & 7 deletions java/vector/src/main/codegen/templates/ValueHolders.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
package org.apache.arrow.vector.holders;

<#include "/@includes/vv_imports.ftl" />

/**
* Source code generated using FreeMarker template ${.template_name}
*/
Expand All @@ -40,11 +39,12 @@ public final class ${className} implements ValueHolder{
/** The last index (exclusive) into the Vector. **/
public int end;
/** The Vector holding the actual values. **/
public ${minor.class}Vector vector;
<#else>
public static final int WIDTH = ${type.width};
<#if mode.name == "Optional">public int isSet;
Expand All @@ -70,10 +70,6 @@ public String toString(){
throw new UnsupportedOperationException();
}
</#if>




}

</#list>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import org.apache.arrow.util.Preconditions;

/**
* Tuple class containing a vector and whether is was created.
* Tuple class containing a vector and whether it was created.
*
* @param <V> The type of vector the result is for.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
/**
* BaseLargeVariableWidthVector is a base class providing functionality for large strings/large bytes types.
*/
public abstract class BaseLargeVariableWidthVector extends BaseValueVector
implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
public abstract class BaseLargeVariableWidthVector extends BaseValueVector implements VariableWidthFieldVector {
private static final int DEFAULT_RECORD_BYTE_COUNT = 12;
private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
private int lastValueCapacity;
Expand Down Expand Up @@ -942,6 +941,7 @@ public void setValueCount(int valueCount) {
*
* @param index target index
*/
@Override
public void fillEmpties(int index) {
handleSafe(index, emptyByteArray.length);
fillHoles(index);
Expand All @@ -955,6 +955,7 @@ public void fillEmpties(int index) {
*
* @param value desired index of last non-null element.
*/
@Override
public void setLastSet(int value) {
lastSet = value;
}
Expand All @@ -964,6 +965,7 @@ public void setLastSet(int value) {
*
* @return index of the last non-null element
*/
@Override
public int getLastSet() {
return lastSet;
}
Expand Down Expand Up @@ -1003,6 +1005,7 @@ public void setValueLengthSafe(int index, int length) {
* @param index position of element to get
* @return greater than 0 length for non-null element, 0 otherwise
*/
@Override
public int getValueLength(int index) {
assert index >= 0;
if (isSet(index) == 0) {
Expand All @@ -1021,6 +1024,7 @@ public int getValueLength(int index) {
* @param index position of the element to set
* @param value array of bytes to write
*/
@Override
public void set(int index, byte[] value) {
assert index >= 0;
fillHoles(index);
Expand All @@ -1037,6 +1041,7 @@ public void set(int index, byte[] value) {
* @param index position of the element to set
* @param value array of bytes to write
*/
@Override
public void setSafe(int index, byte[] value) {
assert index >= 0;
handleSafe(index, value.length);
Expand All @@ -1055,6 +1060,7 @@ public void setSafe(int index, byte[] value) {
* @param start start index in array of bytes
* @param length length of data in array of bytes
*/
@Override
public void set(int index, byte[] value, int start, int length) {
assert index >= 0;
fillHoles(index);
Expand Down Expand Up @@ -1091,6 +1097,7 @@ public void setSafe(int index, byte[] value, int start, int length) {
* @param start start index in ByteBuffer
* @param length length of data in ByteBuffer
*/
@Override
public void set(int index, ByteBuffer value, int start, int length) {
assert index >= 0;
fillHoles(index);
Expand Down
Loading

0 comments on commit 5b555ef

Please sign in to comment.