Skip to content

Commit

Permalink
GH-40943: [Java] Implement RangeEqualsVisitor for StringView (#41636)
Browse files Browse the repository at this point in the history
### Rationale for this change

Adding `RangeEqualsVisitor` for StringView as discussed in #40943.

### What changes are included in this PR?

Including `RangeEqualsVisitor` visitor method and test cases to validate it. 

### Are these changes tested?

Yes

### Are there any user-facing changes?

No
* GitHub Issue: #40943

Authored-by: Vibhatha Abeykoon <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
vibhatha authored May 16, 2024
1 parent 084387c commit 1c15c88
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
*/
public abstract class BaseVariableWidthViewVector extends BaseValueVector implements VariableWidthFieldVector {
// A single element of a view comprises 16 bytes
protected static final int ELEMENT_SIZE = 16;
public static final int ELEMENT_SIZE = 16;
public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096;
private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION * ELEMENT_SIZE;
private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE);
Expand All @@ -70,14 +70,14 @@ public abstract class BaseVariableWidthViewVector extends BaseValueVector implem
*
* */
// 12 byte unsigned int to track inline views
protected static final int INLINE_SIZE = 12;
public static final int INLINE_SIZE = 12;
// The first 4 bytes of view are allocated for length
protected static final int LENGTH_WIDTH = 4;
public static final int LENGTH_WIDTH = 4;
// The second 4 bytes of view are allocated for prefix width
protected static final int PREFIX_WIDTH = 4;
public static final int PREFIX_WIDTH = 4;
// The third 4 bytes of view are allocated for buffer index
protected static final int BUF_INDEX_WIDTH = 4;
protected static final byte[] EMPTY_BYTE_ARRAY = new byte[]{};
public static final int BUF_INDEX_WIDTH = 4;
public static final byte[] EMPTY_BYTE_ARRAY = new byte[]{};
protected ArrowBuf validityBuffer;
// The view buffer is used to store the variable width view elements
protected ArrowBuf viewBuffer;
Expand Down Expand Up @@ -158,6 +158,15 @@ public ArrowBuf getDataBuffer() {
return viewBuffer;
}

/**
* Get the buffers that store the data for views in the vector.
*
* @return buffer
*/
public List<ArrowBuf> getDataBuffers() {
return dataBuffers;
}

/**
* BaseVariableWidthViewVector doesn't support offset buffer.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.List;
import java.util.function.BiFunction;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.util.ByteFunctionHelpers;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BaseFixedWidthVector;
Expand Down Expand Up @@ -165,7 +166,10 @@ public Boolean visit(BaseLargeVariableWidthVector left, Range range) {

@Override
public Boolean visit(BaseVariableWidthViewVector left, Range range) {
throw new UnsupportedOperationException("View vectors are not supported.");
if (!validate(left)) {
return false;
}
return compareBaseVariableWidthViewVectors(range);
}

@Override
Expand Down Expand Up @@ -450,6 +454,85 @@ protected boolean compareBaseLargeVariableWidthVectors(Range range) {
return true;
}

protected boolean compareBaseVariableWidthViewVectors(Range range) {
BaseVariableWidthViewVector leftVector = (BaseVariableWidthViewVector) left;
BaseVariableWidthViewVector rightVector = (BaseVariableWidthViewVector) right;

final ArrowBuf leftViewBuffer = leftVector.getDataBuffer();
final ArrowBuf rightViewBuffer = rightVector.getDataBuffer();

final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE;
final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH;
final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH;
final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH;

List<ArrowBuf> leftDataBuffers = leftVector.getDataBuffers();
List<ArrowBuf> rightDataBuffers = rightVector.getDataBuffers();

for (int i = 0; i < range.getLength(); i++) {
int leftIndex = range.getLeftStart() + i;
int rightIndex = range.getRightStart() + i;

boolean isNull = leftVector.isNull(leftIndex);
if (isNull != rightVector.isNull(rightIndex)) {
return false;
}

if (isNull) {
continue;
}

int startLeftByteOffset = leftIndex * elementSize;

int startRightByteOffset = rightIndex * elementSize;

int leftDataBufferValueLength = leftVector.getValueLength(leftIndex);
int rightDataBufferValueLength = rightVector.getValueLength(rightIndex);

if (leftDataBufferValueLength != rightDataBufferValueLength) {
return false;
}

if (leftDataBufferValueLength > BaseVariableWidthViewVector.INLINE_SIZE) {
// if the value is stored in the dataBuffers
int leftDataBufferIndex = leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth);
int rightDataBufferIndex = rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth);

final int leftDataOffset =
leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth + bufIndexWidth);
final int rightDataOffset =
rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth + bufIndexWidth);

ArrowBuf leftDataBuffer = leftDataBuffers.get(leftDataBufferIndex);
ArrowBuf rightDataBuffer = rightDataBuffers.get(rightDataBufferIndex);

// check equality in the considered string stored in the dataBuffers
int retDataBuf = ByteFunctionHelpers.equal(
leftDataBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength,
rightDataBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength);

if (retDataBuf == 0) {
return false;
}
} else {
// if the value is stored in the view
final int leftDataOffset = startLeftByteOffset + lengthWidth;
final int rightDataOffset = startRightByteOffset + lengthWidth;

// check equality in the considered string stored in the view
int retDataBuf = ByteFunctionHelpers.equal(
leftViewBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength,
rightViewBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength);

if (retDataBuf == 0) {
return false;
}
}

}
return true;
}

protected boolean compareListVectors(Range range) {
ListVector leftVector = (ListVector) left;
ListVector rightVector = (ListVector) right;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
package org.apache.arrow.vector.compare;

import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.nio.charset.Charset;
import java.util.Arrays;
Expand All @@ -33,6 +33,7 @@
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.LargeVarCharVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.ZeroVector;
import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
import org.apache.arrow.vector.complex.DenseUnionVector;
Expand All @@ -53,16 +54,16 @@
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

public class TestRangeEqualsVisitor {

private BufferAllocator allocator;

@Before
@BeforeEach
public void init() {
allocator = new RootAllocator(Long.MAX_VALUE);
}
Expand All @@ -71,8 +72,11 @@ public void init() {
private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
private static final byte[] STR4 = "12345678901234A".getBytes(utf8Charset);
private static final byte[] STR5 = "A2345678901234ABC".getBytes(utf8Charset);
private static final byte[] STR6 = "AB45678901234ABCD".getBytes(utf8Charset);

@After
@AfterEach
public void terminate() throws Exception {
allocator.close();
}
Expand Down Expand Up @@ -132,6 +136,55 @@ public void testBaseVariableVectorRangeEquals() {
}
}

@Test
public void testBaseVariableViewVectorRangeEquals() {
try (final ViewVarCharVector vector1 = new ViewVarCharVector("varchar", allocator);
final ViewVarCharVector vector2 = new ViewVarCharVector("varchar", allocator)) {

setVector(vector1, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4);
setVector(vector2, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4);

RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
// inclusion of long string in the middle
assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
assertFalse(visitor.rangeEquals(new Range(0, 1, 4)));
// inclusion of long string at the start
assertTrue(visitor.rangeEquals(new Range(2, 2, 4)));
assertFalse(visitor.rangeEquals(new Range(2, 5, 4)));
// inclusion of long string at the end
assertTrue(visitor.rangeEquals(new Range(4, 4, 4)));
// unequal range
assertTrue(visitor.rangeEquals(new Range(8, 0, 3)));
assertFalse(visitor.rangeEquals(new Range(4, 5, 3)));

// checking the same ranges when nulls are set

vector1.setNull(1);
vector2.setNull(1);

vector1.setNull(3);
vector2.setNull(3);

vector1.setNull(5);
vector2.setNull(5);

vector1.setNull(9);
vector2.setNull(9);

// inclusion of long string in the middle
assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
assertFalse(visitor.rangeEquals(new Range(0, 1, 4)));
// inclusion of long string at the start
assertTrue(visitor.rangeEquals(new Range(2, 2, 4)));
assertFalse(visitor.rangeEquals(new Range(2, 5, 4)));
// inclusion of long string at the end
assertTrue(visitor.rangeEquals(new Range(4, 4, 4)));
// unequal range
assertTrue(visitor.rangeEquals(new Range(8, 0, 3)));
assertFalse(visitor.rangeEquals(new Range(4, 5, 3)));
}
}

@Test
public void testListVectorWithDifferentChild() {
try (final ListVector vector1 = ListVector.empty("list", allocator);
Expand Down Expand Up @@ -476,7 +529,7 @@ public void testDenseUnionVectorEquals() {
}
}

@Ignore
@Disabled
@Test
public void testEqualsWithOutTypeCheck() {
try (final IntVector intVector = new IntVector("int", allocator);
Expand Down

0 comments on commit 1c15c88

Please sign in to comment.