Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL version of unnest native druid function #13576

Merged
merged 30 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2e72fd3
Some changes for sql unnest
somu-imply Dec 12, 2022
812133f
Updating partial query
somu-imply Dec 13, 2022
80846d3
Updating a test case
somu-imply Dec 14, 2022
828bf95
Adding select project of left to top of correlate
somu-imply Dec 14, 2022
2a308be
Handling the correlate data type check to not blow up
somu-imply Dec 15, 2022
cce6aa8
temp
somu-imply Dec 15, 2022
f0ac1a3
Working version of sql unnest
somu-imply Dec 15, 2022
6385f16
temp changes to debug virtual column creation
somu-imply Dec 17, 2022
92e161a
Adding sql support for virtual columns
somu-imply Dec 21, 2022
6767913
Fixing some test cases
somu-imply Dec 21, 2022
de943e9
More test cases and some changes to support virtual columns and null …
somu-imply Jan 4, 2023
ff561dd
fixing group by on unnested virtual columns
somu-imply Jan 9, 2023
fbb5d5b
Making constructor for correlate slimmer
somu-imply Jan 11, 2023
f820307
Fixed for grouping by on virtual columns for unnest
somu-imply Jan 11, 2023
94f47a5
Refactoring code to use abstract class correlate instead of LogicalCo…
somu-imply Jan 12, 2023
3afd027
adding explain terms to fix the limit issue
somu-imply Jan 12, 2023
6356ba3
test case for unnest on top of a join datasource
somu-imply Jan 12, 2023
8a854dc
Adding javadocs and fixing one test
somu-imply Jan 13, 2023
4f99571
Removing two unused variables and one speel check
somu-imply Jan 13, 2023
0376170
Test with grouping on virtual column
somu-imply Jan 17, 2023
a1e0ea8
Fixing a test case to generate more line coverage
somu-imply Jan 18, 2023
06b3811
Addressing comments part 1
somu-imply Jan 19, 2023
8c03d38
Merge remote-tracking branch 'upstream/master' into sql_unnnest_error…
somu-imply Jan 19, 2023
2bb22fa
Updating the misc.xml
somu-imply Jan 19, 2023
df05cf4
Addressing review comments part 2
somu-imply Jan 19, 2023
533be68
Fixing one code comment
somu-imply Jan 19, 2023
16c750c
spotbugs fix
somu-imply Jan 19, 2023
bf0e27c
Removing throws clause in the clone method
somu-imply Jan 20, 2023
301cbaa
One last fix to reuse a columnName
somu-imply Jan 20, 2023
5320349
Updating the field expression to catch missing index and not pursuing…
somu-imply Jan 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,18 @@ public int hashCode()
{
return Objects.hash(base, column, outputName);
}

@Override
public String toString()
{
return "UnnestDataSource{" +
"base=" + base +
", column='" + column + '\'' +
", outputName='" + outputName + '\'' +
", allowList=" + allowList +
'}';
}

}


Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,31 @@ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
if (!outputName.equals(dimensionSpec.getDimension())) {
return baseColumnSelectorFactory.makeDimensionSelector(dimensionSpec);
}
throw new UOE("Unsupported dimension selector while using column value selector for column [%s]", outputName);
// this is done to support virtual columns
// In future a developer should move towards making sure that
// for all dictionary encoded cases we only get the dimension selector
return new BaseSingleValueDimensionSelector()
{
final ColumnValueSelector colSelector = makeColumnValueSelector(dimensionSpec.getDimension());

@Nullable
@Override
protected String getValue()
{
final Object returnedObj = colSelector.getObject();
if (returnedObj == null) {
return null;
} else {
return String.valueOf(returnedObj);
}
}

@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
colSelector.inspectRuntimeShape(inspector);
}
};
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public class UnnestDimensionCursor implements Cursor
private final BitSet allowedBitSet;
private final ColumnSelectorFactory baseColumnSelectorFactory;
private int index;
private IndexedInts indexedIntsForCurrentRow;
@Nullable private IndexedInts indexedIntsForCurrentRow;
private boolean needInitialization;
private SingleIndexInts indexIntsForRow;

Expand Down Expand Up @@ -181,7 +181,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
@Override
public Object getObject()
{
if (indexedIntsForCurrentRow == null) {
if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) {
return null;
}
if (allowedBitSet.isEmpty()) {
Expand Down Expand Up @@ -319,6 +319,7 @@ public void reset()
* This would also create a bitset for dictonary encoded columns to
* check for matching values specified in allowedList of UnnestDataSource.
*/
@Nullable
private void initialize()
{
IdLookup idLookup = dimSelector.idLookup();
Expand Down Expand Up @@ -409,7 +410,12 @@ public int size()
@Override
public int get(int idx)
{
return indexedIntsForCurrentRow.get(index);
// need to get value from the indexed ints
// only if it is non null and has at least 1 value
if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size() > 0) {
return indexedIntsForCurrentRow.get(index);
}
return 0;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

package org.apache.druid.segment;

import org.apache.druid.java.util.common.UOE;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
Expand Down Expand Up @@ -62,7 +61,7 @@ public void test_list_unnest_cursors()
int j = 0;
while (!unnestCursor.isDone()) {
Object colSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(colSelectorVal.toString(), String.valueOf(j));
Assert.assertEquals(String.valueOf(j), colSelectorVal.toString());
j++;
unnestCursor.advance();
}
Expand Down Expand Up @@ -96,7 +95,7 @@ public void test_list_unnest_cursors_user_supplied_list()
int k = 0;
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
k++;
unnestCursor.advance();
}
Expand All @@ -112,8 +111,6 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls()
Collections.singletonList(null)
);

List<String> expectedResults = Arrays.asList(null, null, null, null);

//Create base cursor
ListCursor listCursor = new ListCursor(inputList);

Expand Down Expand Up @@ -168,9 +165,9 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls()
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
if (valueSelectorVal == null) {
Assert.assertEquals(null, expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), null);
} else {
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
}
k++;
unnestCursor.advance();
Expand Down Expand Up @@ -201,7 +198,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists()
int k = 0;
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
k++;
unnestCursor.advance();
}
Expand Down Expand Up @@ -231,7 +228,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list()
int k = 0;
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
k++;
unnestCursor.advance();
}
Expand Down Expand Up @@ -265,7 +262,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels()
int k = 0;
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k).toString());
Assert.assertEquals(expectedResults.get(k).toString(), valueSelectorVal.toString());
k++;
unnestCursor.advance();
}
Expand Down Expand Up @@ -306,7 +303,7 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels()
int k = 0;
while (!parentCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k).toString());
Assert.assertEquals(expectedResults.get(k).toString(), valueSelectorVal.toString());
k++;
parentCursor.advance();
}
Expand Down Expand Up @@ -344,12 +341,12 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls()
if (valueSelectorVal == null) {
Assert.assertEquals(null, expectedResults.get(k));
} else {
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
}
k++;
unnestCursor.advance();
}
Assert.assertEquals(k, expectedResults.size());
Assert.assertEquals(expectedResults.size(), k);
}

@Test
Expand Down Expand Up @@ -382,7 +379,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups()
if (valueSelectorVal == null) {
Assert.assertEquals(null, expectedResults.get(k));
} else {
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
}
k++;
unnestCursor.advance();
Expand Down Expand Up @@ -420,7 +417,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_ignore_set()
if (valueSelectorVal == null) {
Assert.assertEquals(null, expectedResults.get(k));
} else {
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
}
k++;
unnestCursor.advance();
Expand Down Expand Up @@ -455,7 +452,7 @@ public void test_list_unnest_cursors_user_supplied_list_double()
int k = 0;
while (!unnestCursor.isDone()) {
Double valueSelectorVal = unnestColumnValueSelector.getDouble();
Assert.assertEquals(valueSelectorVal, expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal);
k++;
unnestCursor.advance();
}
Expand Down Expand Up @@ -489,7 +486,7 @@ public void test_list_unnest_cursors_user_supplied_list_float()
int k = 0;
while (!unnestCursor.isDone()) {
Float valueSelectorVal = unnestColumnValueSelector.getFloat();
Assert.assertEquals(valueSelectorVal, expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal);
k++;
unnestCursor.advance();
}
Expand Down Expand Up @@ -526,7 +523,7 @@ public void test_list_unnest_cursors_user_supplied_list_long()
Object obj = unnestColumnValueSelector.getObject();
Assert.assertNotNull(obj);
Long valueSelectorVal = unnestColumnValueSelector.getLong();
Assert.assertEquals(valueSelectorVal, expectedResults.get(k));
Assert.assertEquals(expectedResults.get(k), valueSelectorVal);
k++;
unnestCursor.advance();
}
Expand Down Expand Up @@ -561,7 +558,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m
int k = 0;
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k).toString());
Assert.assertEquals(expectedResults.get(k).toString(), valueSelectorVal.toString());
k++;
unnestCursor.advance();
}
Expand All @@ -570,17 +567,16 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m
Assert.assertFalse(unnestCursor.isDoneOrInterrupted());
}

@Test(expected = UOE.class)
@Test
public void test_list_unnest_cursors_dimSelector()
{
List<Object> inputList = Arrays.asList(
Arrays.asList("a", "b", "c"),
Arrays.asList("e", "f", "g", "h", "i"),
Collections.singletonList("j")
Collections.singletonList(null)
);

List<String> expectedResults = Arrays.asList("a", "b", "c", "e", "f", "g", "h", "i", "j");

List<Object> expectedResults = Arrays.asList("a", "b", "c", "e", "f", "g", "h", "i");
//Create base cursor
ListCursor listCursor = new ListCursor(inputList);

Expand All @@ -592,7 +588,25 @@ public void test_list_unnest_cursors_dimSelector()
OUTPUT_NAME,
IGNORE_SET
);
unnestCursor.getColumnSelectorFactory().makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_NAME));
// should return a column value selector for this case
BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory()
.makeDimensionSelector(
DefaultDimensionSpec.of(
OUTPUT_NAME));
unnestDimSelector.inspectRuntimeShape(null);
int k = 0;
while (!unnestCursor.isDone()) {
if (k < 8) {
Assert.assertEquals(expectedResults.get(k).toString(), unnestDimSelector.getValue());
} else {
Assert.assertNull(unnestDimSelector.getValue());
}
k++;
unnestCursor.advance();
}
Assert.assertEquals(k, 9);
unnestCursor.reset();
Assert.assertNotNull(unnestDimSelector);
}

@Test
Expand Down Expand Up @@ -622,7 +636,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers()
int k = 0;
while (!unnestCursor.isDone()) {
Object valueSelectorVal = unnestColumnValueSelector.getObject();
Assert.assertEquals(valueSelectorVal.toString(), expectedResults.get(k).toString());
Assert.assertEquals(expectedResults.get(k).toString(), valueSelectorVal.toString());
k++;
unnestCursor.advance();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexFieldAccess;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
Expand All @@ -34,6 +35,7 @@
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;
Expand All @@ -58,6 +60,7 @@
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.rel.CannotBuildQueryException;
import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;
import org.apache.druid.sql.calcite.table.RowSignatures;
import org.joda.time.Interval;
Expand Down Expand Up @@ -214,12 +217,51 @@ public static DruidExpression toDruidExpressionWithPostAggOperands(
return rexCallToDruidExpression(plannerContext, rowSignature, rexNode, postAggregatorVisitor);
} else if (kind == SqlKind.LITERAL) {
return literalToDruidExpression(plannerContext, rexNode);
} else if (kind == SqlKind.FIELD_ACCESS) {
return fieldAccessToDruidExpression(rowSignature, rexNode);
} else {
// Can't translate.
return null;
}
}

private static DruidExpression fieldAccessToDruidExpression(
final RowSignature rowSignature,
final RexNode rexNode
)
{
// Translate field references.
final RexFieldAccess ref = (RexFieldAccess) rexNode;
if (ref.getField().getIndex() > rowSignature.size()) {
// This case arises in the case of a correlation where the rexNode points to a table from the left subtree
// while the underlying datasource is the scan stub created from LogicalValuesRule
// In such a case we throw a CannotBuildQueryException so that Calcite does not go ahead with this path
// This exception is caught while returning false from isValidDruidQuery() method
throw new CannotBuildQueryException(StringUtils.format(
"Cannot build query as column name [%s] does not exist in row [%s]", ref.getField().getName(), rowSignature)
);
}

final String columnName = ref.getField().getName();
final int index = rowSignature.indexOf(columnName);

// This case arises when the rexNode has a name which is not in the underlying stub created using DruidUnnestDataSourceRule
// The column name has name ZERO with rowtype as LONG
// causes the index to be -1. In such a case we cannot build the query
// and throw an exception while returning false from isValidDruidQuery() method
if (index < 0) {
throw new CannotBuildQueryException(StringUtils.format(
"Expression referred to nonexistent index[%d] in row[%s]",
index,
rowSignature
));
}

final Optional<ColumnType> columnType = rowSignature.getColumnType(index);

return DruidExpression.ofColumn(columnType.get(), columnName);
}

private static DruidExpression inputRefToDruidExpression(
final RowSignature rowSignature,
final RexNode rexNode
Expand Down
Loading