From e8de6dcef29acfe6dc3f7b19e000b4b618d592fc Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 13 Feb 2023 16:24:33 -0800 Subject: [PATCH 01/15] Now unnest allows bound, in and selector filters on the unnested column --- .../UnnestColumnValueSelectorCursor.java | 67 ++++----- .../druid/segment/UnnestDimensionCursor.java | 109 ++++++-------- .../druid/segment/UnnestStorageAdapter.java | 30 ++-- .../UnnestColumnValueSelectorCursorTest.java | 76 +++------- .../segment/UnnestStorageAdapterTest.java | 140 ++---------------- 5 files changed, 127 insertions(+), 295 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 5d4340329897..b5f240895712 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -22,15 +22,17 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.List; /** @@ -65,7 +67,7 @@ public class UnnestColumnValueSelectorCursor implements Cursor private final ColumnValueSelector columnValueSelector; private final String columnName; private final String outputName; - private final LinkedHashSet allowSet; + private final ValueMatcher valueMatcher; private int index; private Object currentVal; private List unnestListForCurrentRow; @@ -76,7 +78,7 @@ public UnnestColumnValueSelectorCursor( ColumnSelectorFactory baseColumSelectorFactory, String columnName, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter filter ) { this.baseCursor = cursor; @@ -86,7 +88,11 @@ public UnnestColumnValueSelectorCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; + if (filter != null) { + this.valueMatcher = filter.makeMatcher(getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); + } } @Override @@ -191,11 +197,7 @@ public boolean isNull() public Object getObject() { if (!unnestListForCurrentRow.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return unnestListForCurrentRow.get(index); - } else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) { - return unnestListForCurrentRow.get(index); - } + return unnestListForCurrentRow.get(index); } return null; } @@ -243,9 +245,23 @@ public void advance() @Override public void advanceUninterruptibly() { - do { + // the index currently points to an element at position i ($e_i) + // while $e_i does not match the filter + // keep advancing the pointer to the first valid match + // calling advanceAndUpdate increments the index position so needs a call to matches() again for new match status + boolean match = valueMatcher.matches(); + if (match) { advanceAndUpdate(); - } while (matchAndProceed()); + match = valueMatcher.matches(); + } + while (!match) { + if (!baseCursor.isDone()) { + advanceAndUpdate(); + match = valueMatcher.matches(); + } else { + return; + } + } } @Override @@ -311,12 +327,11 @@ private void initialize() { this.unnestListForCurrentRow = new ArrayList<>(); getNextRow(needInitialization); - if (allowSet != null) { - if (!allowSet.isEmpty()) { - if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) { - advance(); - } - } + + // If the first value the index is pointing to does not match the filter + // advance the index to the first value which will match + if (!valueMatcher.matches()) { + advance(); } needInitialization = false; } @@ -339,22 +354,4 @@ private void advanceAndUpdate() index++; } } - - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if (allowSet == null || allowSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 93a56767bbfb..a9ca9084c67e 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -23,16 +23,18 @@ import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.filter.AndFilter; +import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; -import java.util.BitSet; -import java.util.LinkedHashSet; /** * The cursor to help unnest MVDs with dictionary encoding. @@ -79,11 +81,13 @@ public class UnnestDimensionCursor implements Cursor private final DimensionSelector dimSelector; private final String columnName; private final String outputName; - private final LinkedHashSet allowSet; - private final BitSet allowedBitSet; private final ColumnSelectorFactory baseColumnSelectorFactory; - private int index; - @Nullable private IndexedInts indexedIntsForCurrentRow; + private final ValueMatcher valueMatcher; + @Nullable + private final Filter allowFilter; + private int indexForRow; + @Nullable + private IndexedInts indexedIntsForCurrentRow; private boolean needInitialization; private SingleIndexInts indexIntsForRow; @@ -92,18 +96,22 @@ public UnnestDimensionCursor( ColumnSelectorFactory baseColumnSelectorFactory, String columnName, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter allowFilter ) { this.baseCursor = cursor; this.baseColumnSelectorFactory = baseColumnSelectorFactory; this.dimSelector = this.baseColumnSelectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(columnName)); this.columnName = columnName; - this.index = 0; + this.indexForRow = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; - this.allowedBitSet = new BitSet(); + this.allowFilter = allowFilter; + if (allowFilter != null) { + this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); + } } @Override @@ -154,7 +162,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean matches() { - return idForLookup == indexedIntsForCurrentRow.get(index); + return idForLookup == indexedIntsForCurrentRow.get(indexForRow); } @Override @@ -184,14 +192,7 @@ public Object getObject() if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) { return null; } - if (allowedBitSet.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - } else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - return null; + return lookupName(indexedIntsForCurrentRow.get(indexForRow)); } @Override @@ -203,8 +204,10 @@ public Class classOfObject() @Override public int getValueCardinality() { - if (!allowedBitSet.isEmpty()) { - return allowedBitSet.cardinality(); + if (allowFilter instanceof InDimFilter) { + return ((InDimFilter) allowFilter).getValues().size(); + } else if (allowFilter instanceof AndFilter) { + return ((AndFilter) allowFilter).getFilters().size(); } return dimSelector.getValueCardinality(); } @@ -282,9 +285,21 @@ public void advance() @Override public void advanceUninterruptibly() { - do { - advanceAndUpdate(); - } while (matchAndProceed()); + if (!baseCursor.isDone()) { + boolean match = valueMatcher.matches(); + if (match) { + advanceAndUpdate(); + match = valueMatcher.matches(); + } + while (!match) { + if (!baseCursor.isDone()) { + advanceAndUpdate(); + match = valueMatcher.matches(); + } else { + return; + } + } + } } @Override @@ -308,7 +323,7 @@ public boolean isDoneOrInterrupted() @Override public void reset() { - index = 0; + indexForRow = 0; needInitialization = true; baseCursor.reset(); } @@ -322,22 +337,14 @@ public void reset() @Nullable private void initialize() { - IdLookup idLookup = dimSelector.idLookup(); + indexForRow = 0; this.indexIntsForRow = new SingleIndexInts(); - if (allowSet != null && !allowSet.isEmpty() && idLookup != null) { - for (String s : allowSet) { - if (idLookup.lookupId(s) >= 0) { - allowedBitSet.set(idLookup.lookupId(s)); - } - } - } + if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!allowedBitSet.isEmpty()) { - if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - advance(); - } + if (!valueMatcher.matches()) { + advanceAndUpdate(); } needInitialization = false; } @@ -351,43 +358,25 @@ private void initialize() private void advanceAndUpdate() { if (indexedIntsForCurrentRow == null) { - index = 0; + indexForRow = 0; if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); } } else { - if (index >= indexedIntsForCurrentRow.size() - 1) { + if (indexForRow >= indexedIntsForCurrentRow.size() - 1) { if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); } if (!baseCursor.isDone()) { indexedIntsForCurrentRow = dimSelector.getRow(); } - index = 0; + indexForRow = 0; } else { - ++index; + ++indexForRow; } } } - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } - // Helper class to help in returning // getRow from the dimensionSelector // This is set in the initialize method @@ -413,7 +402,7 @@ public int get(int idx) // need to get value from the indexed ints // only if it is non null and has at least 1 value if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size() > 0) { - return indexedIntsForCurrentRow.get(index); + return indexedIntsForCurrentRow.get(indexForRow); } return 0; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index f76ab89270af..4c596440f061 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -25,16 +25,13 @@ import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; -import org.apache.druid.segment.filter.AndFilter; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; -import java.util.Arrays; import java.util.LinkedHashSet; import java.util.Objects; @@ -73,20 +70,19 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { - Filter updatedFilter; - if (allowSet != null && !allowSet.isEmpty()) { - final InDimFilter allowListFilters; - allowListFilters = new InDimFilter(dimensionToUnnest, allowSet); - if (filter != null) { - updatedFilter = new AndFilter(Arrays.asList(filter, allowListFilters)); - } else { - updatedFilter = allowListFilters; - } + final Filter updatedFilter = filter; + + // the filter on the outer unnested column needs to be recreated on the unnested dimension and sent into + // the base cursor + + final Filter forBaseFilter; + if (updatedFilter == null) { + forBaseFilter = updatedFilter; } else { - updatedFilter = filter; + forBaseFilter = updatedFilter.getRequiredColumns().contains(outputColumnName) ? null : updatedFilter; } final Sequence baseCursorSequence = baseAdapter.makeCursors( - updatedFilter, + forBaseFilter, interval, virtualColumns, gran, @@ -107,7 +103,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - allowSet + updatedFilter ); } else { retVal = new UnnestColumnValueSelectorCursor( @@ -115,7 +111,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - allowSet + updatedFilter ); } } else { @@ -124,7 +120,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - allowSet + updatedFilter ); } return retVal; diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index cf4a98c88035..dbe7d39761c4 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -27,14 +27,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandlingTest { private static String OUTPUT_NAME = "unnested-column"; - private static LinkedHashSet IGNORE_SET = null; - private static LinkedHashSet IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f")); @Test @@ -54,7 +51,7 @@ public void test_list_unnest_cursors() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -88,7 +85,7 @@ public void test_list_unnest_cursors_user_supplied_list() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -120,7 +117,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -157,7 +154,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -191,7 +188,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -221,7 +218,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -255,7 +252,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -289,14 +286,14 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), OUTPUT_NAME, "tmp-out", - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory() .makeColumnValueSelector("tmp-out"); @@ -331,7 +328,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -369,7 +366,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -387,44 +384,6 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() Assert.assertEquals(k, 10); } - @Test - public void test_list_unnest_cursors_user_supplied_list_with_ignore_set() - { - List inputList = Arrays.asList( - Arrays.asList("a", "b", "c"), - Arrays.asList("e", "f", "g", "h", "i"), - Collections.singletonList("j") - ); - - List expectedResults = Arrays.asList("b", "f"); - - //Create base cursor - ListCursor listCursor = new ListCursor(inputList); - - //Create unnest cursor - UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( - listCursor, - listCursor.getColumnSelectorFactory(), - "dummy", - OUTPUT_NAME, - IGNORE_SET1 - ); - ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() - .makeColumnValueSelector(OUTPUT_NAME); - int k = 0; - while (!unnestCursor.isDone()) { - Object valueSelectorVal = unnestColumnValueSelector.getObject(); - if (valueSelectorVal == null) { - Assert.assertEquals(null, expectedResults.get(k)); - } else { - Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString()); - } - k++; - unnestCursor.advance(); - } - Assert.assertEquals(k, 2); - } - @Test public void test_list_unnest_cursors_user_supplied_list_double() { @@ -445,7 +404,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -479,7 +438,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -513,7 +472,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -550,7 +509,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -586,7 +545,7 @@ public void test_list_unnest_cursors_dimSelector() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); // should return a column value selector for this case BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory() @@ -629,7 +588,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -643,4 +602,3 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() Assert.assertEquals(k, 9); } } - diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 35d42b82d4b3..2b75334e912c 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -20,11 +20,14 @@ package org.apache.druid.segment; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.generator.GeneratorBasicSchemas; @@ -180,7 +183,6 @@ public void test_group_of_unnest_adapters_column_capabilities() @Test public void test_unnest_adapters_basic() { - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( null, UNNEST_STORAGE_ADAPTER.getInterval(), @@ -203,11 +205,8 @@ public void test_unnest_adapters_basic() cursor.advance(); count++; } - /* - each row has 8 entries. - unnest 2 rows -> 16 rows after unnest - */ - Assert.assertEquals(count, 16); + + Assert.assertEquals(16, count); return null; }); @@ -249,151 +248,44 @@ public void test_two_levels_of_unnest_adapters() unnest 2 rows -> 16 entries also the value cardinality unnest of unnest -> 16*8 = 128 rows */ - Assert.assertEquals(count, 128); - Assert.assertEquals(dimSelector.getValueCardinality(), 16); + Assert.assertEquals(128, count); + Assert.assertEquals(16, dimSelector.getValueCardinality()); return null; }); } @Test - public void test_unnest_adapters_with_allowList() + public void test_unnest_adapters_basic_with_filter() { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), + Filter f = new InDimFilter(OUTPUT_COLUMN_NAME, ImmutableSet.of("1", "3", "5")); + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( + f, + UNNEST_STORAGE_ADAPTER.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null ); + List expectedResults = Arrays.asList("1", "3", "5"); cursorSequence.accumulate(null, (accumulated, cursor) -> { ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME); - int count = 0; while (!cursor.isDone()) { Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); if (dimSelectorVal == null) { Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); } + Assert.assertEquals(expectedResults.get(count), dimSelectorVal.toString()); cursor.advance(); count++; } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - */ - Assert.assertEquals(count, 3); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); + //As we are filtering on 1, 3 and 5 there should be 3 entries + Assert.assertEquals(3, count); return null; }); - } - @Test - public void test_two_levels_of_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER3.makeCursors( - null, - UNNEST_STORAGE_ADAPTER3.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3; - Assert.assertEquals(adapter.getDimensionToUnnest(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME1)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME1); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - unnest of that unnest will have 3*3 = 9 entries - */ - Assert.assertEquals(count, 9); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_unnest_adapters_methods_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1; - Assert.assertEquals(adapter.getDimensionToUnnest(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - IdLookup idlookUp = dimSelector.idLookup(); - Assert.assertFalse(dimSelector.isNull()); - int[] indices = new int[]{1, 3, 5}; - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Assert.assertEquals(idlookUp.lookupId((String) dimSelectorVal), indices[count]); - // after unnest first entry in get row should equal the object - // and the row size will always be 1 - Assert.assertEquals(dimSelector.getRow().get(0), indices[count]); - Assert.assertEquals(dimSelector.getRow().size(), 1); - Assert.assertNotNull(dimSelector.makeValueMatcher(OUTPUT_COLUMN_NAME)); - cursor.advance(); - count++; - } - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - Assert.assertEquals(count, 3); - return null; - }); } } From 974391179a652366dc1357e9a72f3bba3abe04d5 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 15 Feb 2023 11:06:36 -0800 Subject: [PATCH 02/15] Filters on unnested columns over virtual columns --- .../druid/segment/UnnestStorageAdapter.java | 21 ++++--- .../query/scan/UnnestScanQueryRunnerTest.java | 59 ------------------- 2 files changed, 13 insertions(+), 67 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 4c596440f061..6ffe7b80199d 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -70,16 +70,21 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { - final Filter updatedFilter = filter; - // the filter on the outer unnested column needs to be recreated on the unnested dimension and sent into // the base cursor + // currently testing it out for in filter and selector filter + // TBD: boun final Filter forBaseFilter; - if (updatedFilter == null) { - forBaseFilter = updatedFilter; + if (filter == null) { + forBaseFilter = filter; } else { - forBaseFilter = updatedFilter.getRequiredColumns().contains(outputColumnName) ? null : updatedFilter; + // if the filter has thw unnested column + // do not pass it into the base cursor + // if there is a filter as d2 > 1 and unnest-d2 < 10 + // Calcite would push the filter on d2 into the data source + // and only the filter on unnest-d2 < 10 will appear here + forBaseFilter = filter.getRequiredColumns().contains(outputColumnName) ? null : filter; } final Sequence baseCursorSequence = baseAdapter.makeCursors( forBaseFilter, @@ -103,7 +108,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - updatedFilter + filter ); } else { retVal = new UnnestColumnValueSelectorCursor( @@ -111,7 +116,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - updatedFilter + filter ); } } else { @@ -120,7 +125,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - updatedFilter + filter ); } return retVal; diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index 4de22cb00610..444068fbeafd 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -464,65 +464,6 @@ public void testUnnestRunnerWithOrdering() ScanQueryRunnerTest.verify(ascendingExpectedResults, results); } - @Test - public void testUnnestRunnerNonNullAllowSet() - { - ScanQuery query = newTestUnnestQueryWithAllowSet() - .intervals(I_0112_0114) - .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - .limit(3) - .build(); - - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - FACTORY, - new IncrementalIndexSegment( - index, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - - Iterable results = queryRunner.run(QueryPlus.wrap(query)).toList(); - - String[] columnNames; - if (legacy) { - columnNames = new String[]{ - getTimestampName() + ":TIME", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } else { - columnNames = new String[]{ - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } - String[] values; - if (legacy) { - values = new String[]{ - "2011-01-12T00:00:00.000Z\ta", - "2011-01-12T00:00:00.000Z\tb", - "2011-01-13T00:00:00.000Z\ta" - }; - } else { - values = new String[]{ - "a", - "b", - "a" - }; - } - - final List>> events = ScanQueryRunnerTest.toEvents(columnNames, legacy, values); - List expectedResults = toExpected( - events, - legacy - ? Lists.newArrayList(getTimestampName(), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - : Collections.singletonList(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST), - 0, - 3 - ); - ScanQueryRunnerTest.verify(expectedResults, results); - } - private String getTimestampName() { From 933ccbe0c3e356e7cba5c9a9d1d63cef845118d6 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 15 Feb 2023 15:25:43 -0800 Subject: [PATCH 03/15] Removing allowList to make way for filters --- .../apache/druid/query/UnnestDataSource.java | 28 ++++--------- .../druid/segment/UnnestSegmentReference.java | 8 +--- .../druid/segment/UnnestStorageAdapter.java | 5 +-- .../druid/query/QueryRunnerTestHelper.java | 3 +- .../groupby/UnnestGroupByQueryRunnerTest.java | 12 ++---- .../query/scan/UnnestScanQueryRunnerTest.java | 25 +----------- .../query/topn/UnnestTopNQueryRunnerTest.java | 6 +-- .../segment/UnnestStorageAdapterTest.java | 14 ++----- .../calcite/rel/DruidCorrelateUnnestRel.java | 3 +- .../calcite/rel/DruidUnnestDatasourceRel.java | 3 +- .../sql/calcite/CalciteArraysQueryTest.java | 39 +++++++------------ 11 files changed, 38 insertions(+), 108 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 407aea5c39b9..0db6e7ea397e 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -28,8 +28,6 @@ import org.apache.druid.segment.UnnestSegmentReference; import org.apache.druid.utils.JvmUtils; -import javax.annotation.Nullable; -import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -38,7 +36,7 @@ /** * The data source for representing an unnest operation. - * + *

* An unnest data source has the following: * a base data source which is to be unnested * the column name of the MVD which will be unnested @@ -50,30 +48,26 @@ public class UnnestDataSource implements DataSource private final DataSource base; private final String column; private final String outputName; - private final LinkedHashSet allowList; private UnnestDataSource( DataSource dataSource, String columnName, - String outputName, - LinkedHashSet allowList + String outputName ) { this.base = dataSource; this.column = columnName; this.outputName = outputName; - this.allowList = allowList; } @JsonCreator public static UnnestDataSource create( @JsonProperty("base") DataSource base, @JsonProperty("column") String columnName, - @JsonProperty("outputName") String outputName, - @Nullable @JsonProperty("allowList") LinkedHashSet allowList + @JsonProperty("outputName") String outputName ) { - return new UnnestDataSource(base, columnName, outputName, allowList); + return new UnnestDataSource(base, columnName, outputName); } @JsonProperty("base") @@ -94,12 +88,6 @@ public String getOutputName() return outputName; } - @JsonProperty("allowList") - public LinkedHashSet getAllowList() - { - return allowList; - } - @Override public Set getTableNames() { @@ -118,7 +106,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), column, outputName, allowList); + return new UnnestDataSource(children.get(0), column, outputName); } @Override @@ -162,8 +150,7 @@ public Function createSegmentMapFunction( new UnnestSegmentReference( segmentMapFn.apply(baseSegment), column, - outputName, - allowList + outputName ); } } @@ -174,7 +161,7 @@ public Function createSegmentMapFunction( @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, column, outputName, allowList); + return new UnnestDataSource(newSource, column, outputName); } @Override @@ -223,7 +210,6 @@ public String toString() "base=" + base + ", column='" + column + '\'' + ", outputName='" + outputName + '\'' + - ", allowList=" + allowList + '}'; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index 9da6b8132cbb..cd24ee5c0cf4 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -28,7 +28,6 @@ import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; -import java.util.LinkedHashSet; import java.util.Optional; /** @@ -42,14 +41,12 @@ public class UnnestSegmentReference implements SegmentReference private final SegmentReference baseSegment; private final String dimension; private final String renamedOutputDimension; - private final LinkedHashSet allowSet; - public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName, LinkedHashSet allowList) + public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName) { this.baseSegment = baseSegment; this.dimension = dimension; this.renamedOutputDimension = outputName; - this.allowSet = allowList; } @Override @@ -102,8 +99,7 @@ public StorageAdapter asStorageAdapter() return new UnnestStorageAdapter( baseSegment.asStorageAdapter(), dimension, - renamedOutputDimension, - allowSet + renamedOutputDimension ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 6ffe7b80199d..46491839b08a 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -45,19 +45,16 @@ public class UnnestStorageAdapter implements StorageAdapter private final StorageAdapter baseAdapter; private final String dimensionToUnnest; private final String outputColumnName; - private final LinkedHashSet allowSet; public UnnestStorageAdapter( final StorageAdapter baseAdapter, final String dimension, - final String outputColumnName, - final LinkedHashSet allowSet + final String outputColumnName ) { this.baseAdapter = baseAdapter; this.dimensionToUnnest = dimension; this.outputColumnName = outputColumnName; - this.allowSet = allowSet; } @Override diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index 666af0d2e615..3edd8ccb59f8 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -105,8 +105,7 @@ public class QueryRunnerTestHelper public static final DataSource UNNEST_DATA_SOURCE = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST ); public static final Granularity DAY_GRAN = Granularities.DAY; diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index cc2a722d4606..0abbff8bbd2f 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -248,8 +248,7 @@ public void testGroupBy() .setDataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -467,8 +466,7 @@ public void testGroupByOnMissingColumn() .setDataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions( @@ -595,8 +593,7 @@ public void testGroupByOnUnnestedVirtualColumn() final DataSource unnestDataSource = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST ); GroupByQuery query = makeQueryBuilder() @@ -699,8 +696,7 @@ public void testGroupByOnUnnestedVirtualMultiColumn() final DataSource unnestDataSource = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST ); GroupByQuery query = makeQueryBuilder() diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index 444068fbeafd..e2906ab57eb6 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -45,9 +45,7 @@ import org.junit.runners.Parameterized; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -95,23 +93,6 @@ private Druids.ScanQueryBuilder newTestUnnestQuery() .legacy(legacy); } - private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet() - { - List allowList = Arrays.asList("a", "b", "c"); - LinkedHashSet allowSet = new LinkedHashSet(allowList); - return Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - allowSet - )) - .columns(Collections.emptyList()) - .eternityInterval() - .limit(3) - .legacy(legacy); - } - @Test public void testScanOnUnnest() { @@ -179,8 +160,7 @@ public void testUnnestRunnerVirtualColumnsUsingSingleColumn() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST )) .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -253,8 +233,7 @@ public void testUnnestRunnerVirtualColumnsUsingMultipleColumn() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST )) .columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() diff --git a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java index cfb50d06823e..0433ca66e979 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java @@ -254,8 +254,7 @@ public void testTopNStringVirtualColumnUnnest() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .virtualColumns( @@ -341,8 +340,7 @@ public void testTopNStringVirtualMultiColumnUnnest() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .virtualColumns( diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 2b75334e912c..2d4fcab60e37 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -45,7 +45,6 @@ import org.junit.Test; import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.List; public class UnnestStorageAdapterTest extends InitializedNullHandlingTest @@ -61,7 +60,6 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest private static String COLUMNNAME = "multi-string1"; private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1"; private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again"; - private static LinkedHashSet IGNORE_SET = new LinkedHashSet<>(Arrays.asList("1", "3", "5")); @BeforeClass public static void setup() @@ -86,26 +84,22 @@ public static void setup() UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, COLUMNNAME, - OUTPUT_COLUMN_NAME, - null + OUTPUT_COLUMN_NAME ); UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, COLUMNNAME, - OUTPUT_COLUMN_NAME, - IGNORE_SET + OUTPUT_COLUMN_NAME ); UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, COLUMNNAME, - OUTPUT_COLUMN_NAME1, - null + OUTPUT_COLUMN_NAME1 ); UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER1, COLUMNNAME, - OUTPUT_COLUMN_NAME1, - IGNORE_SET + OUTPUT_COLUMN_NAME1 ); ADAPTERS = ImmutableList.of( UNNEST_STORAGE_ADAPTER, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 1751201acc1f..912bb88bdce8 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -204,8 +204,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) UnnestDataSource.create( leftDataSource, dimOrExpToUnnest, - unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0), - null + unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0) ), rowSignature, getPlannerContext(), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java index eaa544d42002..5f857200f314 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java @@ -122,8 +122,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) RowSignature.builder().add("inline", ExpressionType.toColumnType(eval.type())).build() ), "inline", - druidQueryRel.getRowType().getFieldNames().get(0), - null + druidQueryRel.getRowType().getFieldNames().get(0) ); DruidQuery query = druidQueryRel.getPartialDruidQuery().build( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 119cae8634ec..4fc6a945958a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2659,8 +2659,7 @@ public void testUnnestInline() RowSignature.builder().add("inline", ColumnType.LONG_ARRAY).build() ), "inline", - "EXPR$0", - null + "EXPR$0" ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2696,8 +2695,7 @@ public void testUnnest() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2748,8 +2746,7 @@ public void testUnnestWithGroupBy() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0", - null + "EXPR$0" )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_DEFAULT) @@ -2793,8 +2790,7 @@ public void testUnnestWithGroupByOrderBy() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0", - null + "EXPR$0" )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_DEFAULT) @@ -2849,8 +2845,7 @@ public void testUnnestWithGroupByOrderByWithLimit() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .dimension(new DefaultDimensionSpec("EXPR$0", "_d0", ColumnType.STRING)) @@ -2893,8 +2888,7 @@ public void testUnnestWithLimit() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2930,8 +2924,7 @@ public void testUnnestFirstQueryOnSelect() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3013,8 +3006,7 @@ public void testUnnestWithFilters() .build() ), "dim3", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3079,8 +3071,7 @@ public void testUnnestWithInFilters() .build() ), "dim3", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3117,8 +3108,7 @@ public void testUnnestVirtualWithColumns() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) @@ -3159,8 +3149,7 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0", - null + "EXPR$0" )) .setVirtualColumns(expressionVirtualColumn( "v0", @@ -3234,8 +3223,7 @@ public void testUnnestWithJoinOnTheLeft() JoinType.INNER ), "dim3", - "EXPR$0", - null + "EXPR$0" )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3292,8 +3280,7 @@ public void testUnnestWithConstant() RowSignature.builder().add("inline", ColumnType.LONG_ARRAY).build() ), "inline", - "EXPR$0", - null + "EXPR$0" ) ) .intervals(querySegmentSpec(Filtration.eternity())) From e77643aec5d2d0da883eb691823ecb1125f85aef Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 21 Feb 2023 10:24:17 -0800 Subject: [PATCH 04/15] Support for filters on unnested column in SQL part 1 --- .../druid/segment/UnnestStorageAdapter.java | 2 +- .../segment/UnnestStorageAdapterTest.java | 43 ++++++++++++- .../calcite/rel/DruidCorrelateUnnestRel.java | 24 ++++++- .../druid/sql/calcite/rel/DruidQuery.java | 25 ++++++++ .../calcite/rel/DruidUnnestDatasourceRel.java | 28 +++++++-- .../sql/calcite/rel/PartialDruidQuery.java | 62 +++++++++++++++---- .../calcite/rule/DruidFilterUnnestRule.java | 52 ++++++++++++++++ .../druid/sql/calcite/rule/DruidRules.java | 3 +- .../rule/DruidUnnestDatasourceRule.java | 1 + .../sql/calcite/CalciteArraysQueryTest.java | 37 +++++++++++ 10 files changed, 253 insertions(+), 24 deletions(-) create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 46491839b08a..cb57089b4eba 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -76,7 +76,7 @@ public Sequence makeCursors( if (filter == null) { forBaseFilter = filter; } else { - // if the filter has thw unnested column + // if the filter has the unnested column // do not pass it into the base cursor // if there is a filter as d2 > 1 and unnest-d2 < 10 // Calcite would push the filter on d2 into the data source diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 2d4fcab60e37..884ec6a5341b 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -26,10 +26,13 @@ import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.InDimFilter; +import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; @@ -249,7 +252,7 @@ public void test_two_levels_of_unnest_adapters() } @Test - public void test_unnest_adapters_basic_with_filter() + public void test_unnest_adapters_basic_with_in_filter() { Filter f = new InDimFilter(OUTPUT_COLUMN_NAME, ImmutableSet.of("1", "3", "5")); Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( @@ -271,8 +274,44 @@ public void test_unnest_adapters_basic_with_filter() Object dimSelectorVal = dimSelector.getObject(); if (dimSelectorVal == null) { Assert.assertNull(dimSelectorVal); + } else { + Assert.assertEquals(expectedResults.get(count), dimSelectorVal.toString()); + } + cursor.advance(); + count++; + } + //As we are filtering on 1, 3 and 5 there should be 3 entries + Assert.assertEquals(3, count); + return null; + }); + } + + @Test + public void test_unnest_adapters_basic_with_bound_filter() + { + BoundDimFilter f = new BoundDimFilter(OUTPUT_COLUMN_NAME, "2", "6", true, true, null, null, StringComparators.NUMERIC); + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( + new BoundFilter(f), + UNNEST_STORAGE_ADAPTER.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + List expectedResults = Arrays.asList("3", "4", "5"); + cursorSequence.accumulate(null, (accumulated, cursor) -> { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + + DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); + int count = 0; + while (!cursor.isDone()) { + Object dimSelectorVal = dimSelector.getObject(); + if (dimSelectorVal == null) { + Assert.assertNull(dimSelectorVal); + } else { + Assert.assertEquals(expectedResults.get(count), dimSelectorVal.toString()); } - Assert.assertEquals(expectedResults.get(count), dimSelectorVal.toString()); cursor.advance(); count++; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 912bb88bdce8..559cb58d49ae 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; @@ -32,9 +33,11 @@ import org.apache.calcite.rel.core.Correlate; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexUtil; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.DataSource; import org.apache.druid.query.QueryDataSource; @@ -58,7 +61,7 @@ * Each correlate can be perceived as a join with the join type being inner * the left of a correlate as seen in the rule {@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} * is the {@link DruidQueryRel} while the right will always be an {@link DruidUnnestDatasourceRel}. - * + *

* Since this is a subclass of DruidRel it is automatically considered by other rules that involves DruidRels. * Some example being SELECT_PROJECT and SORT_PROJECT rules in {@link org.apache.druid.sql.calcite.rule.DruidRules.DruidQueryRule} */ @@ -157,7 +160,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) unnestDatasourceRel.getUnnestProject().getProjects().get(0) ); - LogicalProject unnestProject = LogicalProject.create( + final LogicalProject unnestProject = LogicalProject.create( this, ImmutableList.of(unnestDatasourceRel.getUnnestProject() .getProjects() @@ -165,6 +168,18 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) unnestDatasourceRel.getUnnestProject().getRowType() ); + final Filter unnestFilterFound = unnestDatasourceRel.getUnnestFilter(); + final Filter logicalFilter; + if (unnestFilterFound != null) { + logicalFilter = LogicalFilter.create( + correlateRel, + RexUtil.shift(unnestFilterFound.getCondition(), rowSignature.size() - 1), + ImmutableSet.of(correlateRel.getCorrelationId()) + ); + } else { + logicalFilter = null; + } + // placeholder for dimension or expression to be unnested final String dimOrExpToUnnest; final VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create( @@ -199,7 +214,10 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) // add the unnest project to the partial query if required // This is necessary to handle the virtual columns on the unnestProject // Also create the unnest datasource to be used by the partial query - PartialDruidQuery partialDruidQuery = unnestProjectNeeded ? partialQuery.withUnnest(unnestProject) : partialQuery; + PartialDruidQuery partialDruidQuery = unnestProjectNeeded + ? partialQuery.withUnnestProject(unnestProject) + : partialQuery; + partialDruidQuery = partialDruidQuery.withUnnestFilter(logicalFilter); return partialDruidQuery.build( UnnestDataSource.create( leftDataSource, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index 5019f6fbaf7b..b29ec4bc7941 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -229,6 +229,15 @@ public static DruidQuery fromPartialQuery( virtualColumnRegistry ) ); + } else if (partialQuery.getUnnestFilter() != null) { + filter = Preconditions.checkNotNull( + computeUnnestFilter( + partialQuery, + plannerContext, + sourceRowSignature, + virtualColumnRegistry + ) + ); } else { filter = null; } @@ -309,6 +318,7 @@ public static DruidQuery fromPartialQuery( unnestProjection = null; } + return new DruidQuery( dataSource, plannerContext, @@ -335,6 +345,21 @@ private static DimFilter computeWhereFilter( return getDimFilter(plannerContext, rowSignature, virtualColumnRegistry, partialQuery.getWhereFilter()); } + @Nullable + private static DimFilter computeUnnestFilter( + final PartialDruidQuery partialQuery, + final PlannerContext plannerContext, + final RowSignature rowSignature, + final VirtualColumnRegistry virtualColumnRegistry + ) + { + final Filter unnestFilter = partialQuery.getUnnestFilter(); + if (unnestFilter == null) { + return null; + } + return getDimFilter(plannerContext, rowSignature, virtualColumnRegistry, unnestFilter); + } + @Nullable private static DimFilter computeHavingFilter( final PartialDruidQuery partialQuery, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java index 5f857200f314..ada7e2bdde79 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java @@ -20,6 +20,7 @@ package org.apache.druid.sql.calcite.rel; import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Uncollect; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.type.RelDataType; @@ -41,11 +42,11 @@ /** * The Rel node to capture the unnest (or uncollect) part in a query. This covers 2 cases: - * + *

* Case 1: * If this is an unnest on a constant and no input table is required, the final query is built using * an UnnestDataSource with a base InlineDataSource in this rel. - * + *

* Case 2: * If the unnest has an input table, this rel resolves the unnest part and delegates the rel to be consumed by other * rule ({@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} @@ -55,11 +56,13 @@ public class DruidUnnestDatasourceRel extends DruidRel private final Uncollect uncollect; private final DruidQueryRel druidQueryRel; private final LogicalProject unnestProject; + private final Filter unnestFilter; public DruidUnnestDatasourceRel( Uncollect uncollect, DruidQueryRel queryRel, LogicalProject unnestProject, + Filter unnestFilter, PlannerContext plannerContext ) { @@ -67,6 +70,7 @@ public DruidUnnestDatasourceRel( this.uncollect = uncollect; this.druidQueryRel = queryRel; this.unnestProject = unnestProject; + this.unnestFilter = unnestFilter; } public LogicalProject getUnnestProject() @@ -74,6 +78,11 @@ public LogicalProject getUnnestProject() return unnestProject; } + public Filter getUnnestFilter() + { + return unnestFilter; + } + @Nullable @Override public PartialDruidQuery getPartialDruidQuery() @@ -88,6 +97,7 @@ public DruidUnnestDatasourceRel withPartialQuery(PartialDruidQuery newQueryBuild uncollect, druidQueryRel.withPartialQuery(newQueryBuilder), unnestProject, + unnestFilter, getPlannerContext() ); } @@ -149,14 +159,24 @@ public DruidUnnestDatasourceRel asDruidConvention() new Uncollect(getCluster(), traitSet.replace(DruidConvention.instance()), uncollect.getInput(), false), druidQueryRel.asDruidConvention(), unnestProject, + unnestFilter, getPlannerContext() ); } + public DruidUnnestDatasourceRel withFilter(Filter f) + { + return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, f, getPlannerContext()); + } + @Override public RelWriter explainTerms(RelWriter pw) { - return super.explainTerms(pw); + return super.explainTerms(pw) + .item("Uncollect", uncollect) + .item("Query", druidQueryRel) + .item("unnestProject", unnestProject) + .item("unnestFilter", unnestFilter); } @Override @@ -174,6 +194,6 @@ protected RelDataType deriveRowType() @Override protected DruidUnnestDatasourceRel clone() { - return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, getPlannerContext()); + return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, unnestFilter, getPlannerContext()); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java index 0cd71af8e5e7..eeb982731d81 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java @@ -61,6 +61,7 @@ public class PartialDruidQuery private final Sort sort; private final Project sortProject; private final Window window; + private final Filter unnestFilter; public enum Stage { @@ -83,7 +84,8 @@ public enum Stage // WINDOW may be present only together with SCAN. WINDOW, - UNNEST_PROJECT + UNNEST_PROJECT, + UNNEST_FILTER } private PartialDruidQuery( @@ -97,7 +99,8 @@ private PartialDruidQuery( final Sort sort, final Project sortProject, final Window window, - final Project unnestProject + final Project unnestProject, + final Filter unnestFilter ) { this.builderSupplier = Preconditions.checkNotNull(builderSupplier, "builderSupplier"); @@ -111,6 +114,7 @@ private PartialDruidQuery( this.sortProject = sortProject; this.window = window; this.unnestProject = unnestProject; + this.unnestFilter = unnestFilter; } public static PartialDruidQuery create(final RelNode scanRel) @@ -119,7 +123,7 @@ public static PartialDruidQuery create(final RelNode scanRel) scanRel.getCluster(), scanRel.getTable() != null ? scanRel.getTable().getRelOptSchema() : null ); - return new PartialDruidQuery(builderSupplier, scanRel, null, null, null, null, null, null, null, null, null); + return new PartialDruidQuery(builderSupplier, scanRel, null, null, null, null, null, null, null, null, null, null); } public RelNode getScan() @@ -142,6 +146,11 @@ public Project getUnnestProject() return unnestProject; } + public Filter getUnnestFilter() + { + return unnestFilter; + } + public Aggregate getAggregate() { return aggregate; @@ -186,7 +195,8 @@ public PartialDruidQuery withWhereFilter(final Filter newWhereFilter) sort, sortProject, window, - unnestProject + unnestProject, + unnestFilter ); } @@ -230,7 +240,8 @@ public PartialDruidQuery withSelectProject(final Project newSelectProject) sort, sortProject, window, - unnestProject + unnestProject, + unnestFilter ); } @@ -248,7 +259,8 @@ public PartialDruidQuery withAggregate(final Aggregate newAggregate) sort, sortProject, window, - unnestProject + unnestProject, + unnestFilter ); } @@ -266,7 +278,8 @@ public PartialDruidQuery withHavingFilter(final Filter newHavingFilter) sort, sortProject, window, - unnestProject + unnestProject, + unnestFilter ); } @@ -284,7 +297,8 @@ public PartialDruidQuery withAggregateProject(final Project newAggregateProject) sort, sortProject, window, - unnestProject + unnestProject, + unnestFilter ); } @@ -302,7 +316,8 @@ public PartialDruidQuery withSort(final Sort newSort) newSort, sortProject, window, - unnestProject + unnestProject, + unnestFilter ); } @@ -320,7 +335,8 @@ public PartialDruidQuery withSortProject(final Project newSortProject) sort, newSortProject, window, - unnestProject + unnestProject, + unnestFilter ); } @@ -338,11 +354,30 @@ public PartialDruidQuery withWindow(final Window newWindow) sort, sortProject, newWindow, - unnestProject + unnestProject, + unnestFilter + ); + } + + public PartialDruidQuery withUnnestProject(final Project newUnnestProject) + { + return new PartialDruidQuery( + builderSupplier, + scan, + whereFilter, + selectProject, + aggregate, + aggregateProject, + havingFilter, + sort, + sortProject, + window, + newUnnestProject, + unnestFilter ); } - public PartialDruidQuery withUnnest(final Project newUnnestProject) + public PartialDruidQuery withUnnestFilter(final Filter newUnnestFilter) { return new PartialDruidQuery( builderSupplier, @@ -355,7 +390,8 @@ public PartialDruidQuery withUnnest(final Project newUnnestProject) sort, sortProject, window, - newUnnestProject + unnestProject, + newUnnestFilter ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java new file mode 100644 index 000000000000..5e681dadfa02 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.DruidUnnestDatasourceRel; + +public class DruidFilterUnnestRule extends RelOptRule +{ + + private final PlannerContext plannerContext; + + public DruidFilterUnnestRule(PlannerContext plannerContext) + { + super( + operand( + Filter.class, + operand(DruidUnnestDatasourceRel.class, any()) + ) + ); + this.plannerContext = plannerContext; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Filter filter = call.rel(0); + final DruidUnnestDatasourceRel unnestDatasourceRel = call.rel(1); + DruidUnnestDatasourceRel newRel = unnestDatasourceRel.withFilter(filter); + call.transformTo(newRel); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 4276a48b2f13..a6c7963d0305 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -99,7 +99,8 @@ public static List rules(PlannerContext plannerContext) DruidSortUnionRule.instance(), DruidJoinRule.instance(plannerContext), new DruidUnnestDatasourceRule(plannerContext), - new DruidCorrelateUnnestRule(plannerContext) + new DruidCorrelateUnnestRule(plannerContext), + new DruidFilterUnnestRule(plannerContext) ) ); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java index e8123fe0670c..52334279b961 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java @@ -95,6 +95,7 @@ public void onMatch(final RelOptRuleCall call) uncollectRel, druidQueryRel.withPartialQuery(druidQueryRel.getPartialDruidQuery().withSelectProject(queryProject)), logicalProject, + null, plannerContext ); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 4fc6a945958a..6e9ddf44fb85 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3328,4 +3328,41 @@ public void testUnnestWithConstant() ) ); } + + @Test + public void testUnnestWithFilterAfter() + { + // This tells the test to skip generating (vectorize = force) path + // Generates only 1 native query with vectorize = false + skipVectorize(); + // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization + // Generates 2 native queries with 2 different values of vectorize + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b')", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(new InDimFilter("EXPR$0", ImmutableList.of("a", "b"), null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } } From ce5e5a676eac0c3b4ae034d456142e385b37ce58 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 21 Feb 2023 11:36:28 -0800 Subject: [PATCH 05/15] Adding a test on virtual column and bound filters --- .../calcite/rel/DruidCorrelateUnnestRel.java | 5 + .../sql/calcite/CalciteArraysQueryTest.java | 107 +++++++++++++++++- 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 559cb58d49ae..84d0b69d5208 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -171,6 +171,11 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) final Filter unnestFilterFound = unnestDatasourceRel.getUnnestFilter(); final Filter logicalFilter; if (unnestFilterFound != null) { + // The correlated value will be the last element in the row signature of correlate + // The filter points to $0 of the right data source e.g. OR(=($0, 'a'), =($0, 'b')) + // After the correlation the rowType becomes (left data source rowtype + 1) + // So the filter needs to be shifted to the last element of + // rowtype after the correlation for e.g OR(=($17, 'a'), =($17, 'b')) logicalFilter = LogicalFilter.create( correlateRel, RexUtil.shift(unnestFilterFound.getCondition(), rowSignature.size() - 1), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6e9ddf44fb85..faca14783843 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3330,16 +3330,12 @@ public void testUnnestWithConstant() } @Test - public void testUnnestWithFilterAfter() + public void testUnnestWithInFilteringOnUnnestedCol() { - // This tells the test to skip generating (vectorize = force) path - // Generates only 1 native query with vectorize = false skipVectorize(); - // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization - // Generates 2 native queries with 2 different values of vectorize cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b')", + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b') ", ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( @@ -3365,4 +3361,103 @@ public void testUnnestWithFilterAfter() ) ); } + + @Test + public void testUnnestWithBoundFilteringOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where (d3>= 'b' AND d3 < 'd') ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(bound("EXPR$0", "b", "d", false, true, null, StringComparators.LEXICOGRAPHIC)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"} + ) + ); + } + + + @Test + public void testUnnestWithFilteringOnUnnestedVirtualCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1, m2]) as unnested (d12) where d12 IN ('1','2') AND m1 < 10", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new QueryDataSource( + newScanQueryBuilder() + .dataSource( + new TableDataSource(CalciteTests.DATASOURCE3) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) + .columns( + "__time", + "cnt", + "d1", + "d2", + "dim1", + "dim2", + "dim3", + "dim4", + "dim5", + "dim6", + "f1", + "f2", + "l1", + "l2", + "m1", + "m2", + "unique_dim1" + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + "v0", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY)) + .filters(new InDimFilter("EXPR$0", ImmutableList.of("1.0", "2.0"), null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{1.0f}, + new Object[]{1.0f}, + new Object[]{2.0f}, + new Object[]{2.0f} + ) + ); + } } From a2ca449088a373b8aa413ce6c953211c8fc7d0ef Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 21 Feb 2023 13:13:48 -0800 Subject: [PATCH 06/15] Removing an older test that is now supported through sql --- .../segment/UnnestStorageAdapterTest.java | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 884ec6a5341b..bbe5b6614b4f 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -26,13 +26,10 @@ import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.dimension.DefaultDimensionSpec; -import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.InDimFilter; -import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; -import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; @@ -285,40 +282,4 @@ public void test_unnest_adapters_basic_with_in_filter() return null; }); } - - @Test - public void test_unnest_adapters_basic_with_bound_filter() - { - BoundDimFilter f = new BoundDimFilter(OUTPUT_COLUMN_NAME, "2", "6", true, true, null, null, StringComparators.NUMERIC); - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( - new BoundFilter(f), - UNNEST_STORAGE_ADAPTER.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - - List expectedResults = Arrays.asList("3", "4", "5"); - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else { - Assert.assertEquals(expectedResults.get(count), dimSelectorVal.toString()); - } - cursor.advance(); - count++; - } - //As we are filtering on 1, 3 and 5 there should be 3 entries - Assert.assertEquals(3, count); - return null; - }); - - } } From ff4d6ea216b815eb6a2701083800e171c43fbe61 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 21 Feb 2023 21:18:18 -0800 Subject: [PATCH 07/15] Fixing an issue with initialization in unnestDim cursor and adding negative test cases where filtering on values not present in the column --- .../druid/segment/UnnestDimensionCursor.java | 16 ++--- .../sql/calcite/CalciteArraysQueryTest.java | 64 ++++++++++++++++++- 2 files changed, 70 insertions(+), 10 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index a9ca9084c67e..d45ec1d50412 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -82,7 +82,6 @@ public class UnnestDimensionCursor implements Cursor private final String columnName; private final String outputName; private final ColumnSelectorFactory baseColumnSelectorFactory; - private final ValueMatcher valueMatcher; @Nullable private final Filter allowFilter; private int indexForRow; @@ -90,6 +89,7 @@ public class UnnestDimensionCursor implements Cursor private IndexedInts indexedIntsForCurrentRow; private boolean needInitialization; private SingleIndexInts indexIntsForRow; + private ValueMatcher valueMatcher; public UnnestDimensionCursor( Cursor cursor, @@ -107,11 +107,6 @@ public UnnestDimensionCursor( this.outputName = outputColumnName; this.needInitialization = true; this.allowFilter = allowFilter; - if (allowFilter != null) { - this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); - } else { - this.valueMatcher = BooleanValueMatcher.of(true); - } } @Override @@ -338,13 +333,18 @@ public void reset() private void initialize() { indexForRow = 0; + if (allowFilter != null) { + this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); + } this.indexIntsForRow = new SingleIndexInts(); if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!valueMatcher.matches()) { - advanceAndUpdate(); + if (!valueMatcher.matches() && !baseCursor.isDone()) { + advance(); } needInitialization = false; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index faca14783843..6f90b1436c45 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3330,7 +3330,7 @@ public void testUnnestWithConstant() } @Test - public void testUnnestWithInFilteringOnUnnestedCol() + public void testUnnestWithInFilterOnUnnestedCol() { skipVectorize(); cannotVectorize(); @@ -3363,7 +3363,67 @@ public void testUnnestWithInFilteringOnUnnestedCol() } @Test - public void testUnnestWithBoundFilteringOnUnnestedCol() + public void testUnnestWithInFilterOnUnnestedColWhereFilterIsNotOnFirstValue() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('d','c') ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(new InDimFilter("EXPR$0", ImmutableList.of("d", "c"), null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"c"}, + new Object[]{"d"} + ) + ); + } + + @Test + public void testUnnestWithInFilterOnUnnestedColWhereValuesDoNotExist() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('foo','bar') ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(new InDimFilter("EXPR$0", ImmutableList.of("foo", "bar"), null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of() + ); + } + + @Test + public void testUnnestWithBoundFilterOnUnnestedCol() { skipVectorize(); cannotVectorize(); From 663292a7cf70f253ddcc0fd489689faa91e46a98 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 22 Feb 2023 11:12:43 -0800 Subject: [PATCH 08/15] Adding support and unit tests for all kinds of selector filters over unnested columns --- .../druid/segment/UnnestDimensionCursor.java | 3 + .../calcite/rule/DruidFilterUnnestRule.java | 38 +++ .../druid/sql/calcite/rule/DruidRules.java | 3 +- .../sql/calcite/CalciteArraysQueryTest.java | 271 ++++++++++++++++++ 4 files changed, 314 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index d45ec1d50412..cc798b0e67d8 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -157,6 +157,9 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean matches() { + if (indexedIntsForCurrentRow.size() == 0) { + return false; + } return idForLookup == indexedIntsForCurrentRow.get(indexForRow); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java index 5e681dadfa02..587812f531ab 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -22,6 +22,7 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.DruidUnnestDatasourceRel; @@ -49,4 +50,41 @@ public void onMatch(RelOptRuleCall call) DruidUnnestDatasourceRel newRel = unnestDatasourceRel.withFilter(filter); call.transformTo(newRel); } + + // This is for a special case of handling selector filters + // on top of UnnestDataSourceRel when Calcite adds an extra + // LogicalProject on the LogicalFilter. For e.g. #122 here + // SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' + // 126:LogicalProject(d3=[$17]) + // 124:LogicalCorrelate(subset=[rel#125:Subset#6.NONE.[]], correlation=[$cor0], joinType=[inner], requiredColumns=[{3}]) + // 8:LogicalTableScan(subset=[rel#114:Subset#0.NONE.[]], table=[[druid, numfoo]]) + // 122:LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) + // 120:LogicalFilter(subset=[rel#121:Subset#4.NONE.[]], condition=[=($0, 'b')]) + // 118:Uncollect(subset=[rel#119:Subset#3.NONE.[]]) + // 116:LogicalProject(subset=[rel#117:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)]) + // 9:LogicalValues(subset=[rel#115:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) + + // This logical project does a type cast only which Druid already has information about + // So we can skip this LogicalProject + // Extensive unit tests can be found in {@link CalciteArraysQueryTest} + + static class DruidProjectOnCorrelate extends RelOptRule + { + public DruidProjectOnCorrelate(PlannerContext plannerContext) + { + super( + operand( + Project.class, + operand(DruidUnnestDatasourceRel.class, any()) + ) + ); + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final DruidUnnestDatasourceRel unnestDatasourceRel = call.rel(1); + call.transformTo(unnestDatasourceRel); + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index a6c7963d0305..40ab3216a2ba 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -100,7 +100,8 @@ public static List rules(PlannerContext plannerContext) DruidJoinRule.instance(plannerContext), new DruidUnnestDatasourceRule(plannerContext), new DruidCorrelateUnnestRule(plannerContext), - new DruidFilterUnnestRule(plannerContext) + new DruidFilterUnnestRule(plannerContext), + new DruidFilterUnnestRule.DruidProjectOnCorrelate(plannerContext) ) ); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6f90b1436c45..43d77af1778e 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3520,4 +3520,275 @@ public void testUnnestWithFilteringOnUnnestedVirtualCol() ) ); } + + @Test + public void testUnnestWithSelectorFilterOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(selector("EXPR$0", "b", null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithSelectorFilterV1OnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3>='b' and d3<='b' ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(selector("EXPR$0", "b", null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithSelectorFilterBadOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3=1 ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(bound("EXPR$0", "1", "1", false, false, null, StringComparators.NUMERIC)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of() + ); + } + + @Test + public void testUnnestWithNotSelectorFilterOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3!='b' ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(not(selector("EXPR$0", "b", null))) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""}, + new Object[]{""}, + new Object[]{""} + ) : + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""}, + new Object[]{null}, + new Object[]{null} + ) + ); + } + + @Test + public void testUnnestWithSelectorFilterOnUnnestedVirtualCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1, m2]) as unnested (d12) where d12=4 AND m1 < 10", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new QueryDataSource( + newScanQueryBuilder() + .dataSource( + new TableDataSource(CalciteTests.DATASOURCE3) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) + .columns( + "__time", + "cnt", + "d1", + "d2", + "dim1", + "dim2", + "dim3", + "dim4", + "dim5", + "dim6", + "f1", + "f2", + "l1", + "l2", + "m1", + "m2", + "unique_dim1" + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + "v0", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY)) + .filters(selector("EXPR$0", "4", null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{4.0f}, + new Object[]{4.0f} + ) + ); + } + + @Test + public void testUnnestVirtualWithColumnsOnStringsWithSelectorFilter() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT strings FROM druid.numfoo, UNNEST(ARRAY[dim4, dim5]) as unnested (strings) where strings='aa'", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "v0", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(selector("EXPR$0", "aa", null)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of( + new Object[]{"aa"}, + new Object[]{"aa"} + ) + ); + } + + @Test + public void testUnnestVirtualWithColumnsOnStringsWithNotSelectorFilter() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT strings FROM druid.numfoo, UNNEST(ARRAY[dim4, dim5]) as unnested (strings) where strings!='aa'", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "v0", + "EXPR$0" + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .filters(not(selector("EXPR$0", "aa", null))) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"ab"}, + new Object[]{"a"}, + new Object[]{"ba"}, + new Object[]{"b"}, + new Object[]{"ad"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"ab"} + ) + ); + } } From f138c6d3414ed601027ca3b6de54639e3e636776 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 22 Feb 2023 13:23:50 -0800 Subject: [PATCH 09/15] Updating a md file for spellcheck to pass, although it is behaving flaky at times --- docs/tutorials/tutorial-jupyter-index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/tutorial-jupyter-index.md b/docs/tutorials/tutorial-jupyter-index.md index 012bb7e16cd3..96bff872e573 100644 --- a/docs/tutorials/tutorial-jupyter-index.md +++ b/docs/tutorials/tutorial-jupyter-index.md @@ -38,7 +38,7 @@ Make sure you meet the following requirements before starting the Jupyter-based pip3 install requests ``` -- JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid and Jupyter both try to use port `8888,` so start Jupyter on a different port. +- JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid and Jupyter both try to use port `8888`, so start Jupyter on a different port. - Install JupyterLab or Notebook: @@ -68,4 +68,4 @@ The notebooks are located in the [apache/druid repo](https://github.com/apache/d The links that follow are the raw GitHub URLs, so you can use them to download the notebook directly, such as with `wget`, or manually through your web browser. Note that if you save the file from your web browser, make sure to remove the `.txt` extension. -- [Introduction to the Druid API](https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/api-tutorial.ipynb) walks you through some of the basics related to the Druid API and several endpoints. \ No newline at end of file +- [Introduction to the Druid API](https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/api-tutorial.ipynb) walks you through some of the basics related to the Druid API and several endpoints. From e969e6a013fee22ad5ee9a148118c6bd0bd7ce46 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 27 Feb 2023 12:59:17 -0800 Subject: [PATCH 10/15] Simplifying the advance logic for cursors --- .../segment/UnnestColumnValueSelectorCursor.java | 12 +++--------- .../druid/segment/UnnestDimensionCursor.java | 16 ++++------------ 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index b5f240895712..398c2fdb6d71 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -249,16 +249,10 @@ public void advanceUninterruptibly() // while $e_i does not match the filter // keep advancing the pointer to the first valid match // calling advanceAndUpdate increments the index position so needs a call to matches() again for new match status - boolean match = valueMatcher.matches(); - if (match) { + while (true) { advanceAndUpdate(); - match = valueMatcher.matches(); - } - while (!match) { - if (!baseCursor.isDone()) { - advanceAndUpdate(); - match = valueMatcher.matches(); - } else { + boolean match = valueMatcher.matches(); + if (match || baseCursor.isDone()) { return; } } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index cc798b0e67d8..37a90dbf3232 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -283,19 +283,11 @@ public void advance() @Override public void advanceUninterruptibly() { - if (!baseCursor.isDone()) { + while (true) { + advanceAndUpdate(); boolean match = valueMatcher.matches(); - if (match) { - advanceAndUpdate(); - match = valueMatcher.matches(); - } - while (!match) { - if (!baseCursor.isDone()) { - advanceAndUpdate(); - match = valueMatcher.matches(); - } else { - return; - } + if (match || baseCursor.isDone()) { + return; } } } From 5031f9f7c51e0f6822ac49f5b67b5e7e24c4825f Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Sat, 4 Mar 2023 13:14:11 -0800 Subject: [PATCH 11/15] Changes for moving filter inside the unnest data source --- .../apache/druid/query/UnnestDataSource.java | 27 ++- .../UnnestColumnValueSelectorCursor.java | 2 +- .../druid/segment/UnnestSegmentReference.java | 8 +- .../druid/segment/UnnestStorageAdapter.java | 30 +-- .../druid/query/QueryRunnerTestHelper.java | 3 +- .../groupby/UnnestGroupByQueryRunnerTest.java | 12 +- .../query/scan/UnnestScanQueryRunnerTest.java | 6 +- .../query/topn/UnnestTopNQueryRunnerTest.java | 6 +- .../segment/UnnestStorageAdapterTest.java | 12 +- .../calcite/rel/DruidCorrelateUnnestRel.java | 48 +++- .../druid/sql/calcite/rel/DruidQuery.java | 26 +-- .../calcite/rel/DruidUnnestDatasourceRel.java | 9 +- .../sql/calcite/rel/PartialDruidQuery.java | 57 +---- .../sql/calcite/CalciteArraysQueryTest.java | 218 ++++++------------ 14 files changed, 197 insertions(+), 267 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 0db6e7ea397e..c5737d966bec 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -23,11 +23,13 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.planning.DataSourceAnalysis; import org.apache.druid.segment.SegmentReference; import org.apache.druid.segment.UnnestSegmentReference; import org.apache.druid.utils.JvmUtils; +import javax.annotation.Nullable; import java.util.List; import java.util.Objects; import java.util.Set; @@ -48,26 +50,30 @@ public class UnnestDataSource implements DataSource private final DataSource base; private final String column; private final String outputName; + private final DimFilter outputColumnFilter; private UnnestDataSource( DataSource dataSource, String columnName, - String outputName + String outputName, + DimFilter outputColumnFilter ) { this.base = dataSource; this.column = columnName; this.outputName = outputName; + this.outputColumnFilter = outputColumnFilter; } @JsonCreator public static UnnestDataSource create( @JsonProperty("base") DataSource base, @JsonProperty("column") String columnName, - @JsonProperty("outputName") String outputName + @JsonProperty("outputName") String outputName, + @Nullable @JsonProperty("outputColumnFilter") DimFilter outputColumnFilter ) { - return new UnnestDataSource(base, columnName, outputName); + return new UnnestDataSource(base, columnName, outputName, outputColumnFilter); } @JsonProperty("base") @@ -88,6 +94,11 @@ public String getOutputName() return outputName; } + @JsonProperty("outputColumnFilter") + public DimFilter getOutputColumnFilter() + { + return outputColumnFilter; + } @Override public Set getTableNames() { @@ -106,7 +117,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), column, outputName); + return new UnnestDataSource(children.get(0), column, outputName, outputColumnFilter); } @Override @@ -150,7 +161,8 @@ public Function createSegmentMapFunction( new UnnestSegmentReference( segmentMapFn.apply(baseSegment), column, - outputName + outputName, + outputColumnFilter ); } } @@ -161,7 +173,7 @@ public Function createSegmentMapFunction( @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, column, outputName); + return new UnnestDataSource(newSource, column, outputName, outputColumnFilter); } @Override @@ -200,7 +212,7 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(base, column, outputName); + return Objects.hash(base, column, outputName, outputColumnFilter); } @Override @@ -210,6 +222,7 @@ public String toString() "base=" + base + ", column='" + column + '\'' + ", outputName='" + outputName + '\'' + + ", outputFilter='" + outputColumnFilter + '\'' + '}'; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 398c2fdb6d71..57940e512b6e 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -324,7 +324,7 @@ private void initialize() // If the first value the index is pointing to does not match the filter // advance the index to the first value which will match - if (!valueMatcher.matches()) { + if (!valueMatcher.matches() && !baseCursor.isDone()) { advance(); } needInitialization = false; diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index cd24ee5c0cf4..92a3cb68a206 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -21,6 +21,7 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.timeline.SegmentId; import org.apache.druid.utils.CloseableUtils; import org.joda.time.Interval; @@ -41,12 +42,14 @@ public class UnnestSegmentReference implements SegmentReference private final SegmentReference baseSegment; private final String dimension; private final String renamedOutputDimension; + private final DimFilter outputColumnFilter; - public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName) + public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName, DimFilter outputColumnFilter) { this.baseSegment = baseSegment; this.dimension = dimension; this.renamedOutputDimension = outputName; + this.outputColumnFilter = outputColumnFilter; } @Override @@ -99,7 +102,8 @@ public StorageAdapter asStorageAdapter() return new UnnestStorageAdapter( baseSegment.asStorageAdapter(), dimension, - renamedOutputDimension + renamedOutputDimension, + outputColumnFilter ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index cb57089b4eba..8866213aef47 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.QueryMetrics; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.Indexed; @@ -45,16 +46,19 @@ public class UnnestStorageAdapter implements StorageAdapter private final StorageAdapter baseAdapter; private final String dimensionToUnnest; private final String outputColumnName; + private final DimFilter outputColumnFilter; public UnnestStorageAdapter( final StorageAdapter baseAdapter, final String dimension, - final String outputColumnName + final String outputColumnName, + final DimFilter outputColumnFilter ) { this.baseAdapter = baseAdapter; this.dimensionToUnnest = dimension; this.outputColumnName = outputColumnName; + this.outputColumnFilter = outputColumnFilter; } @Override @@ -67,24 +71,8 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { - // the filter on the outer unnested column needs to be recreated on the unnested dimension and sent into - // the base cursor - // currently testing it out for in filter and selector filter - // TBD: boun - - final Filter forBaseFilter; - if (filter == null) { - forBaseFilter = filter; - } else { - // if the filter has the unnested column - // do not pass it into the base cursor - // if there is a filter as d2 > 1 and unnest-d2 < 10 - // Calcite would push the filter on d2 into the data source - // and only the filter on unnest-d2 < 10 will appear here - forBaseFilter = filter.getRequiredColumns().contains(outputColumnName) ? null : filter; - } final Sequence baseCursorSequence = baseAdapter.makeCursors( - forBaseFilter, + filter, interval, virtualColumns, gran, @@ -105,7 +93,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - filter + outputColumnFilter != null ? outputColumnFilter.toFilter() : null ); } else { retVal = new UnnestColumnValueSelectorCursor( @@ -113,7 +101,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - filter + outputColumnFilter != null ? outputColumnFilter.toFilter() : null ); } } else { @@ -122,7 +110,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - filter + outputColumnFilter != null ? outputColumnFilter.toFilter() : null ); } return retVal; diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index 3edd8ccb59f8..666af0d2e615 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -105,7 +105,8 @@ public class QueryRunnerTestHelper public static final DataSource UNNEST_DATA_SOURCE = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null ); public static final Granularity DAY_GRAN = Granularities.DAY; diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index 0abbff8bbd2f..cc2a722d4606 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -248,7 +248,8 @@ public void testGroupBy() .setDataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -466,7 +467,8 @@ public void testGroupByOnMissingColumn() .setDataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions( @@ -593,7 +595,8 @@ public void testGroupByOnUnnestedVirtualColumn() final DataSource unnestDataSource = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null ); GroupByQuery query = makeQueryBuilder() @@ -696,7 +699,8 @@ public void testGroupByOnUnnestedVirtualMultiColumn() final DataSource unnestDataSource = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null ); GroupByQuery query = makeQueryBuilder() diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index e2906ab57eb6..86b717734990 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -160,7 +160,8 @@ public void testUnnestRunnerVirtualColumnsUsingSingleColumn() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null )) .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -233,7 +234,8 @@ public void testUnnestRunnerVirtualColumnsUsingMultipleColumn() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null )) .columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() diff --git a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java index 0433ca66e979..cfb50d06823e 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java @@ -254,7 +254,8 @@ public void testTopNStringVirtualColumnUnnest() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .virtualColumns( @@ -340,7 +341,8 @@ public void testTopNStringVirtualMultiColumnUnnest() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + null )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .virtualColumns( diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index bbe5b6614b4f..a9e8efb4d63d 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -84,22 +84,26 @@ public static void setup() UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, COLUMNNAME, - OUTPUT_COLUMN_NAME + OUTPUT_COLUMN_NAME, + null ); UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, COLUMNNAME, - OUTPUT_COLUMN_NAME + OUTPUT_COLUMN_NAME, + null ); UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, COLUMNNAME, - OUTPUT_COLUMN_NAME1 + OUTPUT_COLUMN_NAME1, + null ); UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER1, COLUMNNAME, - OUTPUT_COLUMN_NAME1 + OUTPUT_COLUMN_NAME1, + null ); ADAPTERS = ImmutableList.of( UNNEST_STORAGE_ADAPTER, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 84d0b69d5208..d46bb79b727b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -46,6 +46,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -73,8 +74,8 @@ public class DruidCorrelateUnnestRel extends DruidRel private final PartialDruidQuery partialQuery; private final PlannerConfig plannerConfig; private final Correlate correlateRel; - private RelNode left; - private RelNode right; + private final RelNode left; + private final RelNode right; private DruidCorrelateUnnestRel( RelOptCluster cluster, @@ -139,13 +140,22 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) final DruidRel druidQueryRel = (DruidRel) left; final DruidQuery leftQuery = Preconditions.checkNotNull((druidQueryRel).toDruidQuery(false), "leftQuery"); final DataSource leftDataSource; + final PartialDruidQuery partialQueryFromLeft = druidQueryRel.getPartialDruidQuery(); + final PartialDruidQuery corrPartialQuey; - if (DruidJoinQueryRel.computeLeftRequiresSubquery(druidQueryRel)) { + // If there is a LIMIT in the left query + // It should be honored before unnest + // Create a query data source in that case + + if (partialQueryFromLeft.getSort() != null || partialQueryFromLeft.getSortProject() != null) { leftDataSource = new QueryDataSource(leftQuery.getQuery()); + corrPartialQuey = partialQuery; } else { leftDataSource = leftQuery.getDataSource(); + corrPartialQuey = updateCorrPartialQueryFromLeft(partialQueryFromLeft); } + final DruidUnnestDatasourceRel unnestDatasourceRel = (DruidUnnestDatasourceRel) right; @@ -194,7 +204,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) ); // the unnest project is needed in case of a virtual column - // unnest(mv_to_array(dim_1)) is reconciled as unnesting a MVD dim_1 not requiring a virtual column + // unnest(mv_to_array(dim_1)) is reconciled as unnesting an MVD dim_1 not requiring a virtual column // while unnest(array(dim_2,dim_3)) is understood as unnesting a virtual column which is an array over dim_2 and dim_3 elements boolean unnestProjectNeeded = false; getPlannerContext().setJoinExpressionVirtualColumnRegistry(virtualColumnRegistry); @@ -220,14 +230,21 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) // This is necessary to handle the virtual columns on the unnestProject // Also create the unnest datasource to be used by the partial query PartialDruidQuery partialDruidQuery = unnestProjectNeeded - ? partialQuery.withUnnestProject(unnestProject) - : partialQuery; - partialDruidQuery = partialDruidQuery.withUnnestFilter(logicalFilter); + ? corrPartialQuey.withUnnestProject(unnestProject) + : corrPartialQuey; return partialDruidQuery.build( UnnestDataSource.create( leftDataSource, dimOrExpToUnnest, - unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0) + unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0), + // Filters from Calcite are received as bound Filters + // This piece optimizes multiple bounds to IN filters, Selector Filters etc. + logicalFilter != null ? Filtration.create(DruidQuery.getDimFilter( + getPlannerContext(), + rowSignature, + virtualColumnRegistry, + logicalFilter + )).optimizeFilterOnly(rowSignature).getDimFilter() : null ), rowSignature, getPlannerContext(), @@ -237,6 +254,21 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) ); } + private PartialDruidQuery updateCorrPartialQueryFromLeft(PartialDruidQuery partialQueryFromLeft) + { + PartialDruidQuery corrQuery = PartialDruidQuery.create(correlateRel); + corrQuery = corrQuery.withWhereFilter(partialQueryFromLeft.getWhereFilter()) + .withSelectProject(partialQuery.getSelectProject()); + if (partialQuery.getAggregate() != null) { + corrQuery = corrQuery.withAggregate(partialQuery.getAggregate()) + .withHavingFilter(partialQuery.getHavingFilter()); + } + if (partialQuery.getSort() != null || partialQuery.getSortProject() != null) { + corrQuery = corrQuery.withSort(partialQuery.getSort()); + } + return corrQuery; + } + @Override protected DruidCorrelateUnnestRel clone() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index b29ec4bc7941..6615fa4533f0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -229,15 +229,6 @@ public static DruidQuery fromPartialQuery( virtualColumnRegistry ) ); - } else if (partialQuery.getUnnestFilter() != null) { - filter = Preconditions.checkNotNull( - computeUnnestFilter( - partialQuery, - plannerContext, - sourceRowSignature, - virtualColumnRegistry - ) - ); } else { filter = null; } @@ -345,21 +336,6 @@ private static DimFilter computeWhereFilter( return getDimFilter(plannerContext, rowSignature, virtualColumnRegistry, partialQuery.getWhereFilter()); } - @Nullable - private static DimFilter computeUnnestFilter( - final PartialDruidQuery partialQuery, - final PlannerContext plannerContext, - final RowSignature rowSignature, - final VirtualColumnRegistry virtualColumnRegistry - ) - { - final Filter unnestFilter = partialQuery.getUnnestFilter(); - if (unnestFilter == null) { - return null; - } - return getDimFilter(plannerContext, rowSignature, virtualColumnRegistry, unnestFilter); - } - @Nullable private static DimFilter computeHavingFilter( final PartialDruidQuery partialQuery, @@ -378,7 +354,7 @@ private static DimFilter computeHavingFilter( } @Nonnull - private static DimFilter getDimFilter( + public static DimFilter getDimFilter( final PlannerContext plannerContext, final RowSignature rowSignature, @Nullable final VirtualColumnRegistry virtualColumnRegistry, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java index ada7e2bdde79..b941c347f209 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java @@ -33,6 +33,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -132,7 +133,13 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) RowSignature.builder().add("inline", ExpressionType.toColumnType(eval.type())).build() ), "inline", - druidQueryRel.getRowType().getFieldNames().get(0) + druidQueryRel.getRowType().getFieldNames().get(0), + unnestFilter != null ? Filtration.create(DruidQuery.getDimFilter( + getPlannerContext(), + druidQueryRel.getDruidTable().getRowSignature(), + virtualColumnRegistry, + unnestFilter + )).optimizeFilterOnly(druidQueryRel.getDruidTable().getRowSignature()).getDimFilter() : null ); DruidQuery query = druidQueryRel.getPartialDruidQuery().build( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java index eeb982731d81..0c7c596f3ea1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java @@ -61,7 +61,6 @@ public class PartialDruidQuery private final Sort sort; private final Project sortProject; private final Window window; - private final Filter unnestFilter; public enum Stage { @@ -99,8 +98,7 @@ private PartialDruidQuery( final Sort sort, final Project sortProject, final Window window, - final Project unnestProject, - final Filter unnestFilter + final Project unnestProject ) { this.builderSupplier = Preconditions.checkNotNull(builderSupplier, "builderSupplier"); @@ -114,7 +112,6 @@ private PartialDruidQuery( this.sortProject = sortProject; this.window = window; this.unnestProject = unnestProject; - this.unnestFilter = unnestFilter; } public static PartialDruidQuery create(final RelNode scanRel) @@ -123,7 +120,7 @@ public static PartialDruidQuery create(final RelNode scanRel) scanRel.getCluster(), scanRel.getTable() != null ? scanRel.getTable().getRelOptSchema() : null ); - return new PartialDruidQuery(builderSupplier, scanRel, null, null, null, null, null, null, null, null, null, null); + return new PartialDruidQuery(builderSupplier, scanRel, null, null, null, null, null, null, null, null, null); } public RelNode getScan() @@ -146,11 +143,6 @@ public Project getUnnestProject() return unnestProject; } - public Filter getUnnestFilter() - { - return unnestFilter; - } - public Aggregate getAggregate() { return aggregate; @@ -195,8 +187,7 @@ public PartialDruidQuery withWhereFilter(final Filter newWhereFilter) sort, sortProject, window, - unnestProject, - unnestFilter + unnestProject ); } @@ -240,8 +231,7 @@ public PartialDruidQuery withSelectProject(final Project newSelectProject) sort, sortProject, window, - unnestProject, - unnestFilter + unnestProject ); } @@ -259,8 +249,7 @@ public PartialDruidQuery withAggregate(final Aggregate newAggregate) sort, sortProject, window, - unnestProject, - unnestFilter + unnestProject ); } @@ -278,8 +267,7 @@ public PartialDruidQuery withHavingFilter(final Filter newHavingFilter) sort, sortProject, window, - unnestProject, - unnestFilter + unnestProject ); } @@ -297,8 +285,7 @@ public PartialDruidQuery withAggregateProject(final Project newAggregateProject) sort, sortProject, window, - unnestProject, - unnestFilter + unnestProject ); } @@ -316,8 +303,7 @@ public PartialDruidQuery withSort(final Sort newSort) newSort, sortProject, window, - unnestProject, - unnestFilter + unnestProject ); } @@ -335,8 +321,7 @@ public PartialDruidQuery withSortProject(final Project newSortProject) sort, newSortProject, window, - unnestProject, - unnestFilter + unnestProject ); } @@ -354,8 +339,7 @@ public PartialDruidQuery withWindow(final Window newWindow) sort, sortProject, newWindow, - unnestProject, - unnestFilter + unnestProject ); } @@ -372,26 +356,7 @@ public PartialDruidQuery withUnnestProject(final Project newUnnestProject) sort, sortProject, window, - newUnnestProject, - unnestFilter - ); - } - - public PartialDruidQuery withUnnestFilter(final Filter newUnnestFilter) - { - return new PartialDruidQuery( - builderSupplier, - scan, - whereFilter, - selectProject, - aggregate, - aggregateProject, - havingFilter, - sort, - sortProject, - window, - unnestProject, - newUnnestFilter + newUnnestProject ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 43d77af1778e..6188496fec13 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -171,7 +171,7 @@ public void testSelectNonConstantArrayExpressionFromTableForMultival() try { ExpressionProcessing.initializeForTests(true); - // if nested arrays are allowed, dim3 is a multi-valued string column, so the automatic translation will turn this + // if nested arrays are allowed, dim3 is a multivalued string column, so the automatic translation will turn this // expression into // // `map((dim3) -> array(concat(dim3,'word'),'up'), dim3)` @@ -212,7 +212,7 @@ public void testSomeArrayFunctionsWithScanQuery() { // Yes these outputs are strange sometimes, arrays are in a partial state of existence so end up a bit // stringy for now this is because virtual column selectors are coercing values back to stringish so that - // multi-valued string dimensions can be grouped on. + // multivalued string dimensions can be grouped on. List expectedResults; if (useDefault) { expectedResults = ImmutableList.of( @@ -388,7 +388,7 @@ public void testSomeArrayFunctionsWithScanQuery() public void testSomeArrayFunctionsWithScanQueryNoStringify() { // when not stringifying arrays, some things are still stringified, because they are inferred to be typed as strings - // the planner context which controls stringification of arrays does not apply to multi-valued string columns, + // the planner context which controls stringification of arrays does not apply to multivalued string columns, // which will still always be stringified to ultimately adhere to the varchar type // as array support increases in the engine this will likely change since using explict array functions should // probably kick it into an array @@ -2649,7 +2649,7 @@ public void testUnnestInline() skipVectorize(); cannotVectorize(); testQuery( - "SELECT * FROM UNNEST(ARRAY[1,2,3])", + "SELECT * FROM UNNEST(ARRAY[1,2,3]) as unnested(d)", ImmutableList.of( Druids.newScanQueryBuilder() .dataSource( @@ -2659,7 +2659,8 @@ public void testUnnestInline() RowSignature.builder().add("inline", ColumnType.LONG_ARRAY).build() ), "inline", - "EXPR$0" + "EXPR$0", + null ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2695,7 +2696,8 @@ public void testUnnest() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2733,11 +2735,7 @@ public void testUnnest() @Test public void testUnnestWithGroupBy() { - // This tells the test to skip generating (vectorize = force) path - // Generates only 1 native query with vectorize = false skipVectorize(); - // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization - // Generates 2 native queries with 2 different values of vectorize cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) GROUP BY d3 ", @@ -2746,7 +2744,8 @@ public void testUnnestWithGroupBy() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + null )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_DEFAULT) @@ -2790,7 +2789,8 @@ public void testUnnestWithGroupByOrderBy() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + null )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_DEFAULT) @@ -2845,7 +2845,8 @@ public void testUnnestWithGroupByOrderByWithLimit() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .dimension(new DefaultDimensionSpec("EXPR$0", "_d0", ColumnType.STRING)) @@ -2888,7 +2889,8 @@ public void testUnnestWithLimit() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2924,7 +2926,8 @@ public void testUnnestFirstQueryOnSelect() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2973,44 +2976,15 @@ public void testUnnestWithFilters() ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( - new QueryDataSource( - newScanQueryBuilder() - .dataSource( - new TableDataSource(CalciteTests.DATASOURCE3) - ) - .intervals(querySegmentSpec(Filtration.eternity())) - .virtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .legacy(false) - .filters(new SelectorDimFilter("dim2", "a", null)) - .columns( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "l1", - "l2", - "m1", - "m2", - "unique_dim1", - "v0" - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), + new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) + .filters(new SelectorDimFilter("dim2", "a", null)) .context(QUERY_CONTEXT_DEFAULT) .columns(ImmutableList.of( "EXPR$0" @@ -3035,7 +3009,7 @@ public void testUnnestWithInFilters() // Generates 2 native queries with 2 different values of vectorize cannotVectorize(); testQuery( - "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc')), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc') LIMIT 2), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( @@ -3068,10 +3042,12 @@ public void testUnnestWithInFilters() "unique_dim1" ) .context(QUERY_CONTEXT_DEFAULT) + .limit(2) .build() ), "dim3", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3085,9 +3061,7 @@ public void testUnnestWithInFilters() ImmutableList.of( new Object[]{"a"}, new Object[]{"b"}, - new Object[]{""}, - useDefault ? - new Object[]{""} : new Object[]{null} + new Object[]{""} ) ); } @@ -3108,7 +3082,8 @@ public void testUnnestVirtualWithColumns() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) @@ -3149,7 +3124,8 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0" + "EXPR$0", + null )) .setVirtualColumns(expressionVirtualColumn( "v0", @@ -3223,7 +3199,8 @@ public void testUnnestWithJoinOnTheLeft() JoinType.INNER ), "dim3", - "EXPR$0" + "EXPR$0", + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3280,7 +3257,8 @@ public void testUnnestWithConstant() RowSignature.builder().add("inline", ColumnType.LONG_ARRAY).build() ), "inline", - "EXPR$0" + "EXPR$0", + null ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -3341,13 +3319,13 @@ public void testUnnestWithInFilterOnUnnestedCol() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("a", "b"), null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(new InDimFilter("EXPR$0", ImmutableList.of("a", "b"), null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3374,13 +3352,13 @@ public void testUnnestWithInFilterOnUnnestedColWhereFilterIsNotOnFirstValue() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("d", "c"), null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(new InDimFilter("EXPR$0", ImmutableList.of("d", "c"), null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3406,13 +3384,13 @@ public void testUnnestWithInFilterOnUnnestedColWhereValuesDoNotExist() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("foo", "bar"), null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(new InDimFilter("EXPR$0", ImmutableList.of("foo", "bar"), null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3434,13 +3412,14 @@ public void testUnnestWithBoundFilterOnUnnestedCol() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + bound("EXPR$0", "b", "d", false, true, null, StringComparators.LEXICOGRAPHIC) + )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(bound("EXPR$0", "b", "d", false, true, null, StringComparators.LEXICOGRAPHIC)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3466,46 +3445,22 @@ public void testUnnestWithFilteringOnUnnestedVirtualCol() ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( - new QueryDataSource( - newScanQueryBuilder() - .dataSource( - new TableDataSource(CalciteTests.DATASOURCE3) - ) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .legacy(false) - .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) - .columns( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "l1", - "l2", - "m1", - "m2", - "unique_dim1" - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), + new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0" + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("1.0", "2.0"), null) + )) + .virtualColumns(expressionVirtualColumn( + "v0", + "array(\"dim2\",\"dim4\")", + ColumnType.STRING_ARRAY )) + .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY)) - .filters(new InDimFilter("EXPR$0", ImmutableList.of("1.0", "2.0"), null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3533,13 +3488,13 @@ public void testUnnestWithSelectorFilterOnUnnestedCol() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + selector("EXPR$0", "b", null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(selector("EXPR$0", "b", null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3565,13 +3520,13 @@ public void testUnnestWithSelectorFilterV1OnUnnestedCol() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + selector("EXPR$0", "b", null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(selector("EXPR$0", "b", null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3597,13 +3552,14 @@ public void testUnnestWithSelectorFilterBadOnUnnestedCol() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + bound("EXPR$0", "1", "1", false, false, null, StringComparators.NUMERIC) + )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(bound("EXPR$0", "1", "1", false, false, null, StringComparators.NUMERIC)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3625,13 +3581,13 @@ public void testUnnestWithNotSelectorFilterOnUnnestedCol() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "dim3", - "EXPR$0" + "EXPR$0", + not(selector("EXPR$0", "b", null)) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(not(selector("EXPR$0", "b", null))) .columns(ImmutableList.of( "EXPR$0" )) @@ -3667,46 +3623,22 @@ public void testUnnestWithSelectorFilterOnUnnestedVirtualCol() ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( - new QueryDataSource( - newScanQueryBuilder() - .dataSource( - new TableDataSource(CalciteTests.DATASOURCE3) - ) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .legacy(false) - .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) - .columns( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "l1", - "l2", - "m1", - "m2", - "unique_dim1" - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), + new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0" + "EXPR$0", + selector("EXPR$0", "4", null) )) + .virtualColumns(expressionVirtualColumn( + "v0", + "array(\"m1\",\"m2\")", + ColumnType.FLOAT_ARRAY + )) + .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY)) - .filters(selector("EXPR$0", "4", null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3732,14 +3664,14 @@ public void testUnnestVirtualWithColumnsOnStringsWithSelectorFilter() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0" + "EXPR$0", + selector("EXPR$0", "aa", null) )) .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(selector("EXPR$0", "aa", null)) .columns(ImmutableList.of( "EXPR$0" )) @@ -3764,14 +3696,14 @@ public void testUnnestVirtualWithColumnsOnStringsWithNotSelectorFilter() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), "v0", - "EXPR$0" + "EXPR$0", + not(selector("EXPR$0", "aa", null)) )) .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_DEFAULT) - .filters(not(selector("EXPR$0", "aa", null))) .columns(ImmutableList.of( "EXPR$0" )) From 737771b4e9fb345edce75d3463a040b89d35ece6 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Sat, 4 Mar 2023 13:32:43 -0800 Subject: [PATCH 12/15] Adding some comments and updating some javadocs --- .../druid/segment/UnnestColumnValueSelectorCursor.java | 2 +- .../org/apache/druid/segment/UnnestDimensionCursor.java | 6 +++--- .../druid/sql/calcite/rel/DruidCorrelateUnnestRel.java | 5 +++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 57940e512b6e..d127bf17d4aa 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -52,7 +52,7 @@ * unnestCursor.advance() -> 'e' *

*

- * The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to + * The filter if available helps skip over elements which are not in the allowList by moving the cursor to * the next available match. *

* The index reference points to the index of each row that the unnest cursor is accessing through currentVal diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 37a90dbf3232..d3ba15fb4bb3 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -60,9 +60,9 @@ *

* Total 5 advance calls above *

- * The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to + * The filter, if available, helps skip over elements that are not in the allowList by moving the cursor to * the next available match. The hashSet is converted into a bitset (during initialization) for efficiency. - * If allowSet is ['c', 'd'] then the advance moves over to the next available match + * If filter is IN ('c', 'd') then the advance moves over to the next available match *

* advance() -> 2 -> 'c' * advance() -> 3 -> 'd' (advances base cursor first) @@ -395,7 +395,7 @@ public int size() public int get(int idx) { // need to get value from the indexed ints - // only if it is non null and has at least 1 value + // only if it is non-null and has at least 1 value if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size() > 0) { return indexedIntsForCurrentRow.get(indexForRow); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index d46bb79b727b..bff90fc74ecf 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -256,6 +256,11 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) private PartialDruidQuery updateCorrPartialQueryFromLeft(PartialDruidQuery partialQueryFromLeft) { + // The DruidCorrelateRule already creates the project and pushes it on the top level + // So get select project from partialQuery + // The filters are present on the partial query of the left + // The group by and having clauses would be on the top level + // Same for the sort PartialDruidQuery corrQuery = PartialDruidQuery.create(correlateRel); corrQuery = corrQuery.withWhereFilter(partialQueryFromLeft.getWhereFilter()) .withSelectProject(partialQuery.getSelectProject()); From 744fb7b3011aae7fbb76d3031fb27dc585d19306 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Sat, 4 Mar 2023 13:45:15 -0800 Subject: [PATCH 13/15] Do not need planner context in the new rules, using an instance method instead --- .../sql/calcite/rel/PartialDruidQuery.java | 3 +-- .../calcite/rule/DruidFilterUnnestRule.java | 23 +++++++++++++------ .../druid/sql/calcite/rule/DruidRules.java | 4 ++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java index 0c7c596f3ea1..61697bd03ba0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java @@ -83,8 +83,7 @@ public enum Stage // WINDOW may be present only together with SCAN. WINDOW, - UNNEST_PROJECT, - UNNEST_FILTER + UNNEST_PROJECT } private PartialDruidQuery( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java index 587812f531ab..41a1a4c32db6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -23,15 +23,13 @@ import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; -import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.DruidUnnestDatasourceRel; public class DruidFilterUnnestRule extends RelOptRule { + private static final DruidFilterUnnestRule INSTANCE = new DruidFilterUnnestRule(); - private final PlannerContext plannerContext; - - public DruidFilterUnnestRule(PlannerContext plannerContext) + private DruidFilterUnnestRule() { super( operand( @@ -39,7 +37,11 @@ public DruidFilterUnnestRule(PlannerContext plannerContext) operand(DruidUnnestDatasourceRel.class, any()) ) ); - this.plannerContext = plannerContext; + } + + public static DruidFilterUnnestRule instance() + { + return INSTANCE; } @Override @@ -68,9 +70,11 @@ public void onMatch(RelOptRuleCall call) // So we can skip this LogicalProject // Extensive unit tests can be found in {@link CalciteArraysQueryTest} - static class DruidProjectOnCorrelate extends RelOptRule + static class DruidProjectOnCorrelateRule extends RelOptRule { - public DruidProjectOnCorrelate(PlannerContext plannerContext) + private static final DruidProjectOnCorrelateRule INSTANCE = new DruidProjectOnCorrelateRule(); + + private DruidProjectOnCorrelateRule() { super( operand( @@ -80,6 +84,11 @@ public DruidProjectOnCorrelate(PlannerContext plannerContext) ); } + public static DruidProjectOnCorrelateRule instance() + { + return INSTANCE; + } + @Override public void onMatch(RelOptRuleCall call) { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 40ab3216a2ba..2d238238fbd5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -100,8 +100,8 @@ public static List rules(PlannerContext plannerContext) DruidJoinRule.instance(plannerContext), new DruidUnnestDatasourceRule(plannerContext), new DruidCorrelateUnnestRule(plannerContext), - new DruidFilterUnnestRule(plannerContext), - new DruidFilterUnnestRule.DruidProjectOnCorrelate(plannerContext) + DruidFilterUnnestRule.instance(), + DruidFilterUnnestRule.DruidProjectOnCorrelateRule.instance() ) ); From 0bb44c554966dc27b5a44acb971fd63171e56b5c Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 7 Mar 2023 12:10:00 -0800 Subject: [PATCH 14/15] Tmp changes for filter matching --- .../druid/segment/UnnestDimensionCursor.java | 197 ++++++++++++++++++ .../rule/DruidCorrelateUnnestRule.java | 15 +- .../sql/calcite/CalciteArraysQueryTest.java | 3 +- 3 files changed, 207 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index d3ba15fb4bb3..6af6715cdd25 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -20,9 +20,11 @@ package org.apache.druid.segment; import com.google.common.base.Predicate; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.dimension.LegacyDimensionSpec; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.ValueMatcher; @@ -30,11 +32,13 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.SingleIndexedInt; import org.apache.druid.segment.filter.AndFilter; import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; +import java.util.concurrent.atomic.AtomicInteger; /** * The cursor to help unnest MVDs with dictionary encoding. @@ -327,6 +331,199 @@ public void reset() @Nullable private void initialize() { + /* + for i=0 to baseColFactory.makeDimSelector.getValueCardinality() + match each item with the filter and populate bitset if there's a match + */ + + AtomicInteger idRef = new AtomicInteger(); + ValueMatcher myMatcher = allowFilter.makeMatcher(new ColumnSelectorFactory() + { + @Override + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) + { + if (!outputName.equals(dimensionSpec.getDimension())) { + throw new ISE("Asked for bad dimension[%s]", dimensionSpec); + } + return new DimensionSelector() + { + private final IndexedInts myInts = new IndexedInts() + { + @Override + public int size() + { + return 1; + } + + @Override + public int get(int index) + { + return 1; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; + @Override + public IndexedInts getRow() + { + return myInts; + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + // Handle value is null + return new ValueMatcher() + { + @Override + public boolean matches() + { + return value.equals(lookupName(1)); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return new ValueMatcher() + { + @Override + public boolean matches() + { + return predicate.apply(lookupName(1)); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + } + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Nullable + @Override + public Object getObject() + { + return null; + } + + @Override + public Class classOfObject() + { + return null; + } + + @Override + public int getValueCardinality() + { + return dimSelector.getValueCardinality(); + } + + @Nullable + @Override + public String lookupName(int id) + { + return dimSelector.lookupName(idRef.get()); + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return dimSelector.nameLookupPossibleInAdvance(); + } + + @Nullable + @Override + public IdLookup idLookup() + { + return dimSelector.idLookup(); + } + } + } + + @Override + public ColumnValueSelector makeColumnValueSelector(String columnName) + { + return new ColumnValueSelector() + { + @Override + public double getDouble() + { + return Double.parseDouble(dimSelector.lookupName(idRef.get())); + } + + @Override + public float getFloat() + { + return 0; + } + + @Override + public long getLong() + { + return 0; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Override + public boolean isNull() + { + return false; + } + + @Nullable + @Override + public Object getObject() + { + return null; + } + + @Override + public Class classOfObject() + { + return null; + } + } + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + + } + }); + + for (int i = 0; i < dimSelector.getValueCardinality(); ++i) { + idRef.set(i); + if (myMatcher.matches()) { + bitSet.set(i, true); + } + } + indexForRow = 0; if (allowFilter != null) { this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java index 1870e11dd75c..7a9c349828b0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java @@ -101,8 +101,9 @@ public void onMatch(RelOptRuleCall call) final RexBuilder rexBuilder = correlate.getCluster().getRexBuilder(); final Filter druidRelFilter; - final DruidRel newDruidRelFilter; + final DruidRel newDruidRel; final List newProjectExprs = new ArrayList<>(); + final List newWheres = new ArrayList<>(); final boolean isLeftDirectAccessPossible = enableLeftScanDirect && (druidRel instanceof DruidQueryRel); @@ -116,14 +117,14 @@ public void onMatch(RelOptRuleCall call) // Left-side projection expressions rewritten to be on top of the correlate. newProjectExprs.addAll(leftProject.getProjects()); - newDruidRelFilter = druidRel.withPartialQuery(PartialDruidQuery.create(leftScan)); + newDruidRel = druidRel.withPartialQuery(PartialDruidQuery.create(leftScan)); } else { // Leave druidRel as-is. Write input refs that do nothing. for (int i = 0; i < druidRel.getRowType().getFieldCount(); i++) { newProjectExprs.add(rexBuilder.makeInputRef(correlate.getRowType().getFieldList().get(i).getType(), i)); } - newDruidRelFilter = druidRel; - druidRelFilter = null; + newDruidRel = druidRel; + druidRelFilter = druidRel.getPartialDruidQuery().getWhereFilter(); } if (druidUnnestDatasourceRel.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT) { @@ -131,7 +132,7 @@ public void onMatch(RelOptRuleCall call) druidUnnestDatasourceRel.getPartialDruidQuery() .getSelectProject() .getProjects(), - newDruidRelFilter.getRowType().getFieldCount() + newDruidRel.getRowType().getFieldCount() )) { newProjectExprs.add(rexNode); } @@ -143,7 +144,7 @@ public void onMatch(RelOptRuleCall call) .getFieldList() .get(druidRel.getRowType().getFieldCount() + i) .getType(), - newDruidRelFilter.getRowType().getFieldCount() + i + newDruidRel.getRowType().getFieldCount() + i ) ); } @@ -152,7 +153,7 @@ public void onMatch(RelOptRuleCall call) final DruidCorrelateUnnestRel druidCorr = DruidCorrelateUnnestRel.create( correlate.copy( correlate.getTraitSet(), - newDruidRelFilter, + newDruidRel, druidUnnestDatasourceRel, correlate.getCorrelationId(), correlate.getRequiredColumns(), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6188496fec13..13d828dde148 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3009,7 +3009,8 @@ public void testUnnestWithInFilters() // Generates 2 native queries with 2 different values of vectorize cannotVectorize(); testQuery( - "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc') LIMIT 2), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where dim2 IN ('a','b','ab','abc') LIMIT 2", +// "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc') LIMIT 2), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( From c9074b60f5ef377f75e242550608dd056aee4f7c Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 8 Mar 2023 16:52:32 -0800 Subject: [PATCH 15/15] Updating dimension cursor to use value matcher once and populate a bitset --- .../druid/segment/UnnestDimensionCursor.java | 346 +++++++++--------- .../sql/calcite/CalciteArraysQueryTest.java | 7 +- 2 files changed, 184 insertions(+), 169 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 6af6715cdd25..dff6cdfe87dd 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -24,20 +24,16 @@ import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.query.dimension.LegacyDimensionSpec; import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.data.SingleIndexedInt; -import org.apache.druid.segment.filter.AndFilter; -import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; +import java.util.BitSet; import java.util.concurrent.atomic.AtomicInteger; /** @@ -93,7 +89,7 @@ public class UnnestDimensionCursor implements Cursor private IndexedInts indexedIntsForCurrentRow; private boolean needInitialization; private SingleIndexInts indexIntsForRow; - private ValueMatcher valueMatcher; + private BitSet matchBitSet; public UnnestDimensionCursor( Cursor cursor, @@ -111,6 +107,7 @@ public UnnestDimensionCursor( this.outputName = outputColumnName; this.needInitialization = true; this.allowFilter = allowFilter; + this.matchBitSet = new BitSet(); } @Override @@ -206,10 +203,8 @@ public Class classOfObject() @Override public int getValueCardinality() { - if (allowFilter instanceof InDimFilter) { - return ((InDimFilter) allowFilter).getValues().size(); - } else if (allowFilter instanceof AndFilter) { - return ((AndFilter) allowFilter).getFilters().size(); + if (!matchBitSet.isEmpty()) { + return matchBitSet.cardinality(); } return dimSelector.getValueCardinality(); } @@ -287,13 +282,9 @@ public void advance() @Override public void advanceUninterruptibly() { - while (true) { + do { advanceAndUpdate(); - boolean match = valueMatcher.matches(); - if (match || baseCursor.isDone()) { - return; - } - } + } while (matchAndProceed()); } @Override @@ -302,6 +293,10 @@ public boolean isDone() if (needInitialization && !baseCursor.isDone()) { initialize(); } + // If the filter does not match any dimensions + // No need to move cursor and do extra work + if (allowFilter != null && matchBitSet.isEmpty()) + return true; return baseCursor.isDone(); } @@ -322,6 +317,29 @@ public void reset() baseCursor.reset(); } + /** + * This advances the unnest cursor in cases where an allowList is specified + * and the current value at the unnest cursor is not in the allowList. + * The cursor in such cases is moved till the next match is found. + * + * @return a boolean to indicate whether to stay or move cursor + */ + private boolean matchAndProceed() + { + boolean matchStatus; + if ((allowFilter == null) && matchBitSet.isEmpty()) { + matchStatus = true; + } else { + if (indexedIntsForCurrentRow==null || indexedIntsForCurrentRow.size() == 0) { + matchStatus = false; + } + else { + matchStatus = matchBitSet.get(indexedIntsForCurrentRow.get(indexForRow)); + } + } + return !baseCursor.isDone() && !matchStatus; + } + /** * This initializes the unnest cursor and creates data structures * to start iterating over the values to be unnested. @@ -336,53 +354,30 @@ private void initialize() match each item with the filter and populate bitset if there's a match */ - AtomicInteger idRef = new AtomicInteger(); - ValueMatcher myMatcher = allowFilter.makeMatcher(new ColumnSelectorFactory() - { - @Override - public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) + if (allowFilter != null) { + AtomicInteger idRef = new AtomicInteger(); + ValueMatcher myMatcher = allowFilter.makeMatcher(new ColumnSelectorFactory() { - if (!outputName.equals(dimensionSpec.getDimension())) { - throw new ISE("Asked for bad dimension[%s]", dimensionSpec); - } - return new DimensionSelector() + @Override + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) { - private final IndexedInts myInts = new IndexedInts() - { - @Override - public int size() - { - return 1; - } - - @Override - public int get(int index) - { - return 1; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - - } - }; - @Override - public IndexedInts getRow() - { - return myInts; + if (!outputName.equals(dimensionSpec.getDimension())) { + throw new ISE("Asked for bad dimension[%s]", dimensionSpec); } - - @Override - public ValueMatcher makeValueMatcher(@Nullable String value) + return new DimensionSelector() { - // Handle value is null - return new ValueMatcher() + private final IndexedInts myInts = new IndexedInts() { @Override - public boolean matches() + public int size() { - return value.equals(lookupName(1)); + return 1; + } + + @Override + public int get(int index) + { + return 1; } @Override @@ -391,152 +386,175 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } }; - } - @Override - public ValueMatcher makeValueMatcher(Predicate predicate) - { - return new ValueMatcher() + @Override + public IndexedInts getRow() { - @Override - public boolean matches() + return myInts; + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + // Handle value is null + return new ValueMatcher() { - return predicate.apply(lookupName(1)); - } + @Override + public boolean matches() + { + return value.equals(lookupName(1)); + } - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return new ValueMatcher() { + @Override + public boolean matches() + { + return predicate.apply(lookupName(1)); + } - } + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; } - } - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { - } + } - @Nullable - @Override - public Object getObject() - { - return null; - } + @Nullable + @Override + public Object getObject() + { + return null; + } - @Override - public Class classOfObject() - { - return null; - } + @Override + public Class classOfObject() + { + return null; + } - @Override - public int getValueCardinality() - { - return dimSelector.getValueCardinality(); - } + @Override + public int getValueCardinality() + { + return dimSelector.getValueCardinality(); + } - @Nullable - @Override - public String lookupName(int id) - { - return dimSelector.lookupName(idRef.get()); - } + @Nullable + @Override + public String lookupName(int id) + { + return dimSelector.lookupName(idRef.get()); + } - @Override - public boolean nameLookupPossibleInAdvance() - { - return dimSelector.nameLookupPossibleInAdvance(); - } + @Override + public boolean nameLookupPossibleInAdvance() + { + return dimSelector.nameLookupPossibleInAdvance(); + } - @Nullable - @Override - public IdLookup idLookup() - { - return dimSelector.idLookup(); - } + @Nullable + @Override + public IdLookup idLookup() + { + return dimSelector.idLookup(); + } + }; } - } - @Override - public ColumnValueSelector makeColumnValueSelector(String columnName) - { - return new ColumnValueSelector() + @Override + public ColumnValueSelector makeColumnValueSelector(String columnName) { - @Override - public double getDouble() + return new ColumnValueSelector() { - return Double.parseDouble(dimSelector.lookupName(idRef.get())); - } + @Override + public double getDouble() + { + return Double.parseDouble(dimSelector.lookupName(idRef.get())); + } - @Override - public float getFloat() - { - return 0; - } + @Override + public float getFloat() + { + return 0; + } - @Override - public long getLong() - { - return 0; - } + @Override + public long getLong() + { + return 0; + } - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { - } + } - @Override - public boolean isNull() - { - return false; - } + @Override + public boolean isNull() + { + return false; + } - @Nullable - @Override - public Object getObject() - { - return null; - } + @Nullable + @Override + public Object getObject() + { + return null; + } - @Override - public Class classOfObject() - { - return null; - } + @Override + public Class classOfObject() + { + return null; + } + }; } - } - @Nullable - @Override - public ColumnCapabilities getColumnCapabilities(String column) - { - - } - }); + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + return getColumnSelectorFactory().getColumnCapabilities(column); + } + }); - for (int i = 0; i < dimSelector.getValueCardinality(); ++i) { - idRef.set(i); - if (myMatcher.matches()) { - bitSet.set(i, true); + for (int i = 0; i < dimSelector.getValueCardinality(); ++i) { + idRef.set(i); + if (myMatcher.matches()) { + matchBitSet.set(i); + } } } indexForRow = 0; - if (allowFilter != null) { - this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); - } else { - this.valueMatcher = BooleanValueMatcher.of(true); - } this.indexIntsForRow = new SingleIndexInts(); if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!valueMatcher.matches() && !baseCursor.isDone()) { - advance(); + if (!matchBitSet.isEmpty()) { + if (!matchBitSet.get(indexedIntsForCurrentRow.get(indexForRow))) { + advance(); + } } needInitialization = false; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 13d828dde148..ff882b39f71f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3009,8 +3009,7 @@ public void testUnnestWithInFilters() // Generates 2 native queries with 2 different values of vectorize cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where dim2 IN ('a','b','ab','abc') LIMIT 2", -// "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc') LIMIT 2), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc') LIMIT 2), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( @@ -3451,7 +3450,7 @@ public void testUnnestWithFilteringOnUnnestedVirtualCol() "EXPR$0", new InDimFilter("EXPR$0", ImmutableList.of("1.0", "2.0"), null) )) - .virtualColumns(expressionVirtualColumn( + .virtualColumns(expressionVirtualColumn( "v0", "array(\"dim2\",\"dim4\")", ColumnType.STRING_ARRAY @@ -3600,7 +3599,6 @@ public void testUnnestWithNotSelectorFilterOnUnnestedCol() new Object[]{"c"}, new Object[]{"d"}, new Object[]{""}, - new Object[]{""}, new Object[]{""} ) : ImmutableList.of( @@ -3608,7 +3606,6 @@ public void testUnnestWithNotSelectorFilterOnUnnestedCol() new Object[]{"c"}, new Object[]{"d"}, new Object[]{""}, - new Object[]{null}, new Object[]{null} ) );