diff --git a/docs/tutorials/tutorial-jupyter-index.md b/docs/tutorials/tutorial-jupyter-index.md index d77e0d42b322..4f39699d2d2d 100644 --- a/docs/tutorials/tutorial-jupyter-index.md +++ b/docs/tutorials/tutorial-jupyter-index.md @@ -42,6 +42,7 @@ Make sure you meet the following requirements before starting the Jupyter-based pip3 install requests ``` + - JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid and Jupyter both try to use port `8888`, so start Jupyter on a different port. @@ -99,10 +100,12 @@ The notebooks are located in the [apache/druid repo](https://github.com/apache/d The links that follow are the raw GitHub URLs, so you can use them to download the notebook directly, such as with `wget`, or manually through your web browser. Note that if you save the file from your web browser, make sure to remove the `.txt` extension. + - [Introduction to the Druid REST API]( https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/api-tutorial.ipynb) walks you through some of the basics related to the Druid REST API and several endpoints. - [Introduction to the Druid Python API]( https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/Python_API_Tutorial.ipynb) walks you through some of the basics related to the Druid API using the Python wrapper API. + - [Introduction to Druid SQL](https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/sql-tutorial.ipynb) covers the basics of Druid SQL. diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 407aea5c39b9..c5737d966bec 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -23,13 +23,13 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.planning.DataSourceAnalysis; import org.apache.druid.segment.SegmentReference; import org.apache.druid.segment.UnnestSegmentReference; import org.apache.druid.utils.JvmUtils; import javax.annotation.Nullable; -import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -38,7 +38,7 @@ /** * The data source for representing an unnest operation. - * + *

* An unnest data source has the following: * a base data source which is to be unnested * the column name of the MVD which will be unnested @@ -50,19 +50,19 @@ public class UnnestDataSource implements DataSource private final DataSource base; private final String column; private final String outputName; - private final LinkedHashSet allowList; + private final DimFilter outputColumnFilter; private UnnestDataSource( DataSource dataSource, String columnName, String outputName, - LinkedHashSet allowList + DimFilter outputColumnFilter ) { this.base = dataSource; this.column = columnName; this.outputName = outputName; - this.allowList = allowList; + this.outputColumnFilter = outputColumnFilter; } @JsonCreator @@ -70,10 +70,10 @@ public static UnnestDataSource create( @JsonProperty("base") DataSource base, @JsonProperty("column") String columnName, @JsonProperty("outputName") String outputName, - @Nullable @JsonProperty("allowList") LinkedHashSet allowList + @Nullable @JsonProperty("outputColumnFilter") DimFilter outputColumnFilter ) { - return new UnnestDataSource(base, columnName, outputName, allowList); + return new UnnestDataSource(base, columnName, outputName, outputColumnFilter); } @JsonProperty("base") @@ -94,12 +94,11 @@ public String getOutputName() return outputName; } - @JsonProperty("allowList") - public LinkedHashSet getAllowList() + @JsonProperty("outputColumnFilter") + public DimFilter getOutputColumnFilter() { - return allowList; + return outputColumnFilter; } - @Override public Set getTableNames() { @@ -118,7 +117,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), column, outputName, allowList); + return new UnnestDataSource(children.get(0), column, outputName, outputColumnFilter); } @Override @@ -163,7 +162,7 @@ public Function createSegmentMapFunction( segmentMapFn.apply(baseSegment), column, outputName, - allowList + outputColumnFilter ); } } @@ -174,7 +173,7 @@ public Function createSegmentMapFunction( @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, column, outputName, allowList); + return new UnnestDataSource(newSource, column, outputName, outputColumnFilter); } @Override @@ -213,7 +212,7 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(base, column, outputName); + return Objects.hash(base, column, outputName, outputColumnFilter); } @Override @@ -223,7 +222,7 @@ public String toString() "base=" + base + ", column='" + column + '\'' + ", outputName='" + outputName + '\'' + - ", allowList=" + allowList + + ", outputFilter='" + outputColumnFilter + '\'' + '}'; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 5d4340329897..d127bf17d4aa 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -22,15 +22,17 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.List; /** @@ -50,7 +52,7 @@ * unnestCursor.advance() -> 'e' *

*

- * The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to + * The filter if available helps skip over elements which are not in the allowList by moving the cursor to * the next available match. *

* The index reference points to the index of each row that the unnest cursor is accessing through currentVal @@ -65,7 +67,7 @@ public class UnnestColumnValueSelectorCursor implements Cursor private final ColumnValueSelector columnValueSelector; private final String columnName; private final String outputName; - private final LinkedHashSet allowSet; + private final ValueMatcher valueMatcher; private int index; private Object currentVal; private List unnestListForCurrentRow; @@ -76,7 +78,7 @@ public UnnestColumnValueSelectorCursor( ColumnSelectorFactory baseColumSelectorFactory, String columnName, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter filter ) { this.baseCursor = cursor; @@ -86,7 +88,11 @@ public UnnestColumnValueSelectorCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; + if (filter != null) { + this.valueMatcher = filter.makeMatcher(getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); + } } @Override @@ -191,11 +197,7 @@ public boolean isNull() public Object getObject() { if (!unnestListForCurrentRow.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return unnestListForCurrentRow.get(index); - } else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) { - return unnestListForCurrentRow.get(index); - } + return unnestListForCurrentRow.get(index); } return null; } @@ -243,9 +245,17 @@ public void advance() @Override public void advanceUninterruptibly() { - do { + // the index currently points to an element at position i ($e_i) + // while $e_i does not match the filter + // keep advancing the pointer to the first valid match + // calling advanceAndUpdate increments the index position so needs a call to matches() again for new match status + while (true) { advanceAndUpdate(); - } while (matchAndProceed()); + boolean match = valueMatcher.matches(); + if (match || baseCursor.isDone()) { + return; + } + } } @Override @@ -311,12 +321,11 @@ private void initialize() { this.unnestListForCurrentRow = new ArrayList<>(); getNextRow(needInitialization); - if (allowSet != null) { - if (!allowSet.isEmpty()) { - if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) { - advance(); - } - } + + // If the first value the index is pointing to does not match the filter + // advance the index to the first value which will match + if (!valueMatcher.matches() && !baseCursor.isDone()) { + advance(); } needInitialization = false; } @@ -339,22 +348,4 @@ private void advanceAndUpdate() index++; } } - - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if (allowSet == null || allowSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 93a56767bbfb..dff6cdfe87dd 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -20,9 +20,11 @@ package org.apache.druid.segment; import com.google.common.base.Predicate; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; @@ -32,7 +34,7 @@ import javax.annotation.Nullable; import java.util.BitSet; -import java.util.LinkedHashSet; +import java.util.concurrent.atomic.AtomicInteger; /** * The cursor to help unnest MVDs with dictionary encoding. @@ -58,9 +60,9 @@ *

* Total 5 advance calls above *

- * The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to + * The filter, if available, helps skip over elements that are not in the allowList by moving the cursor to * the next available match. The hashSet is converted into a bitset (during initialization) for efficiency. - * If allowSet is ['c', 'd'] then the advance moves over to the next available match + * If filter is IN ('c', 'd') then the advance moves over to the next available match *

* advance() -> 2 -> 'c' * advance() -> 3 -> 'd' (advances base cursor first) @@ -79,31 +81,33 @@ public class UnnestDimensionCursor implements Cursor private final DimensionSelector dimSelector; private final String columnName; private final String outputName; - private final LinkedHashSet allowSet; - private final BitSet allowedBitSet; private final ColumnSelectorFactory baseColumnSelectorFactory; - private int index; - @Nullable private IndexedInts indexedIntsForCurrentRow; + @Nullable + private final Filter allowFilter; + private int indexForRow; + @Nullable + private IndexedInts indexedIntsForCurrentRow; private boolean needInitialization; private SingleIndexInts indexIntsForRow; + private BitSet matchBitSet; public UnnestDimensionCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, String columnName, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter allowFilter ) { this.baseCursor = cursor; this.baseColumnSelectorFactory = baseColumnSelectorFactory; this.dimSelector = this.baseColumnSelectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(columnName)); this.columnName = columnName; - this.index = 0; + this.indexForRow = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; - this.allowedBitSet = new BitSet(); + this.allowFilter = allowFilter; + this.matchBitSet = new BitSet(); } @Override @@ -154,7 +158,10 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean matches() { - return idForLookup == indexedIntsForCurrentRow.get(index); + if (indexedIntsForCurrentRow.size() == 0) { + return false; + } + return idForLookup == indexedIntsForCurrentRow.get(indexForRow); } @Override @@ -184,14 +191,7 @@ public Object getObject() if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) { return null; } - if (allowedBitSet.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - } else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - return null; + return lookupName(indexedIntsForCurrentRow.get(indexForRow)); } @Override @@ -203,8 +203,8 @@ public Class classOfObject() @Override public int getValueCardinality() { - if (!allowedBitSet.isEmpty()) { - return allowedBitSet.cardinality(); + if (!matchBitSet.isEmpty()) { + return matchBitSet.cardinality(); } return dimSelector.getValueCardinality(); } @@ -293,6 +293,10 @@ public boolean isDone() if (needInitialization && !baseCursor.isDone()) { initialize(); } + // If the filter does not match any dimensions + // No need to move cursor and do extra work + if (allowFilter != null && matchBitSet.isEmpty()) + return true; return baseCursor.isDone(); } @@ -308,11 +312,34 @@ public boolean isDoneOrInterrupted() @Override public void reset() { - index = 0; + indexForRow = 0; needInitialization = true; baseCursor.reset(); } + /** + * This advances the unnest cursor in cases where an allowList is specified + * and the current value at the unnest cursor is not in the allowList. + * The cursor in such cases is moved till the next match is found. + * + * @return a boolean to indicate whether to stay or move cursor + */ + private boolean matchAndProceed() + { + boolean matchStatus; + if ((allowFilter == null) && matchBitSet.isEmpty()) { + matchStatus = true; + } else { + if (indexedIntsForCurrentRow==null || indexedIntsForCurrentRow.size() == 0) { + matchStatus = false; + } + else { + matchStatus = matchBitSet.get(indexedIntsForCurrentRow.get(indexForRow)); + } + } + return !baseCursor.isDone() && !matchStatus; + } + /** * This initializes the unnest cursor and creates data structures * to start iterating over the values to be unnested. @@ -322,20 +349,210 @@ public void reset() @Nullable private void initialize() { - IdLookup idLookup = dimSelector.idLookup(); - this.indexIntsForRow = new SingleIndexInts(); - if (allowSet != null && !allowSet.isEmpty() && idLookup != null) { - for (String s : allowSet) { - if (idLookup.lookupId(s) >= 0) { - allowedBitSet.set(idLookup.lookupId(s)); + /* + for i=0 to baseColFactory.makeDimSelector.getValueCardinality() + match each item with the filter and populate bitset if there's a match + */ + + if (allowFilter != null) { + AtomicInteger idRef = new AtomicInteger(); + ValueMatcher myMatcher = allowFilter.makeMatcher(new ColumnSelectorFactory() + { + @Override + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) + { + if (!outputName.equals(dimensionSpec.getDimension())) { + throw new ISE("Asked for bad dimension[%s]", dimensionSpec); + } + return new DimensionSelector() + { + private final IndexedInts myInts = new IndexedInts() + { + @Override + public int size() + { + return 1; + } + + @Override + public int get(int index) + { + return 1; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; + + @Override + public IndexedInts getRow() + { + return myInts; + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + // Handle value is null + return new ValueMatcher() + { + @Override + public boolean matches() + { + return value.equals(lookupName(1)); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; + } + + @Override + public ValueMatcher makeValueMatcher(Predicate predicate) + { + return new ValueMatcher() + { + @Override + public boolean matches() + { + return predicate.apply(lookupName(1)); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + }; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Nullable + @Override + public Object getObject() + { + return null; + } + + @Override + public Class classOfObject() + { + return null; + } + + @Override + public int getValueCardinality() + { + return dimSelector.getValueCardinality(); + } + + @Nullable + @Override + public String lookupName(int id) + { + return dimSelector.lookupName(idRef.get()); + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return dimSelector.nameLookupPossibleInAdvance(); + } + + @Nullable + @Override + public IdLookup idLookup() + { + return dimSelector.idLookup(); + } + }; + } + + @Override + public ColumnValueSelector makeColumnValueSelector(String columnName) + { + return new ColumnValueSelector() + { + @Override + public double getDouble() + { + return Double.parseDouble(dimSelector.lookupName(idRef.get())); + } + + @Override + public float getFloat() + { + return 0; + } + + @Override + public long getLong() + { + return 0; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Override + public boolean isNull() + { + return false; + } + + @Nullable + @Override + public Object getObject() + { + return null; + } + + @Override + public Class classOfObject() + { + return null; + } + }; + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + return getColumnSelectorFactory().getColumnCapabilities(column); + } + }); + + for (int i = 0; i < dimSelector.getValueCardinality(); ++i) { + idRef.set(i); + if (myMatcher.matches()) { + matchBitSet.set(i); } } } + + indexForRow = 0; + this.indexIntsForRow = new SingleIndexInts(); + if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!allowedBitSet.isEmpty()) { - if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { + if (!matchBitSet.isEmpty()) { + if (!matchBitSet.get(indexedIntsForCurrentRow.get(indexForRow))) { advance(); } } @@ -351,43 +568,25 @@ private void initialize() private void advanceAndUpdate() { if (indexedIntsForCurrentRow == null) { - index = 0; + indexForRow = 0; if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); } } else { - if (index >= indexedIntsForCurrentRow.size() - 1) { + if (indexForRow >= indexedIntsForCurrentRow.size() - 1) { if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); } if (!baseCursor.isDone()) { indexedIntsForCurrentRow = dimSelector.getRow(); } - index = 0; + indexForRow = 0; } else { - ++index; + ++indexForRow; } } } - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } - // Helper class to help in returning // getRow from the dimensionSelector // This is set in the initialize method @@ -411,9 +610,9 @@ public int size() public int get(int idx) { // need to get value from the indexed ints - // only if it is non null and has at least 1 value + // only if it is non-null and has at least 1 value if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size() > 0) { - return indexedIntsForCurrentRow.get(index); + return indexedIntsForCurrentRow.get(indexForRow); } return 0; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index 9da6b8132cbb..92a3cb68a206 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -21,6 +21,7 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.timeline.SegmentId; import org.apache.druid.utils.CloseableUtils; import org.joda.time.Interval; @@ -28,7 +29,6 @@ import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; -import java.util.LinkedHashSet; import java.util.Optional; /** @@ -42,14 +42,14 @@ public class UnnestSegmentReference implements SegmentReference private final SegmentReference baseSegment; private final String dimension; private final String renamedOutputDimension; - private final LinkedHashSet allowSet; + private final DimFilter outputColumnFilter; - public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName, LinkedHashSet allowList) + public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName, DimFilter outputColumnFilter) { this.baseSegment = baseSegment; this.dimension = dimension; this.renamedOutputDimension = outputName; - this.allowSet = allowList; + this.outputColumnFilter = outputColumnFilter; } @Override @@ -103,7 +103,7 @@ public StorageAdapter asStorageAdapter() baseSegment.asStorageAdapter(), dimension, renamedOutputDimension, - allowSet + outputColumnFilter ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index f76ab89270af..8866213aef47 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -24,17 +24,15 @@ import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.QueryMetrics; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; -import org.apache.druid.segment.filter.AndFilter; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; -import java.util.Arrays; import java.util.LinkedHashSet; import java.util.Objects; @@ -48,19 +46,19 @@ public class UnnestStorageAdapter implements StorageAdapter private final StorageAdapter baseAdapter; private final String dimensionToUnnest; private final String outputColumnName; - private final LinkedHashSet allowSet; + private final DimFilter outputColumnFilter; public UnnestStorageAdapter( final StorageAdapter baseAdapter, final String dimension, final String outputColumnName, - final LinkedHashSet allowSet + final DimFilter outputColumnFilter ) { this.baseAdapter = baseAdapter; this.dimensionToUnnest = dimension; this.outputColumnName = outputColumnName; - this.allowSet = allowSet; + this.outputColumnFilter = outputColumnFilter; } @Override @@ -73,20 +71,8 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { - Filter updatedFilter; - if (allowSet != null && !allowSet.isEmpty()) { - final InDimFilter allowListFilters; - allowListFilters = new InDimFilter(dimensionToUnnest, allowSet); - if (filter != null) { - updatedFilter = new AndFilter(Arrays.asList(filter, allowListFilters)); - } else { - updatedFilter = allowListFilters; - } - } else { - updatedFilter = filter; - } final Sequence baseCursorSequence = baseAdapter.makeCursors( - updatedFilter, + filter, interval, virtualColumns, gran, @@ -107,7 +93,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - allowSet + outputColumnFilter != null ? outputColumnFilter.toFilter() : null ); } else { retVal = new UnnestColumnValueSelectorCursor( @@ -115,7 +101,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - allowSet + outputColumnFilter != null ? outputColumnFilter.toFilter() : null ); } } else { @@ -124,7 +110,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), dimensionToUnnest, outputColumnName, - allowSet + outputColumnFilter != null ? outputColumnFilter.toFilter() : null ); } return retVal; diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index 4de22cb00610..86b717734990 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -45,9 +45,7 @@ import org.junit.runners.Parameterized; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -95,23 +93,6 @@ private Druids.ScanQueryBuilder newTestUnnestQuery() .legacy(legacy); } - private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet() - { - List allowList = Arrays.asList("a", "b", "c"); - LinkedHashSet allowSet = new LinkedHashSet(allowList); - return Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - allowSet - )) - .columns(Collections.emptyList()) - .eternityInterval() - .limit(3) - .legacy(legacy); - } - @Test public void testScanOnUnnest() { @@ -464,65 +445,6 @@ public void testUnnestRunnerWithOrdering() ScanQueryRunnerTest.verify(ascendingExpectedResults, results); } - @Test - public void testUnnestRunnerNonNullAllowSet() - { - ScanQuery query = newTestUnnestQueryWithAllowSet() - .intervals(I_0112_0114) - .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - .limit(3) - .build(); - - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - FACTORY, - new IncrementalIndexSegment( - index, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - - Iterable results = queryRunner.run(QueryPlus.wrap(query)).toList(); - - String[] columnNames; - if (legacy) { - columnNames = new String[]{ - getTimestampName() + ":TIME", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } else { - columnNames = new String[]{ - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } - String[] values; - if (legacy) { - values = new String[]{ - "2011-01-12T00:00:00.000Z\ta", - "2011-01-12T00:00:00.000Z\tb", - "2011-01-13T00:00:00.000Z\ta" - }; - } else { - values = new String[]{ - "a", - "b", - "a" - }; - } - - final List>> events = ScanQueryRunnerTest.toEvents(columnNames, legacy, values); - List expectedResults = toExpected( - events, - legacy - ? Lists.newArrayList(getTimestampName(), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - : Collections.singletonList(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST), - 0, - 3 - ); - ScanQueryRunnerTest.verify(expectedResults, results); - } - private String getTimestampName() { diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index cf4a98c88035..dbe7d39761c4 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -27,14 +27,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandlingTest { private static String OUTPUT_NAME = "unnested-column"; - private static LinkedHashSet IGNORE_SET = null; - private static LinkedHashSet IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f")); @Test @@ -54,7 +51,7 @@ public void test_list_unnest_cursors() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -88,7 +85,7 @@ public void test_list_unnest_cursors_user_supplied_list() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -120,7 +117,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -157,7 +154,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -191,7 +188,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -221,7 +218,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -255,7 +252,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -289,14 +286,14 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), OUTPUT_NAME, "tmp-out", - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory() .makeColumnValueSelector("tmp-out"); @@ -331,7 +328,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -369,7 +366,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -387,44 +384,6 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() Assert.assertEquals(k, 10); } - @Test - public void test_list_unnest_cursors_user_supplied_list_with_ignore_set() - { - List inputList = Arrays.asList( - Arrays.asList("a", "b", "c"), - Arrays.asList("e", "f", "g", "h", "i"), - Collections.singletonList("j") - ); - - List expectedResults = Arrays.asList("b", "f"); - - //Create base cursor - ListCursor listCursor = new ListCursor(inputList); - - //Create unnest cursor - UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( - listCursor, - listCursor.getColumnSelectorFactory(), - "dummy", - OUTPUT_NAME, - IGNORE_SET1 - ); - ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() - .makeColumnValueSelector(OUTPUT_NAME); - int k = 0; - while (!unnestCursor.isDone()) { - Object valueSelectorVal = unnestColumnValueSelector.getObject(); - if (valueSelectorVal == null) { - Assert.assertEquals(null, expectedResults.get(k)); - } else { - Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString()); - } - k++; - unnestCursor.advance(); - } - Assert.assertEquals(k, 2); - } - @Test public void test_list_unnest_cursors_user_supplied_list_double() { @@ -445,7 +404,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -479,7 +438,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -513,7 +472,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -550,7 +509,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -586,7 +545,7 @@ public void test_list_unnest_cursors_dimSelector() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); // should return a column value selector for this case BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory() @@ -629,7 +588,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() listCursor.getColumnSelectorFactory(), "dummy", OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -643,4 +602,3 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() Assert.assertEquals(k, 9); } } - diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 35d42b82d4b3..a9e8efb4d63d 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -20,11 +20,14 @@ package org.apache.druid.segment; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.generator.GeneratorBasicSchemas; @@ -42,7 +45,6 @@ import org.junit.Test; import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.List; public class UnnestStorageAdapterTest extends InitializedNullHandlingTest @@ -58,7 +60,6 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest private static String COLUMNNAME = "multi-string1"; private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1"; private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again"; - private static LinkedHashSet IGNORE_SET = new LinkedHashSet<>(Arrays.asList("1", "3", "5")); @BeforeClass public static void setup() @@ -90,7 +91,7 @@ public static void setup() INCREMENTAL_INDEX_STORAGE_ADAPTER, COLUMNNAME, OUTPUT_COLUMN_NAME, - IGNORE_SET + null ); UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, @@ -102,7 +103,7 @@ public static void setup() UNNEST_STORAGE_ADAPTER1, COLUMNNAME, OUTPUT_COLUMN_NAME1, - IGNORE_SET + null ); ADAPTERS = ImmutableList.of( UNNEST_STORAGE_ADAPTER, @@ -180,7 +181,6 @@ public void test_group_of_unnest_adapters_column_capabilities() @Test public void test_unnest_adapters_basic() { - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( null, UNNEST_STORAGE_ADAPTER.getInterval(), @@ -203,11 +203,8 @@ public void test_unnest_adapters_basic() cursor.advance(); count++; } - /* - each row has 8 entries. - unnest 2 rows -> 16 rows after unnest - */ - Assert.assertEquals(count, 16); + + Assert.assertEquals(16, count); return null; }); @@ -249,150 +246,43 @@ public void test_two_levels_of_unnest_adapters() unnest 2 rows -> 16 entries also the value cardinality unnest of unnest -> 16*8 = 128 rows */ - Assert.assertEquals(count, 128); - Assert.assertEquals(dimSelector.getValueCardinality(), 16); + Assert.assertEquals(128, count); + Assert.assertEquals(16, dimSelector.getValueCardinality()); return null; }); } @Test - public void test_unnest_adapters_with_allowList() + public void test_unnest_adapters_basic_with_in_filter() { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), + Filter f = new InDimFilter(OUTPUT_COLUMN_NAME, ImmutableSet.of("1", "3", "5")); + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( + f, + UNNEST_STORAGE_ADAPTER.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null ); + List expectedResults = Arrays.asList("1", "3", "5"); cursorSequence.accumulate(null, (accumulated, cursor) -> { ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - */ - Assert.assertEquals(count, 3); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_two_levels_of_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER3.makeCursors( - null, - UNNEST_STORAGE_ADAPTER3.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3; - Assert.assertEquals(adapter.getDimensionToUnnest(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME1)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME1); - int count = 0; while (!cursor.isDone()) { Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); if (dimSelectorVal == null) { Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); + } else { + Assert.assertEquals(expectedResults.get(count), dimSelectorVal.toString()); } cursor.advance(); count++; } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - unnest of that unnest will have 3*3 = 9 entries - */ - Assert.assertEquals(count, 9); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_unnest_adapters_methods_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1; - Assert.assertEquals(adapter.getDimensionToUnnest(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - IdLookup idlookUp = dimSelector.idLookup(); - Assert.assertFalse(dimSelector.isNull()); - int[] indices = new int[]{1, 3, 5}; - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Assert.assertEquals(idlookUp.lookupId((String) dimSelectorVal), indices[count]); - // after unnest first entry in get row should equal the object - // and the row size will always be 1 - Assert.assertEquals(dimSelector.getRow().get(0), indices[count]); - Assert.assertEquals(dimSelector.getRow().size(), 1); - Assert.assertNotNull(dimSelector.makeValueMatcher(OUTPUT_COLUMN_NAME)); - cursor.advance(); - count++; - } - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - Assert.assertEquals(count, 3); + //As we are filtering on 1, 3 and 5 there should be 3 entries + Assert.assertEquals(3, count); return null; }); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 847c41b63ede..89d02df0ff3e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; @@ -32,9 +33,11 @@ import org.apache.calcite.rel.core.Correlate; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexUtil; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.DataSource; import org.apache.druid.query.QueryDataSource; @@ -43,6 +46,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -58,7 +62,7 @@ * Each correlate can be perceived as a join with the join type being inner * the left of a correlate as seen in the rule {@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} * is the {@link DruidQueryRel} while the right will always be an {@link DruidUnnestDatasourceRel}. - * + *

* Since this is a subclass of DruidRel it is automatically considered by other rules that involves DruidRels. * Some example being SELECT_PROJECT and SORT_PROJECT rules in {@link org.apache.druid.sql.calcite.rule.DruidRules.DruidQueryRule} */ @@ -70,8 +74,8 @@ public class DruidCorrelateUnnestRel extends DruidRel private final PartialDruidQuery partialQuery; private final PlannerConfig plannerConfig; private final Correlate correlateRel; - private RelNode left; - private RelNode right; + private final RelNode left; + private final RelNode right; private DruidCorrelateUnnestRel( RelOptCluster cluster, @@ -136,13 +140,23 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) final DruidRel druidQueryRel = (DruidRel) left; final DruidQuery leftQuery = Preconditions.checkNotNull((druidQueryRel).toDruidQuery(false), "leftQuery"); final DataSource leftDataSource; + final PartialDruidQuery partialQueryFromLeft = druidQueryRel.getPartialDruidQuery(); + final PartialDruidQuery corrPartialQuey; + + // If there is a LIMIT in the left query + // It should be honored before unnest + // Create a query data source in that case + - if (DruidJoinQueryRel.computeLeftRequiresSubquery(getPlannerContext(), druidQueryRel)) { + if (partialQueryFromLeft.getSort() != null || partialQueryFromLeft.getSortProject() != null) { leftDataSource = new QueryDataSource(leftQuery.getQuery()); + corrPartialQuey = partialQuery; } else { leftDataSource = leftQuery.getDataSource(); + corrPartialQuey = updateCorrPartialQueryFromLeft(partialQueryFromLeft); } + final DruidUnnestDatasourceRel unnestDatasourceRel = (DruidUnnestDatasourceRel) right; @@ -157,7 +171,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) unnestDatasourceRel.getUnnestProject().getProjects().get(0) ); - LogicalProject unnestProject = LogicalProject.create( + final LogicalProject unnestProject = LogicalProject.create( this, ImmutableList.of(unnestDatasourceRel.getUnnestProject() .getProjects() @@ -165,6 +179,23 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) unnestDatasourceRel.getUnnestProject().getRowType() ); + final Filter unnestFilterFound = unnestDatasourceRel.getUnnestFilter(); + final Filter logicalFilter; + if (unnestFilterFound != null) { + // The correlated value will be the last element in the row signature of correlate + // The filter points to $0 of the right data source e.g. OR(=($0, 'a'), =($0, 'b')) + // After the correlation the rowType becomes (left data source rowtype + 1) + // So the filter needs to be shifted to the last element of + // rowtype after the correlation for e.g OR(=($17, 'a'), =($17, 'b')) + logicalFilter = LogicalFilter.create( + correlateRel, + RexUtil.shift(unnestFilterFound.getCondition(), rowSignature.size() - 1), + ImmutableSet.of(correlateRel.getCorrelationId()) + ); + } else { + logicalFilter = null; + } + // placeholder for dimension or expression to be unnested final String dimOrExpToUnnest; final VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create( @@ -174,7 +205,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) ); // the unnest project is needed in case of a virtual column - // unnest(mv_to_array(dim_1)) is reconciled as unnesting a MVD dim_1 not requiring a virtual column + // unnest(mv_to_array(dim_1)) is reconciled as unnesting an MVD dim_1 not requiring a virtual column // while unnest(array(dim_2,dim_3)) is understood as unnesting a virtual column which is an array over dim_2 and dim_3 elements boolean unnestProjectNeeded = false; getPlannerContext().setJoinExpressionVirtualColumnRegistry(virtualColumnRegistry); @@ -199,13 +230,22 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) // add the unnest project to the partial query if required // This is necessary to handle the virtual columns on the unnestProject // Also create the unnest datasource to be used by the partial query - PartialDruidQuery partialDruidQuery = unnestProjectNeeded ? partialQuery.withUnnest(unnestProject) : partialQuery; + PartialDruidQuery partialDruidQuery = unnestProjectNeeded + ? corrPartialQuey.withUnnestProject(unnestProject) + : corrPartialQuey; return partialDruidQuery.build( UnnestDataSource.create( leftDataSource, dimOrExpToUnnest, unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0), - null + // Filters from Calcite are received as bound Filters + // This piece optimizes multiple bounds to IN filters, Selector Filters etc. + logicalFilter != null ? Filtration.create(DruidQuery.getDimFilter( + getPlannerContext(), + rowSignature, + virtualColumnRegistry, + logicalFilter + )).optimizeFilterOnly(rowSignature).getDimFilter() : null ), rowSignature, getPlannerContext(), @@ -215,6 +255,26 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) ); } + private PartialDruidQuery updateCorrPartialQueryFromLeft(PartialDruidQuery partialQueryFromLeft) + { + // The DruidCorrelateRule already creates the project and pushes it on the top level + // So get select project from partialQuery + // The filters are present on the partial query of the left + // The group by and having clauses would be on the top level + // Same for the sort + PartialDruidQuery corrQuery = PartialDruidQuery.create(correlateRel); + corrQuery = corrQuery.withWhereFilter(partialQueryFromLeft.getWhereFilter()) + .withSelectProject(partialQuery.getSelectProject()); + if (partialQuery.getAggregate() != null) { + corrQuery = corrQuery.withAggregate(partialQuery.getAggregate()) + .withHavingFilter(partialQuery.getHavingFilter()); + } + if (partialQuery.getSort() != null || partialQuery.getSortProject() != null) { + corrQuery = corrQuery.withSort(partialQuery.getSort()); + } + return corrQuery; + } + @Override protected DruidCorrelateUnnestRel clone() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index dee970655871..91002dd80690 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -309,6 +309,7 @@ public static DruidQuery fromPartialQuery( unnestProjection = null; } + return new DruidQuery( dataSource, plannerContext, @@ -353,7 +354,7 @@ private static DimFilter computeHavingFilter( } @Nonnull - private static DimFilter getDimFilter( + public static DimFilter getDimFilter( final PlannerContext plannerContext, final RowSignature rowSignature, @Nullable final VirtualColumnRegistry virtualColumnRegistry, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java index cb01a003eae8..0ba9a64a6b51 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java @@ -20,6 +20,7 @@ package org.apache.druid.sql.calcite.rel; import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Uncollect; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.type.RelDataType; @@ -32,6 +33,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -41,11 +43,11 @@ /** * The Rel node to capture the unnest (or uncollect) part in a query. This covers 2 cases: - * + *

* Case 1: * If this is an unnest on a constant and no input table is required, the final query is built using * an UnnestDataSource with a base InlineDataSource in this rel. - * + *

* Case 2: * If the unnest has an input table, this rel resolves the unnest part and delegates the rel to be consumed by other * rule ({@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} @@ -55,11 +57,13 @@ public class DruidUnnestDatasourceRel extends DruidRel private final Uncollect uncollect; private final DruidQueryRel druidQueryRel; private final LogicalProject unnestProject; + private final Filter unnestFilter; public DruidUnnestDatasourceRel( Uncollect uncollect, DruidQueryRel queryRel, LogicalProject unnestProject, + Filter unnestFilter, PlannerContext plannerContext ) { @@ -67,6 +71,7 @@ public DruidUnnestDatasourceRel( this.uncollect = uncollect; this.druidQueryRel = queryRel; this.unnestProject = unnestProject; + this.unnestFilter = unnestFilter; } public LogicalProject getUnnestProject() @@ -74,6 +79,11 @@ public LogicalProject getUnnestProject() return unnestProject; } + public Filter getUnnestFilter() + { + return unnestFilter; + } + @Nullable @Override public PartialDruidQuery getPartialDruidQuery() @@ -88,6 +98,7 @@ public DruidUnnestDatasourceRel withPartialQuery(PartialDruidQuery newQueryBuild uncollect, druidQueryRel.withPartialQuery(newQueryBuilder), unnestProject, + unnestFilter, getPlannerContext() ); } @@ -123,7 +134,12 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) ), "inline", druidQueryRel.getRowType().getFieldNames().get(0), - null + unnestFilter != null ? Filtration.create(DruidQuery.getDimFilter( + getPlannerContext(), + druidQueryRel.getDruidTable().getRowSignature(), + virtualColumnRegistry, + unnestFilter + )).optimizeFilterOnly(druidQueryRel.getDruidTable().getRowSignature()).getDimFilter() : null ); DruidQuery query = druidQueryRel.getPartialDruidQuery().build( @@ -150,14 +166,24 @@ public DruidUnnestDatasourceRel asDruidConvention() new Uncollect(getCluster(), traitSet.replace(DruidConvention.instance()), uncollect.getInput(), false), druidQueryRel.asDruidConvention(), unnestProject, + unnestFilter, getPlannerContext() ); } + public DruidUnnestDatasourceRel withFilter(Filter f) + { + return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, f, getPlannerContext()); + } + @Override public RelWriter explainTerms(RelWriter pw) { - return super.explainTerms(pw); + return super.explainTerms(pw) + .item("Uncollect", uncollect) + .item("Query", druidQueryRel) + .item("unnestProject", unnestProject) + .item("unnestFilter", unnestFilter); } @Override @@ -175,6 +201,6 @@ protected RelDataType deriveRowType() @Override protected DruidUnnestDatasourceRel clone() { - return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, getPlannerContext()); + return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, unnestFilter, getPlannerContext()); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java index 0cd71af8e5e7..61697bd03ba0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java @@ -342,7 +342,7 @@ public PartialDruidQuery withWindow(final Window newWindow) ); } - public PartialDruidQuery withUnnest(final Project newUnnestProject) + public PartialDruidQuery withUnnestProject(final Project newUnnestProject) { return new PartialDruidQuery( builderSupplier, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java index 1870e11dd75c..7a9c349828b0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java @@ -101,8 +101,9 @@ public void onMatch(RelOptRuleCall call) final RexBuilder rexBuilder = correlate.getCluster().getRexBuilder(); final Filter druidRelFilter; - final DruidRel newDruidRelFilter; + final DruidRel newDruidRel; final List newProjectExprs = new ArrayList<>(); + final List newWheres = new ArrayList<>(); final boolean isLeftDirectAccessPossible = enableLeftScanDirect && (druidRel instanceof DruidQueryRel); @@ -116,14 +117,14 @@ public void onMatch(RelOptRuleCall call) // Left-side projection expressions rewritten to be on top of the correlate. newProjectExprs.addAll(leftProject.getProjects()); - newDruidRelFilter = druidRel.withPartialQuery(PartialDruidQuery.create(leftScan)); + newDruidRel = druidRel.withPartialQuery(PartialDruidQuery.create(leftScan)); } else { // Leave druidRel as-is. Write input refs that do nothing. for (int i = 0; i < druidRel.getRowType().getFieldCount(); i++) { newProjectExprs.add(rexBuilder.makeInputRef(correlate.getRowType().getFieldList().get(i).getType(), i)); } - newDruidRelFilter = druidRel; - druidRelFilter = null; + newDruidRel = druidRel; + druidRelFilter = druidRel.getPartialDruidQuery().getWhereFilter(); } if (druidUnnestDatasourceRel.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT) { @@ -131,7 +132,7 @@ public void onMatch(RelOptRuleCall call) druidUnnestDatasourceRel.getPartialDruidQuery() .getSelectProject() .getProjects(), - newDruidRelFilter.getRowType().getFieldCount() + newDruidRel.getRowType().getFieldCount() )) { newProjectExprs.add(rexNode); } @@ -143,7 +144,7 @@ public void onMatch(RelOptRuleCall call) .getFieldList() .get(druidRel.getRowType().getFieldCount() + i) .getType(), - newDruidRelFilter.getRowType().getFieldCount() + i + newDruidRel.getRowType().getFieldCount() + i ) ); } @@ -152,7 +153,7 @@ public void onMatch(RelOptRuleCall call) final DruidCorrelateUnnestRel druidCorr = DruidCorrelateUnnestRel.create( correlate.copy( correlate.getTraitSet(), - newDruidRelFilter, + newDruidRel, druidUnnestDatasourceRel, correlate.getCorrelationId(), correlate.getRequiredColumns(), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java new file mode 100644 index 000000000000..41a1a4c32db6 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.druid.sql.calcite.rel.DruidUnnestDatasourceRel; + +public class DruidFilterUnnestRule extends RelOptRule +{ + private static final DruidFilterUnnestRule INSTANCE = new DruidFilterUnnestRule(); + + private DruidFilterUnnestRule() + { + super( + operand( + Filter.class, + operand(DruidUnnestDatasourceRel.class, any()) + ) + ); + } + + public static DruidFilterUnnestRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Filter filter = call.rel(0); + final DruidUnnestDatasourceRel unnestDatasourceRel = call.rel(1); + DruidUnnestDatasourceRel newRel = unnestDatasourceRel.withFilter(filter); + call.transformTo(newRel); + } + + // This is for a special case of handling selector filters + // on top of UnnestDataSourceRel when Calcite adds an extra + // LogicalProject on the LogicalFilter. For e.g. #122 here + // SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' + // 126:LogicalProject(d3=[$17]) + // 124:LogicalCorrelate(subset=[rel#125:Subset#6.NONE.[]], correlation=[$cor0], joinType=[inner], requiredColumns=[{3}]) + // 8:LogicalTableScan(subset=[rel#114:Subset#0.NONE.[]], table=[[druid, numfoo]]) + // 122:LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) + // 120:LogicalFilter(subset=[rel#121:Subset#4.NONE.[]], condition=[=($0, 'b')]) + // 118:Uncollect(subset=[rel#119:Subset#3.NONE.[]]) + // 116:LogicalProject(subset=[rel#117:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)]) + // 9:LogicalValues(subset=[rel#115:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) + + // This logical project does a type cast only which Druid already has information about + // So we can skip this LogicalProject + // Extensive unit tests can be found in {@link CalciteArraysQueryTest} + + static class DruidProjectOnCorrelateRule extends RelOptRule + { + private static final DruidProjectOnCorrelateRule INSTANCE = new DruidProjectOnCorrelateRule(); + + private DruidProjectOnCorrelateRule() + { + super( + operand( + Project.class, + operand(DruidUnnestDatasourceRel.class, any()) + ) + ); + } + + public static DruidProjectOnCorrelateRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final DruidUnnestDatasourceRel unnestDatasourceRel = call.rel(1); + call.transformTo(unnestDatasourceRel); + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 4276a48b2f13..2d238238fbd5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -99,7 +99,9 @@ public static List rules(PlannerContext plannerContext) DruidSortUnionRule.instance(), DruidJoinRule.instance(plannerContext), new DruidUnnestDatasourceRule(plannerContext), - new DruidCorrelateUnnestRule(plannerContext) + new DruidCorrelateUnnestRule(plannerContext), + DruidFilterUnnestRule.instance(), + DruidFilterUnnestRule.DruidProjectOnCorrelateRule.instance() ) ); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java index e8123fe0670c..52334279b961 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java @@ -95,6 +95,7 @@ public void onMatch(final RelOptRuleCall call) uncollectRel, druidQueryRel.withPartialQuery(druidQueryRel.getPartialDruidQuery().withSelectProject(queryProject)), logicalProject, + null, plannerContext ); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 119cae8634ec..ff882b39f71f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -171,7 +171,7 @@ public void testSelectNonConstantArrayExpressionFromTableForMultival() try { ExpressionProcessing.initializeForTests(true); - // if nested arrays are allowed, dim3 is a multi-valued string column, so the automatic translation will turn this + // if nested arrays are allowed, dim3 is a multivalued string column, so the automatic translation will turn this // expression into // // `map((dim3) -> array(concat(dim3,'word'),'up'), dim3)` @@ -212,7 +212,7 @@ public void testSomeArrayFunctionsWithScanQuery() { // Yes these outputs are strange sometimes, arrays are in a partial state of existence so end up a bit // stringy for now this is because virtual column selectors are coercing values back to stringish so that - // multi-valued string dimensions can be grouped on. + // multivalued string dimensions can be grouped on. List expectedResults; if (useDefault) { expectedResults = ImmutableList.of( @@ -388,7 +388,7 @@ public void testSomeArrayFunctionsWithScanQuery() public void testSomeArrayFunctionsWithScanQueryNoStringify() { // when not stringifying arrays, some things are still stringified, because they are inferred to be typed as strings - // the planner context which controls stringification of arrays does not apply to multi-valued string columns, + // the planner context which controls stringification of arrays does not apply to multivalued string columns, // which will still always be stringified to ultimately adhere to the varchar type // as array support increases in the engine this will likely change since using explict array functions should // probably kick it into an array @@ -2649,7 +2649,7 @@ public void testUnnestInline() skipVectorize(); cannotVectorize(); testQuery( - "SELECT * FROM UNNEST(ARRAY[1,2,3])", + "SELECT * FROM UNNEST(ARRAY[1,2,3]) as unnested(d)", ImmutableList.of( Druids.newScanQueryBuilder() .dataSource( @@ -2735,11 +2735,7 @@ public void testUnnest() @Test public void testUnnestWithGroupBy() { - // This tells the test to skip generating (vectorize = force) path - // Generates only 1 native query with vectorize = false skipVectorize(); - // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization - // Generates 2 native queries with 2 different values of vectorize cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) GROUP BY d3 ", @@ -2980,38 +2976,7 @@ public void testUnnestWithFilters() ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( - new QueryDataSource( - newScanQueryBuilder() - .dataSource( - new TableDataSource(CalciteTests.DATASOURCE3) - ) - .intervals(querySegmentSpec(Filtration.eternity())) - .virtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .legacy(false) - .filters(new SelectorDimFilter("dim2", "a", null)) - .columns( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "l1", - "l2", - "m1", - "m2", - "unique_dim1", - "v0" - ) - .context(QUERY_CONTEXT_DEFAULT) - .build() - ), + new TableDataSource(CalciteTests.DATASOURCE3), "dim3", "EXPR$0", null @@ -3019,6 +2984,7 @@ public void testUnnestWithFilters() .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) + .filters(new SelectorDimFilter("dim2", "a", null)) .context(QUERY_CONTEXT_DEFAULT) .columns(ImmutableList.of( "EXPR$0" @@ -3043,7 +3009,7 @@ public void testUnnestWithInFilters() // Generates 2 native queries with 2 different values of vectorize cannotVectorize(); testQuery( - "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc')), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc') LIMIT 2), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( @@ -3076,6 +3042,7 @@ public void testUnnestWithInFilters() "unique_dim1" ) .context(QUERY_CONTEXT_DEFAULT) + .limit(2) .build() ), "dim3", @@ -3094,9 +3061,7 @@ public void testUnnestWithInFilters() ImmutableList.of( new Object[]{"a"}, new Object[]{"b"}, - new Object[]{""}, - useDefault ? - new Object[]{""} : new Object[]{null} + new Object[]{""} ) ); } @@ -3341,4 +3306,419 @@ public void testUnnestWithConstant() ) ); } + + @Test + public void testUnnestWithInFilterOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b') ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("a", "b"), null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithInFilterOnUnnestedColWhereFilterIsNotOnFirstValue() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('d','c') ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("d", "c"), null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"c"}, + new Object[]{"d"} + ) + ); + } + + @Test + public void testUnnestWithInFilterOnUnnestedColWhereValuesDoNotExist() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('foo','bar') ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("foo", "bar"), null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of() + ); + } + + @Test + public void testUnnestWithBoundFilterOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where (d3>= 'b' AND d3 < 'd') ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + bound("EXPR$0", "b", "d", false, true, null, StringComparators.LEXICOGRAPHIC) + + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"} + ) + ); + } + + + @Test + public void testUnnestWithFilteringOnUnnestedVirtualCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1, m2]) as unnested (d12) where d12 IN ('1','2') AND m1 < 10", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "v0", + "EXPR$0", + new InDimFilter("EXPR$0", ImmutableList.of("1.0", "2.0"), null) + )) + .virtualColumns(expressionVirtualColumn( + "v0", + "array(\"dim2\",\"dim4\")", + ColumnType.STRING_ARRAY + )) + .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{1.0f}, + new Object[]{1.0f}, + new Object[]{2.0f}, + new Object[]{2.0f} + ) + ); + } + + @Test + public void testUnnestWithSelectorFilterOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + selector("EXPR$0", "b", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithSelectorFilterV1OnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3>='b' and d3<='b' ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + selector("EXPR$0", "b", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithSelectorFilterBadOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3=1 ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + bound("EXPR$0", "1", "1", false, false, null, StringComparators.NUMERIC) + + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of() + ); + } + + @Test + public void testUnnestWithNotSelectorFilterOnUnnestedCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3!='b' ", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "dim3", + "EXPR$0", + not(selector("EXPR$0", "b", null)) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""}, + new Object[]{""} + ) : + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""}, + new Object[]{null} + ) + ); + } + + @Test + public void testUnnestWithSelectorFilterOnUnnestedVirtualCol() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1, m2]) as unnested (d12) where d12=4 AND m1 < 10", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "v0", + "EXPR$0", + selector("EXPR$0", "4", null) + )) + .virtualColumns(expressionVirtualColumn( + "v0", + "array(\"m1\",\"m2\")", + ColumnType.FLOAT_ARRAY + )) + .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY)) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + + ImmutableList.of( + new Object[]{4.0f}, + new Object[]{4.0f} + ) + ); + } + + @Test + public void testUnnestVirtualWithColumnsOnStringsWithSelectorFilter() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT strings FROM druid.numfoo, UNNEST(ARRAY[dim4, dim5]) as unnested (strings) where strings='aa'", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "v0", + "EXPR$0", + selector("EXPR$0", "aa", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of( + new Object[]{"aa"}, + new Object[]{"aa"} + ) + ); + } + + @Test + public void testUnnestVirtualWithColumnsOnStringsWithNotSelectorFilter() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT strings FROM druid.numfoo, UNNEST(ARRAY[dim4, dim5]) as unnested (strings) where strings!='aa'", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + "v0", + "EXPR$0", + not(selector("EXPR$0", "aa", null)) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_DEFAULT) + .columns(ImmutableList.of( + "EXPR$0" + )) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"ab"}, + new Object[]{"a"}, + new Object[]{"ba"}, + new Object[]{"b"}, + new Object[]{"ad"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"ab"} + ) + ); + } }