diff --git a/docs/tutorials/tutorial-jupyter-index.md b/docs/tutorials/tutorial-jupyter-index.md
index d77e0d42b322..4f39699d2d2d 100644
--- a/docs/tutorials/tutorial-jupyter-index.md
+++ b/docs/tutorials/tutorial-jupyter-index.md
@@ -42,6 +42,7 @@ Make sure you meet the following requirements before starting the Jupyter-based
pip3 install requests
```
+
- JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid
and Jupyter both try to use port `8888`, so start Jupyter on a different port.
@@ -99,10 +100,12 @@ The notebooks are located in the [apache/druid repo](https://github.com/apache/d
The links that follow are the raw GitHub URLs, so you can use them to download the notebook directly, such as with `wget`, or manually through your web browser. Note that if you save the file from your web browser, make sure to remove the `.txt` extension.
+
- [Introduction to the Druid REST API](
https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/api-tutorial.ipynb)
walks you through some of the basics related to the Druid REST API and several endpoints.
- [Introduction to the Druid Python API](
https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/Python_API_Tutorial.ipynb)
walks you through some of the basics related to the Druid API using the Python wrapper API.
+
- [Introduction to Druid SQL](https://raw.githubusercontent.com/apache/druid/master/examples/quickstart/jupyter-notebooks/sql-tutorial.ipynb) covers the basics of Druid SQL.
diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java
index 407aea5c39b9..c5737d966bec 100644
--- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java
+++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java
@@ -23,13 +23,13 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableList;
import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.UnnestSegmentReference;
import org.apache.druid.utils.JvmUtils;
import javax.annotation.Nullable;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
@@ -38,7 +38,7 @@
/**
* The data source for representing an unnest operation.
- *
+ *
* An unnest data source has the following:
* a base data source which is to be unnested
* the column name of the MVD which will be unnested
@@ -50,19 +50,19 @@ public class UnnestDataSource implements DataSource
private final DataSource base;
private final String column;
private final String outputName;
- private final LinkedHashSet allowList;
+ private final DimFilter outputColumnFilter;
private UnnestDataSource(
DataSource dataSource,
String columnName,
String outputName,
- LinkedHashSet allowList
+ DimFilter outputColumnFilter
)
{
this.base = dataSource;
this.column = columnName;
this.outputName = outputName;
- this.allowList = allowList;
+ this.outputColumnFilter = outputColumnFilter;
}
@JsonCreator
@@ -70,10 +70,10 @@ public static UnnestDataSource create(
@JsonProperty("base") DataSource base,
@JsonProperty("column") String columnName,
@JsonProperty("outputName") String outputName,
- @Nullable @JsonProperty("allowList") LinkedHashSet allowList
+ @Nullable @JsonProperty("outputColumnFilter") DimFilter outputColumnFilter
)
{
- return new UnnestDataSource(base, columnName, outputName, allowList);
+ return new UnnestDataSource(base, columnName, outputName, outputColumnFilter);
}
@JsonProperty("base")
@@ -94,12 +94,11 @@ public String getOutputName()
return outputName;
}
- @JsonProperty("allowList")
- public LinkedHashSet getAllowList()
+ @JsonProperty("outputColumnFilter")
+ public DimFilter getOutputColumnFilter()
{
- return allowList;
+ return outputColumnFilter;
}
-
@Override
public Set getTableNames()
{
@@ -118,7 +117,7 @@ public DataSource withChildren(List children)
if (children.size() != 1) {
throw new IAE("Expected [1] child, got [%d]", children.size());
}
- return new UnnestDataSource(children.get(0), column, outputName, allowList);
+ return new UnnestDataSource(children.get(0), column, outputName, outputColumnFilter);
}
@Override
@@ -163,7 +162,7 @@ public Function createSegmentMapFunction(
segmentMapFn.apply(baseSegment),
column,
outputName,
- allowList
+ outputColumnFilter
);
}
}
@@ -174,7 +173,7 @@ public Function createSegmentMapFunction(
@Override
public DataSource withUpdatedDataSource(DataSource newSource)
{
- return new UnnestDataSource(newSource, column, outputName, allowList);
+ return new UnnestDataSource(newSource, column, outputName, outputColumnFilter);
}
@Override
@@ -213,7 +212,7 @@ public boolean equals(Object o)
@Override
public int hashCode()
{
- return Objects.hash(base, column, outputName);
+ return Objects.hash(base, column, outputName, outputColumnFilter);
}
@Override
@@ -223,7 +222,7 @@ public String toString()
"base=" + base +
", column='" + column + '\'' +
", outputName='" + outputName + '\'' +
- ", allowList=" + allowList +
+ ", outputFilter='" + outputColumnFilter + '\'' +
'}';
}
diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java
index 5d4340329897..d127bf17d4aa 100644
--- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java
+++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java
@@ -22,15 +22,17 @@
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
+import org.apache.druid.segment.filter.BooleanValueMatcher;
import org.joda.time.DateTime;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.LinkedHashSet;
import java.util.List;
/**
@@ -50,7 +52,7 @@
* unnestCursor.advance() -> 'e'
*
*
- * The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to
+ * The filter if available helps skip over elements which are not in the allowList by moving the cursor to
* the next available match.
*
* The index reference points to the index of each row that the unnest cursor is accessing through currentVal
@@ -65,7 +67,7 @@ public class UnnestColumnValueSelectorCursor implements Cursor
private final ColumnValueSelector columnValueSelector;
private final String columnName;
private final String outputName;
- private final LinkedHashSet allowSet;
+ private final ValueMatcher valueMatcher;
private int index;
private Object currentVal;
private List unnestListForCurrentRow;
@@ -76,7 +78,7 @@ public UnnestColumnValueSelectorCursor(
ColumnSelectorFactory baseColumSelectorFactory,
String columnName,
String outputColumnName,
- LinkedHashSet allowSet
+ @Nullable Filter filter
)
{
this.baseCursor = cursor;
@@ -86,7 +88,11 @@ public UnnestColumnValueSelectorCursor(
this.index = 0;
this.outputName = outputColumnName;
this.needInitialization = true;
- this.allowSet = allowSet;
+ if (filter != null) {
+ this.valueMatcher = filter.makeMatcher(getColumnSelectorFactory());
+ } else {
+ this.valueMatcher = BooleanValueMatcher.of(true);
+ }
}
@Override
@@ -191,11 +197,7 @@ public boolean isNull()
public Object getObject()
{
if (!unnestListForCurrentRow.isEmpty()) {
- if (allowSet == null || allowSet.isEmpty()) {
- return unnestListForCurrentRow.get(index);
- } else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) {
- return unnestListForCurrentRow.get(index);
- }
+ return unnestListForCurrentRow.get(index);
}
return null;
}
@@ -243,9 +245,17 @@ public void advance()
@Override
public void advanceUninterruptibly()
{
- do {
+ // the index currently points to an element at position i ($e_i)
+ // while $e_i does not match the filter
+ // keep advancing the pointer to the first valid match
+ // calling advanceAndUpdate increments the index position so needs a call to matches() again for new match status
+ while (true) {
advanceAndUpdate();
- } while (matchAndProceed());
+ boolean match = valueMatcher.matches();
+ if (match || baseCursor.isDone()) {
+ return;
+ }
+ }
}
@Override
@@ -311,12 +321,11 @@ private void initialize()
{
this.unnestListForCurrentRow = new ArrayList<>();
getNextRow(needInitialization);
- if (allowSet != null) {
- if (!allowSet.isEmpty()) {
- if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) {
- advance();
- }
- }
+
+ // If the first value the index is pointing to does not match the filter
+ // advance the index to the first value which will match
+ if (!valueMatcher.matches() && !baseCursor.isDone()) {
+ advance();
}
needInitialization = false;
}
@@ -339,22 +348,4 @@ private void advanceAndUpdate()
index++;
}
}
-
- /**
- * This advances the unnest cursor in cases where an allowList is specified
- * and the current value at the unnest cursor is not in the allowList.
- * The cursor in such cases is moved till the next match is found.
- *
- * @return a boolean to indicate whether to stay or move cursor
- */
- private boolean matchAndProceed()
- {
- boolean matchStatus;
- if (allowSet == null || allowSet.isEmpty()) {
- matchStatus = true;
- } else {
- matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index));
- }
- return !baseCursor.isDone() && !matchStatus;
- }
}
diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
index 93a56767bbfb..dff6cdfe87dd 100644
--- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
+++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java
@@ -20,9 +20,11 @@
package org.apache.druid.segment;
import com.google.common.base.Predicate;
+import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec;
+import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
@@ -32,7 +34,7 @@
import javax.annotation.Nullable;
import java.util.BitSet;
-import java.util.LinkedHashSet;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* The cursor to help unnest MVDs with dictionary encoding.
@@ -58,9 +60,9 @@
*
* Total 5 advance calls above
*
- * The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to
+ * The filter, if available, helps skip over elements that are not in the allowList by moving the cursor to
* the next available match. The hashSet is converted into a bitset (during initialization) for efficiency.
- * If allowSet is ['c', 'd'] then the advance moves over to the next available match
+ * If filter is IN ('c', 'd') then the advance moves over to the next available match
*
* advance() -> 2 -> 'c'
* advance() -> 3 -> 'd' (advances base cursor first)
@@ -79,31 +81,33 @@ public class UnnestDimensionCursor implements Cursor
private final DimensionSelector dimSelector;
private final String columnName;
private final String outputName;
- private final LinkedHashSet allowSet;
- private final BitSet allowedBitSet;
private final ColumnSelectorFactory baseColumnSelectorFactory;
- private int index;
- @Nullable private IndexedInts indexedIntsForCurrentRow;
+ @Nullable
+ private final Filter allowFilter;
+ private int indexForRow;
+ @Nullable
+ private IndexedInts indexedIntsForCurrentRow;
private boolean needInitialization;
private SingleIndexInts indexIntsForRow;
+ private BitSet matchBitSet;
public UnnestDimensionCursor(
Cursor cursor,
ColumnSelectorFactory baseColumnSelectorFactory,
String columnName,
String outputColumnName,
- LinkedHashSet allowSet
+ @Nullable Filter allowFilter
)
{
this.baseCursor = cursor;
this.baseColumnSelectorFactory = baseColumnSelectorFactory;
this.dimSelector = this.baseColumnSelectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(columnName));
this.columnName = columnName;
- this.index = 0;
+ this.indexForRow = 0;
this.outputName = outputColumnName;
this.needInitialization = true;
- this.allowSet = allowSet;
- this.allowedBitSet = new BitSet();
+ this.allowFilter = allowFilter;
+ this.matchBitSet = new BitSet();
}
@Override
@@ -154,7 +158,10 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
@Override
public boolean matches()
{
- return idForLookup == indexedIntsForCurrentRow.get(index);
+ if (indexedIntsForCurrentRow.size() == 0) {
+ return false;
+ }
+ return idForLookup == indexedIntsForCurrentRow.get(indexForRow);
}
@Override
@@ -184,14 +191,7 @@ public Object getObject()
if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) {
return null;
}
- if (allowedBitSet.isEmpty()) {
- if (allowSet == null || allowSet.isEmpty()) {
- return lookupName(indexedIntsForCurrentRow.get(index));
- }
- } else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) {
- return lookupName(indexedIntsForCurrentRow.get(index));
- }
- return null;
+ return lookupName(indexedIntsForCurrentRow.get(indexForRow));
}
@Override
@@ -203,8 +203,8 @@ public Class> classOfObject()
@Override
public int getValueCardinality()
{
- if (!allowedBitSet.isEmpty()) {
- return allowedBitSet.cardinality();
+ if (!matchBitSet.isEmpty()) {
+ return matchBitSet.cardinality();
}
return dimSelector.getValueCardinality();
}
@@ -293,6 +293,10 @@ public boolean isDone()
if (needInitialization && !baseCursor.isDone()) {
initialize();
}
+ // If the filter does not match any dimensions
+ // No need to move cursor and do extra work
+ if (allowFilter != null && matchBitSet.isEmpty())
+ return true;
return baseCursor.isDone();
}
@@ -308,11 +312,34 @@ public boolean isDoneOrInterrupted()
@Override
public void reset()
{
- index = 0;
+ indexForRow = 0;
needInitialization = true;
baseCursor.reset();
}
+ /**
+ * This advances the unnest cursor in cases where an allowList is specified
+ * and the current value at the unnest cursor is not in the allowList.
+ * The cursor in such cases is moved till the next match is found.
+ *
+ * @return a boolean to indicate whether to stay or move cursor
+ */
+ private boolean matchAndProceed()
+ {
+ boolean matchStatus;
+ if ((allowFilter == null) && matchBitSet.isEmpty()) {
+ matchStatus = true;
+ } else {
+ if (indexedIntsForCurrentRow==null || indexedIntsForCurrentRow.size() == 0) {
+ matchStatus = false;
+ }
+ else {
+ matchStatus = matchBitSet.get(indexedIntsForCurrentRow.get(indexForRow));
+ }
+ }
+ return !baseCursor.isDone() && !matchStatus;
+ }
+
/**
* This initializes the unnest cursor and creates data structures
* to start iterating over the values to be unnested.
@@ -322,20 +349,210 @@ public void reset()
@Nullable
private void initialize()
{
- IdLookup idLookup = dimSelector.idLookup();
- this.indexIntsForRow = new SingleIndexInts();
- if (allowSet != null && !allowSet.isEmpty() && idLookup != null) {
- for (String s : allowSet) {
- if (idLookup.lookupId(s) >= 0) {
- allowedBitSet.set(idLookup.lookupId(s));
+ /*
+ for i=0 to baseColFactory.makeDimSelector.getValueCardinality()
+ match each item with the filter and populate bitset if there's a match
+ */
+
+ if (allowFilter != null) {
+ AtomicInteger idRef = new AtomicInteger();
+ ValueMatcher myMatcher = allowFilter.makeMatcher(new ColumnSelectorFactory()
+ {
+ @Override
+ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec)
+ {
+ if (!outputName.equals(dimensionSpec.getDimension())) {
+ throw new ISE("Asked for bad dimension[%s]", dimensionSpec);
+ }
+ return new DimensionSelector()
+ {
+ private final IndexedInts myInts = new IndexedInts()
+ {
+ @Override
+ public int size()
+ {
+ return 1;
+ }
+
+ @Override
+ public int get(int index)
+ {
+ return 1;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+ };
+
+ @Override
+ public IndexedInts getRow()
+ {
+ return myInts;
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(@Nullable String value)
+ {
+ // Handle value is null
+ return new ValueMatcher()
+ {
+ @Override
+ public boolean matches()
+ {
+ return value.equals(lookupName(1));
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+ };
+ }
+
+ @Override
+ public ValueMatcher makeValueMatcher(Predicate predicate)
+ {
+ return new ValueMatcher()
+ {
+ @Override
+ public boolean matches()
+ {
+ return predicate.apply(lookupName(1));
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+ };
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return null;
+ }
+
+ @Override
+ public Class> classOfObject()
+ {
+ return null;
+ }
+
+ @Override
+ public int getValueCardinality()
+ {
+ return dimSelector.getValueCardinality();
+ }
+
+ @Nullable
+ @Override
+ public String lookupName(int id)
+ {
+ return dimSelector.lookupName(idRef.get());
+ }
+
+ @Override
+ public boolean nameLookupPossibleInAdvance()
+ {
+ return dimSelector.nameLookupPossibleInAdvance();
+ }
+
+ @Nullable
+ @Override
+ public IdLookup idLookup()
+ {
+ return dimSelector.idLookup();
+ }
+ };
+ }
+
+ @Override
+ public ColumnValueSelector makeColumnValueSelector(String columnName)
+ {
+ return new ColumnValueSelector()
+ {
+ @Override
+ public double getDouble()
+ {
+ return Double.parseDouble(dimSelector.lookupName(idRef.get()));
+ }
+
+ @Override
+ public float getFloat()
+ {
+ return 0;
+ }
+
+ @Override
+ public long getLong()
+ {
+ return 0;
+ }
+
+ @Override
+ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
+ {
+
+ }
+
+ @Override
+ public boolean isNull()
+ {
+ return false;
+ }
+
+ @Nullable
+ @Override
+ public Object getObject()
+ {
+ return null;
+ }
+
+ @Override
+ public Class classOfObject()
+ {
+ return null;
+ }
+ };
+ }
+
+ @Nullable
+ @Override
+ public ColumnCapabilities getColumnCapabilities(String column)
+ {
+ return getColumnSelectorFactory().getColumnCapabilities(column);
+ }
+ });
+
+ for (int i = 0; i < dimSelector.getValueCardinality(); ++i) {
+ idRef.set(i);
+ if (myMatcher.matches()) {
+ matchBitSet.set(i);
}
}
}
+
+ indexForRow = 0;
+ this.indexIntsForRow = new SingleIndexInts();
+
if (dimSelector.getObject() != null) {
this.indexedIntsForCurrentRow = dimSelector.getRow();
}
- if (!allowedBitSet.isEmpty()) {
- if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) {
+ if (!matchBitSet.isEmpty()) {
+ if (!matchBitSet.get(indexedIntsForCurrentRow.get(indexForRow))) {
advance();
}
}
@@ -351,43 +568,25 @@ private void initialize()
private void advanceAndUpdate()
{
if (indexedIntsForCurrentRow == null) {
- index = 0;
+ indexForRow = 0;
if (!baseCursor.isDone()) {
baseCursor.advanceUninterruptibly();
}
} else {
- if (index >= indexedIntsForCurrentRow.size() - 1) {
+ if (indexForRow >= indexedIntsForCurrentRow.size() - 1) {
if (!baseCursor.isDone()) {
baseCursor.advanceUninterruptibly();
}
if (!baseCursor.isDone()) {
indexedIntsForCurrentRow = dimSelector.getRow();
}
- index = 0;
+ indexForRow = 0;
} else {
- ++index;
+ ++indexForRow;
}
}
}
- /**
- * This advances the unnest cursor in cases where an allowList is specified
- * and the current value at the unnest cursor is not in the allowList.
- * The cursor in such cases is moved till the next match is found.
- *
- * @return a boolean to indicate whether to stay or move cursor
- */
- private boolean matchAndProceed()
- {
- boolean matchStatus;
- if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) {
- matchStatus = true;
- } else {
- matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index));
- }
- return !baseCursor.isDone() && !matchStatus;
- }
-
// Helper class to help in returning
// getRow from the dimensionSelector
// This is set in the initialize method
@@ -411,9 +610,9 @@ public int size()
public int get(int idx)
{
// need to get value from the indexed ints
- // only if it is non null and has at least 1 value
+ // only if it is non-null and has at least 1 value
if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size() > 0) {
- return indexedIntsForCurrentRow.get(index);
+ return indexedIntsForCurrentRow.get(indexForRow);
}
return 0;
}
diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java
index 9da6b8132cbb..92a3cb68a206 100644
--- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java
+++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java
@@ -21,6 +21,7 @@
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.utils.CloseableUtils;
import org.joda.time.Interval;
@@ -28,7 +29,6 @@
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
-import java.util.LinkedHashSet;
import java.util.Optional;
/**
@@ -42,14 +42,14 @@ public class UnnestSegmentReference implements SegmentReference
private final SegmentReference baseSegment;
private final String dimension;
private final String renamedOutputDimension;
- private final LinkedHashSet allowSet;
+ private final DimFilter outputColumnFilter;
- public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName, LinkedHashSet allowList)
+ public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName, DimFilter outputColumnFilter)
{
this.baseSegment = baseSegment;
this.dimension = dimension;
this.renamedOutputDimension = outputName;
- this.allowSet = allowList;
+ this.outputColumnFilter = outputColumnFilter;
}
@Override
@@ -103,7 +103,7 @@ public StorageAdapter asStorageAdapter()
baseSegment.asStorageAdapter(),
dimension,
renamedOutputDimension,
- allowSet
+ outputColumnFilter
);
}
diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
index f76ab89270af..8866213aef47 100644
--- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java
@@ -24,17 +24,15 @@
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.query.QueryMetrics;
+import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.Filter;
-import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
-import org.apache.druid.segment.filter.AndFilter;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import javax.annotation.Nullable;
-import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.Objects;
@@ -48,19 +46,19 @@ public class UnnestStorageAdapter implements StorageAdapter
private final StorageAdapter baseAdapter;
private final String dimensionToUnnest;
private final String outputColumnName;
- private final LinkedHashSet allowSet;
+ private final DimFilter outputColumnFilter;
public UnnestStorageAdapter(
final StorageAdapter baseAdapter,
final String dimension,
final String outputColumnName,
- final LinkedHashSet allowSet
+ final DimFilter outputColumnFilter
)
{
this.baseAdapter = baseAdapter;
this.dimensionToUnnest = dimension;
this.outputColumnName = outputColumnName;
- this.allowSet = allowSet;
+ this.outputColumnFilter = outputColumnFilter;
}
@Override
@@ -73,20 +71,8 @@ public Sequence makeCursors(
@Nullable QueryMetrics> queryMetrics
)
{
- Filter updatedFilter;
- if (allowSet != null && !allowSet.isEmpty()) {
- final InDimFilter allowListFilters;
- allowListFilters = new InDimFilter(dimensionToUnnest, allowSet);
- if (filter != null) {
- updatedFilter = new AndFilter(Arrays.asList(filter, allowListFilters));
- } else {
- updatedFilter = allowListFilters;
- }
- } else {
- updatedFilter = filter;
- }
final Sequence baseCursorSequence = baseAdapter.makeCursors(
- updatedFilter,
+ filter,
interval,
virtualColumns,
gran,
@@ -107,7 +93,7 @@ public Sequence makeCursors(
retVal.getColumnSelectorFactory(),
dimensionToUnnest,
outputColumnName,
- allowSet
+ outputColumnFilter != null ? outputColumnFilter.toFilter() : null
);
} else {
retVal = new UnnestColumnValueSelectorCursor(
@@ -115,7 +101,7 @@ public Sequence makeCursors(
retVal.getColumnSelectorFactory(),
dimensionToUnnest,
outputColumnName,
- allowSet
+ outputColumnFilter != null ? outputColumnFilter.toFilter() : null
);
}
} else {
@@ -124,7 +110,7 @@ public Sequence makeCursors(
retVal.getColumnSelectorFactory(),
dimensionToUnnest,
outputColumnName,
- allowSet
+ outputColumnFilter != null ? outputColumnFilter.toFilter() : null
);
}
return retVal;
diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java
index 4de22cb00610..86b717734990 100644
--- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java
+++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java
@@ -45,9 +45,7 @@
import org.junit.runners.Parameterized;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -95,23 +93,6 @@ private Druids.ScanQueryBuilder newTestUnnestQuery()
.legacy(legacy);
}
- private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet()
- {
- List allowList = Arrays.asList("a", "b", "c");
- LinkedHashSet allowSet = new LinkedHashSet(allowList);
- return Druids.newScanQueryBuilder()
- .dataSource(UnnestDataSource.create(
- new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
- QueryRunnerTestHelper.PLACEMENTISH_DIMENSION,
- QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
- allowSet
- ))
- .columns(Collections.emptyList())
- .eternityInterval()
- .limit(3)
- .legacy(legacy);
- }
-
@Test
public void testScanOnUnnest()
{
@@ -464,65 +445,6 @@ public void testUnnestRunnerWithOrdering()
ScanQueryRunnerTest.verify(ascendingExpectedResults, results);
}
- @Test
- public void testUnnestRunnerNonNullAllowSet()
- {
- ScanQuery query = newTestUnnestQueryWithAllowSet()
- .intervals(I_0112_0114)
- .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)
- .limit(3)
- .build();
-
- final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn(
- FACTORY,
- new IncrementalIndexSegment(
- index,
- QueryRunnerTestHelper.SEGMENT_ID
- ),
- query,
- "rtIndexvc"
- );
-
- Iterable results = queryRunner.run(QueryPlus.wrap(query)).toList();
-
- String[] columnNames;
- if (legacy) {
- columnNames = new String[]{
- getTimestampName() + ":TIME",
- QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST
- };
- } else {
- columnNames = new String[]{
- QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST
- };
- }
- String[] values;
- if (legacy) {
- values = new String[]{
- "2011-01-12T00:00:00.000Z\ta",
- "2011-01-12T00:00:00.000Z\tb",
- "2011-01-13T00:00:00.000Z\ta"
- };
- } else {
- values = new String[]{
- "a",
- "b",
- "a"
- };
- }
-
- final List>> events = ScanQueryRunnerTest.toEvents(columnNames, legacy, values);
- List expectedResults = toExpected(
- events,
- legacy
- ? Lists.newArrayList(getTimestampName(), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST)
- : Collections.singletonList(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST),
- 0,
- 3
- );
- ScanQueryRunnerTest.verify(expectedResults, results);
- }
-
private String getTimestampName()
{
diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java
index cf4a98c88035..dbe7d39761c4 100644
--- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java
@@ -27,14 +27,11 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.LinkedHashSet;
import java.util.List;
public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandlingTest
{
private static String OUTPUT_NAME = "unnested-column";
- private static LinkedHashSet IGNORE_SET = null;
- private static LinkedHashSet IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f"));
@Test
@@ -54,7 +51,7 @@ public void test_list_unnest_cursors()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -88,7 +85,7 @@ public void test_list_unnest_cursors_user_supplied_list()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -120,7 +117,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -157,7 +154,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -191,7 +188,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -221,7 +218,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -255,7 +252,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -289,14 +286,14 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor(
childCursor,
childCursor.getColumnSelectorFactory(),
OUTPUT_NAME,
"tmp-out",
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory()
.makeColumnValueSelector("tmp-out");
@@ -331,7 +328,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -369,7 +366,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -387,44 +384,6 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups()
Assert.assertEquals(k, 10);
}
- @Test
- public void test_list_unnest_cursors_user_supplied_list_with_ignore_set()
- {
- List inputList = Arrays.asList(
- Arrays.asList("a", "b", "c"),
- Arrays.asList("e", "f", "g", "h", "i"),
- Collections.singletonList("j")
- );
-
- List expectedResults = Arrays.asList("b", "f");
-
- //Create base cursor
- ListCursor listCursor = new ListCursor(inputList);
-
- //Create unnest cursor
- UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor(
- listCursor,
- listCursor.getColumnSelectorFactory(),
- "dummy",
- OUTPUT_NAME,
- IGNORE_SET1
- );
- ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
- .makeColumnValueSelector(OUTPUT_NAME);
- int k = 0;
- while (!unnestCursor.isDone()) {
- Object valueSelectorVal = unnestColumnValueSelector.getObject();
- if (valueSelectorVal == null) {
- Assert.assertEquals(null, expectedResults.get(k));
- } else {
- Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString());
- }
- k++;
- unnestCursor.advance();
- }
- Assert.assertEquals(k, 2);
- }
-
@Test
public void test_list_unnest_cursors_user_supplied_list_double()
{
@@ -445,7 +404,7 @@ public void test_list_unnest_cursors_user_supplied_list_double()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -479,7 +438,7 @@ public void test_list_unnest_cursors_user_supplied_list_float()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -513,7 +472,7 @@ public void test_list_unnest_cursors_user_supplied_list_long()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -550,7 +509,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -586,7 +545,7 @@ public void test_list_unnest_cursors_dimSelector()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
// should return a column value selector for this case
BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory()
@@ -629,7 +588,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers()
listCursor.getColumnSelectorFactory(),
"dummy",
OUTPUT_NAME,
- IGNORE_SET
+ null
);
ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory()
.makeColumnValueSelector(OUTPUT_NAME);
@@ -643,4 +602,3 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers()
Assert.assertEquals(k, 9);
}
}
-
diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
index 35d42b82d4b3..a9e8efb4d63d 100644
--- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java
@@ -20,11 +20,14 @@
package org.apache.druid.segment;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
+import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.filter.InDimFilter;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
@@ -42,7 +45,6 @@
import org.junit.Test;
import java.util.Arrays;
-import java.util.LinkedHashSet;
import java.util.List;
public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
@@ -58,7 +60,6 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest
private static String COLUMNNAME = "multi-string1";
private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1";
private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again";
- private static LinkedHashSet IGNORE_SET = new LinkedHashSet<>(Arrays.asList("1", "3", "5"));
@BeforeClass
public static void setup()
@@ -90,7 +91,7 @@ public static void setup()
INCREMENTAL_INDEX_STORAGE_ADAPTER,
COLUMNNAME,
OUTPUT_COLUMN_NAME,
- IGNORE_SET
+ null
);
UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter(
UNNEST_STORAGE_ADAPTER,
@@ -102,7 +103,7 @@ public static void setup()
UNNEST_STORAGE_ADAPTER1,
COLUMNNAME,
OUTPUT_COLUMN_NAME1,
- IGNORE_SET
+ null
);
ADAPTERS = ImmutableList.of(
UNNEST_STORAGE_ADAPTER,
@@ -180,7 +181,6 @@ public void test_group_of_unnest_adapters_column_capabilities()
@Test
public void test_unnest_adapters_basic()
{
-
Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors(
null,
UNNEST_STORAGE_ADAPTER.getInterval(),
@@ -203,11 +203,8 @@ public void test_unnest_adapters_basic()
cursor.advance();
count++;
}
- /*
- each row has 8 entries.
- unnest 2 rows -> 16 rows after unnest
- */
- Assert.assertEquals(count, 16);
+
+ Assert.assertEquals(16, count);
return null;
});
@@ -249,150 +246,43 @@ public void test_two_levels_of_unnest_adapters()
unnest 2 rows -> 16 entries also the value cardinality
unnest of unnest -> 16*8 = 128 rows
*/
- Assert.assertEquals(count, 128);
- Assert.assertEquals(dimSelector.getValueCardinality(), 16);
+ Assert.assertEquals(128, count);
+ Assert.assertEquals(16, dimSelector.getValueCardinality());
return null;
});
}
@Test
- public void test_unnest_adapters_with_allowList()
+ public void test_unnest_adapters_basic_with_in_filter()
{
- final String columnName = "multi-string1";
-
- Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors(
- null,
- UNNEST_STORAGE_ADAPTER1.getInterval(),
+ Filter f = new InDimFilter(OUTPUT_COLUMN_NAME, ImmutableSet.of("1", "3", "5"));
+ Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors(
+ f,
+ UNNEST_STORAGE_ADAPTER.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
+ List expectedResults = Arrays.asList("1", "3", "5");
cursorSequence.accumulate(null, (accumulated, cursor) -> {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME));
- ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME);
-
- int count = 0;
- while (!cursor.isDone()) {
- Object dimSelectorVal = dimSelector.getObject();
- Object valueSelectorVal = valueSelector.getObject();
- if (dimSelectorVal == null) {
- Assert.assertNull(dimSelectorVal);
- } else if (valueSelectorVal == null) {
- Assert.assertNull(valueSelectorVal);
- }
- cursor.advance();
- count++;
- }
- /*
- each row has 8 distinct entries.
- allowlist has 3 entries also the value cardinality
- unnest will have 3 distinct entries
- */
- Assert.assertEquals(count, 3);
- Assert.assertEquals(dimSelector.getValueCardinality(), 3);
- return null;
- });
- }
-
- @Test
- public void test_two_levels_of_unnest_adapters_with_allowList()
- {
- final String columnName = "multi-string1";
-
- Sequence cursorSequence = UNNEST_STORAGE_ADAPTER3.makeCursors(
- null,
- UNNEST_STORAGE_ADAPTER3.getInterval(),
- VirtualColumns.EMPTY,
- Granularities.ALL,
- false,
- null
- );
- UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3;
- Assert.assertEquals(adapter.getDimensionToUnnest(), columnName);
- Assert.assertEquals(
- adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
- ColumnCapabilities.Capable.TRUE
- );
- Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME));
- Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME));
-
- cursorSequence.accumulate(null, (accumulated, cursor) -> {
- ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
-
- DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME1));
- ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME1);
-
int count = 0;
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
- Object valueSelectorVal = valueSelector.getObject();
if (dimSelectorVal == null) {
Assert.assertNull(dimSelectorVal);
- } else if (valueSelectorVal == null) {
- Assert.assertNull(valueSelectorVal);
+ } else {
+ Assert.assertEquals(expectedResults.get(count), dimSelectorVal.toString());
}
cursor.advance();
count++;
}
- /*
- each row has 8 distinct entries.
- allowlist has 3 entries also the value cardinality
- unnest will have 3 distinct entries
- unnest of that unnest will have 3*3 = 9 entries
- */
- Assert.assertEquals(count, 9);
- Assert.assertEquals(dimSelector.getValueCardinality(), 3);
- return null;
- });
- }
-
- @Test
- public void test_unnest_adapters_methods_with_allowList()
- {
- final String columnName = "multi-string1";
-
- Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors(
- null,
- UNNEST_STORAGE_ADAPTER1.getInterval(),
- VirtualColumns.EMPTY,
- Granularities.ALL,
- false,
- null
- );
- UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1;
- Assert.assertEquals(adapter.getDimensionToUnnest(), columnName);
- Assert.assertEquals(
- adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
- ColumnCapabilities.Capable.TRUE
- );
- Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME));
- Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME));
-
- cursorSequence.accumulate(null, (accumulated, cursor) -> {
- ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
-
- DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME));
- IdLookup idlookUp = dimSelector.idLookup();
- Assert.assertFalse(dimSelector.isNull());
- int[] indices = new int[]{1, 3, 5};
- int count = 0;
- while (!cursor.isDone()) {
- Object dimSelectorVal = dimSelector.getObject();
- Assert.assertEquals(idlookUp.lookupId((String) dimSelectorVal), indices[count]);
- // after unnest first entry in get row should equal the object
- // and the row size will always be 1
- Assert.assertEquals(dimSelector.getRow().get(0), indices[count]);
- Assert.assertEquals(dimSelector.getRow().size(), 1);
- Assert.assertNotNull(dimSelector.makeValueMatcher(OUTPUT_COLUMN_NAME));
- cursor.advance();
- count++;
- }
- Assert.assertEquals(dimSelector.getValueCardinality(), 3);
- Assert.assertEquals(count, 3);
+ //As we are filtering on 1, 3 and 5 there should be 3 entries
+ Assert.assertEquals(3, count);
return null;
});
}
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java
index 847c41b63ede..89d02df0ff3e 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java
@@ -22,6 +22,7 @@
import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptPlanner;
@@ -32,9 +33,11 @@
import org.apache.calcite.rel.core.Correlate;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.logical.LogicalFilter;
import org.apache.calcite.rel.logical.LogicalProject;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexUtil;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.QueryDataSource;
@@ -43,6 +46,7 @@
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
+import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignatures;
@@ -58,7 +62,7 @@
* Each correlate can be perceived as a join with the join type being inner
* the left of a correlate as seen in the rule {@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule}
* is the {@link DruidQueryRel} while the right will always be an {@link DruidUnnestDatasourceRel}.
- *
+ *
* Since this is a subclass of DruidRel it is automatically considered by other rules that involves DruidRels.
* Some example being SELECT_PROJECT and SORT_PROJECT rules in {@link org.apache.druid.sql.calcite.rule.DruidRules.DruidQueryRule}
*/
@@ -70,8 +74,8 @@ public class DruidCorrelateUnnestRel extends DruidRel
private final PartialDruidQuery partialQuery;
private final PlannerConfig plannerConfig;
private final Correlate correlateRel;
- private RelNode left;
- private RelNode right;
+ private final RelNode left;
+ private final RelNode right;
private DruidCorrelateUnnestRel(
RelOptCluster cluster,
@@ -136,13 +140,23 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
final DruidRel> druidQueryRel = (DruidRel>) left;
final DruidQuery leftQuery = Preconditions.checkNotNull((druidQueryRel).toDruidQuery(false), "leftQuery");
final DataSource leftDataSource;
+ final PartialDruidQuery partialQueryFromLeft = druidQueryRel.getPartialDruidQuery();
+ final PartialDruidQuery corrPartialQuey;
+
+ // If there is a LIMIT in the left query
+ // It should be honored before unnest
+ // Create a query data source in that case
+
- if (DruidJoinQueryRel.computeLeftRequiresSubquery(getPlannerContext(), druidQueryRel)) {
+ if (partialQueryFromLeft.getSort() != null || partialQueryFromLeft.getSortProject() != null) {
leftDataSource = new QueryDataSource(leftQuery.getQuery());
+ corrPartialQuey = partialQuery;
} else {
leftDataSource = leftQuery.getDataSource();
+ corrPartialQuey = updateCorrPartialQueryFromLeft(partialQueryFromLeft);
}
+
final DruidUnnestDatasourceRel unnestDatasourceRel = (DruidUnnestDatasourceRel) right;
@@ -157,7 +171,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
unnestDatasourceRel.getUnnestProject().getProjects().get(0)
);
- LogicalProject unnestProject = LogicalProject.create(
+ final LogicalProject unnestProject = LogicalProject.create(
this,
ImmutableList.of(unnestDatasourceRel.getUnnestProject()
.getProjects()
@@ -165,6 +179,23 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
unnestDatasourceRel.getUnnestProject().getRowType()
);
+ final Filter unnestFilterFound = unnestDatasourceRel.getUnnestFilter();
+ final Filter logicalFilter;
+ if (unnestFilterFound != null) {
+ // The correlated value will be the last element in the row signature of correlate
+ // The filter points to $0 of the right data source e.g. OR(=($0, 'a'), =($0, 'b'))
+ // After the correlation the rowType becomes (left data source rowtype + 1)
+ // So the filter needs to be shifted to the last element of
+ // rowtype after the correlation for e.g OR(=($17, 'a'), =($17, 'b'))
+ logicalFilter = LogicalFilter.create(
+ correlateRel,
+ RexUtil.shift(unnestFilterFound.getCondition(), rowSignature.size() - 1),
+ ImmutableSet.of(correlateRel.getCorrelationId())
+ );
+ } else {
+ logicalFilter = null;
+ }
+
// placeholder for dimension or expression to be unnested
final String dimOrExpToUnnest;
final VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create(
@@ -174,7 +205,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
);
// the unnest project is needed in case of a virtual column
- // unnest(mv_to_array(dim_1)) is reconciled as unnesting a MVD dim_1 not requiring a virtual column
+ // unnest(mv_to_array(dim_1)) is reconciled as unnesting an MVD dim_1 not requiring a virtual column
// while unnest(array(dim_2,dim_3)) is understood as unnesting a virtual column which is an array over dim_2 and dim_3 elements
boolean unnestProjectNeeded = false;
getPlannerContext().setJoinExpressionVirtualColumnRegistry(virtualColumnRegistry);
@@ -199,13 +230,22 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
// add the unnest project to the partial query if required
// This is necessary to handle the virtual columns on the unnestProject
// Also create the unnest datasource to be used by the partial query
- PartialDruidQuery partialDruidQuery = unnestProjectNeeded ? partialQuery.withUnnest(unnestProject) : partialQuery;
+ PartialDruidQuery partialDruidQuery = unnestProjectNeeded
+ ? corrPartialQuey.withUnnestProject(unnestProject)
+ : corrPartialQuey;
return partialDruidQuery.build(
UnnestDataSource.create(
leftDataSource,
dimOrExpToUnnest,
unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0),
- null
+ // Filters from Calcite are received as bound Filters
+ // This piece optimizes multiple bounds to IN filters, Selector Filters etc.
+ logicalFilter != null ? Filtration.create(DruidQuery.getDimFilter(
+ getPlannerContext(),
+ rowSignature,
+ virtualColumnRegistry,
+ logicalFilter
+ )).optimizeFilterOnly(rowSignature).getDimFilter() : null
),
rowSignature,
getPlannerContext(),
@@ -215,6 +255,26 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
);
}
+ private PartialDruidQuery updateCorrPartialQueryFromLeft(PartialDruidQuery partialQueryFromLeft)
+ {
+ // The DruidCorrelateRule already creates the project and pushes it on the top level
+ // So get select project from partialQuery
+ // The filters are present on the partial query of the left
+ // The group by and having clauses would be on the top level
+ // Same for the sort
+ PartialDruidQuery corrQuery = PartialDruidQuery.create(correlateRel);
+ corrQuery = corrQuery.withWhereFilter(partialQueryFromLeft.getWhereFilter())
+ .withSelectProject(partialQuery.getSelectProject());
+ if (partialQuery.getAggregate() != null) {
+ corrQuery = corrQuery.withAggregate(partialQuery.getAggregate())
+ .withHavingFilter(partialQuery.getHavingFilter());
+ }
+ if (partialQuery.getSort() != null || partialQuery.getSortProject() != null) {
+ corrQuery = corrQuery.withSort(partialQuery.getSort());
+ }
+ return corrQuery;
+ }
+
@Override
protected DruidCorrelateUnnestRel clone()
{
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
index dee970655871..91002dd80690 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
@@ -309,6 +309,7 @@ public static DruidQuery fromPartialQuery(
unnestProjection = null;
}
+
return new DruidQuery(
dataSource,
plannerContext,
@@ -353,7 +354,7 @@ private static DimFilter computeHavingFilter(
}
@Nonnull
- private static DimFilter getDimFilter(
+ public static DimFilter getDimFilter(
final PlannerContext plannerContext,
final RowSignature rowSignature,
@Nullable final VirtualColumnRegistry virtualColumnRegistry,
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java
index cb01a003eae8..0ba9a64a6b51 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java
@@ -20,6 +20,7 @@
package org.apache.druid.sql.calcite.rel;
import org.apache.calcite.rel.RelWriter;
+import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Uncollect;
import org.apache.calcite.rel.logical.LogicalProject;
import org.apache.calcite.rel.type.RelDataType;
@@ -32,6 +33,7 @@
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
+import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignatures;
@@ -41,11 +43,11 @@
/**
* The Rel node to capture the unnest (or uncollect) part in a query. This covers 2 cases:
- *
+ *
* Case 1:
* If this is an unnest on a constant and no input table is required, the final query is built using
* an UnnestDataSource with a base InlineDataSource in this rel.
- *
+ *
* Case 2:
* If the unnest has an input table, this rel resolves the unnest part and delegates the rel to be consumed by other
* rule ({@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule}
@@ -55,11 +57,13 @@ public class DruidUnnestDatasourceRel extends DruidRel
private final Uncollect uncollect;
private final DruidQueryRel druidQueryRel;
private final LogicalProject unnestProject;
+ private final Filter unnestFilter;
public DruidUnnestDatasourceRel(
Uncollect uncollect,
DruidQueryRel queryRel,
LogicalProject unnestProject,
+ Filter unnestFilter,
PlannerContext plannerContext
)
{
@@ -67,6 +71,7 @@ public DruidUnnestDatasourceRel(
this.uncollect = uncollect;
this.druidQueryRel = queryRel;
this.unnestProject = unnestProject;
+ this.unnestFilter = unnestFilter;
}
public LogicalProject getUnnestProject()
@@ -74,6 +79,11 @@ public LogicalProject getUnnestProject()
return unnestProject;
}
+ public Filter getUnnestFilter()
+ {
+ return unnestFilter;
+ }
+
@Nullable
@Override
public PartialDruidQuery getPartialDruidQuery()
@@ -88,6 +98,7 @@ public DruidUnnestDatasourceRel withPartialQuery(PartialDruidQuery newQueryBuild
uncollect,
druidQueryRel.withPartialQuery(newQueryBuilder),
unnestProject,
+ unnestFilter,
getPlannerContext()
);
}
@@ -123,7 +134,12 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
),
"inline",
druidQueryRel.getRowType().getFieldNames().get(0),
- null
+ unnestFilter != null ? Filtration.create(DruidQuery.getDimFilter(
+ getPlannerContext(),
+ druidQueryRel.getDruidTable().getRowSignature(),
+ virtualColumnRegistry,
+ unnestFilter
+ )).optimizeFilterOnly(druidQueryRel.getDruidTable().getRowSignature()).getDimFilter() : null
);
DruidQuery query = druidQueryRel.getPartialDruidQuery().build(
@@ -150,14 +166,24 @@ public DruidUnnestDatasourceRel asDruidConvention()
new Uncollect(getCluster(), traitSet.replace(DruidConvention.instance()), uncollect.getInput(), false),
druidQueryRel.asDruidConvention(),
unnestProject,
+ unnestFilter,
getPlannerContext()
);
}
+ public DruidUnnestDatasourceRel withFilter(Filter f)
+ {
+ return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, f, getPlannerContext());
+ }
+
@Override
public RelWriter explainTerms(RelWriter pw)
{
- return super.explainTerms(pw);
+ return super.explainTerms(pw)
+ .item("Uncollect", uncollect)
+ .item("Query", druidQueryRel)
+ .item("unnestProject", unnestProject)
+ .item("unnestFilter", unnestFilter);
}
@Override
@@ -175,6 +201,6 @@ protected RelDataType deriveRowType()
@Override
protected DruidUnnestDatasourceRel clone()
{
- return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, getPlannerContext());
+ return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, unnestFilter, getPlannerContext());
}
}
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java
index 0cd71af8e5e7..61697bd03ba0 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java
@@ -342,7 +342,7 @@ public PartialDruidQuery withWindow(final Window newWindow)
);
}
- public PartialDruidQuery withUnnest(final Project newUnnestProject)
+ public PartialDruidQuery withUnnestProject(final Project newUnnestProject)
{
return new PartialDruidQuery(
builderSupplier,
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java
index 1870e11dd75c..7a9c349828b0 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java
@@ -101,8 +101,9 @@ public void onMatch(RelOptRuleCall call)
final RexBuilder rexBuilder = correlate.getCluster().getRexBuilder();
final Filter druidRelFilter;
- final DruidRel> newDruidRelFilter;
+ final DruidRel> newDruidRel;
final List newProjectExprs = new ArrayList<>();
+ final List newWheres = new ArrayList<>();
final boolean isLeftDirectAccessPossible = enableLeftScanDirect && (druidRel instanceof DruidQueryRel);
@@ -116,14 +117,14 @@ public void onMatch(RelOptRuleCall call)
// Left-side projection expressions rewritten to be on top of the correlate.
newProjectExprs.addAll(leftProject.getProjects());
- newDruidRelFilter = druidRel.withPartialQuery(PartialDruidQuery.create(leftScan));
+ newDruidRel = druidRel.withPartialQuery(PartialDruidQuery.create(leftScan));
} else {
// Leave druidRel as-is. Write input refs that do nothing.
for (int i = 0; i < druidRel.getRowType().getFieldCount(); i++) {
newProjectExprs.add(rexBuilder.makeInputRef(correlate.getRowType().getFieldList().get(i).getType(), i));
}
- newDruidRelFilter = druidRel;
- druidRelFilter = null;
+ newDruidRel = druidRel;
+ druidRelFilter = druidRel.getPartialDruidQuery().getWhereFilter();
}
if (druidUnnestDatasourceRel.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT) {
@@ -131,7 +132,7 @@ public void onMatch(RelOptRuleCall call)
druidUnnestDatasourceRel.getPartialDruidQuery()
.getSelectProject()
.getProjects(),
- newDruidRelFilter.getRowType().getFieldCount()
+ newDruidRel.getRowType().getFieldCount()
)) {
newProjectExprs.add(rexNode);
}
@@ -143,7 +144,7 @@ public void onMatch(RelOptRuleCall call)
.getFieldList()
.get(druidRel.getRowType().getFieldCount() + i)
.getType(),
- newDruidRelFilter.getRowType().getFieldCount() + i
+ newDruidRel.getRowType().getFieldCount() + i
)
);
}
@@ -152,7 +153,7 @@ public void onMatch(RelOptRuleCall call)
final DruidCorrelateUnnestRel druidCorr = DruidCorrelateUnnestRel.create(
correlate.copy(
correlate.getTraitSet(),
- newDruidRelFilter,
+ newDruidRel,
druidUnnestDatasourceRel,
correlate.getCorrelationId(),
correlate.getRequiredColumns(),
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java
new file mode 100644
index 000000000000..41a1a4c32db6
--- /dev/null
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.sql.calcite.rule;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Project;
+import org.apache.druid.sql.calcite.rel.DruidUnnestDatasourceRel;
+
+public class DruidFilterUnnestRule extends RelOptRule
+{
+ private static final DruidFilterUnnestRule INSTANCE = new DruidFilterUnnestRule();
+
+ private DruidFilterUnnestRule()
+ {
+ super(
+ operand(
+ Filter.class,
+ operand(DruidUnnestDatasourceRel.class, any())
+ )
+ );
+ }
+
+ public static DruidFilterUnnestRule instance()
+ {
+ return INSTANCE;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call)
+ {
+ final Filter filter = call.rel(0);
+ final DruidUnnestDatasourceRel unnestDatasourceRel = call.rel(1);
+ DruidUnnestDatasourceRel newRel = unnestDatasourceRel.withFilter(filter);
+ call.transformTo(newRel);
+ }
+
+ // This is for a special case of handling selector filters
+ // on top of UnnestDataSourceRel when Calcite adds an extra
+ // LogicalProject on the LogicalFilter. For e.g. #122 here
+ // SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b'
+ // 126:LogicalProject(d3=[$17])
+ // 124:LogicalCorrelate(subset=[rel#125:Subset#6.NONE.[]], correlation=[$cor0], joinType=[inner], requiredColumns=[{3}])
+ // 8:LogicalTableScan(subset=[rel#114:Subset#0.NONE.[]], table=[[druid, numfoo]])
+ // 122:LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR])
+ // 120:LogicalFilter(subset=[rel#121:Subset#4.NONE.[]], condition=[=($0, 'b')])
+ // 118:Uncollect(subset=[rel#119:Subset#3.NONE.[]])
+ // 116:LogicalProject(subset=[rel#117:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)])
+ // 9:LogicalValues(subset=[rel#115:Subset#1.NONE.[0]], tuples=[[{ 0 }]])
+
+ // This logical project does a type cast only which Druid already has information about
+ // So we can skip this LogicalProject
+ // Extensive unit tests can be found in {@link CalciteArraysQueryTest}
+
+ static class DruidProjectOnCorrelateRule extends RelOptRule
+ {
+ private static final DruidProjectOnCorrelateRule INSTANCE = new DruidProjectOnCorrelateRule();
+
+ private DruidProjectOnCorrelateRule()
+ {
+ super(
+ operand(
+ Project.class,
+ operand(DruidUnnestDatasourceRel.class, any())
+ )
+ );
+ }
+
+ public static DruidProjectOnCorrelateRule instance()
+ {
+ return INSTANCE;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call)
+ {
+ final DruidUnnestDatasourceRel unnestDatasourceRel = call.rel(1);
+ call.transformTo(unnestDatasourceRel);
+ }
+ }
+}
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java
index 4276a48b2f13..2d238238fbd5 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java
@@ -99,7 +99,9 @@ public static List rules(PlannerContext plannerContext)
DruidSortUnionRule.instance(),
DruidJoinRule.instance(plannerContext),
new DruidUnnestDatasourceRule(plannerContext),
- new DruidCorrelateUnnestRule(plannerContext)
+ new DruidCorrelateUnnestRule(plannerContext),
+ DruidFilterUnnestRule.instance(),
+ DruidFilterUnnestRule.DruidProjectOnCorrelateRule.instance()
)
);
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java
index e8123fe0670c..52334279b961 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java
@@ -95,6 +95,7 @@ public void onMatch(final RelOptRuleCall call)
uncollectRel,
druidQueryRel.withPartialQuery(druidQueryRel.getPartialDruidQuery().withSelectProject(queryProject)),
logicalProject,
+ null,
plannerContext
);
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index 119cae8634ec..ff882b39f71f 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -171,7 +171,7 @@ public void testSelectNonConstantArrayExpressionFromTableForMultival()
try {
ExpressionProcessing.initializeForTests(true);
- // if nested arrays are allowed, dim3 is a multi-valued string column, so the automatic translation will turn this
+ // if nested arrays are allowed, dim3 is a multivalued string column, so the automatic translation will turn this
// expression into
//
// `map((dim3) -> array(concat(dim3,'word'),'up'), dim3)`
@@ -212,7 +212,7 @@ public void testSomeArrayFunctionsWithScanQuery()
{
// Yes these outputs are strange sometimes, arrays are in a partial state of existence so end up a bit
// stringy for now this is because virtual column selectors are coercing values back to stringish so that
- // multi-valued string dimensions can be grouped on.
+ // multivalued string dimensions can be grouped on.
List expectedResults;
if (useDefault) {
expectedResults = ImmutableList.of(
@@ -388,7 +388,7 @@ public void testSomeArrayFunctionsWithScanQuery()
public void testSomeArrayFunctionsWithScanQueryNoStringify()
{
// when not stringifying arrays, some things are still stringified, because they are inferred to be typed as strings
- // the planner context which controls stringification of arrays does not apply to multi-valued string columns,
+ // the planner context which controls stringification of arrays does not apply to multivalued string columns,
// which will still always be stringified to ultimately adhere to the varchar type
// as array support increases in the engine this will likely change since using explict array functions should
// probably kick it into an array
@@ -2649,7 +2649,7 @@ public void testUnnestInline()
skipVectorize();
cannotVectorize();
testQuery(
- "SELECT * FROM UNNEST(ARRAY[1,2,3])",
+ "SELECT * FROM UNNEST(ARRAY[1,2,3]) as unnested(d)",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(
@@ -2735,11 +2735,7 @@ public void testUnnest()
@Test
public void testUnnestWithGroupBy()
{
- // This tells the test to skip generating (vectorize = force) path
- // Generates only 1 native query with vectorize = false
skipVectorize();
- // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization
- // Generates 2 native queries with 2 different values of vectorize
cannotVectorize();
testQuery(
"SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) GROUP BY d3 ",
@@ -2980,38 +2976,7 @@ public void testUnnestWithFilters()
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
- new QueryDataSource(
- newScanQueryBuilder()
- .dataSource(
- new TableDataSource(CalciteTests.DATASOURCE3)
- )
- .intervals(querySegmentSpec(Filtration.eternity()))
- .virtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING))
- .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
- .legacy(false)
- .filters(new SelectorDimFilter("dim2", "a", null))
- .columns(
- "__time",
- "cnt",
- "d1",
- "d2",
- "dim1",
- "dim3",
- "dim4",
- "dim5",
- "dim6",
- "f1",
- "f2",
- "l1",
- "l2",
- "m1",
- "m2",
- "unique_dim1",
- "v0"
- )
- .context(QUERY_CONTEXT_DEFAULT)
- .build()
- ),
+ new TableDataSource(CalciteTests.DATASOURCE3),
"dim3",
"EXPR$0",
null
@@ -3019,6 +2984,7 @@ public void testUnnestWithFilters()
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
+ .filters(new SelectorDimFilter("dim2", "a", null))
.context(QUERY_CONTEXT_DEFAULT)
.columns(ImmutableList.of(
"EXPR$0"
@@ -3043,7 +3009,7 @@ public void testUnnestWithInFilters()
// Generates 2 native queries with 2 different values of vectorize
cannotVectorize();
testQuery(
- "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc')), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)",
+ "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc') LIMIT 2), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
@@ -3076,6 +3042,7 @@ public void testUnnestWithInFilters()
"unique_dim1"
)
.context(QUERY_CONTEXT_DEFAULT)
+ .limit(2)
.build()
),
"dim3",
@@ -3094,9 +3061,7 @@ public void testUnnestWithInFilters()
ImmutableList.of(
new Object[]{"a"},
new Object[]{"b"},
- new Object[]{""},
- useDefault ?
- new Object[]{""} : new Object[]{null}
+ new Object[]{""}
)
);
}
@@ -3341,4 +3306,419 @@ public void testUnnestWithConstant()
)
);
}
+
+ @Test
+ public void testUnnestWithInFilterOnUnnestedCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b') ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ new InDimFilter("EXPR$0", ImmutableList.of("a", "b"), null)
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+
+ ImmutableList.of(
+ new Object[]{"a"},
+ new Object[]{"b"},
+ new Object[]{"b"}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithInFilterOnUnnestedColWhereFilterIsNotOnFirstValue()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('d','c') ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ new InDimFilter("EXPR$0", ImmutableList.of("d", "c"), null)
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+
+ ImmutableList.of(
+ new Object[]{"c"},
+ new Object[]{"d"}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithInFilterOnUnnestedColWhereValuesDoNotExist()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('foo','bar') ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ new InDimFilter("EXPR$0", ImmutableList.of("foo", "bar"), null)
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+ ImmutableList.of()
+ );
+ }
+
+ @Test
+ public void testUnnestWithBoundFilterOnUnnestedCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where (d3>= 'b' AND d3 < 'd') ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ bound("EXPR$0", "b", "d", false, true, null, StringComparators.LEXICOGRAPHIC)
+
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+
+ ImmutableList.of(
+ new Object[]{"b"},
+ new Object[]{"b"},
+ new Object[]{"c"}
+ )
+ );
+ }
+
+
+ @Test
+ public void testUnnestWithFilteringOnUnnestedVirtualCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1, m2]) as unnested (d12) where d12 IN ('1','2') AND m1 < 10",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "v0",
+ "EXPR$0",
+ new InDimFilter("EXPR$0", ImmutableList.of("1.0", "2.0"), null)
+ ))
+ .virtualColumns(expressionVirtualColumn(
+ "v0",
+ "array(\"dim2\",\"dim4\")",
+ ColumnType.STRING_ARRAY
+ ))
+ .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY))
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+
+ ImmutableList.of(
+ new Object[]{1.0f},
+ new Object[]{1.0f},
+ new Object[]{2.0f},
+ new Object[]{2.0f}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithSelectorFilterOnUnnestedCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ selector("EXPR$0", "b", null)
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+
+ ImmutableList.of(
+ new Object[]{"b"},
+ new Object[]{"b"}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithSelectorFilterV1OnUnnestedCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3>='b' and d3<='b' ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ selector("EXPR$0", "b", null)
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+
+ ImmutableList.of(
+ new Object[]{"b"},
+ new Object[]{"b"}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithSelectorFilterBadOnUnnestedCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3=1 ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ bound("EXPR$0", "1", "1", false, false, null, StringComparators.NUMERIC)
+
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+ ImmutableList.of()
+ );
+ }
+
+ @Test
+ public void testUnnestWithNotSelectorFilterOnUnnestedCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3!='b' ",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "dim3",
+ "EXPR$0",
+ not(selector("EXPR$0", "b", null))
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+ useDefault ?
+ ImmutableList.of(
+ new Object[]{"a"},
+ new Object[]{"c"},
+ new Object[]{"d"},
+ new Object[]{""},
+ new Object[]{""}
+ ) :
+ ImmutableList.of(
+ new Object[]{"a"},
+ new Object[]{"c"},
+ new Object[]{"d"},
+ new Object[]{""},
+ new Object[]{null}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestWithSelectorFilterOnUnnestedVirtualCol()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1, m2]) as unnested (d12) where d12=4 AND m1 < 10",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "v0",
+ "EXPR$0",
+ selector("EXPR$0", "4", null)
+ ))
+ .virtualColumns(expressionVirtualColumn(
+ "v0",
+ "array(\"m1\",\"m2\")",
+ ColumnType.FLOAT_ARRAY
+ ))
+ .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .virtualColumns(expressionVirtualColumn("v0", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY))
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+
+ ImmutableList.of(
+ new Object[]{4.0f},
+ new Object[]{4.0f}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestVirtualWithColumnsOnStringsWithSelectorFilter()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT strings FROM druid.numfoo, UNNEST(ARRAY[dim4, dim5]) as unnested (strings) where strings='aa'",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "v0",
+ "EXPR$0",
+ selector("EXPR$0", "aa", null)
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"aa"},
+ new Object[]{"aa"}
+ )
+ );
+ }
+
+ @Test
+ public void testUnnestVirtualWithColumnsOnStringsWithNotSelectorFilter()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "SELECT strings FROM druid.numfoo, UNNEST(ARRAY[dim4, dim5]) as unnested (strings) where strings!='aa'",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(UnnestDataSource.create(
+ new TableDataSource(CalciteTests.DATASOURCE3),
+ "v0",
+ "EXPR$0",
+ not(selector("EXPR$0", "aa", null))
+ ))
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY))
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .legacy(false)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .columns(ImmutableList.of(
+ "EXPR$0"
+ ))
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"a"},
+ new Object[]{"a"},
+ new Object[]{"ab"},
+ new Object[]{"a"},
+ new Object[]{"ba"},
+ new Object[]{"b"},
+ new Object[]{"ad"},
+ new Object[]{"b"},
+ new Object[]{"b"},
+ new Object[]{"ab"}
+ )
+ );
+ }
}