From 6dfd9aabace55ebe818f5ce0e2d685d5555966ca Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Mon, 22 Oct 2018 21:30:44 +0200 Subject: [PATCH 1/2] SQL: Implement null handling for `IN(v1, v2, ...)` Implemented null handling for both the value tested but also for values inside the list of values tested against. The null handling is implemented for local processors, painless scripts and Lucene Terms queries making it available for `IN` expressions occuring in `SELECT`, `WHERE` and `HAVING` clauses. Closes: #34582 --- .../xpack/sql/type/DataType.java | 2 + .../xpack/sql/expression/Foldables.java | 8 ++- .../xpack/sql/expression/predicate/In.java | 31 +++++++----- .../operator/comparison/InProcessor.java | 6 ++- .../xpack/sql/querydsl/query/TermsQuery.java | 2 +- .../predicate/InProcessorTests.java | 11 ++++ .../sql/expression/predicate/InTests.java | 50 +++++++++++++++++++ .../sql/planner/QueryTranslatorTests.java | 26 ++++++++++ x-pack/qa/sql/src/main/resources/agg.sql-spec | 6 +++ .../qa/sql/src/main/resources/filter.sql-spec | 5 ++ .../qa/sql/src/main/resources/select.csv-spec | 37 +++++++++++++- 11 files changed, 168 insertions(+), 16 deletions(-) create mode 100644 x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InTests.java diff --git a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java index 1c08c6e1c9fa1..7c7e2ce0946d9 100644 --- a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java +++ b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java @@ -229,6 +229,8 @@ public static DataType fromEsType(String esType) { public boolean isCompatibleWith(DataType other) { if (this == other) { return true; + } else if (this == NULL || other == NULL) { + return true; } else if (isString() && other.isString()) { return true; } else if (isNumeric() && other.isNumeric()) { diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java index 6e06a1d1c8581..6826c8497eb87 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java @@ -46,9 +46,15 @@ public static double doubleValueOf(Expression e) { } public static List valuesOf(List list, DataType to) { + return valuesOf(list, to, false); + } + + public static List valuesOf(List list, DataType to, boolean removeNulls) { List l = new ArrayList<>(list.size()); for (Expression e : list) { - l.add(valueOf(e, to)); + if (!removeNulls || e.dataType() != DataType.NULL) { + l.add(valueOf(e, to)); + } } return l; } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java index a820833d1a013..25d053032bc2c 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java @@ -82,16 +82,20 @@ public boolean foldable() { } @Override - public Object fold() { + public Boolean fold() { Object foldedLeftValue = value.fold(); - + Boolean result = false; for (Expression rightValue : list) { Boolean compResult = Comparisons.eq(foldedLeftValue, rightValue.fold()); - if (compResult != null && compResult) { + if (compResult == null) { +// if (value().dataType() == DataType.NULL || rightValue.dataType() == DataType.NULL) { + result = null; +// } + } else if (compResult) { return true; } } - return false; + return result; } @Override @@ -118,15 +122,18 @@ public ScriptTemplate asScript() { String scriptPrefix = leftScript + "=="; LinkedHashSet values = list.stream().map(Expression::fold).collect(Collectors.toCollection(LinkedHashSet::new)); for (Object valueFromList : values) { - if (valueFromList instanceof Expression) { - ScriptTemplate rightScript = asScript((Expression) valueFromList); - sj.add(scriptPrefix + rightScript.template()); - rightParams.add(rightScript.params()); - } else { - if (valueFromList instanceof String) { - sj.add(scriptPrefix + '"' + valueFromList + '"'); + // if checked against null => false + if (valueFromList != null) { + if (valueFromList instanceof Expression) { + ScriptTemplate rightScript = asScript((Expression) valueFromList); + sj.add(scriptPrefix + rightScript.template()); + rightParams.add(rightScript.params()); } else { - sj.add(scriptPrefix + valueFromList.toString()); + if (valueFromList instanceof String) { + sj.add(scriptPrefix + '"' + valueFromList + '"'); + } else { + sj.add(scriptPrefix + valueFromList.toString()); + } } } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java index 5ebf8870965b5..575bc29e27bad 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java @@ -40,14 +40,18 @@ public final void writeTo(StreamOutput out) throws IOException { @Override public Object process(Object input) { Object leftValue = processsors.get(processsors.size() - 1).process(input); + Boolean result = false; for (int i = 0; i < processsors.size() - 1; i++) { Boolean compResult = Comparisons.eq(leftValue, processsors.get(i).process(input)); + if (compResult == null) { + result = null; + } if (compResult != null && compResult) { return true; } } - return false; + return result; } @Override diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java index 412df4e8ca682..3914ada653554 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java @@ -24,7 +24,7 @@ public class TermsQuery extends LeafQuery { public TermsQuery(Location location, String term, List values) { super(location); this.term = term; - this.values = new LinkedHashSet<>(Foldables.valuesOf(values, values.get(0).dataType())); + this.values = new LinkedHashSet<>(Foldables.valuesOf(values, values.get(0).dataType(), true)); } @Override diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java index 3e71ac90f8127..12bba003115f4 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java @@ -22,6 +22,7 @@ public class InProcessorTests extends AbstractWireSerializingTestCase")); } + + public void testTranslateInExpression_HavingClauseAndNullHandling_Painless() { + LogicalPlan p = plan("SELECT keyword, max(int) FROM test GROUP BY keyword HAVING max(int) in (10, null, 20, null, 30 - 10)"); + assertTrue(p instanceof Project); + assertTrue(p.children().get(0) instanceof Filter); + Expression condition = ((Filter) p.children().get(0)).condition(); + assertFalse(condition.foldable()); + QueryTranslation translation = QueryTranslator.toQuery(condition, false); + assertTrue(translation.query instanceof ScriptQuery); + ScriptQuery sq = (ScriptQuery) translation.query; + assertEquals("InternalSqlScriptUtils.nullSafeFilter(params.a0==10 || params.a0==20)", sq.script().toString()); + assertThat(sq.script().params().toString(), startsWith("[{a=MAX(int){a->")); + } } diff --git a/x-pack/qa/sql/src/main/resources/agg.sql-spec b/x-pack/qa/sql/src/main/resources/agg.sql-spec index 2c6248059f5fb..110882dc21ed0 100644 --- a/x-pack/qa/sql/src/main/resources/agg.sql-spec +++ b/x-pack/qa/sql/src/main/resources/agg.sql-spec @@ -450,3 +450,9 @@ selectHireDateGroupByHireDate SELECT hire_date HD, COUNT(*) c FROM test_emp GROUP BY hire_date ORDER BY hire_date DESC; selectSalaryGroupBySalary SELECT salary, COUNT(*) c FROM test_emp GROUP BY salary ORDER BY salary DESC; + +// filter with IN +aggMultiWithHavingUsingInAndNullHandling +SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING max IN(74999, null, 74600) ORDER BY gender; +aggMultiGroupByMultiWithHavingUsingInAndNullHandling +SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g, languages HAVING max IN (74500, null, 74600) ORDER BY gender, languages; diff --git a/x-pack/qa/sql/src/main/resources/filter.sql-spec b/x-pack/qa/sql/src/main/resources/filter.sql-spec index 1a564ecb9ad82..79b3836b95994 100644 --- a/x-pack/qa/sql/src/main/resources/filter.sql-spec +++ b/x-pack/qa/sql/src/main/resources/filter.sql-spec @@ -96,3 +96,8 @@ SELECT last_name l FROM "test_emp" WHERE emp_no NOT IN (10000, 10001, 10002, 999 whereWithInAndComplexFunctions SELECT last_name l FROM "test_emp" WHERE emp_no NOT IN (10000, abs(2 - 10003), 10002, 999) AND lcase(first_name) IN ('sumant', 'mary', 'patricio', 'No''Match') ORDER BY emp_no LIMIT 5; + +whereWithInAndNullHandling1 +SELECT last_name l FROM "test_emp" WHERE birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) AND (emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040) ORDER BY emp_no; +whereWithInAndNullHandling2 +SELECT last_name l FROM "test_emp" WHERE birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) AND (emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040) ORDER BY emp_no; diff --git a/x-pack/qa/sql/src/main/resources/select.csv-spec b/x-pack/qa/sql/src/main/resources/select.csv-spec index b3888abd47bf3..bf208c62026df 100644 --- a/x-pack/qa/sql/src/main/resources/select.csv-spec +++ b/x-pack/qa/sql/src/main/resources/select.csv-spec @@ -25,6 +25,22 @@ false |true ; +inWithNullHandling +SELECT 2 IN (1, null, 3), 3 IN (1, null, 3), null IN (1, null, 3), null IN (1, 2, 3); + + 2 IN (1, null, 3) | 3 IN (1, null, 3) | null IN (1, null, 3) | null IN (1, 2, 3) +--------------------+--------------------+-----------------------+------------------- +null |true |null | null +; + +inWithNullHandlingAndNegation +SELECT NOT 2 IN (1, null, 3), NOT 3 IN (1, null, 3), NOT null IN (1, null, 3), NOT null IN (1, 2, 3); + + NOT 2 IN (1, null, 3) | NOT 3 IN (1, null, 3) | NOT null IN (1, null, 3) | null IN (1, 2, 3) +------------------------+------------------------+---------------------------+-------------------- +null |false |null | null +; + // // SELECT with IN and table columns // @@ -64,4 +80,23 @@ SELECT 1 IN (1, abs(2 - 4), 3) OR emp_no NOT IN (10000, 10000 + 1, 10002) FROM t 10003 10004 10005 -; \ No newline at end of file +; + +inWithTableColumnAndNullHandling +SELECT emp_no, birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)), birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) FROM test_emp WHERE emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040 ORDER BY 1; + + emp_no | birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) | birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) +--------+-------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------ +10038 | true | true +10039 | null | null +10040 | false | null + + +inWithTableColumnAndNullHandlingAndNegation +SELECT emp_no, NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)), NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) FROM test_emp WHERE emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040 ORDER BY 1; + + emp_no | NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) | NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) +--------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------ +10038 | false | false +10039 | null | null +10040 | true | null \ No newline at end of file From a0bd76c2548b650a804585541bd423ee3b3e09be Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Wed, 24 Oct 2018 11:23:27 +0200 Subject: [PATCH 2/2] Address comments --- .../org/elasticsearch/xpack/sql/type/DataType.java | 13 ++++--------- .../xpack/sql/expression/Foldables.java | 8 +------- .../xpack/sql/expression/predicate/In.java | 4 +--- .../predicate/operator/comparison/InProcessor.java | 3 +-- .../xpack/sql/querydsl/query/TermsQuery.java | 5 ++++- 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java index 7c7e2ce0946d9..2622c3be63c01 100644 --- a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java +++ b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java @@ -229,14 +229,9 @@ public static DataType fromEsType(String esType) { public boolean isCompatibleWith(DataType other) { if (this == other) { return true; - } else if (this == NULL || other == NULL) { - return true; - } else if (isString() && other.isString()) { - return true; - } else if (isNumeric() && other.isNumeric()) { - return true; - } else { - return false; - } + } else return + (this == NULL || other == NULL) || + (isString() && other.isString()) || + (isNumeric() && other.isNumeric()); } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java index 6826c8497eb87..8c672ed162ee6 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java @@ -46,15 +46,9 @@ public static double doubleValueOf(Expression e) { } public static List valuesOf(List list, DataType to) { - return valuesOf(list, to, false); - } - - public static List valuesOf(List list, DataType to, boolean removeNulls) { List l = new ArrayList<>(list.size()); for (Expression e : list) { - if (!removeNulls || e.dataType() != DataType.NULL) { - l.add(valueOf(e, to)); - } + l.add(valueOf(e, to)); } return l; } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java index 25d053032bc2c..1574e406a1e10 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java @@ -88,9 +88,7 @@ public Boolean fold() { for (Expression rightValue : list) { Boolean compResult = Comparisons.eq(foldedLeftValue, rightValue.fold()); if (compResult == null) { -// if (value().dataType() == DataType.NULL || rightValue.dataType() == DataType.NULL) { - result = null; -// } + result = null; } else if (compResult) { return true; } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java index 575bc29e27bad..0a901b5b5e6fe 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java @@ -46,8 +46,7 @@ public Object process(Object input) { Boolean compResult = Comparisons.eq(leftValue, processsors.get(i).process(input)); if (compResult == null) { result = null; - } - if (compResult != null && compResult) { + } else if (compResult) { return true; } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java index 3914ada653554..4366e2d404c13 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java @@ -9,6 +9,7 @@ import org.elasticsearch.xpack.sql.expression.Expression; import org.elasticsearch.xpack.sql.expression.Foldables; import org.elasticsearch.xpack.sql.tree.Location; +import org.elasticsearch.xpack.sql.type.DataType; import java.util.LinkedHashSet; import java.util.List; @@ -24,7 +25,9 @@ public class TermsQuery extends LeafQuery { public TermsQuery(Location location, String term, List values) { super(location); this.term = term; - this.values = new LinkedHashSet<>(Foldables.valuesOf(values, values.get(0).dataType(), true)); + values.removeIf(e -> e.dataType() == DataType.NULL); + this.values = new LinkedHashSet<>(Foldables.valuesOf(values, values.get(0).dataType())); + this.values.removeIf(Objects::isNull); } @Override