From fbc72a41ef66d33c4650171260ddf8c8702903ca Mon Sep 17 00:00:00 2001 From: Forest Vey <36905077+forestmvey@users.noreply.github.com> Date: Wed, 12 Apr 2023 14:22:35 -0700 Subject: [PATCH] Support nested function in SELECT clause (#1490) Users can now query nested fields in an index. --------- Signed-off-by: forestmvey --- .../org/opensearch/sql/analysis/Analyzer.java | 8 + .../sql/analysis/ExpressionAnalyzer.java | 11 - .../sql/analysis/NestedAnalyzer.java | 111 ++++++ .../org/opensearch/sql/executor/Explain.java | 7 + .../org/opensearch/sql/expression/DSL.java | 4 + .../sql/expression/ReferenceExpression.java | 8 +- .../function/BuiltinFunctionName.java | 2 + .../function/OpenSearchFunctions.java | 33 ++ .../sql/planner/DefaultImplementor.java | 7 + .../sql/planner/logical/LogicalNested.java | 49 +++ .../sql/planner/logical/LogicalPlanDSL.java | 8 + .../logical/LogicalPlanNodeVisitor.java | 4 + .../optimizer/LogicalPlanOptimizer.java | 1 + .../planner/optimizer/pattern/Patterns.java | 8 + .../rule/read/TableScanPushDown.java | 6 + .../sql/planner/physical/NestedOperator.java | 284 +++++++++++++++ .../sql/planner/physical/PhysicalPlanDSL.java | 8 + .../physical/PhysicalPlanNodeVisitor.java | 4 + .../sql/storage/read/TableScanBuilder.java | 12 + .../opensearch/sql/analysis/AnalyzerTest.java | 233 ++++++++++++- .../sql/analysis/ExpressionAnalyzerTest.java | 11 - .../org/opensearch/sql/config/TestConfig.java | 5 + .../opensearch/sql/executor/ExplainTest.java | 71 ++-- .../expression/ReferenceExpressionTest.java | 36 +- .../function/OpenSearchFunctionsTest.java | 20 +- .../sql/planner/DefaultImplementorTest.java | 112 +++--- .../logical/LogicalPlanNodeVisitorTest.java | 18 + .../optimizer/LogicalPlanOptimizerTest.java | 24 ++ .../planner/physical/NestedOperatorTest.java | 328 ++++++++++++++++++ .../physical/PhysicalPlanNodeVisitorTest.java | 10 + docs/user/beyond/partiql.rst | 10 +- docs/user/dql/functions.rst | 23 ++ docs/user/dql/metadata.rst | 3 +- doctest/test_data/nested_objects.json | 4 + doctest/test_docs.py | 4 +- doctest/test_mapping/nested_objects.json | 47 +++ .../sql/legacy/ObjectFieldSelectIT.java | 3 +- .../sql/legacy/PrettyFormatResponseIT.java | 2 +- .../sql/legacy/SQLIntegTestCase.java | 15 +- .../opensearch/sql/legacy/TestsConstants.java | 4 + .../java/org/opensearch/sql/sql/NestedIT.java | 260 ++++++++++++++ .../indexDefinitions/multi_nested.json | 42 +++ .../test/resources/multi_nested_objects.json | 10 + .../nested_objects_without_arrays.json | 10 + .../src/test/resources/nested_with_nulls.json | 24 ++ .../value/OpenSearchExprValueFactory.java | 25 +- .../OpenSearchExecutionProtector.java | 10 + .../request/OpenSearchRequestBuilder.java | 85 ++++- .../response/OpenSearchResponse.java | 8 +- .../scan/OpenSearchIndexScanBuilder.java | 6 + .../scan/OpenSearchIndexScanQueryBuilder.java | 13 + .../client/OpenSearchNodeClientTest.java | 1 + .../client/OpenSearchRestClientTest.java | 1 + .../value/OpenSearchExprValueFactoryTest.java | 19 + .../OpenSearchExecutionProtectorTest.java | 19 +- .../request/OpenSearchRequestBuilderTest.java | 110 +++++- .../response/OpenSearchResponseTest.java | 27 ++ .../OpenSearchIndexScanOptimizationTest.java | 42 +++ sql/src/main/antlr/OpenSearchSQLParser.g4 | 5 + .../sql/sql/parser/AstExpressionBuilder.java | 1 + .../sql/sql/antlr/SQLSyntaxParserTest.java | 12 + .../sql/sql/parser/AstBuilderTest.java | 9 - 62 files changed, 2162 insertions(+), 135 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/analysis/NestedAnalyzer.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java create mode 100644 core/src/test/java/org/opensearch/sql/planner/physical/NestedOperatorTest.java create mode 100644 doctest/test_data/nested_objects.json create mode 100644 doctest/test_mapping/nested_objects.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java create mode 100644 integ-test/src/test/resources/indexDefinitions/multi_nested.json create mode 100644 integ-test/src/test/resources/multi_nested_objects.json create mode 100644 integ-test/src/test/resources/nested_objects_without_arrays.json create mode 100644 integ-test/src/test/resources/nested_with_nulls.json diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index b7c03db6d4..4ea298a589 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -366,6 +366,14 @@ public LogicalPlan visitProject(Project node, AnalysisContext context) { List namedExpressions = selectExpressionAnalyzer.analyze(node.getProjectList(), context, new ExpressionReferenceOptimizer(expressionAnalyzer.getRepository(), child)); + + for (UnresolvedExpression expr : node.getProjectList()) { + NestedAnalyzer nestedAnalyzer = new NestedAnalyzer( + namedExpressions, expressionAnalyzer, child + ); + child = nestedAnalyzer.analyze(expr, context); + } + // new context context.push(); TypeEnvironment newEnv = context.peek(); diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index 436c26374c..43155a868a 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -416,17 +416,6 @@ private Expression visitIdentifier(String ident, AnalysisContext context) { ReferenceExpression ref = DSL.ref(ident, typeEnv.resolve(new Symbol(Namespace.FIELD_NAME, ident))); - // Fall back to old engine too if type is not supported semantically - if (isTypeNotSupported(ref.type())) { - throw new SyntaxCheckException(String.format( - "Identifier [%s] of type [%s] is not supported yet", ident, ref.type())); - } return ref; } - - // Array type is not supporte yet. - private boolean isTypeNotSupported(ExprType type) { - return "array".equalsIgnoreCase(type.typeName()); - } - } diff --git a/core/src/main/java/org/opensearch/sql/analysis/NestedAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/NestedAnalyzer.java new file mode 100644 index 0000000000..756c1f20b3 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/analysis/NestedAnalyzer.java @@ -0,0 +1,111 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.analysis; + +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import lombok.RequiredArgsConstructor; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Alias; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.planner.logical.LogicalNested; +import org.opensearch.sql.planner.logical.LogicalPlan; + +/** + * Analyze the Nested Function in the {@link AnalysisContext} to construct the {@link + * LogicalPlan}. + */ +@RequiredArgsConstructor +public class NestedAnalyzer extends AbstractNodeVisitor { + private final List namedExpressions; + private final ExpressionAnalyzer expressionAnalyzer; + private final LogicalPlan child; + + public LogicalPlan analyze(UnresolvedExpression projectItem, AnalysisContext context) { + LogicalPlan nested = projectItem.accept(this, context); + return (nested == null) ? child : nested; + } + + @Override + public LogicalPlan visitAlias(Alias node, AnalysisContext context) { + return node.getDelegated().accept(this, context); + } + + @Override + public LogicalPlan visitFunction(Function node, AnalysisContext context) { + if (node.getFuncName().equalsIgnoreCase(BuiltinFunctionName.NESTED.name())) { + + List expressions = node.getFuncArgs(); + validateArgs(expressions); + ReferenceExpression nestedField = + (ReferenceExpression)expressionAnalyzer.analyze(expressions.get(0), context); + Map args; + if (expressions.size() == 2) { + args = Map.of( + "field", nestedField, + "path", (ReferenceExpression)expressionAnalyzer.analyze(expressions.get(1), context) + ); + } else { + args = Map.of( + "field", (ReferenceExpression)expressionAnalyzer.analyze(expressions.get(0), context), + "path", generatePath(nestedField.toString()) + ); + } + if (child instanceof LogicalNested) { + ((LogicalNested)child).addFields(args); + return child; + } else { + return new LogicalNested(child, new ArrayList<>(Arrays.asList(args)), namedExpressions); + } + } + return null; + } + + /** + * Validate each parameter used in nested function in SELECT clause. Any supplied parameter + * for a nested function in a SELECT statement must be a valid qualified name, and the field + * parameter must be nested at least one level. + * @param args : Arguments in nested function. + */ + private void validateArgs(List args) { + if (args.size() < 1 || args.size() > 2) { + throw new IllegalArgumentException( + "on nested object only allowed 2 parameters (field,path) or 1 parameter (field)" + ); + } + + for (int i = 0; i < args.size(); i++) { + if (!(args.get(i) instanceof QualifiedName)) { + throw new IllegalArgumentException( + String.format("Illegal nested field name: %s", args.get(i).toString()) + ); + } + if (i == 0 && ((QualifiedName)args.get(i)).getParts().size() < 2) { + throw new IllegalArgumentException( + String.format("Illegal nested field name: %s", args.get(i).toString()) + ); + } + } + } + + /** + * Generate nested path dynamically. Assumes at least one level of nesting in supplied string. + * @param field : Nested field to generate path of. + * @return : Path of field derived from last level of nesting. + */ + private ReferenceExpression generatePath(String field) { + return new ReferenceExpression(field.substring(0, field.lastIndexOf(".")), STRING); + } +} diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index db2f4bdb11..7c16e0b720 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -23,6 +23,7 @@ import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.LimitOperator; +import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanNodeVisitor; import org.opensearch.sql.planner.physical.ProjectOperator; @@ -142,6 +143,12 @@ public ExplainResponseNode visitLimit(LimitOperator node, Object context) { "limit", node.getLimit(), "offset", node.getOffset()))); } + @Override + public ExplainResponseNode visitNested(NestedOperator node, Object context) { + return explain(node, context, explanNode -> explanNode.setDescription(ImmutableMap.of( + "nested", node.getFields()))); + } + protected ExplainResponseNode explain(PhysicalPlan node, Object context, Consumer doExplain) { ExplainResponseNode explainNode = new ExplainResponseNode(getOperatorName(node)); diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index b0440acb6e..f9ef20e305 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -624,6 +624,10 @@ public static FunctionExpression xor(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.XOR, expressions); } + public static FunctionExpression nested(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.NESTED, expressions); + } + public static FunctionExpression not(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.NOT, expressions); } diff --git a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java index 94bb4e067d..3c5b2af23c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/ReferenceExpression.java @@ -15,6 +15,7 @@ import lombok.RequiredArgsConstructor; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.env.Environment; @@ -100,7 +101,12 @@ public ExprValue resolve(ExprTupleValue value) { } private ExprValue resolve(ExprValue value, List paths) { - final ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, paths)); + ExprValue wholePathValue = value.keyValue(String.join(PATH_SEP, paths)); + // For array types only first index currently supported. + if (value.type().equals(ExprCoreType.ARRAY)) { + wholePathValue = value.collectionValue().get(0).keyValue(paths.get(0)); + } + if (!wholePathValue.isMissing() || paths.size() == 1) { return wholePathValue; } else { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 890d4b48bf..728712f537 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -189,6 +189,8 @@ public enum BuiltinFunctionName { STDDEV_POP(FunctionName.of("stddev_pop")), // take top documents from aggregation bucket. TAKE(FunctionName.of("take")), + // Not always an aggregation query + NESTED(FunctionName.of("nested")), /** * Text Functions. diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 9a50aca344..c5fcb010f5 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -12,6 +12,7 @@ import lombok.Getter; import lombok.Setter; import lombok.experimental.UtilityClass; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; @@ -47,6 +48,8 @@ public void register(BuiltinFunctionRepository repository) { repository.register(score(BuiltinFunctionName.SCORE)); repository.register(score(BuiltinFunctionName.SCOREQUERY)); repository.register(score(BuiltinFunctionName.SCORE_QUERY)); + // Functions supported in SELECT clause + repository.register(nested()); } private static FunctionResolver match_bool_prefix() { @@ -93,6 +96,36 @@ private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery return new RelevanceFunctionResolver(funcName); } + private static FunctionResolver nested() { + return new FunctionResolver() { + @Override + public Pair resolve( + FunctionSignature unresolvedSignature) { + return Pair.of(unresolvedSignature, + (functionProperties, arguments) -> + new FunctionExpression(BuiltinFunctionName.NESTED.getName(), arguments) { + @Override + public ExprValue valueOf(Environment valueEnv) { + return valueEnv.resolve(getArguments().get(0)); + } + + @Override + public ExprType type() { + return getArguments().get(0).type(); + } + }); + } + + @Override + public FunctionName getFunctionName() { + return BuiltinFunctionName.NESTED.getName(); + } + }; + } + + + + private static FunctionResolver score(BuiltinFunctionName score) { FunctionName funcName = score.getName(); return new RelevanceFunctionResolver(funcName); diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index 4a6d4d8222..d4cdb528fa 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -11,6 +11,7 @@ import org.opensearch.sql.planner.logical.LogicalEval; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanNodeVisitor; import org.opensearch.sql.planner.logical.LogicalProject; @@ -26,6 +27,7 @@ import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.LimitOperator; +import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.ProjectOperator; import org.opensearch.sql.planner.physical.RareTopNOperator; @@ -94,6 +96,11 @@ public PhysicalPlan visitEval(LogicalEval node, C context) { return new EvalOperator(visitChild(node, context), node.getExpressions()); } + @Override + public PhysicalPlan visitNested(LogicalNested node, C context) { + return new NestedOperator(visitChild(node, context), node.getFields()); + } + @Override public PhysicalPlan visitSort(LogicalSort node, C context) { return new SortOperator(visitChild(node, context), node.getSortList()); diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java new file mode 100644 index 0000000000..3e0e167cf3 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java @@ -0,0 +1,49 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; + +/** + * Logical Nested plan. + */ +@EqualsAndHashCode(callSuper = true) +@Getter +@ToString +public class LogicalNested extends LogicalPlan { + private List> fields; + private final List projectList; + + /** + * Constructor of LogicalNested. + * + */ + public LogicalNested( + LogicalPlan childPlan, + List> fields, + List projectList + ) { + super(Collections.singletonList(childPlan)); + this.fields = fields; + this.projectList = projectList; + } + + public void addFields(Map fields) { + this.fields.add(fields); + } + + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitNested(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index a192966287..411d9a51be 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -74,6 +74,14 @@ public LogicalPlan highlight(LogicalPlan input, Expression field, return new LogicalHighlight(input, field, arguments); } + + public static LogicalPlan nested( + LogicalPlan input, + List> nestedArgs, + List projectList) { + return new LogicalNested(input, nestedArgs, projectList); + } + public static LogicalPlan remove(LogicalPlan input, ReferenceExpression... fields) { return new LogicalRemove(input, ImmutableSet.copyOf(fields)); } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index 9a41072fe7..d7ab75f869 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -73,6 +73,10 @@ public R visitEval(LogicalEval plan, C context) { return visitNode(plan, context); } + public R visitNested(LogicalNested plan, C context) { + return visitNode(plan, context); + } + public R visitSort(LogicalSort plan, C context) { return visitNode(plan, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java index 70847b869b..097c5ff8ce 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java @@ -56,6 +56,7 @@ public static LogicalPlanOptimizer create() { TableScanPushDown.PUSH_DOWN_SORT, TableScanPushDown.PUSH_DOWN_LIMIT, TableScanPushDown.PUSH_DOWN_HIGHLIGHT, + TableScanPushDown.PUSH_DOWN_NESTED, TableScanPushDown.PUSH_DOWN_PROJECT, new CreateTableWriteBuilder())); } diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java index 856d8df7ea..8f5ac86580 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java @@ -16,6 +16,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalRelation; @@ -65,6 +66,13 @@ public static Pattern highlight(Patter return Pattern.typeOf(LogicalHighlight.class).with(source(pattern)); } + /** + * Logical nested operator with a given pattern on inner field. + */ + public static Pattern nested(Pattern pattern) { + return Pattern.typeOf(LogicalNested.class).with(source(pattern)); + } + /** * Logical project operator with a given pattern on inner field. */ diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java index 556a12bb34..de2b47d403 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/read/TableScanPushDown.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.planner.optimizer.pattern.Patterns.filter; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.highlight; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.limit; +import static org.opensearch.sql.planner.optimizer.pattern.Patterns.nested; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.project; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.scanBuilder; import static org.opensearch.sql.planner.optimizer.pattern.Patterns.sort; @@ -74,6 +75,11 @@ public class TableScanPushDown implements Rule { scanBuilder())) .apply((highlight, scanBuilder) -> scanBuilder.pushDownHighlight(highlight)); + public static final Rule PUSH_DOWN_NESTED = + match( + nested( + scanBuilder())) + .apply((nested, scanBuilder) -> scanBuilder.pushDownNested(nested)); /** Pattern that matches a plan node. */ private final WithPattern pattern; diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java new file mode 100644 index 0000000000..049e9fd16e --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java @@ -0,0 +1,284 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static java.util.stream.Collectors.mapping; +import static java.util.stream.Collectors.toList; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.data.model.ExprCollectionValue; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.ReferenceExpression; + +/** + * The NestedOperator evaluates the {@link NestedOperator#fields} and + * generates {@link NestedOperator#nonNestedFields} to form the + * {@link NestedOperator#result} output. Resolve two nested fields + * with differing paths will result in a cartesian product(inner join). + */ +@EqualsAndHashCode(callSuper = false) +public class NestedOperator extends PhysicalPlan { + @Getter + private final PhysicalPlan input; + @Getter + private final Set fields; // Needs to be a Set to match legacy implementation + @Getter + private final Map> groupedPathsAndFields; + @EqualsAndHashCode.Exclude + private List> result = new ArrayList<>(); + @EqualsAndHashCode.Exclude + private List nonNestedFields = new ArrayList<>(); + @EqualsAndHashCode.Exclude + private ListIterator> flattenedResult = result.listIterator(); + + /** + * Constructor for NestedOperator with list of map as arg. + * @param input : PhysicalPlan input. + * @param fields : List of all fields and paths for nested fields. + */ + public NestedOperator(PhysicalPlan input, List> fields) { + this.input = input; + this.fields = fields.stream() + .map(m -> m.get("field").toString()) + .collect(Collectors.toSet()); + this.groupedPathsAndFields = fields.stream().collect( + Collectors.groupingBy( + m -> m.get("path").toString(), + mapping( + m -> m.get("field").toString(), + toList() + ) + ) + ); + } + + /** + * Constructor for NestedOperator with Set of fields. + * @param input : PhysicalPlan input. + * @param fields : List of all fields for nested fields. + * @param groupedPathsAndFields : Map of fields grouped by their path. + */ + public NestedOperator( + PhysicalPlan input, + Set fields, + Map> groupedPathsAndFields + ) { + this.input = input; + this.fields = fields; + this.groupedPathsAndFields = groupedPathsAndFields; + } + + @Override + public R accept(PhysicalPlanNodeVisitor visitor, C context) { + return visitor.visitNested(this, context); + } + + @Override + public List getChild() { + return Collections.singletonList(input); + } + + @Override + public boolean hasNext() { + return input.hasNext() || flattenedResult.hasNext(); + } + + + @Override + public ExprValue next() { + if (!flattenedResult.hasNext()) { + result.clear(); + nonNestedFields.clear(); + + ExprValue inputValue = input.next(); + generateNonNestedFieldsMap(inputValue); + // Add all nested fields to result map + for (String field : fields) { + result = flatten(field, inputValue, result); + } + + // Add all non-nested fields to result map + for (String nonNestedField : nonNestedFields) { + result = flatten(nonNestedField, inputValue, result); + } + + if (result.isEmpty()) { + flattenedResult = result.listIterator(); + return new ExprTupleValue(new LinkedHashMap<>()); + } + + flattenedResult = result.listIterator(); + } + return new ExprTupleValue(new LinkedHashMap<>(flattenedResult.next())); + } + + /** + * Generate list of non-nested fields that are in inputMap, but not in the member variable + * fields list. + * @param inputMap : Row to parse non-nested fields. + */ + public void generateNonNestedFieldsMap(ExprValue inputMap) { + for (Map.Entry inputField : inputMap.tupleValue().entrySet()) { + boolean foundNestedField = + this.fields.stream().anyMatch( + field -> field.split("\\.")[0].equalsIgnoreCase(inputField.getKey()) + ); + + if (!foundNestedField) { + this.nonNestedFields.add(inputField.getKey()); + } + } + } + + + /** + * Simplifies the structure of row's source Map by flattening it, + * making the full path of an object the key + * and the Object it refers to the value. + * + *

Sample input: + * keys = ['comments.likes'] + * row = comments: { + * likes: 2 + * } + * + *

Return: + * flattenedRow = {comment.likes: 2} + * + * @param nestedField : Field to query in row. + * @param row : Row returned from OS. + * @param prevList : List of previous nested calls. + * @return : List of nested select items or cartesian product of nested calls. + */ + private List> flatten( + String nestedField, + ExprValue row, + List> prevList + ) { + List> copy = new ArrayList<>(); + List> newList = new ArrayList<>(); + + ExprValue nestedObj = null; + getNested(nestedField, nestedField, row, copy, nestedObj); + + // Only one field in select statement + if (prevList.size() == 0) { + return copy; + } + + if (containSamePath(copy.get(0))) { + var resultIt = this.result.iterator(); + Map resultVal = resultIt.next(); + var copyIt = copy.iterator(); + Map copyVal = copyIt.next(); + for (int i = 0; i < this.result.size(); i++) { + resultVal.putAll(copyVal); + if (copyIt.hasNext()) { + copyVal = copyIt.next(); + } + if (resultIt.hasNext()) { + resultVal = resultIt.next(); + } + } + return this.result; + } else { + // Generate cartesian product + for (Map prevMap : prevList) { + for (Map newMap : copy) { + newList.add(Stream.of(newMap, prevMap) + .flatMap(map -> map.entrySet().stream()) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue))); + } + } + return newList; + } + } + + /** + * Check if newMap field has any sharing paths in prevMap. + * @param newMap : New map to add to result set. + * @return : true if there is already a field added to result set with same path. + */ + boolean containSamePath(Map newMap) { + String newKey = newMap.keySet().iterator().next(); + Map resultMap = this.result.iterator().next(); + for (var entry : this.groupedPathsAndFields.entrySet()) { + if (entry.getValue().contains(newKey)) { + for (var map : resultMap.entrySet()) { + if (entry.getValue().contains(map.getKey())) { + return true; + } + } + } + } + return false; + } + + + /** + * Retrieve nested field(s) in row. + * + * @param field : Path for nested field. + * @param nestedField : Current level to nested field path. + * @param row : Row to resolve nested field. + * @param ret : List to add nested field to. + * @param nestedObj : Object at current nested level. + * @return : Object at current nested level. + */ + private void getNested( + String field, String nestedField, ExprValue row, + List> ret, ExprValue nestedObj + ) { + ExprValue currentObj = (nestedObj == null) ? row : nestedObj; + String[] splitKeys = nestedField.split("\\."); + + if (currentObj instanceof ExprTupleValue) { + ExprTupleValue currentMap = (ExprTupleValue) currentObj; + if (currentMap.tupleValue().containsKey(splitKeys[0])) { + currentObj = currentMap.tupleValue().get(splitKeys[0]); + } else { + currentObj = null; + ret.add(new LinkedHashMap<>(Map.of(field, ExprNullValue.of()))); + } + } else if (currentObj instanceof ExprCollectionValue) { + ExprValue arrayObj = currentObj; + for (int x = 0; x < arrayObj.collectionValue().size(); x++) { + currentObj = arrayObj.collectionValue().get(x); + getNested(field, nestedField, row, ret, currentObj); + currentObj = null; + } + } else { + currentObj = null; + } + + // Return final nested result + if (currentObj != null + && (StringUtils.substringAfterLast(field, ".").equals(nestedField) + || !field.contains(".")) + ) { + ret.add(new LinkedHashMap<>(Map.of(field, currentObj))); + } else if (currentObj != null) { + getNested(field, nestedField.substring(nestedField.indexOf(".") + 1), + row, ret, currentObj); + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index e6e59990c8..8c10c91fb6 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -11,6 +11,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; import lombok.experimental.UtilityClass; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -105,4 +106,11 @@ public ValuesOperator values(List... values) { public static LimitOperator limit(PhysicalPlan input, Integer limit, Integer offset) { return new LimitOperator(input, limit, offset); } + + public static NestedOperator nested( + PhysicalPlan input, + Set args, + Map> groupedFieldsByPath) { + return new NestedOperator(input, args, groupedFieldsByPath); + } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index d4bc4a1ea9..cb488700a0 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -57,6 +57,10 @@ public R visitEval(EvalOperator node, C context) { return visitNode(node, context); } + public R visitNested(NestedOperator node, C context) { + return visitNode(node, context); + } + public R visitDedupe(DedupeOperator node, C context) { return visitNode(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java index c0fdf36e70..9af66e219f 100644 --- a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java +++ b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java @@ -10,6 +10,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanNodeVisitor; import org.opensearch.sql.planner.logical.LogicalProject; @@ -104,6 +105,17 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { return false; } + /** + * Can a given nested operator be pushed down to table scan builder. Assume no such support + * by default unless subclass override this. + * + * @param nested logical nested operator + * @return true if pushed down, otherwise false + */ + public boolean pushDownNested(LogicalNested nested) { + return false; + } + @Override public R accept(LogicalPlanNodeVisitor visitor, C context) { return visitor.visitTableScanBuilder(this, context); diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index f711c2362d..e8a1c6eb3f 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -79,11 +79,12 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.UnresolvedPlan; -import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.HighlightExpression; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.planner.logical.LogicalAD; @@ -531,6 +532,236 @@ public void project_source() { AstDSL.alias("double_value", AstDSL.field("double_value")))); } + @Test + public void project_nested_field_arg() { + List> nestedArgs = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING)), + null) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.nested( + LogicalPlanDSL.relation("schema", table), + nestedArgs, + projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING))) + ), + AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message.info", + function("nested", qualifiedName("message", "info")), null) + ) + ); + } + + @Test + public void project_nested_field_and_path_args() { + List> nestedArgs = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING)), + null) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.nested( + LogicalPlanDSL.relation("schema", table), + nestedArgs, + projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING))) + ), + AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message.info", + function( + "nested", + qualifiedName("message", "info"), + qualifiedName("message") + ), + null + ) + ) + ); + } + + @Test + public void project_nested_deep_field_arg() { + List> nestedArgs = + List.of( + Map.of( + "field", new ReferenceExpression("message.info.id", STRING), + "path", new ReferenceExpression("message.info", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression( + "message.info.id", + DSL.nested(DSL.ref("message.info.id", STRING)), + null) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.nested( + LogicalPlanDSL.relation("schema", table), + nestedArgs, + projectList), + DSL.named("message.info.id", + DSL.nested(DSL.ref("message.info.id", STRING))) + ), + AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message.info.id", + function("nested", qualifiedName("message", "info", "id")), null) + ) + ); + } + + @Test + public void project_multiple_nested() { + List> nestedArgs = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ), + Map.of( + "field", new ReferenceExpression("comment.data", STRING), + "path", new ReferenceExpression("comment", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING)), + null), + new NamedExpression( + "comment.data", + DSL.nested(DSL.ref("comment.data", STRING)), + null) + ); + + assertAnalyzeEqual( + LogicalPlanDSL.project( + LogicalPlanDSL.nested( + LogicalPlanDSL.relation("schema", table), + nestedArgs, + projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING))), + DSL.named("comment.data", + DSL.nested(DSL.ref("comment.data", STRING))) + ), + AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message.info", + function("nested", qualifiedName("message", "info")), null), + AstDSL.alias("comment.data", + function("nested", qualifiedName("comment", "data")), null) + ) + ); + } + + @Test + public void project_nested_invalid_field_throws_exception() { + var exception = assertThrows( + IllegalArgumentException.class, + () -> analyze(AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message", + function("nested", qualifiedName("message")), null) + ) + ) + ); + assertEquals(exception.getMessage(), "Illegal nested field name: message"); + } + + @Test + public void project_nested_invalid_arg_type_throws_exception() { + var exception = assertThrows( + IllegalArgumentException.class, + () -> analyze(AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message", + function("nested", stringLiteral("message")), null) + ) + ) + ); + assertEquals(exception.getMessage(), "Illegal nested field name: message"); + } + + @Test + public void project_nested_no_args_throws_exception() { + var exception = assertThrows( + IllegalArgumentException.class, + () -> analyze(AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message", + function("nested"), null) + ) + ) + ); + assertEquals(exception.getMessage(), + "on nested object only allowed 2 parameters (field,path) or 1 parameter (field)" + ); + } + + @Test + public void project_nested_too_many_args_throws_exception() { + var exception = assertThrows( + IllegalArgumentException.class, + () -> analyze(AstDSL.projectWithArg( + AstDSL.relation("schema"), + AstDSL.defaultFieldsArgs(), + AstDSL.alias("message", + function("nested", + stringLiteral("message.info"), + stringLiteral("message"), + stringLiteral("message")), + null) + ) + ) + ); + assertEquals(exception.getMessage(), + "on nested object only allowed 2 parameters (field,path) or 1 parameter (field)" + ); + } + @Test public void project_highlight() { Map args = new HashMap<>(); diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index c7cd8d0556..5a05c79132 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -287,17 +287,6 @@ public void case_clause() { AstDSL.stringLiteral("test")))); } - @Test - public void skip_array_data_type() { - SyntaxCheckException exception = - assertThrows(SyntaxCheckException.class, - () -> analyze(qualifiedName("array_value"))); - assertEquals( - "Identifier [array_value] of type [ARRAY] is not supported yet", - exception.getMessage() - ); - } - @Test public void undefined_var_semantic_check_failed() { SemanticCheckException exception = assertThrows(SemanticCheckException.class, diff --git a/core/src/test/java/org/opensearch/sql/config/TestConfig.java b/core/src/test/java/org/opensearch/sql/config/TestConfig.java index 74dde6c2e9..6179f020c2 100644 --- a/core/src/test/java/org/opensearch/sql/config/TestConfig.java +++ b/core/src/test/java/org/opensearch/sql/config/TestConfig.java @@ -56,6 +56,11 @@ public class TestConfig { .put("timestamp_value", ExprCoreType.TIMESTAMP) .put("field_value1", ExprCoreType.STRING) .put("field_value2", ExprCoreType.STRING) + .put("message", ExprCoreType.STRING) + .put("message.info", ExprCoreType.STRING) + .put("message.info.id", ExprCoreType.STRING) + .put("comment", ExprCoreType.STRING) + .put("comment.data", ExprCoreType.STRING) .build(); protected StorageEngine storageEngine() { diff --git a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java index c2763e7120..7d438c870d 100644 --- a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.eval; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.filter; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.limit; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.nested; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.project; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.rareTopN; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.remove; @@ -30,10 +31,9 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.values; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.window; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.DisplayNameGeneration; @@ -83,20 +83,20 @@ void can_explain_project_filter_table_scan() { new ExplainResponse( new ExplainResponseNode( "ProjectOperator", - ImmutableMap.of("fields", "[name, age]"), + Map.of("fields", "[name, age]"), singletonList(new ExplainResponseNode( "FilterOperator", - ImmutableMap.of("conditions", "and(=(balance, 10000), >(age, 30))"), + Map.of("conditions", "and(=(balance, 10000), >(age, 30))"), singletonList(tableScan.explainNode()))))), explain.apply(plan)); } @Test void can_explain_aggregations() { - List aggExprs = ImmutableList.of(ref("balance", DOUBLE)); - List aggList = ImmutableList.of( + List aggExprs = List.of(ref("balance", DOUBLE)); + List aggList = List.of( named("avg(balance)", DSL.avg(aggExprs.toArray(new Expression[0])))); - List groupByList = ImmutableList.of( + List groupByList = List.of( named("state", ref("state", STRING))); PhysicalPlan plan = agg(new FakeTableScan(), aggList, groupByList); @@ -104,7 +104,7 @@ void can_explain_aggregations() { new ExplainResponse( new ExplainResponseNode( "AggregationOperator", - ImmutableMap.of( + Map.of( "aggregators", "[avg(balance)]", "groupBy", "[state]"), singletonList(tableScan.explainNode()))), @@ -120,7 +120,7 @@ void can_explain_rare_top_n() { new ExplainResponse( new ExplainResponseNode( "RareTopNOperator", - ImmutableMap.of( + Map.of( "commandType", TOP, "noOfResults", 10, "fields", "[state]", @@ -131,8 +131,8 @@ void can_explain_rare_top_n() { @Test void can_explain_window() { - List partitionByList = ImmutableList.of(DSL.ref("state", STRING)); - List> sortList = ImmutableList.of( + List partitionByList = List.of(DSL.ref("state", STRING)); + List> sortList = List.of( ImmutablePair.of(DEFAULT_ASC, ref("age", INTEGER))); PhysicalPlan plan = window(tableScan, named(DSL.rank()), @@ -142,12 +142,12 @@ void can_explain_window() { new ExplainResponse( new ExplainResponseNode( "WindowOperator", - ImmutableMap.of( + Map.of( "function", "rank()", - "definition", ImmutableMap.of( + "definition", Map.of( "partitionBy", "[state]", - "sortList", ImmutableMap.of( - "age", ImmutableMap.of( + "sortList", Map.of( + "age", Map.of( "sortOrder", "ASC", "nullOrder", "NULL_FIRST")))), singletonList(tableScan.explainNode()))), @@ -157,14 +157,14 @@ void can_explain_window() { @Test void can_explain_other_operators() { ReferenceExpression[] removeList = {ref("state", STRING)}; - Map renameMapping = ImmutableMap.of( + Map renameMapping = Map.of( ref("state", STRING), ref("s", STRING)); Pair evalExprs = ImmutablePair.of( ref("age", INTEGER), DSL.add(ref("age", INTEGER), literal(2))); Expression[] dedupeList = {ref("age", INTEGER)}; Pair sortList = ImmutablePair.of( DEFAULT_ASC, ref("age", INTEGER)); - List values = ImmutableList.of(literal("WA"), literal(30)); + List values = List.of(literal("WA"), literal(30)); PhysicalPlan plan = remove( @@ -183,30 +183,30 @@ void can_explain_other_operators() { new ExplainResponse( new ExplainResponseNode( "RemoveOperator", - ImmutableMap.of("removeList", "[state]"), + Map.of("removeList", "[state]"), singletonList(new ExplainResponseNode( "RenameOperator", - ImmutableMap.of("mapping", ImmutableMap.of("state", "s")), + Map.of("mapping", Map.of("state", "s")), singletonList(new ExplainResponseNode( "EvalOperator", - ImmutableMap.of("expressions", ImmutableMap.of("age", "+(age, 2)")), + Map.of("expressions", Map.of("age", "+(age, 2)")), singletonList(new ExplainResponseNode( "DedupeOperator", - ImmutableMap.of( + Map.of( "dedupeList", "[age]", "allowedDuplication", 1, "keepEmpty", false, "consecutive", false), singletonList(new ExplainResponseNode( "SortOperator", - ImmutableMap.of( - "sortList", ImmutableMap.of( - "age", ImmutableMap.of( + Map.of( + "sortList", Map.of( + "age", Map.of( "sortOrder", "ASC", "nullOrder", "NULL_FIRST"))), singletonList(new ExplainResponseNode( "ValuesOperator", - ImmutableMap.of("values", ImmutableList.of(values)), + Map.of("values", List.of(values)), emptyList()))))))))))) ), explain.apply(plan) @@ -220,7 +220,24 @@ void can_explain_limit() { new ExplainResponse( new ExplainResponseNode( "LimitOperator", - ImmutableMap.of("limit", 10, "offset", 5), + Map.of("limit", 10, "offset", 5), + singletonList(tableScan.explainNode()))), + explain.apply(plan) + ); + } + + @Test + void can_explain_nested() { + Set nestedOperatorArgs = Set.of("message.info", "message"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + PhysicalPlan plan = nested(tableScan, nestedOperatorArgs, groupedFieldsByPath); + + assertEquals( + new ExplainResponse( + new ExplainResponseNode( + "NestedOperator", + Map.of("nested", Set.of("message.info", "message")), singletonList(tableScan.explainNode()))), explain.apply(plan) ); @@ -246,7 +263,7 @@ public String toString() { public ExplainResponseNode explainNode() { return new ExplainResponseNode( "FakeTableScan", - ImmutableMap.of("request", "Fake DSL request"), + Map.of("request", "Fake DSL request"), emptyList()); } diff --git a/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java b/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java index d3b44fe6a1..46aae069bb 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/ReferenceExpressionTest.java @@ -35,6 +35,7 @@ import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprTupleValue; @@ -126,6 +127,16 @@ public void innner_none_object_field_contain_dot() { assertEquals(1990, actualValue.integerValue()); } + @Test + public void array_with_multiple_path_value() { + ReferenceExpression expr = new ReferenceExpression("message.info", STRING); + ExprValue actualValue = expr.resolve(tuple()); + + assertEquals(STRING, actualValue.type()); + // Array of object, only first index is used + assertEquals("First message in array", actualValue.stringValue()); + } + /** * { * "name": "bob smith" @@ -140,7 +151,11 @@ public void innner_none_object_field_contain_dot() { * }, * "address.local": { * "state": "WA", - * } + * }, + * "message": [ + * { "info": "message in array" }, + * { "info": "Only first index of array used" } + * ] * } */ private ExprTupleValue tuple() { @@ -151,12 +166,29 @@ private ExprTupleValue tuple() { ExprValueUtils.tupleValue(ImmutableMap.of("year", 2020)); ExprValue addressLocal = ExprValueUtils.tupleValue(ImmutableMap.of("state", "WA")); + ExprValue messageCollectionValue = + new ExprCollectionValue( + ImmutableList.of( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "info", stringValue("First message in array") + ) + ), + ExprValueUtils.tupleValue( + ImmutableMap.of( + "info", stringValue("Only first index of array used") + ) + ) + ) + ); + ExprTupleValue tuple = ExprTupleValue.fromExprValueMap(ImmutableMap.of( "name", new ExprStringValue("bob smith"), "project.year", new ExprIntegerValue(1990), "project", project, "address", address, - "address.local", addressLocal + "address.local", addressLocal, + "message", messageCollectionValue )); return tuple; } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java index 6e4fff2fb0..d90d8295c4 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/OpenSearchFunctionsTest.java @@ -8,24 +8,28 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; -import com.google.common.collect.ImmutableMap; import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import org.junit.jupiter.api.Test; import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.ExpressionTestBase; import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.env.Environment; public class OpenSearchFunctionsTest extends ExpressionTestBase { private final NamedArgumentExpression field = new NamedArgumentExpression( "field", DSL.literal("message")); private final NamedArgumentExpression fields = new NamedArgumentExpression( - "fields", DSL.literal(new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( + "fields", DSL.literal(new ExprTupleValue(new LinkedHashMap<>(Map.of( "title", ExprValueUtils.floatValue(1.F), "body", ExprValueUtils.floatValue(.3F)))))); private final NamedArgumentExpression query = new NamedArgumentExpression( @@ -205,4 +209,16 @@ void wildcard_query() { field.getValue(), query.getValue()), expr.toString()); } + + @Test + void nested_query() { + FunctionExpression expr = DSL.nested(DSL.ref("message.info", STRING)); + assertEquals(String.format("FunctionExpression(functionName=%s, arguments=[message.info])", + BuiltinFunctionName.NESTED.getName()), + expr.toString()); + Environment nestedTuple = ExprValueUtils.tupleValue( + Map.of("message", Map.of("info", "result"))).bindingTuples(); + assertEquals(expr.valueOf(nestedTuple), ExprValueUtils.stringValue("result")); + assertEquals(expr.type(), STRING); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 017cfb60ea..a717c4ed8f 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -18,6 +18,7 @@ import static org.opensearch.sql.planner.logical.LogicalPlanDSL.eval; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.rareTopN; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.remove; @@ -31,6 +32,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.Test; @@ -99,59 +101,81 @@ public void visitShouldReturnDefaultPhysicalOperator() { ImmutablePair.of(Sort.SortOption.DEFAULT_ASC, ref("name1", STRING)); Integer limit = 1; Integer offset = 1; + List> nestedArgs = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + List nestedProjectList = + List.of( + new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING)), + null + ) + ); + Set nestedOperatorArgs = Set.of("message.info"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + LogicalPlan plan = project( - limit( - LogicalPlanDSL.dedupe( - rareTopN( - sort( - eval( - remove( - rename( - aggregation( - filter(values(emptyList()), filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), - dedupeField), - limit, - offset), + nested( + limit( + LogicalPlanDSL.dedupe( + rareTopN( + sort( + eval( + remove( + rename( + aggregation( + filter(values(emptyList()), filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), + dedupeField), + limit, + offset), + nestedArgs, nestedProjectList), include); PhysicalPlan actual = plan.accept(implementor, null); assertEquals( PhysicalPlanDSL.project( - PhysicalPlanDSL.limit( - PhysicalPlanDSL.dedupe( - PhysicalPlanDSL.rareTopN( - PhysicalPlanDSL.sort( - PhysicalPlanDSL.eval( - PhysicalPlanDSL.remove( - PhysicalPlanDSL.rename( - PhysicalPlanDSL.agg( - PhysicalPlanDSL.filter( - PhysicalPlanDSL.values(emptyList()), - filterExpr), - aggregators, - groupByExprs), - mappings), - exclude), - newEvalField), - sortField), - CommandType.TOP, - topByExprs, - rareTopNField), - dedupeField), - limit, - offset), + PhysicalPlanDSL.nested( + PhysicalPlanDSL.limit( + PhysicalPlanDSL.dedupe( + PhysicalPlanDSL.rareTopN( + PhysicalPlanDSL.sort( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.remove( + PhysicalPlanDSL.rename( + PhysicalPlanDSL.agg( + PhysicalPlanDSL.filter( + PhysicalPlanDSL.values(emptyList()), + filterExpr), + aggregators, + groupByExprs), + mappings), + exclude), + newEvalField), + sortField), + CommandType.TOP, + topByExprs, + rareTopNField), + dedupeField), + limit, + offset), + nestedOperatorArgs, groupedFieldsByPath), include), actual); } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index 341bcbc29e..fe76589066 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -8,12 +8,14 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.expression.DSL.named; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; @@ -29,6 +31,7 @@ import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.Aggregator; import org.opensearch.sql.expression.window.WindowDefinition; @@ -152,6 +155,21 @@ public TableWriteOperator build(PhysicalPlan child) { assertNull(highlight.accept(new LogicalPlanNodeVisitor() { }, null)); + List> nestedArgs = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, nestedArgs, projectList); + assertNull(nested.accept(new LogicalPlanNodeVisitor() { + }, null)); + LogicalPlan mlCommons = new LogicalMLCommons(LogicalPlanDSL.relation("schema", table), "kmeans", ImmutableMap.builder() diff --git a/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java b/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java index 7516aa1809..d220f599f8 100644 --- a/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java @@ -14,10 +14,12 @@ import static org.opensearch.sql.data.model.ExprValueUtils.longValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.LONG; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.aggregation; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.highlight; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.relation; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.sort; @@ -26,6 +28,7 @@ import com.google.common.collect.ImmutableList; import java.util.Collections; +import java.util.List; import java.util.Map; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; @@ -38,6 +41,8 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.Table; @@ -253,6 +258,25 @@ void table_scan_builder_support_highlight_push_down_can_apply_its_rule() { ); } + @Test + void table_scan_builder_support_nested_push_down_can_apply_its_rule() { + when(tableScanBuilder.pushDownNested(any())).thenReturn(true); + + assertEquals( + tableScanBuilder, + optimize( + nested( + relation("schema", table), + List.of(Map.of("field", new ReferenceExpression("message.info", STRING))), + List.of(new NamedExpression( + "message.info", + DSL.nested(DSL.ref("message.info", STRING)), + null)) + ) + ) + ); + } + @Test void table_not_support_scan_builder_should_not_be_impact() { Mockito.reset(table, tableScanBuilder); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/NestedOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/NestedOperatorTest.java new file mode 100644 index 0000000000..5d8b893869 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/physical/NestedOperatorTest.java @@ -0,0 +1,328 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; +import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.ReferenceExpression; + +@ExtendWith(MockitoExtension.class) +class NestedOperatorTest extends PhysicalPlanTestBase { + @Mock + private PhysicalPlan inputPlan; + + private final ExprValue testData = tupleValue( + Map.of( + "message", + collectionValue( + List.of( + Map.of("info", "a"), + Map.of("info", "b"), + Map.of("info", "c") + ) + ), + "comment", + collectionValue( + List.of( + Map.of("data", "1"), + Map.of("data", "2"), + Map.of("data", "3") + ) + ) + ) + ); + + + private final ExprValue testDataWithSamePath = tupleValue( + Map.of( + "message", + collectionValue( + List.of( + Map.of("info", "a", "id", "1"), + Map.of("info", "b", "id", "2"), + Map.of("info", "c", "id", "3") + ) + ) + ) + ); + + private final ExprValue nonNestedTestData = tupleValue( + Map.of( + "message", "val" + ) + ); + + private final ExprValue missingArrayData = tupleValue( + Map.of( + "missing", + collectionValue( + List.of("value") + ) + ) + ); + + @Test + public void nested_one_nested_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(testData); + + Set fields = Set.of("message.info"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + + assertThat( + execute(new NestedOperator(inputPlan, fields, groupedFieldsByPath)), + contains( + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "a"); + put("comment", collectionValue( + new ArrayList<>() {{ + add(new LinkedHashMap<>() {{ + put("data", "1"); + }} + ); + add(new LinkedHashMap<>() {{ + put("data", "2"); + }} + ); + add(new LinkedHashMap<>() {{ + put("data", "3"); + }} + ); + }} + )); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "b"); + put("comment", collectionValue( + new ArrayList<>() {{ + add(new LinkedHashMap<>() {{ + put("data", "1"); + }} + ); + add(new LinkedHashMap<>() {{ + put("data", "2"); + }} + ); + add(new LinkedHashMap<>() {{ + put("data", "3"); + }} + ); + }} + )); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "c"); + put("comment", collectionValue( + new ArrayList<>() {{ + add(new LinkedHashMap<>() {{ + put("data", "1"); + }} + ); + add(new LinkedHashMap<>() {{ + put("data", "2"); + }} + ); + add(new LinkedHashMap<>() {{ + put("data", "3"); + }} + ); + }} + )); + }} + ) + ) + ); + } + + @Test + public void nested_two_nested_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(testData); + + List> fields = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING)), + Map.of( + "field", new ReferenceExpression("comment.data", STRING), + "path", new ReferenceExpression("comment", STRING)) + ); + assertThat( + execute(new NestedOperator(inputPlan, fields)), + contains( + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "a"); + put("comment.data", "1"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "a"); + put("comment.data", "2"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "a"); + put("comment.data", "3"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "b"); + put("comment.data", "1"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "b"); + put("comment.data", "2"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "b"); + put("comment.data", "3"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "c"); + put("comment.data", "1"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "c"); + put("comment.data", "2"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "c"); + put("comment.data", "3"); + }} + ) + ) + ); + } + + @Test + public void nested_two_nested_fields_with_same_path() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(testDataWithSamePath); + + List> fields = + List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING)), + Map.of( + "field", new ReferenceExpression("message.id", STRING), + "path", new ReferenceExpression("message", STRING)) + ); + assertThat( + execute(new NestedOperator(inputPlan, fields)), + contains( + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "a"); + put("message.id", "1"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "b"); + put("message.id", "2"); + }} + ), + tupleValue( + new LinkedHashMap<>() {{ + put("message.info", "c"); + put("message.id", "3"); + }} + ) + ) + ); + } + + @Test + public void non_nested_field_tests() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(nonNestedTestData); + + Set fields = Set.of("message"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + assertThat( + execute(new NestedOperator(inputPlan, fields, groupedFieldsByPath)), + contains( + tupleValue(new LinkedHashMap<>(Map.of("message", "val"))) + ) + ); + } + + @Test + public void nested_missing_tuple_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(tupleValue(Map.of())); + Set fields = Set.of("message.val"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.val")); + assertThat( + execute(new NestedOperator(inputPlan, fields, groupedFieldsByPath)), + contains( + tupleValue(new LinkedHashMap<>(Map.of("message.val", ExprNullValue.of()))) + ) + ); + } + + @Test + public void nested_missing_array_field() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(missingArrayData); + Set fields = Set.of("missing.data"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.data")); + assertTrue( + execute(new NestedOperator(inputPlan, fields, groupedFieldsByPath)) + .get(0) + .tupleValue() + .size() == 0 + ); + } +} diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java index 735b914d3e..fb687277ce 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java @@ -16,6 +16,9 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -132,6 +135,13 @@ public void test_PhysicalPlanVisitor_should_return_null() { PhysicalPlan limit = PhysicalPlanDSL.limit(plan, 1, 1); assertNull(limit.accept(new PhysicalPlanNodeVisitor() { }, null)); + + Set nestedArgs = Set.of("nested.test"); + Map> groupedFieldsByPath = + Map.of("nested", List.of("nested.test")); + PhysicalPlan nested = new NestedOperator(plan, nestedArgs, groupedFieldsByPath); + assertNull(nested.accept(new PhysicalPlanNodeVisitor() { + }, null)); } @Test diff --git a/docs/user/beyond/partiql.rst b/docs/user/beyond/partiql.rst index 6ad93ddeaf..76fec8405d 100644 --- a/docs/user/beyond/partiql.rst +++ b/docs/user/beyond/partiql.rst @@ -202,11 +202,11 @@ Selecting top level for object fields, object fields of array value and nested f os> SELECT city, accounts, projects FROM people; fetched rows / total rows = 1/1 - +-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------+ - | city | accounts | projects | - |-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------| - | {'name': 'Seattle', 'location': {'latitude': 10.5}} | [{'id': 1},{'id': 2}] | [{'name': 'AWS Redshift Spectrum querying'},{'name': 'AWS Redshift security'},{'name': 'AWS Aurora security'}] | - +-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------+ + | city | accounts | projects | + |-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------| + | {'name': 'Seattle', 'location': {'latitude': 10.5}} | {'id': 1} | [{'name': 'AWS Redshift Spectrum querying'},{'name': 'AWS Redshift security'},{'name': 'AWS Aurora security'}] | + +-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------+ Example 2: Selecting Deeper Levels ---------------------------------- diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index 5603c29534..d117226c4d 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -4341,6 +4341,29 @@ Another example to show how to set custom values for the optional parameters:: | tEsT wIlDcArD sensitive cases | +-------------------------------------------+ +NESTED +------ + +Description +>>>>>>>>>>> + +``nested(field | [field, path])`` + +The ``nested`` function maps to the ``nested`` query used in search engine. It returns nested field types in documents that match the provided specified field(s). +If the user does not provide the ``path`` parameter it will be generated dynamically. For example the ``field`` ``user.office.cubicle`` would dynamically generate the path +``user.office``. + +Example with ``field`` and ``path`` parameters:: + + os> SELECT nested(message.info, message) FROM nested; + fetched rows / total rows = 2/2 + +---------------------------------+ + | nested(message.info, message) | + |---------------------------------| + | a | + | b | + +---------------------------------+ + System Functions ================ diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 22a635cf38..a02bcf096a 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 8/8 + fetched rows / total rows = 9/9 +----------------+---------------+-----------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+---------------+-----------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------| @@ -44,6 +44,7 @@ SQL query:: | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | wildcard | BASE TABLE | null | null | null | null | null | null | diff --git a/doctest/test_data/nested_objects.json b/doctest/test_data/nested_objects.json new file mode 100644 index 0000000000..fc5f56b4c5 --- /dev/null +++ b/doctest/test_data/nested_objects.json @@ -0,0 +1,4 @@ +{"index":{"_id":"1"}} +{"message":{"info":"a","author":"e","dayOfWeek":1},"comment":{"data":"ab","likes":3},"myNum":1,"someField":"b"} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":"f","dayOfWeek":2},"comment":{"data":"aa","likes":2},"myNum":2,"someField":"a"} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index c517b2756c..1fedbdf49e 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -27,6 +27,7 @@ BOOKS = "books" APACHE = "apache" WILDCARD = "wildcard" +NESTED = "nested" DATASOURCES = ".ql-datasources" @@ -95,6 +96,7 @@ def set_up_test_indices(test): load_file("books.json", index_name=BOOKS) load_file("apache.json", index_name=APACHE) load_file("wildcard.json", index_name=WILDCARD) + load_file("nested_objects.json", index_name=NESTED) load_file("datasources.json", index_name=DATASOURCES) @@ -124,7 +126,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, diff --git a/doctest/test_mapping/nested_objects.json b/doctest/test_mapping/nested_objects.json new file mode 100644 index 0000000000..4f0ed97433 --- /dev/null +++ b/doctest/test_mapping/nested_objects.json @@ -0,0 +1,47 @@ +{ + "mappings": { + "properties": { + "message": { + "type": "nested", + "properties": { + "info": { + "type": "keyword", + "index": "true" + }, + "author": { + "type": "keyword", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "index": "true" + }, + "dayOfWeek": { + "type": "long" + } + } + }, + "comment": { + "type": "nested", + "properties": { + "data": { + "type": "keyword", + "index": "true" + }, + "likes": { + "type": "long" + } + } + }, + "myNum": { + "type": "long" + }, + "someField": { + "type": "keyword", + "index": "true" + } + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java index bddaa22772..b1db21a2ff 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java @@ -70,8 +70,7 @@ public void testSelectObjectInnerFields() { public void testSelectNestedFieldItself() { JSONObject response = new JSONObject(query("SELECT projects FROM %s")); - // Nested field is absent in OpenSearch Get Field Mapping response either hence "object" used - verifySchema(response, schema("projects", null, "object")); + verifySchema(response, schema("projects", null, "nested")); // Expect nested field itself is returned in a single cell verifyDataRows(response, diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java index 1e2073acbd..200c300f3b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/PrettyFormatResponseIT.java @@ -196,7 +196,7 @@ public void selectNestedFields() throws IOException { String.format(Locale.ROOT, "SELECT nested(message.info), someField FROM %s", TestsConstants.TEST_INDEX_NESTED_TYPE)); - List fields = Arrays.asList("message.info", "someField"); + List fields = Arrays.asList("nested(message.info)", "someField"); assertContainsColumns(getSchema(response), fields); assertContainsData(getDataRows(response), fields); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 360497300e..cb86ed6a11 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -505,6 +505,10 @@ public enum Index { "nestedType", getNestedTypeIndexMapping(), "src/test/resources/nested_objects.json"), + NESTED_WITHOUT_ARRAYS(TestsConstants.TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS, + "nestedTypeWithoutArrays", + getNestedTypeIndexMapping(), + "src/test/resources/nested_objects_without_arrays.json"), NESTED_WITH_QUOTES(TestsConstants.TEST_INDEX_NESTED_WITH_QUOTES, "nestedType", getNestedTypeIndexMapping(), @@ -593,11 +597,18 @@ public enum Index { "wildcard", getMappingFile("wildcard_index_mappings.json"), "src/test/resources/wildcard.json"), - DATASOURCES(TestsConstants.DATASOURCES, "datasource", getMappingFile("datasources_index_mappings.json"), - "src/test/resources/datasources.json"); + "src/test/resources/datasources.json"), + MULTI_NESTED(TestsConstants.TEST_INDEX_MULTI_NESTED_TYPE, + "multi_nested", + getMappingFile("multi_nested.json"), + "src/test/resources/multi_nested_objects.json"), + NESTED_WITH_NULLS(TestsConstants.TEST_INDEX_NESTED_WITH_NULLS, + "multi_nested", + getNestedTypeIndexMapping(), + "src/test/resources/nested_with_nulls.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index e46993cd17..c3af98b794 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -31,6 +31,8 @@ public class TestsConstants { public final static String TEST_INDEX_LOCATION = TEST_INDEX + "_location"; public final static String TEST_INDEX_LOCATION2 = TEST_INDEX + "_location2"; public final static String TEST_INDEX_NESTED_TYPE = TEST_INDEX + "_nested_type"; + public final static String TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS = + TEST_INDEX + "_nested_type_without_arrays"; public final static String TEST_INDEX_NESTED_SIMPLE = TEST_INDEX + "_nested_simple"; public final static String TEST_INDEX_NESTED_WITH_QUOTES = TEST_INDEX + "_nested_type_with_quotes"; @@ -55,6 +57,8 @@ public class TestsConstants { public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing"; public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs"; public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; + public final static String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; + public final static String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; public final static String DATASOURCES = ".ql-datasources"; public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java new file mode 100644 index 0000000000..a7b5d46234 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java @@ -0,0 +1,260 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MULTI_NESTED_TYPE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_WITH_NULLS; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.Test; +import org.junit.jupiter.api.Disabled; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class NestedIT extends SQLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.MULTI_NESTED); + loadIndex(Index.NESTED); + loadIndex(Index.NESTED_WITHOUT_ARRAYS); + loadIndex(Index.EMPLOYEE_NESTED); + loadIndex(Index.NESTED_WITH_NULLS); + } + + @Test + public void nested_function_with_array_of_nested_field_test() { + String query = "SELECT nested(message.info), nested(comment.data) FROM " + TEST_INDEX_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("c", "ab"), + rows("a", "ab"), + rows("b", "aa"), + rows("c", "aa"), + rows("a", "ab"), + rows("zz", new JSONArray(List.of("aa", "bb")))); + } + + @Test + public void nested_function_in_select_test() { + String query = "SELECT nested(message.info), nested(comment.data), " + + "nested(message.dayOfWeek) FROM " + + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; + JSONObject result = executeJdbcRequest(query); + + assertEquals(5, result.getInt("total")); + verifySchema(result, + schema("nested(message.info)", null, "keyword"), + schema("nested(comment.data)", null, "keyword"), + schema("nested(message.dayOfWeek)", null, "long")); + verifyDataRows(result, + rows("a", "ab", 1), + rows("b", "aa", 2), + rows("c", "aa", 1), + rows("c", "ab", 4), + rows("zz", "bb", 6)); + } + + // Has to be tested with JSON format when https://github.com/opensearch-project/sql/issues/1317 + // gets resolved + @Disabled // TODO fix me when aggregation is supported + public void nested_function_in_an_aggregate_function_in_select_test() { + String query = "SELECT sum(nested(message.dayOfWeek)) FROM " + + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows(14)); + } + + // TODO Enable me when nested aggregation is supported + @Disabled + public void nested_function_with_arrays_in_an_aggregate_function_in_select_test() { + String query = "SELECT sum(nested(message.dayOfWeek)) FROM " + + TEST_INDEX_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + verifyDataRows(result, rows(19)); + } + + // TODO not currently supported by legacy, should we add implementation in AstBuilder? + @Disabled + public void nested_function_in_a_function_in_select_test() { + String query = "SELECT upper(nested(message.info)) FROM " + + TEST_INDEX_NESTED_TYPE_WITHOUT_ARRAYS; + JSONObject result = executeJdbcRequest(query); + + verifyDataRows(result, + rows("A"), + rows("B"), + rows("C"), + rows("C"), + rows("ZZ")); + } + + @Test + public void nested_function_with_array_of_multi_nested_field_test() { + String query = "SELECT nested(message.author.name) FROM " + TEST_INDEX_MULTI_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("e"), + rows("f"), + rows("g"), + rows("h"), + rows("p"), + rows("yy")); + } + + @Test + public void nested_function_with_null_and_missing_fields_test() { + String query = "SELECT nested(message.info), nested(comment.data) FROM " + + TEST_INDEX_NESTED_WITH_NULLS; + JSONObject result = executeJdbcRequest(query); + + assertEquals(10, result.getInt("total")); + verifyDataRows(result, + rows(null, "hh"), + rows("b", "aa"), + rows("c", "aa"), + rows("c", "ab"), + rows("a", "ab"), + rows("zz", new JSONArray(List.of("aa", "bb"))), + rows("zz", new JSONArray(List.of("aa", "bb"))), + rows(null, "ee"), + rows("a", "ab"), + rows("rr", new JSONArray(List.of("asdf", "sdfg")))); + } + + @Test + public void nested_function_multiple_fields_with_matched_and_mismatched_paths_test() { + String query = + "SELECT nested(message.author), nested(message.dayOfWeek), nested(message.info), nested(comment.data), " + + "nested(comment.likes) FROM " + TEST_INDEX_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("e", 1, "a", "ab", 3), + rows("f", 2, "b", "aa", 2), + rows("g", 1, "c", "aa", 3), + rows("h", 4, "c", "ab", 1), + rows("i", 5, "a", "ab", 1), + rows("zz", 6, "zz", new JSONArray(List.of("aa", "bb")), 10)); + } + + @Test + public void nested_function_mixed_with_non_nested_type_test() { + String query = + "SELECT nested(message.info), someField FROM " + TEST_INDEX_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("a", "b"), + rows("b", "a"), + rows("c", "a"), + rows("c", "b"), + rows("a", "b"), + rows("zz", "a")); + } + + @Test + public void nested_function_mixed_with_non_nested_types_test() { + String query = + "SELECT nested(message.info), office, office.west FROM " + TEST_INDEX_MULTI_NESTED_TYPE; + JSONObject result = executeJdbcRequest(query); + + assertEquals(6, result.getInt("total")); + verifyDataRows(result, + rows("a", + new JSONObject(Map.of("south", 3, "west", "ab")), "ab"), + rows("b", + new JSONObject(Map.of("south", 5, "west", "ff")), "ff"), + rows("c", + new JSONObject(Map.of("south", 3, "west", "ll")), "ll"), + rows("d", null, null), + rows("i", null, null), + rows("zz", null, null)); + } + + @Test + public void nested_function_with_relevance_query() { + String query = + "SELECT nested(message.info), highlight(someField) FROM " + + TEST_INDEX_NESTED_TYPE + " WHERE match(someField, 'b')"; + JSONObject result = executeJdbcRequest(query); + + assertEquals(3, result.getInt("total")); + verifyDataRows(result, + rows("a", new JSONArray(List.of("b"))), + rows("c", new JSONArray(List.of("b"))), + rows("a", new JSONArray(List.of("b")))); + } + + @Test + public void nested_with_non_nested_type_test() { + String query = "SELECT nested(someField) FROM " + TEST_INDEX_NESTED_TYPE; + + Exception exception = assertThrows(RuntimeException.class, + () -> executeJdbcRequest(query)); + assertTrue(exception.getMessage().contains( + "{\n" + + " \"error\": {\n" + + " \"reason\": \"Invalid SQL query\",\n" + + " \"details\": \"Illegal nested field name: someField\",\n" + + " \"type\": \"IllegalArgumentException\"\n" + + " },\n" + + " \"status\": 400\n" + + "}" + )); + } + + @Test + public void nested_missing_path() { + String query = "SELECT nested(message.invalid) FROM " + TEST_INDEX_MULTI_NESTED_TYPE; + + Exception exception = assertThrows(RuntimeException.class, + () -> executeJdbcRequest(query)); + assertTrue(exception.getMessage().contains("" + + "{\n" + + " \"error\": {\n" + + " \"reason\": \"Invalid SQL query\",\n" + + " \"details\": \"can't resolve Symbol(namespace=FIELD_NAME, name=message.invalid) in type env\",\n" + + " \"type\": \"SemanticCheckException\"\n" + + " },\n" + + " \"status\": 400\n" + + "}" + )); + } + + @Test + public void nested_missing_path_argument() { + String query = "SELECT nested(message.author.name, invalid) FROM " + TEST_INDEX_MULTI_NESTED_TYPE; + + Exception exception = assertThrows(RuntimeException.class, + () -> executeJdbcRequest(query)); + assertTrue(exception.getMessage().contains("" + + "{\n" + + " \"error\": {\n" + + " \"reason\": \"Invalid SQL query\",\n" + + " \"details\": \"can't resolve Symbol(namespace=FIELD_NAME, name=invalid) in type env\",\n" + + " \"type\": \"SemanticCheckException\"\n" + + " },\n" + + " \"status\": 400\n" + + "}" + )); + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/multi_nested.json b/integ-test/src/test/resources/indexDefinitions/multi_nested.json new file mode 100644 index 0000000000..d2da21d24c --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/multi_nested.json @@ -0,0 +1,42 @@ +{ + "mappings": { + "properties": { + "message": { + "type": "nested", + "properties": { + "info": { + "type": "keyword" + }, + "author": { + "type": "nested", + "properties": { + "name": { + "type": "keyword", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "address": { + "type": "nested", + "properties": { + "street": { + "type": "keyword" + }, + "number": { + "type": "integer" + } + } + } + } + }, + "dayOfWeek": { + "type": "long" + } + } + } + } + } +} diff --git a/integ-test/src/test/resources/multi_nested_objects.json b/integ-test/src/test/resources/multi_nested_objects.json new file mode 100644 index 0000000000..c8cf91162a --- /dev/null +++ b/integ-test/src/test/resources/multi_nested_objects.json @@ -0,0 +1,10 @@ +{"index":{"_id":"1"}} +{"message":{"info":"a","author":{"name": "e", "address": {"street": "bc", "number": 1}},"dayOfWeek":1},"office":{"west":"ab","south":3}} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":{"name": "f", "address": {"street": "ab", "number": 2}},"dayOfWeek":2},"office":{"west":"ff","south":5}} +{"index":{"_id":"3"}} +{"message":{"info":"c","author":{"name": "g", "address": {"street": "sk", "number": 3}},"dayOfWeek":1},"office":{"west":"ll","south":3}} +{"index":{"_id":"4"}} +{"message":[{"info":"d","author":{"name": "h", "address": {"street": "mb", "number": 4}},"dayOfWeek":4},{"info":"i","author":{"name": "p", "address": {"street": "on", "number": 5}},"dayOfWeek":5}]} +{"index":{"_id":"5"}} +{"message": [{"info":"zz","author":{"name": "yy", "address": {"street": "qc", "number": 6}},"dayOfWeek":6}]} diff --git a/integ-test/src/test/resources/nested_objects_without_arrays.json b/integ-test/src/test/resources/nested_objects_without_arrays.json new file mode 100644 index 0000000000..626e63e079 --- /dev/null +++ b/integ-test/src/test/resources/nested_objects_without_arrays.json @@ -0,0 +1,10 @@ +{"index":{"_id":"1"}} +{"message":{"info":"a","author":"e","dayOfWeek":1},"comment":{"data":"ab","likes":3},"myNum":1,"someField":"b"} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":"f","dayOfWeek":2},"comment":{"data":"aa","likes":2},"myNum":2,"someField":"a"} +{"index":{"_id":"3"}} +{"message":{"info":"c","author":"g","dayOfWeek":1},"comment":{"data":"aa","likes":3},"myNum":3,"someField":"a"} +{"index":{"_id":"4"}} +{"message":{"info":"c","author":"h","dayOfWeek":4},"comment":{"data":"ab","likes":1},"myNum":4,"someField":"b"} +{"index":{"_id":"5"}} +{"message": {"info":"zz","author":"zz","dayOfWeek":6},"comment":{"data":"bb","likes":10},"myNum":3,"someField":"a"} diff --git a/integ-test/src/test/resources/nested_with_nulls.json b/integ-test/src/test/resources/nested_with_nulls.json new file mode 100644 index 0000000000..b02a8ab110 --- /dev/null +++ b/integ-test/src/test/resources/nested_with_nulls.json @@ -0,0 +1,24 @@ +{"index":{"_id":"1"}} +{"message":{"author":"e","dayOfWeek":5},"comment":{"data":"hh","likes":5},"myNum":7,"someField":"a"} +{"index":{"_id":"2"}} +{"message":{"info":"b","author":"f","dayOfWeek":2},"comment":{"data":"aa","likes":2},"myNum":2,"someField":"a"} +{"index":{"_id":"3"}} +{"message":{"info":"c","author":"g","dayOfWeek":1},"comment":{"data":"aa","likes":3},"myNum":3,"someField":"a"} +{"index":{"_id":"4"}} +{"message":[{"info":"c","author":"h","dayOfWeek":4},{"info":"a","author":"i","dayOfWeek":5}],"comment":{"data":"ab","likes":1},"myNum":4,"someField":"b"} +{"index":{"_id":"5"}} +{"message": [{"info":"zz","author":"zz","dayOfWeek":6}],"comment":{"data":["aa","bb"],"likes":10},"myNum":[3,4],"someField":"a"} +{"index":{"_id":"7"}} +{"message":[{"info":"zz", "author":"z\"z", "dayOfWeek":6}], "comment":{"data":["aa","bb"], "likes":10}, "myNum":[3,4], "someField":"a"} +{"index":{"_id":"8"}} +{"message":{"info":null,"author":"e","dayOfWeek":7},"comment":{"data":"ee","likes":6},"myNum":6,"someField":"a"} +{"index":{"_id":"9"}} +{"message":{"info":"a","author":"e","dayOfWeek":1},"comment":{"data":"ab","likes":3},"myNum":1,"someField":"b"} +{"index":{"_id":"10"}} +{"message":[{"info":"rr", "author":"this \"value\" contains quotes", "dayOfWeek":3}], "comment":{"data":["asdf","sdfg"], "likes":56}, "myNum":[1,2,4], "someField":"ert"} +{"index":{"_id":"11"}} +{"comment":{"data":"jj","likes":1},"myNum":8,"someField":"a"} +{"index":{"_id":"12"}} +{"message":null,"comment":{"data":"kk","likes":0},"myNum":9,"someField":"a"} +{"index":{"_id":"13"}} +{} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 034f9227ee..0c4548a368 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -15,8 +15,10 @@ import static org.opensearch.sql.utils.DateTimeFormatters.DATE_TIME_FORMATTER; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterators; import java.time.Instant; import java.time.format.DateTimeParseException; import java.util.ArrayList; @@ -234,15 +236,20 @@ private ExprValue parseStruct(Content content, String prefix) { */ private ExprValue parseArray(Content content, String prefix) { List result = new ArrayList<>(); - content.array().forEachRemaining(v -> { - // ExprCoreType.ARRAY does not indicate inner elements type. OpenSearch nested will be an - // array of structs, otherwise parseArray currently only supports array of strings. - if (v.isString()) { - result.add(parse(v, prefix, Optional.of(OpenSearchDataType.of(STRING)))); - } else { - result.add(parse(v, prefix, Optional.of(STRUCT))); - } - }); + // ExprCoreType.ARRAY does not indicate inner elements type. + if (Iterators.size(content.array()) == 1 && content.objectValue() instanceof JsonNode) { + result.add(parse(content, prefix, Optional.of(STRUCT))); + } else { + content.array().forEachRemaining(v -> { + // ExprCoreType.ARRAY does not indicate inner elements type. OpenSearch nested will be an + // array of structs, otherwise parseArray currently only supports array of strings. + if (v.isString()) { + result.add(parse(v, prefix, Optional.of(OpenSearchDataType.of(STRING)))); + } else { + result.add(parse(v, prefix, Optional.of(STRUCT))); + } + }); + } return new ExprCollectionValue(result); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index f06ecb8576..9d71cee8c9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -16,6 +16,7 @@ import org.opensearch.sql.planner.physical.EvalOperator; import org.opensearch.sql.planner.physical.FilterOperator; import org.opensearch.sql.planner.physical.LimitOperator; +import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.ProjectOperator; import org.opensearch.sql.planner.physical.RareTopNOperator; @@ -87,6 +88,15 @@ public PhysicalPlan visitEval(EvalOperator node, Object context) { return new EvalOperator(visitInput(node.getInput(), context), node.getExpressionList()); } + @Override + public PhysicalPlan visitNested(NestedOperator node, Object context) { + return doProtect( + new NestedOperator( + visitInput(node.getInput(), context), node.getFields(), node.getGroupedPathsAndFields() + ) + ); + } + @Override public PhysicalPlan visitDedupe(DedupeOperator node, Object context) { return new DedupeOperator(visitInput(node.getInput(), context), node.getDedupeList(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 95f9fa39b0..9f1b588af9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -6,10 +6,14 @@ package org.opensearch.sql.opensearch.request; +import static java.util.stream.Collectors.mapping; +import static java.util.stream.Collectors.toList; +import static org.opensearch.index.query.QueryBuilders.boolQuery; +import static org.opensearch.index.query.QueryBuilders.matchAllQuery; +import static org.opensearch.index.query.QueryBuilders.nestedQuery; import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; import static org.opensearch.search.sort.SortOrder.ASC; -import com.google.common.collect.Lists; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -19,14 +23,17 @@ import lombok.Getter; import lombok.ToString; import org.apache.commons.lang3.tuple.Pair; +import org.apache.lucene.search.join.ScoreMode; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.InnerHitBuilder; +import org.opensearch.index.query.NestedQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; -import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.sql.ast.expression.Literal; @@ -37,6 +44,7 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; +import org.opensearch.sql.planner.logical.LogicalNested; /** * OpenSearch search request builder. @@ -242,4 +250,77 @@ private boolean isSortByDocOnly() { } return false; } + + /** + * Push down nested to sourceBuilder. + * @param nestedArgs : Nested arguments to push down. + */ + public void pushDownNested(List> nestedArgs) { + initBoolQueryFilter(); + groupFieldNamesByPath(nestedArgs).forEach( + (path, fieldNames) -> buildInnerHit( + fieldNames, createEmptyNestedQuery(path) + ) + ); + } + + /** + * Initialize bool query for push down. + */ + private void initBoolQueryFilter() { + if (sourceBuilder.query() == null) { + sourceBuilder.query(QueryBuilders.boolQuery()); + } else { + sourceBuilder.query(QueryBuilders.boolQuery().must(sourceBuilder.query())); + } + + sourceBuilder.query(QueryBuilders.boolQuery().filter(sourceBuilder.query())); + } + + /** + * Map all field names in nested queries that use same path. + * @param fields : Fields for nested queries. + * @return : Map of path and associated field names. + */ + private Map> groupFieldNamesByPath( + List> fields) { + // TODO filter out reverse nested when supported - .filter(not(isReverseNested())) + return fields.stream().collect( + Collectors.groupingBy( + m -> m.get("path").toString(), + mapping( + m -> m.get("field").toString(), + toList() + ) + ) + ); + } + + /** + * Build inner hits portion to nested query. + * @param paths : Set of all paths used in nested queries. + * @param query : Current pushDown query. + */ + private void buildInnerHit(List paths, NestedQueryBuilder query) { + query.innerHit(new InnerHitBuilder().setFetchSourceContext( + new FetchSourceContext(true, paths.toArray(new String[0]), null) + )); + } + + /** + * Create a nested query with match all filter to place inner hits. + */ + private NestedQueryBuilder createEmptyNestedQuery(String path) { + NestedQueryBuilder nestedQuery = nestedQuery(path, matchAllQuery(), ScoreMode.None); + ((BoolQueryBuilder) query().filter().get(0)).must(nestedQuery); + return nestedQuery; + } + + /** + * Return current query. + * @return : Current source builder query. + */ + private BoolQueryBuilder query() { + return (BoolQueryBuilder) sourceBuilder.query(); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index 85a6b503f6..204a6bca22 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -126,7 +126,13 @@ public Iterator iterator() { ExprValue docData = exprValueFactory.construct(source); ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); - builder.putAll(docData.tupleValue()); + if (hit.getInnerHits() == null || hit.getInnerHits().isEmpty()) { + builder.putAll(docData.tupleValue()); + } else { + Map rowSource = hit.getSourceAsMap(); + builder.putAll(ExprValueUtils.tupleValue(rowSource).tupleValue()); + } + metaDataFieldSet.forEach(metaDataField -> { if (metaDataField.equals(METADATA_FIELD_INDEX)) { builder.put(METADATA_FIELD_INDEX, new ExprStringValue(hit.getIndex())); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index d7483cfcf0..8e6c57d7d5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -13,6 +13,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalSort; import org.opensearch.sql.storage.TableScanOperator; @@ -97,6 +98,11 @@ public boolean pushDownHighlight(LogicalHighlight highlight) { return delegate.pushDownHighlight(highlight); } + @Override + public boolean pushDownNested(LogicalNested nested) { + return delegate.pushDownNested(nested); + } + private boolean sortByFieldsOnly(LogicalSort sort) { return sort.getSortList().stream() .map(sortItem -> sortItem.getRight() instanceof ReferenceExpression) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java index d5a0c72f20..f20556ccc5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java @@ -29,6 +29,7 @@ import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalSort; import org.opensearch.sql.storage.TableScanOperator; @@ -116,6 +117,18 @@ private boolean trackScoresFromOpenSearchFunction(Expression condition) { return false; } + @Override + public boolean pushDownNested(LogicalNested nested) { + indexScan.getRequestBuilder().pushDownNested(nested.getFields()); + indexScan.getRequestBuilder().pushDownProjects( + findReferenceExpressions(nested.getProjectList())); + // Return false intentionally to keep the original nested operator + // Since we return false we need to pushDownProject here as it won't be + // pushed down due to no matching push down rule. + // TODO: improve LogicalPlanOptimizer pushdown api. + return false; + } + /** * Find reference expression from expression. * @param expressions a list of expression. diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java index aa603157a8..e573b8305c 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java @@ -309,6 +309,7 @@ void search() { new TotalHits(1L, TotalHits.Relation.EQUAL_TO), 1.0F)); when(searchHit.getSourceAsString()).thenReturn("{\"id\", 1}"); + when(searchHit.getInnerHits()).thenReturn(null); when(factory.construct(any())).thenReturn(exprTupleValue); // Mock second scroll request followed diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index a86399ed32..dd5bfd4e6f 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -290,6 +290,7 @@ void search() throws IOException { new TotalHits(1L, TotalHits.Relation.EQUAL_TO), 1.0F)); when(searchHit.getSourceAsString()).thenReturn("{\"id\", 1}"); + when(searchHit.getInnerHits()).thenReturn(null); when(factory.construct(any())).thenReturn(exprTupleValue); // Mock second scroll request followed diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 1b9a8b7e65..8f2c954f65 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -34,6 +34,8 @@ import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.time.Instant; @@ -94,6 +96,23 @@ public void constructNullValue() { assertTrue(new OpenSearchJsonContent(null).isNull()); } + @Test + public void iterateArrayValue() throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + var arrayIt = new OpenSearchJsonContent(mapper.readTree("[\"zz\",\"bb\"]")).array(); + assertTrue(arrayIt.next().stringValue().equals("zz")); + assertTrue(arrayIt.next().stringValue().equals("bb")); + assertTrue(!arrayIt.hasNext()); + } + + @Test + public void iterateArrayValueWithOneElement() throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + var arrayIt = new OpenSearchJsonContent(mapper.readTree("[\"zz\"]")).array(); + assertTrue(arrayIt.next().stringValue().equals("zz")); + assertTrue(!arrayIt.hasNext()); + } + @Test public void constructNullArrayValue() { assertEquals(nullValue(), tupleValue("{\"intV\":[]}").get("intV")); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index 857ff601e1..f1fcaf677f 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -28,6 +28,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; @@ -59,6 +60,7 @@ import org.opensearch.sql.opensearch.planner.physical.MLOperator; import org.opensearch.sql.opensearch.setting.OpenSearchSettings; import org.opensearch.sql.opensearch.storage.OpenSearchIndexScan; +import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; @@ -104,10 +106,8 @@ public void testProtectIndexScan() { ImmutableMap.of(ref("name", STRING), ref("lastname", STRING)); Pair newEvalField = ImmutablePair.of(ref("name1", STRING), ref("name", STRING)); - Integer sortCount = 100; Pair sortField = ImmutablePair.of(DEFAULT_ASC, ref("name1", STRING)); - Integer size = 200; Integer limit = 10; Integer offset = 10; @@ -314,6 +314,21 @@ public void testVisitML() { executionProtector.visitML(mlOperator, null)); } + @Test + public void testVisitNested() { + Set args = Set.of("message.info"); + Map> groupedFieldsByPath = + Map.of("message", List.of("message.info")); + NestedOperator nestedOperator = + new NestedOperator( + values(emptyList()), + args, + groupedFieldsByPath); + + assertEquals(executionProtector.doProtect(nestedOperator), + executionProtector.visitNested(nestedOperator, values(emptyList()))); + } + PhysicalPlan resourceMonitor(PhysicalPlan input) { return new ResourceMonitorPlan(input, resourceMonitor); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java index 85e259a400..187f319d44 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java @@ -9,39 +9,47 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.index.query.QueryBuilders.matchAllQuery; +import static org.opensearch.index.query.QueryBuilders.nestedQuery; import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; import static org.opensearch.search.sort.SortOrder.ASC; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.lang3.tuple.Pair; +import org.apache.lucene.search.join.ScoreMode; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.InnerHitBuilder; +import org.opensearch.index.query.NestedQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.sql.common.setting.Settings; -import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; +import org.opensearch.sql.planner.logical.LogicalNested; @ExtendWith(MockitoExtension.class) public class OpenSearchRequestBuilderTest { @@ -216,6 +224,106 @@ void testPushDownProject() { requestBuilder.getSourceBuilder()); } + @Test + void testPushDownNested() { + List> args = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, args, projectList); + requestBuilder.pushDownNested(nested.getFields()); + + NestedQueryBuilder nestedQuery = nestedQuery("message", matchAllQuery(), ScoreMode.None) + .innerHit(new InnerHitBuilder().setFetchSourceContext( + new FetchSourceContext(true, new String[]{"message.info"}, null))); + + assertEquals( + new SearchSourceBuilder() + .query(QueryBuilders.boolQuery().filter(QueryBuilders.boolQuery().must(nestedQuery))) + .from(DEFAULT_OFFSET) + .size(DEFAULT_LIMIT) + .timeout(DEFAULT_QUERY_TIMEOUT), + requestBuilder.getSourceBuilder()); + } + + @Test + void testPushDownMultipleNestedWithSamePath() { + List> args = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ), + Map.of( + "field", new ReferenceExpression("message.from", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null), + new NamedExpression("message.from", DSL.nested(DSL.ref("message.from", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, args, projectList); + requestBuilder.pushDownNested(nested.getFields()); + + NestedQueryBuilder nestedQuery = nestedQuery("message", matchAllQuery(), ScoreMode.None) + .innerHit(new InnerHitBuilder().setFetchSourceContext( + new FetchSourceContext(true, new String[]{"message.info", "message.from"}, null))); + assertEquals( + new SearchSourceBuilder() + .query(QueryBuilders.boolQuery().filter(QueryBuilders.boolQuery().must(nestedQuery))) + .from(DEFAULT_OFFSET) + .size(DEFAULT_LIMIT) + .timeout(DEFAULT_QUERY_TIMEOUT), + requestBuilder.getSourceBuilder()); + } + + @Test + void testPushDownNestedWithFilter() { + List> args = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, args, projectList); + requestBuilder.getSourceBuilder().query(QueryBuilders.rangeQuery("myNum").gt(3)); + requestBuilder.pushDownNested(nested.getFields()); + + NestedQueryBuilder nestedQuery = nestedQuery("message", matchAllQuery(), ScoreMode.None) + .innerHit(new InnerHitBuilder().setFetchSourceContext( + new FetchSourceContext(true, new String[]{"message.info"}, null))); + + assertEquals( + new SearchSourceBuilder() + .query( + QueryBuilders.boolQuery().filter( + QueryBuilders.boolQuery() + .must(QueryBuilders.rangeQuery("myNum").gt(3)) + .must(nestedQuery) + ) + ) + .from(DEFAULT_OFFSET) + .size(DEFAULT_LIMIT) + .timeout(DEFAULT_QUERY_TIMEOUT), + requestBuilder.getSourceBuilder()); + } + @Test void testPushTypeMapping() { Map typeMapping = Map.of("intA", OpenSearchDataType.of(INTEGER)); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java index 92b47bc7da..65568cf5f1 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java @@ -107,6 +107,8 @@ void iterator() { when(searchHit1.getSourceAsString()).thenReturn("{\"id1\", 1}"); when(searchHit2.getSourceAsString()).thenReturn("{\"id1\", 2}"); + when(searchHit1.getInnerHits()).thenReturn(null); + when(searchHit2.getInnerHits()).thenReturn(null); when(factory.construct(any())).thenReturn(exprTupleValue1).thenReturn(exprTupleValue2); int i = 0; @@ -237,6 +239,31 @@ void iterator_metafields_scoreNaN() { } } + @Test + void iterator_with_inner_hits() { + when(searchResponse.getHits()) + .thenReturn( + new SearchHits( + new SearchHit[] {searchHit1}, + new TotalHits(2L, TotalHits.Relation.EQUAL_TO), + 1.0F)); + when(searchHit1.getSourceAsString()).thenReturn("{\"id1\", 1}"); + when(searchHit1.getSourceAsMap()).thenReturn(Map.of("id1", 1)); + when(searchHit1.getInnerHits()).thenReturn( + Map.of( + "innerHit", + new SearchHits( + new SearchHit[] {searchHit1}, + new TotalHits(2L, TotalHits.Relation.EQUAL_TO), + 1.0F))); + + when(factory.construct(any())).thenReturn(exprTupleValue1); + + for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, includes)) { + assertEquals(exprTupleValue1, hit); + } + } + @Test void response_is_aggregation_when_aggregation_not_empty() { when(searchResponse.getAggregations()).thenReturn(aggregations); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 852a5a71bc..fa98f5a3b9 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -23,6 +23,7 @@ import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.highlight; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.nested; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.project; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.relation; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.sort; @@ -30,6 +31,7 @@ import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_FILTER; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_HIGHLIGHT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_LIMIT; +import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_NESTED; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_PROJECT; import static org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown.PUSH_DOWN_SORT; @@ -68,6 +70,7 @@ import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.HighlightExpression; +import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; @@ -78,6 +81,7 @@ import org.opensearch.sql.opensearch.storage.OpenSearchIndexScan; import org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder; import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalNested; import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.optimizer.LogicalPlanOptimizer; import org.opensearch.sql.planner.optimizer.rule.read.CreateTableScanBuilder; @@ -384,6 +388,39 @@ void test_highlight_push_down() { ); } + @Test + void test_nested_push_down() { + List> args = List.of( + Map.of( + "field", new ReferenceExpression("message.info", STRING), + "path", new ReferenceExpression("message", STRING) + ) + ); + + List projectList = + List.of( + new NamedExpression("message.info", DSL.nested(DSL.ref("message.info", STRING)), null) + ); + + LogicalNested nested = new LogicalNested(null, args, projectList); + + assertEqualsAfterOptimization( + project( + nested( + indexScanBuilder( + withNestedPushedDown(nested.getFields())), args, projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING))) + ), + project( + nested( + relation("schema", table), args, projectList), + DSL.named("message.info", + DSL.nested(DSL.ref("message.info", STRING))) + ) + ); + } + /** * SELECT avg(intV) FROM schema WHERE intV = 1 GROUP BY string_value. */ @@ -715,6 +752,10 @@ private Runnable withHighlightPushedDown(String field, Map argu return () -> verify(requestBuilder, times(1)).pushDownHighlight(field, arguments); } + private Runnable withNestedPushedDown(List> fields) { + return () -> verify(requestBuilder, times(1)).pushDownNested(fields); + } + private Runnable withTrackedScoresPushedDown(boolean trackScores) { return () -> verify(requestBuilder, times(1)).pushDownTrackedScore(trackScores); } @@ -749,6 +790,7 @@ private LogicalPlan optimize(LogicalPlan plan) { PUSH_DOWN_SORT, PUSH_DOWN_LIMIT, PUSH_DOWN_HIGHLIGHT, + PUSH_DOWN_NESTED, PUSH_DOWN_PROJECT)); return optimizer.optimize(plan); } diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 69fcdbe787..ebc2b8747e 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -388,6 +388,7 @@ scalarFunctionName | textFunctionName | flowControlFunctionName | systemFunctionName + | nestedFunctionName ; specificFunction @@ -561,6 +562,10 @@ systemFunctionName : TYPEOF ; +nestedFunctionName + : NESTED + ; + scoreRelevanceFunctionName : SCORE | SCOREQUERY | SCORE_QUERY ; diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index ad660f1e33..bad0543e02 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.between; import static org.opensearch.sql.ast.dsl.AstDSL.not; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE; diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index 5912a76f28..39fe8811b5 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -635,6 +635,18 @@ public void can_parse_wildcard_query_relevance_function() { + "boost=1.5, case_insensitive=true, rewrite=\"scoring_boolean\")")); } + @Test + public void can_parse_nested_function() { + assertNotNull( + parser.parse("SELECT NESTED(FIELD.DAYOFWEEK) FROM TEST")); + assertNotNull( + parser.parse("SELECT NESTED('FIELD.DAYOFWEEK') FROM TEST")); + assertNotNull( + parser.parse("SELECT SUM(NESTED(FIELD.SUBFIELD)) FROM TEST")); + assertNotNull( + parser.parse("SELECT NESTED(FIELD.DAYOFWEEK, PATH) FROM TEST")); + } + @Test public void can_parse_yearweek_function() { assertNotNull(parser.parse("SELECT yearweek('1987-01-01')")); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java index 64a7445dc8..2f19fc1f3f 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java @@ -33,21 +33,13 @@ import com.google.common.collect.ImmutableList; import java.util.HashMap; -import java.util.List; import java.util.Map; -import java.util.stream.Stream; -import org.antlr.v4.runtime.tree.ParseTree; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.antlr.SyntaxCheckException; -import org.opensearch.sql.sql.antlr.SQLSyntaxParser; class AstBuilderTest extends AstBuilderTestBase { @@ -696,5 +688,4 @@ public void can_build_string_literal_highlight() { buildAST("SELECT highlight(\"fieldA\") FROM test") ); } - }