From 02bbd510e1a5f3264e40d9a0d38708f910a06d6f Mon Sep 17 00:00:00 2001 From: Torin Sandall Date: Wed, 8 Jul 2020 11:41:03 -0400 Subject: [PATCH] ast: Disqualify indexing if candidate found in nested comprehension In v0.20. we added support for indexing and caching of comprehensions in certain cases. There was a bug in the index construction that allowed for comprehensions to be indexed and cached incorrectly. Specifically, if any of the candidate keys appear in nested comprehensions, indexing should not be performed because the evaluator does not close over variable bindings from the outer scope when evaluating the comprehension (because this would require cache invalidation if there were multiple assignments to the variables in the outer scope). This fix updates the compiler to check if the candidates appear inside of any nested comprehensions. If a match is found, no indexing is performed. Fixes #2497 Signed-off-by: Torin Sandall --- ast/compile.go | 55 ++++++++++++++++++++++++++---- ast/compile_test.go | 15 ++++++++ docs/content/policy-performance.md | 10 +++++- 3 files changed, 73 insertions(+), 7 deletions(-) diff --git a/ast/compile.go b/ast/compile.go index 214e6d5706..6248a0676d 100644 --- a/ast/compile.go +++ b/ast/compile.go @@ -1601,6 +1601,7 @@ func getComprehensionIndex(arity func(Ref) int, candidates VarSet, expr *Expr) * outputs := outputVarsForBody(body, arity, ReservedVars) unsafe := body.Vars(SafetyCheckVisitorParams).Diff(outputs).Diff(ReservedVars) + if len(unsafe) > 0 { return nil } @@ -1608,20 +1609,30 @@ func getComprehensionIndex(arity func(Ref) int, candidates VarSet, expr *Expr) * // Similarly, ignore comprehensions that contain references with output variables // that intersect with the candidates. Indexing these comprehensions could worsen // performance. - vis := newComprehensionIndexRegressionCheckVisitor(candidates) - vis.Walk(body) - if vis.worse { + regressionVis := newComprehensionIndexRegressionCheckVisitor(candidates) + regressionVis.Walk(body) + if regressionVis.worse { return nil } - indexVars := candidates.Intersect(outputs) - if len(indexVars) == 0 { + // Check if any nested comprehensions close over candidates. If any intersection is found + // the comprehension cannot be cached because it would require closing over the candidates + // which the evaluator does not support today. + nestedVis := newComprehensionIndexNestedCandidateVisitor(candidates) + nestedVis.Walk(body) + if nestedVis.found { return nil } // Make a sorted set of variable names that will serve as the index key set. // Sort to ensure deterministic indexing. In future this could be relaxed - // if we can decide that one ordering is better than another. + // if we can decide that one ordering is better than another. If the set is + // empty, there is no indexing to do. + indexVars := candidates.Intersect(outputs) + if len(indexVars) == 0 { + return nil + } + result := make([]*Term, 0, len(indexVars)) for v := range indexVars { @@ -1690,6 +1701,38 @@ func (vis *comprehensionIndexRegressionCheckVisitor) assertEmptyIntersection(vs } } +type comprehensionIndexNestedCandidateVisitor struct { + candidates VarSet + nested bool + found bool +} + +func newComprehensionIndexNestedCandidateVisitor(candidates VarSet) *comprehensionIndexNestedCandidateVisitor { + return &comprehensionIndexNestedCandidateVisitor{ + candidates: candidates, + } +} + +func (vis *comprehensionIndexNestedCandidateVisitor) Walk(x interface{}) { + NewGenericVisitor(vis.visit).Walk(x) +} + +func (vis *comprehensionIndexNestedCandidateVisitor) visit(x interface{}) bool { + + if vis.found { + return true + } + + if v, ok := x.(Value); ok && IsComprehension(v) { + varVis := NewVarVisitor().WithParams(VarVisitorParams{SkipRefHead: true}) + varVis.Walk(v) + vis.found = len(varVis.Vars().Intersect(vis.candidates)) > 0 + return true + } + + return false +} + // ModuleTreeNode represents a node in the module tree. The module // tree is keyed by the package path. type ModuleTreeNode struct { diff --git a/ast/compile_test.go b/ast/compile_test.go index c67f28e61f..83f3444bd0 100644 --- a/ast/compile_test.go +++ b/ast/compile_test.go @@ -3526,6 +3526,21 @@ func TestCompilerBuildComprehensionIndexKeySet(t *testing.T) { ys = [y | y = input[j]] }`, }, + { + note: "skip: due to nested comprehension containing candidate", + module: ` + package test + + p { + x = input[i] # 'x' is a candidate + y = 2 # 'y' is a candidate + z = [1 | + x = data.foo[j] # 'x' is an index key + t = [1 | data.bar[k] = y] # 'y' disqualifies indexing because it is nested inside a comprehension + ] + } + `, + }, { note: "skip: avoid increasing runtime (func arg)", module: ` diff --git a/docs/content/policy-performance.md b/docs/content/policy-performance.md index d2b8116bb9..7bd169c4e6 100644 --- a/docs/content/policy-performance.md +++ b/docs/content/policy-performance.md @@ -228,7 +228,7 @@ In order to be indexed, comprehensions must meet the following conditions: 1. The expression containing the comprehension does not include a `with` statement. 1. The expression containing the comprehension is not negated. 1. The comprehension body is safe when considered independent from the outer query. -1. The comprehension body closes over at least one variable in the outer query and none of these variables appear as outputs in references or `walk()` calls. +1. The comprehension body closes over at least one variable in the outer query and none of these variables appear as outputs in references or `walk()` calls or inside nested comprehensions. The following examples show cases that are NOT indexed: @@ -262,6 +262,14 @@ not_indexed_because_reference_operand_closure { x := input[y].x ys := [y | x == input[y].z[_]] } + +not_indexed_because_nested_closure { + x = 1 + y = 2 + _ = [i | + x == input.foo[i] + _ = [j | y == input.bar[j]]] +} ``` > The 4th and 5th restrictions may be relaxed in the future.