[release-20.0-rc] feat: add a LIMIT 1 on EXISTS subqueries to limit n…

…etwork overhead (#16153) (#16192) Signed-off-by: Andres Taylor <andres@planetscale.com> Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
vitessio · Jun 15, 2024 · 6986d74 · 6986d74
1 parent 1f308de
commit 6986d74
Show file tree

Hide file tree

Showing 9 changed files with 485 additions and 89 deletions.
diff --git a/go/vt/vtgate/planbuilder/operators/query_planning.go b/go/vt/vtgate/planbuilder/operators/query_planning.go
@@ -19,6 +19,7 @@ package operators
 import (
 	"fmt"
 	"io"
+	"strconv"
 
 	"vitess.io/vitess/go/vt/sqlparser"
 	"vitess.io/vitess/go/vt/vterrors"
@@ -244,8 +245,170 @@ func tryPushLimit(ctx *plancontext.PlanningContext, in *Limit) (Operator, *Apply
 		}
 
 		return src, Rewrote(fmt.Sprintf("push limit to %s of apply join", side))
+	case *Limit:
+		combinedLimit := mergeLimits(in.AST, src.AST)
+		if combinedLimit == nil {
+			break
+		}
+		// we can remove the other LIMIT
+		in.AST = combinedLimit
+		in.Source = src.Source
+		return in, Rewrote("merged two limits")
+
+	}
+	return setUpperLimit(in)
+}
+
+func mergeLimits(l1, l2 *sqlparser.Limit) *sqlparser.Limit {
+	// To merge two relational LIMIT operators with LIMIT and OFFSET, we need to combine their
+	// LIMIT and OFFSET values appropriately.
+	// Let's denote the first LIMIT operator as LIMIT_1 with LIMIT_1 and OFFSET_1,
+	// and the second LIMIT operator as LIMIT_2 with LIMIT_2 and OFFSET_2.
+	// The second LIMIT operator receives the output of the first LIMIT operator, meaning the first LIMIT and
+	// OFFSET are applied first, and then the second LIMIT and OFFSET are applied to the resulting subset.
+	//
+	// The goal is to determine the effective combined LIMIT and OFFSET values when applying these two operators sequentially.
+	//
+	// Combined Offset:
+	// The combined offset (OFFSET_combined) is the sum of the two offsets because you need to skip OFFSET_1 rows first,
+	// and then apply the second offset OFFSET_2 to the result.
+	// OFFSET_combined = OFFSET_1 + OFFSET_2
+
+	// Combined Limit:
+	// The combined limit (LIMIT_combined) needs to account for both limits. The effective limit should not exceed the rows returned by the first limit,
+	// so it is the minimum of the remaining rows after the first offset and the second limit.
+	// LIMIT_combined = min(LIMIT_2, LIMIT_1 - OFFSET_2)
+
+	// Note: If LIMIT_1 - OFFSET_2 is negative or zero, it means there are no rows left to limit, so LIMIT_combined should be zero.
+
+	// Example:
+	// First LIMIT operator: LIMIT 10 OFFSET 5 (LIMIT_1 = 10, OFFSET_1 = 5)
+	// Second LIMIT operator: LIMIT 7 OFFSET 3 (LIMIT_2 = 7, OFFSET_2 = 3)
+
+	// Calculations:
+	// Combined OFFSET:
+	// OFFSET_combined = 5 + 3 = 8
+
+	// Combined LIMIT:
+	// remaining rows after OFFSET_2 = 10 - 3 = 7
+	// LIMIT_combined = min(7, 7) = 7
+
+	// So, the combined result would be:
+	// LIMIT 7 OFFSET 8
+
+	// This method ensures that the final combined LIMIT and OFFSET correctly reflect the sequential application of the two original operators.
+	combinedLimit, failed := mergeLimitExpressions(l1.Rowcount, l2.Rowcount, l2.Offset)
+	if failed {
+		return nil
+	}
+	combinedOffset, failed := mergeOffsetExpressions(l1.Offset, l2.Offset)
+	if failed {
+		return nil
+	}
+
+	return &sqlparser.Limit{
+		Offset:   combinedOffset,
+		Rowcount: combinedLimit,
+	}
+}
+
+func mergeOffsetExpressions(e1, e2 sqlparser.Expr) (expr sqlparser.Expr, failed bool) {
+	switch {
+	case e1 == nil && e2 == nil:
+		return nil, false
+	case e1 == nil:
+		return e2, false
+	case e2 == nil:
+		return e1, false
 	default:
-		return setUpperLimit(in)
+		v1str, ok := e1.(*sqlparser.Literal)
+		if !ok {
+			return nil, true
+		}
+		v2str, ok := e2.(*sqlparser.Literal)
+		if !ok {
+			return nil, true
+		}
+		v1, _ := strconv.Atoi(v1str.Val)
+		v2, _ := strconv.Atoi(v2str.Val)
+		return sqlparser.NewIntLiteral(strconv.Itoa(v1 + v2)), false
+	}
+}
+
+// mergeLimitExpressions merges two LIMIT expressions with an OFFSET expression.
+// l1: First LIMIT expression.
+// l2: Second LIMIT expression.
+// off2: Second OFFSET expression.
+// Returns the merged LIMIT expression and a boolean indicating if the merge failed.
+func mergeLimitExpressions(l1, l2, off2 sqlparser.Expr) (expr sqlparser.Expr, failed bool) {
+	switch {
+	// If both limits are nil, there's nothing to merge, return nil without failure.
+	case l1 == nil && l2 == nil:
+		return nil, false
+
+	// If the first limit is nil, the second limit determines the final limit.
+	case l1 == nil:
+		return l2, false
+
+	// If the second limit is nil, calculate the remaining limit after applying the offset to the first limit.
+	case l2 == nil:
+		if off2 == nil {
+			// No offset, so the first limit is used directly.
+			return l1, false
+		}
+		off2, ok := off2.(*sqlparser.Literal)
+		if !ok {
+			// If the offset is not a literal, fail the merge.
+			return nil, true
+		}
+		lim1str, ok := l1.(*sqlparser.Literal)
+		if !ok {
+			// If the first limit is not a literal, return the first limit without failing.
+			return nil, false
+		}
+		// Calculate the remaining limit after the offset.
+		off2int, _ := strconv.Atoi(off2.Val)
+		l1int, _ := strconv.Atoi(lim1str.Val)
+		lim := l1int - off2int
+		if lim < 0 {
+			lim = 0
+		}
+		return sqlparser.NewIntLiteral(strconv.Itoa(lim)), false
+
+	default:
+		v1str, ok1 := l1.(*sqlparser.Literal)
+		if ok1 && v1str.Val == "1" {
+			// If the first limit is "1", it dominates, so return it.
+			return l1, false
+		}
+		v2str, ok2 := l2.(*sqlparser.Literal)
+		if ok2 && v2str.Val == "1" {
+			// If the second limit is "1", it dominates, so return it.
+			return l2, false
+		}
+		if !ok1 || !ok2 {
+			// If either limit is not a literal, fail the merge.
+			return nil, true
+		}
+
+		var off2int int
+		if off2 != nil {
+			off2, ok := off2.(*sqlparser.Literal)
+			if !ok {
+				// If the offset is not a literal, fail the merge.
+				return nil, true
+			}
+			off2int, _ = strconv.Atoi(off2.Val)
+		}
+
+		v1, _ := strconv.Atoi(v1str.Val)
+		v2, _ := strconv.Atoi(v2str.Val)
+		lim := min(v2, v1-off2int)
+		if lim < 0 {
+			// If the combined limit is negative, set it to zero.
+			lim = 0
+		}
+		return sqlparser.NewIntLiteral(strconv.Itoa(lim)), false
 	}
 }
 

diff --git a/go/vt/vtgate/planbuilder/operators/query_planning_test.go b/go/vt/vtgate/planbuilder/operators/query_planning_test.go
@@ -0,0 +1,185 @@
+/*
+Copyright 2024 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package operators
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"vitess.io/vitess/go/vt/sqlparser"
+)
+
+func TestMergeOffsetExpressions(t *testing.T) {
+	tests := []struct {
+		name           string
+		offset1        sqlparser.Expr
+		offset2        sqlparser.Expr
+		expectedExpr   sqlparser.Expr
+		expectedFailed bool
+	}{
+		{
+			name:           "both offsets are integers",
+			offset1:        sqlparser.NewIntLiteral("5"),
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   sqlparser.NewIntLiteral("8"),
+			expectedFailed: false,
+		},
+		{
+			name:           "first offset is nil",
+			offset1:        nil,
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   sqlparser.NewIntLiteral("3"),
+			expectedFailed: false,
+		},
+		{
+			name:           "second offset is nil",
+			offset1:        sqlparser.NewIntLiteral("5"),
+			offset2:        nil,
+			expectedExpr:   sqlparser.NewIntLiteral("5"),
+			expectedFailed: false,
+		},
+		{
+			name:           "both offsets are nil",
+			offset1:        nil,
+			offset2:        nil,
+			expectedExpr:   nil,
+			expectedFailed: false,
+		},
+		{
+			name:           "first offset is argument",
+			offset1:        sqlparser.NewArgument("offset1"),
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   nil,
+			expectedFailed: true,
+		},
+		{
+			name:           "second offset is argument",
+			offset1:        sqlparser.NewIntLiteral("5"),
+			offset2:        sqlparser.NewArgument("offset2"),
+			expectedExpr:   nil,
+			expectedFailed: true,
+		},
+		{
+			name:           "both offsets are arguments",
+			offset1:        sqlparser.NewArgument("offset1"),
+			offset2:        sqlparser.NewArgument("offset2"),
+			expectedExpr:   nil,
+			expectedFailed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			expr, failed := mergeOffsetExpressions(tt.offset1, tt.offset2)
+			assert.Equal(t, tt.expectedExpr, expr)
+			assert.Equal(t, tt.expectedFailed, failed, "failed")
+		})
+	}
+}
+
+func TestMergeLimitExpressions(t *testing.T) {
+	tests := []struct {
+		name           string
+		limit1         sqlparser.Expr
+		limit2         sqlparser.Expr
+		offset2        sqlparser.Expr
+		expectedExpr   sqlparser.Expr
+		expectedFailed bool
+	}{
+		{
+			name:           "valid limits and offset",
+			limit1:         sqlparser.NewIntLiteral("10"),
+			limit2:         sqlparser.NewIntLiteral("7"),
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   sqlparser.NewIntLiteral("7"),
+			expectedFailed: false,
+		},
+		{
+			name:           "remaining rows after offset2 is zero",
+			limit1:         sqlparser.NewIntLiteral("3"),
+			limit2:         sqlparser.NewIntLiteral("7"),
+			offset2:        sqlparser.NewIntLiteral("5"),
+			expectedExpr:   sqlparser.NewIntLiteral("0"),
+			expectedFailed: false,
+		},
+		{
+			name:           "first limit is nil",
+			limit1:         nil,
+			limit2:         sqlparser.NewIntLiteral("7"),
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   sqlparser.NewIntLiteral("7"),
+			expectedFailed: false,
+		},
+		{
+			name:           "second limit is nil",
+			limit1:         sqlparser.NewIntLiteral("10"),
+			limit2:         nil,
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   sqlparser.NewIntLiteral("7"),
+			expectedFailed: false,
+		},
+		{
+			name:           "offset2 is nil",
+			limit1:         sqlparser.NewIntLiteral("10"),
+			limit2:         sqlparser.NewIntLiteral("7"),
+			offset2:        nil,
+			expectedExpr:   sqlparser.NewIntLiteral("7"),
+			expectedFailed: false,
+		},
+		{
+			name:           "first limit is argument",
+			limit1:         sqlparser.NewArgument("limit1"),
+			limit2:         sqlparser.NewIntLiteral("7"),
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   nil,
+			expectedFailed: true,
+		},
+		{
+			name:           "second limit is argument",
+			limit1:         sqlparser.NewIntLiteral("10"),
+			limit2:         sqlparser.NewArgument("limit2"),
+			offset2:        sqlparser.NewIntLiteral("3"),
+			expectedExpr:   nil,
+			expectedFailed: true,
+		},
+		{
+			name:           "offset2 is argument",
+			limit1:         sqlparser.NewIntLiteral("10"),
+			limit2:         sqlparser.NewIntLiteral("7"),
+			offset2:        sqlparser.NewArgument("offset2"),
+			expectedExpr:   nil,
+			expectedFailed: true,
+		},
+		{
+			name:           "all are arguments",
+			limit1:         sqlparser.NewArgument("limit1"),
+			limit2:         sqlparser.NewArgument("limit2"),
+			offset2:        sqlparser.NewArgument("offset2"),
+			expectedExpr:   nil,
+			expectedFailed: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			expr, failed := mergeLimitExpressions(tt.limit1, tt.limit2, tt.offset2)
+			assert.Equal(t, tt.expectedExpr, expr)
+			assert.Equal(t, tt.expectedFailed, failed, "failed")
+		})
+	}
+}
diff --git a/go/vt/vtgate/planbuilder/operators/subquery.go b/go/vt/vtgate/planbuilder/operators/subquery.go
@@ -228,11 +228,21 @@ func (sq *SubQuery) settle(ctx *plancontext.PlanningContext, outer Operator) Ope
 var correlatedSubqueryErr = vterrors.VT12001("correlated subquery is only supported for EXISTS")
 var subqueryNotAtTopErr = vterrors.VT12001("unmergable subquery can not be inside complex expression")
 
+func (sq *SubQuery) addLimit() {
+	// for a correlated subquery, we can add a limit 1 to the subquery
+	sq.Subquery = &Limit{
+		Source: sq.Subquery,
+		AST:    &sqlparser.Limit{Rowcount: sqlparser.NewIntLiteral("1")},
+		Top:    true,
+	}
+}
+
 func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operator) Operator {
 	if len(sq.Predicates) > 0 {
 		if sq.FilterType != opcode.PulloutExists {
 			panic(correlatedSubqueryErr)
 		}
+		sq.addLimit()
 		return outer
 	}
 
@@ -260,8 +270,10 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
 	var predicates []sqlparser.Expr
 	switch sq.FilterType {
 	case opcode.PulloutExists:
+		sq.addLimit()
 		predicates = append(predicates, sqlparser.NewArgument(hasValuesArg()))
 	case opcode.PulloutNotExists:
+		sq.addLimit()
 		sq.FilterType = opcode.PulloutExists // it's the same pullout as EXISTS, just with a NOT in front of the predicate
 		predicates = append(predicates, sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())))
 	case opcode.PulloutIn: