From 21d66b4258028ef403b9127a6f51c4dca8c2fcbb Mon Sep 17 00:00:00 2001 From: Spas Bojanov Date: Thu, 28 May 2020 15:16:30 -0400 Subject: [PATCH 1/2] roachtest/version-upgrade: don't run schema change workload on 19.2 releases Fixes #47024. Release note (bug fix): The schema change workload is meant for testing the behavior of schema changes on clusters with nodes with min version 19.2. It will deadlock on earlier versions. --- pkg/cmd/roachtest/acceptance.go | 6 +----- .../roachtest/mixed_version_schemachange.go | 21 +++++++++++++------ pkg/cmd/roachtest/versionupgrade.go | 11 +++++++--- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pkg/cmd/roachtest/acceptance.go b/pkg/cmd/roachtest/acceptance.go index 592e9b2f5ea5..5f4229383176 100644 --- a/pkg/cmd/roachtest/acceptance.go +++ b/pkg/cmd/roachtest/acceptance.go @@ -51,11 +51,7 @@ func registerAcceptance(r *testRegistry) { { name: "version-upgrade", fn: func(ctx context.Context, t *test, c *cluster) { - predV, err := PredecessorVersion(r.buildVersion) - if err != nil { - t.Fatal(err) - } - runVersionUpgrade(ctx, t, c, predV) + runVersionUpgrade(ctx, t, c, r.buildVersion) }, // This test doesn't like running on old versions because it upgrades to // the latest released version and then it tries to "head", where head is diff --git a/pkg/cmd/roachtest/mixed_version_schemachange.go b/pkg/cmd/roachtest/mixed_version_schemachange.go index 2995e49e38d4..8660dca40485 100644 --- a/pkg/cmd/roachtest/mixed_version_schemachange.go +++ b/pkg/cmd/roachtest/mixed_version_schemachange.go @@ -13,6 +13,8 @@ package main import ( "context" "fmt" + + "github.com/cockroachdb/cockroach/pkg/util/version" ) func registerSchemaChangeMixedVersions(r *testRegistry) { @@ -25,15 +27,11 @@ func registerSchemaChangeMixedVersions(r *testRegistry) { MinVersion: "v20.1.0", Cluster: makeClusterSpec(4), Run: func(ctx context.Context, t *test, c *cluster) { - predV, err := PredecessorVersion(r.buildVersion) - if err != nil { - t.Fatal(err) - } maxOps := 100 if local { maxOps = 10 } - runSchemaChangeMixedVersions(ctx, t, c, maxOps, predV) + runSchemaChangeMixedVersions(ctx, t, c, maxOps, r.buildVersion) }, }) } @@ -63,12 +61,23 @@ func runSchemaChangeWorkloadStep(maxOps int) versionStep { } func runSchemaChangeMixedVersions( - ctx context.Context, t *test, c *cluster, maxOps int, predecessorVersion string, + ctx context.Context, t *test, c *cluster, maxOps int, buildVersion version.Version, ) { + predecessorVersion, err := PredecessorVersion(buildVersion) + if err != nil { + t.Fatal(err) + } + // An empty string will lead to the cockroach binary specified by flag // `cockroach` to be used. const mainVersion = "" schemaChangeStep := runSchemaChangeWorkloadStep(maxOps) + if buildVersion.Major() < 20 { + // Schema change workload is meant to run only on versions 19.2 or higher. + // If the main version is below 20.1 then then predecessor version will be + // below 19.2. + schemaChangeStep = nil + } u := newVersionUpgradeTest(c, uploadAndStartFromCheckpointFixture(c.All(), predecessorVersion), diff --git a/pkg/cmd/roachtest/versionupgrade.go b/pkg/cmd/roachtest/versionupgrade.go index 19861239a572..02dcee1f0c71 100644 --- a/pkg/cmd/roachtest/versionupgrade.go +++ b/pkg/cmd/roachtest/versionupgrade.go @@ -72,7 +72,11 @@ DROP TABLE test.t; `), } -func runVersionUpgrade(ctx context.Context, t *test, c *cluster, predecessorVersion string) { +func runVersionUpgrade(ctx context.Context, t *test, c *cluster, buildVersion version.Version) { + predecessorVersion, err := PredecessorVersion(buildVersion) + if err != nil { + t.Fatal(err) + } // This test uses fixtures and we do not have encrypted fixtures right now. c.encryptDefault = false @@ -159,8 +163,9 @@ func (u *versionUpgradeTest) run(ctx context.Context, t *test) { }() for _, step := range u.steps { - step(ctx, t, u) - + if step != nil { + step(ctx, t, u) + } } } From e039beb45d49620e9c71bb9f20d8a6abe76a9048 Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Fri, 22 May 2020 17:07:51 -0700 Subject: [PATCH 2/2] opt: create library that determines how joins affect input rows Previously, there was no simple way to determine whether all rows from a join input will be included in its output, nor whether input rows will be duplicated by the join. This patch adds a library that constructs a Multiplicity struct for join operators. The Multiplicity can be queried for information about how a join will affect its input rows (e.g. duplicated, filtered and/or null-extended). The existing SimplifyLeftJoinWithFilters rule has been refactored to use this library. The Multiplicity library will also be useful for future join elimination and limit pushdown rules. Release note: None --- pkg/sql/logictest/testdata/logic_test/fk | 21 + pkg/sql/logictest/testdata/logic_test/join | 19 + pkg/sql/opt/exec/execbuilder/testdata/enums | 1 - pkg/sql/opt/memo/expr_format.go | 6 + pkg/sql/opt/memo/multiplicity_builder.go | 426 +++++++++++++ pkg/sql/opt/memo/testdata/logprops/join | 600 ++++++++++++++++++ pkg/sql/opt/memo/testdata/logprops/scalar | 1 + pkg/sql/opt/memo/testdata/logprops/upsert | 6 + .../opt/memo/testdata/logprops/virtual-scan | 1 + pkg/sql/opt/memo/testdata/memo | 2 +- pkg/sql/opt/metadata.go | 11 - pkg/sql/opt/norm/join_funcs.go | 260 +------- pkg/sql/opt/norm/rules/join.opt | 4 +- pkg/sql/opt/norm/testdata/rules/join | 23 + pkg/sql/opt/norm/testdata/rules/with | 4 + pkg/sql/opt/props/logical.go | 30 +- pkg/sql/opt/props/multiplicity.go | 178 ++++++ pkg/sql/opt/props/multiplicity_test.go | 150 +++++ pkg/sql/opt/testutils/opttester/opt_tester.go | 3 + pkg/sql/opt/xform/testdata/rules/groupby | 6 +- pkg/sql/opt/xform/testdata/rules/join | 8 +- pkg/sql/opt/xform/testdata/rules/join_order | 5 +- pkg/sql/opt/xform/testdata/rules/limit | 2 +- pkg/sql/opt/xform/testdata/rules/select | 2 +- 24 files changed, 1475 insertions(+), 294 deletions(-) create mode 100644 pkg/sql/opt/memo/multiplicity_builder.go create mode 100644 pkg/sql/opt/props/multiplicity.go create mode 100644 pkg/sql/opt/props/multiplicity_test.go diff --git a/pkg/sql/logictest/testdata/logic_test/fk b/pkg/sql/logictest/testdata/logic_test/fk index f5b2f9fff4b6..6b3aec41f747 100644 --- a/pkg/sql/logictest/testdata/logic_test/fk +++ b/pkg/sql/logictest/testdata/logic_test/fk @@ -3198,3 +3198,24 @@ ALTER TABLE t2 DROP CONSTRAINT fk1; ALTER TABLE t2 DROP CONSTRAINT fk2; TRUNCATE statement ok DROP TABLE t2 CASCADE; DROP TABLE t1 CASCADE + +# Regression test for #49628. +statement ok +CREATE TABLE xyz (x INT, y INT, z INT, PRIMARY KEY (x, y, z)); +CREATE TABLE fk_ref +( + a INT NOT NULL, + b INT, + c INT NOT NULL, + FOREIGN KEY (a, b, c) REFERENCES xyz (x, y, z) +); +INSERT INTO fk_ref (VALUES (1, NULL, 1)); + +query IIIIII +SELECT * FROM fk_ref LEFT JOIN xyz ON a = x +---- +1 NULL 1 NULL NULL NULL + +statement ok +DROP TABLE fk_ref; +DROP TABLE xyz; diff --git a/pkg/sql/logictest/testdata/logic_test/join b/pkg/sql/logictest/testdata/logic_test/join index a4eeeb4a2905..8b4804340bdd 100644 --- a/pkg/sql/logictest/testdata/logic_test/join +++ b/pkg/sql/logictest/testdata/logic_test/join @@ -1093,3 +1093,22 @@ CREATE TABLE t44746_1(c1 INT) # Note: an "error parsing regexp" would also be acceptable here. statement ok SELECT * FROM t44746_0 FULL JOIN t44746_1 ON (SUBSTRING('', ')') = '') = (c1 > 0) + +# Regression test for #49630. +statement ok +DROP TABLE empty; +CREATE TABLE xy (x INT PRIMARY KEY, y INT); +CREATE TABLE fk_ref (r INT NOT NULL REFERENCES xy (x)); +CREATE TABLE empty (v INT); +INSERT INTO xy (VALUES (1, 1)); +INSERT INTO fk_ref (VALUES (1)); + +query IIII +SELECT * FROM fk_ref LEFT JOIN (SELECT * FROM xy INNER JOIN empty ON True) ON r = x +---- +1 NULL NULL NULL + +statement ok +DROP TABLE empty; +DROP TABLE fk_ref; +DROP TABLE xy; diff --git a/pkg/sql/opt/exec/execbuilder/testdata/enums b/pkg/sql/opt/exec/execbuilder/testdata/enums index 2c48f04e995d..05d76e6ca15b 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/enums +++ b/pkg/sql/opt/exec/execbuilder/testdata/enums @@ -53,4 +53,3 @@ scan t └── constraint: /1 ├── [/'hello' - /'hello'] └── [/'hi' - /'hi'] - diff --git a/pkg/sql/opt/memo/expr_format.go b/pkg/sql/opt/memo/expr_format.go index 107f871ae9ba..d317beec2e8f 100644 --- a/pkg/sql/opt/memo/expr_format.go +++ b/pkg/sql/opt/memo/expr_format.go @@ -686,6 +686,12 @@ func (f *ExprFmtCtx) formatRelational(e RelExpr, tp treeprinter.Node) { if r.JoinSize > 1 { tp.Childf("join-size: %d", r.JoinSize) } + switch e.Op() { + case opt.InnerJoinOp, opt.LeftJoinOp, opt.FullJoinOp: + if s := r.MultiplicityProps.String(); (r.Available&props.MultiplicityProps) != 0 && s != "" { + tp.Childf("multiplicity: %s", s) + } + } if withUses := relational.Shared.Rule.WithUses; len(withUses) > 0 { n := tp.Childf("cte-uses") ids := make([]opt.WithID, 0, len(withUses)) diff --git a/pkg/sql/opt/memo/multiplicity_builder.go b/pkg/sql/opt/memo/multiplicity_builder.go new file mode 100644 index 000000000000..caf4d9c3544f --- /dev/null +++ b/pkg/sql/opt/memo/multiplicity_builder.go @@ -0,0 +1,426 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package memo + +import ( + "github.com/cockroachdb/cockroach/pkg/sql/opt" + "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" + "github.com/cockroachdb/cockroach/pkg/sql/opt/props" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/errors" +) + +// DeriveJoinMultiplicity returns a JoinMultiplicity struct that describes how a +// join operator will affect the rows of its left and right inputs (e.g. +// duplicated and/or filtered). When the function is called on an operator other +// than an InnerJoin, a LeftJoin, or a FullJoin, it simply populates the +// UnfilteredCols field of the JoinMultiplicity for that operator and leaves the +// join fields unchanged. +// +// DeriveJoinMultiplicity recursively derives the UnfilteredCols field and +// populates the props.Relational.Rule.MultiplicityProps field as it goes to +// make future calls faster. +func DeriveJoinMultiplicity(in RelExpr) props.JoinMultiplicity { + // If the MultiplicityProps property has already been derived, return it + // immediately. + relational := in.Relational() + if relational.IsAvailable(props.MultiplicityProps) { + return relational.Rule.MultiplicityProps + } + relational.Rule.Available |= props.MultiplicityProps + var multiplicity props.JoinMultiplicity + + // Derive MultiplicityProps now. + switch t := in.(type) { + case *ScanExpr: + // All un-limited, unconstrained output columns are unfiltered columns. + if t.HardLimit == 0 && t.Constraint == nil { + multiplicity.UnfilteredCols = relational.OutputCols + } + + case *ProjectExpr: + // Project never filters rows, so it passes through unfiltered columns. + unfilteredCols := DeriveJoinMultiplicity(t.Input).UnfilteredCols + multiplicity.UnfilteredCols = unfilteredCols.Intersection(relational.OutputCols) + + case *InnerJoinExpr, *LeftJoinExpr, *FullJoinExpr: + left := t.Child(0).(RelExpr) + right := t.Child(1).(RelExpr) + filters := *t.Child(2).(*FiltersExpr) + multiplicity = GetJoinMultiplicityFromInputs(t.Op(), left, right, filters) + + // Use the JoinMultiplicity to determine whether unfiltered columns can be + // passed through. + if multiplicity.JoinPreservesLeftRows() { + multiplicity.UnfilteredCols.UnionWith(DeriveJoinMultiplicity(left).UnfilteredCols) + } + if multiplicity.JoinPreservesRightRows() { + multiplicity.UnfilteredCols.UnionWith(DeriveJoinMultiplicity(right).UnfilteredCols) + } + + default: + // An empty JoinMultiplicity is returned. + } + relational.Rule.MultiplicityProps = multiplicity + return relational.Rule.MultiplicityProps +} + +// GetJoinMultiplicityFromInputs returns a JoinMultiplicity that describes how a +// join of the given type with the given inputs and filters will affect the rows +// of its inputs. When possible, DeriveJoinMultiplicity should be called instead +// because GetJoinMultiplicityFromInputs cannot take advantage of a previously +// calculated JoinMultiplicity. +func GetJoinMultiplicityFromInputs( + joinOp opt.Operator, left, right RelExpr, filters FiltersExpr, +) props.JoinMultiplicity { + + switch joinOp { + case opt.InnerJoinOp, opt.LeftJoinOp, opt.FullJoinOp: + + default: + panic(errors.AssertionFailedf("invalid operator: %v", joinOp)) + } + + isLeftOuter := joinOp == opt.LeftJoinOp || joinOp == opt.FullJoinOp + isRightOuter := joinOp == opt.FullJoinOp + + leftMultiplicity := getJoinLeftMultiplicityVal(left, right, filters, isLeftOuter) + rightMultiplicity := getJoinLeftMultiplicityVal(right, left, filters, isRightOuter) + + return props.JoinMultiplicity{ + LeftMultiplicity: leftMultiplicity, + RightMultiplicity: rightMultiplicity, + } +} + +// getJoinLeftMultiplicityVal returns a MultiplicityValue that describes whether +// a join with the given properties would duplicate or filter the rows of its +// left input. +// +// The duplicated and filtered flags will be set unless it can be statically +// proven that no rows will be duplicated or filtered respectively. +func getJoinLeftMultiplicityVal( + left, right RelExpr, filters FiltersExpr, isLeftOuter bool, +) props.MultiplicityValue { + multiplicity := props.MultiplicityIndeterminateVal + if filtersMatchLeftRowsAtMostOnce(left, right, filters) { + multiplicity |= props.MultiplicityNotDuplicatedVal + } + if isLeftOuter || filtersMatchAllLeftRows(left, right, filters) { + multiplicity |= props.MultiplicityPreservedVal + } + return multiplicity +} + +// filtersMatchLeftRowsAtMostOnce returns true if a join expression with the +// given ON filters is guaranteed to match every left row at most once. This is +// the case when either of the following conditions is satisfied: +// +// 1. The join is a cross join and the right input has zero or one rows. +// +// 2. The equivalence closure of the left columns over the filter functional +// dependencies forms a lax key over the right columns. +// +// Why is condition #2 sufficient to ensure that no left rows are matched more +// than once? +// * It implies that left columns are being equated with a lax key from the +// right input. +// * A lax key means that the right rows being equated are unique apart from +// nulls. +// * Equalities are null-rejecting and the right rows are otherwise unique, so +// no left row can be equal to more than one right row on the filters. +// * Therefore, no left row will be matched more than once. +// +// As an example: +// +// CREATE TABLE x_tab (x INT); +// CREATE TABLE a_tab (a INT UNIQUE); +// +// x a +// ---- ---- +// NULL NULL +// 1 1 +// 1 2 +// 2 3 +// +// SELECT * FROM x_tab INNER JOIN a_tab ON x = a; +// => +// x a +// --- +// 1 1 +// 1 1 +// 2 2 +// +// In this example, no rows from x are duplicated, while the '1' row from a is +// duplicated. +func filtersMatchLeftRowsAtMostOnce(left, right RelExpr, filters FiltersExpr) bool { + // Condition #1. + if len(filters) == 0 && right.Relational().Cardinality.IsZeroOrOne() { + return true + } + + // Condition #2. + filtersFDs := getFiltersFDs(filters) + closure := filtersFDs.ComputeEquivClosure(left.Relational().OutputCols) + return right.Relational().FuncDeps.ColsAreLaxKey(closure) +} + +// filtersMatchAllLeftRows returns true when each row in the given join's left +// input can be guaranteed to match at least one row from the right input, +// according to the join filters. This is true when the following conditions are +// satisfied: +// +// 1. If this is a cross join (there are no filters), then either: +// a. The minimum cardinality of the right input is greater than zero. There +// must be at least one right row for the left rows to be preserved. +// b. There is a not-null foreign key column in the left input that references +// an unfiltered column from the right input. +// +// 2. If this is not a cross join, every filter falls under one of these two +// cases: +// a. The self-join case: an equality between ColumnIDs that come from the +// same column on the same base table. +// b. The foreign-key case: an equality between a foreign key column on the +// left and the column it references from the right. +// +// In both the self-join and the foreign key cases, the left columns must be +// not-null, and the right columns must be unfiltered. +// +// Why do the left columns have to be not-null and the right columns +// unfiltered? +// * In both the self-join and the foreign-key cases, a non-null value in +// the left column guarantees a corresponding value in the right column. As +// long as no nulls have been added to the left column and no values have +// been removed from the right, this property will be valid. +// +// Note: in the foreign key case, if the key's match method is match simple, all +// columns in the foreign key must be not-null in order to guarantee that all +// rows will have a match in the referenced table. +func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool { + md := left.Memo().Metadata() + + // Cross join case. + if len(filters) == 0 { + if !right.Relational().Cardinality.CanBeZero() { + // Case 1a: this is a cross join and there's at least one row in the right + // input, so every left row is guaranteed to match at least once. + return true + } + // Case 1b: if there is at least one not-null foreign key column referencing + // the unfiltered right columns, return true. Otherwise, false. + return makeForeignKeyMap( + md, left.Relational().NotNullCols, DeriveJoinMultiplicity(right).UnfilteredCols) != nil + } + + leftColIDs := left.Relational().NotNullCols + rightColIDs := DeriveJoinMultiplicity(right).UnfilteredCols + if rightColIDs.Empty() { + // Right input has no unfiltered columns. + return false + } + + var fkColMap map[opt.ColumnID]opt.ColumnID + + for i := range filters { + eq, _ := filters[i].Condition.(*EqExpr) + if eq == nil { + // Conjunct is not an equality comparison. + return false + } + + leftVar, _ := eq.Left.(*VariableExpr) + rightVar, _ := eq.Right.(*VariableExpr) + if leftVar == nil || rightVar == nil { + // Conjunct does not directly compare two columns. + return false + } + + leftColID := leftVar.Col + rightColID := rightVar.Col + + // Normalize leftColID to come from leftColIDs. + if !leftColIDs.Contains(leftColID) { + leftColID, rightColID = rightColID, leftColID + } + if !leftColIDs.Contains(leftColID) || !rightColIDs.Contains(rightColID) { + // Columns don't come from both sides of join, left column is nullable or + // right column is filtered. + return false + } + + leftTab := md.ColumnMeta(leftColID).Table + rightTab := md.ColumnMeta(rightColID).Table + if leftTab == 0 || rightTab == 0 { + // Columns don't come from base tables. + return false + } + + if md.TableMeta(leftTab).Table == md.TableMeta(rightTab).Table { + // Case 2a: check self-join case. + leftColOrd := leftTab.ColumnOrdinal(leftColID) + rightColOrd := rightTab.ColumnOrdinal(rightColID) + if leftColOrd != rightColOrd { + // Left and right column ordinals do not match. + return false + } + } else { + // Case 2b: check foreign-key case. + if fkColMap == nil { + // Lazily construct a map from all not-null foreign key columns on the + // left to all unfiltered referenced columns on the right. + fkColMap = makeForeignKeyMap(md, leftColIDs, rightColIDs) + if fkColMap == nil { + // No valid foreign key relations were found. + return false + } + } + if refCol, ok := fkColMap[leftColID]; !ok || refCol != rightColID { + // There is no valid foreign key relation from leftColID to + // rightColID. + return false + } + } + } + + return true +} + +// makeForeignKeyMap returns a map from left foreign key columns to right +// referenced columns. The given left columns should not be nullable and the +// right columns should be guaranteed to be unfiltered, or the foreign key +// relation may not hold. If the key's match method isn't match full, all +// foreign key columns must be not-null, or the key relation is not guaranteed +// to have a match for each row. If no valid foreign key relations are found, +// fkColMap is nil. +func makeForeignKeyMap( + md *opt.Metadata, leftNotNullCols, rightUnfilteredCols opt.ColSet, +) map[opt.ColumnID]opt.ColumnID { + var tableIDMap map[cat.StableID]opt.TableID + var fkColMap map[opt.ColumnID]opt.ColumnID + var lastSeen opt.TableID + + // Walk through the left columns and add foreign key and referenced columns to + // the output mapping if they come from the leftNotNullCols and + // rightUnfilteredCols ColSets respectively. + for col, ok := leftNotNullCols.Next(0); ok; col, ok = leftNotNullCols.Next(col + 1) { + fkTableID := md.ColumnMeta(col).Table + if fkTableID < 1 { + // The column does not come from a base table. + continue + } + if fkTableID == lastSeen { + // We have already encountered this TableID. (This works because ColumnIDs + // with the same TableID are clustered together). + continue + } + lastSeen = fkTableID + fkTableMeta := md.TableMeta(fkTableID) + if fkTableMeta.IgnoreForeignKeys { + // We are not allowed to use any of this table's foreign keys. + continue + } + fkTable := fkTableMeta.Table + for i, cnt := 0, fkTable.OutboundForeignKeyCount(); i < cnt; i++ { + fk := fkTable.OutboundForeignKey(i) + if !fk.Validated() { + // The data is not guaranteed to follow the foreign key constraint. + continue + } + if tableIDMap == nil { + // Lazily initialize tableIDMap. + tableIDMap = makeStableTableIDMap(md, rightUnfilteredCols) + if len(tableIDMap) == 0 { + // No valid tables were found from the right side. + break + } + } + refTableID, ok := tableIDMap[fk.ReferencedTableID()] + if !ok { + // There is no valid right table corresponding to the referenced table. + continue + } + var leftCols, rightCols []opt.ColumnID + fkValid := true + for j, numCols := 0, fk.ColumnCount(); j < numCols; j++ { + leftOrd := fk.OriginColumnOrdinal(fkTable, j) + rightOrd := fk.ReferencedColumnOrdinal(md.Table(refTableID), j) + leftCol := fkTableID.ColumnID(leftOrd) + rightCol := refTableID.ColumnID(rightOrd) + if !leftNotNullCols.Contains(leftCol) { + // Not all FK columns are part of the equality conditions. There are two + // cases: + // 1. MATCH SIMPLE/PARTIAL: if this column is nullable, rows from this + // foreign key are not guaranteed to match. + // 2. MATCH FULL: FK rows are still guaranteed to match because the + // non-present columns can only be NULL if all FK columns are NULL. + if fk.MatchMethod() != tree.MatchFull { + fkValid = false + break + } + continue + } + if !rightUnfilteredCols.Contains(rightCol) { + continue + } + leftCols = append(leftCols, leftCol) + rightCols = append(rightCols, rightCol) + } + if !fkValid { + // The foreign key relations should only be added to the mapping if the + // foreign key is guaranteed a match for every row. + continue + } + for i := range leftCols { + // Add any valid foreign key relations to the mapping. + if fkColMap == nil { + // Lazily initialize fkColMap + fkColMap = map[opt.ColumnID]opt.ColumnID{} + } + fkColMap[leftCols[i]] = rightCols[i] + } + } + } + return fkColMap +} + +// makeStableTableIDMap creates a mapping from the StableIDs of the base tables +// to the meta TableIDs for the given columns. +func makeStableTableIDMap(md *opt.Metadata, cols opt.ColSet) map[cat.StableID]opt.TableID { + idMap := map[cat.StableID]opt.TableID{} + for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { + metaTableID := md.ColumnMeta(col).Table + if metaTableID == 0 { + continue + } + stableTableID := md.Table(metaTableID).ID() + if prevID, ok := idMap[stableTableID]; ok && prevID != metaTableID { + // Avoid dealing with cases where multiple meta tables reference the same + // base table so that only one TableID has to be stored. + return map[cat.StableID]opt.TableID{} + } + idMap[stableTableID] = metaTableID + } + return idMap +} + +// getFiltersFDs returns a FuncDepSet with the FDs from the FiltersItems in +// the given FiltersExpr. +func getFiltersFDs(filters FiltersExpr) props.FuncDepSet { + if len(filters) == 1 { + return filters[0].ScalarProps().FuncDeps + } + + filtersFDs := props.FuncDepSet{} + for i := range filters { + filtersFDs.AddFrom(&filters[i].ScalarProps().FuncDeps) + } + return filtersFDs +} diff --git a/pkg/sql/opt/memo/testdata/logprops/join b/pkg/sql/opt/memo/testdata/logprops/join index f4aac48b725d..643079c2beb5 100644 --- a/pkg/sql/opt/memo/testdata/logprops/join +++ b/pkg/sql/opt/memo/testdata/logprops/join @@ -10,6 +10,28 @@ exec-ddl CREATE TABLE mn (m INT PRIMARY KEY, n INT, UNIQUE (n)) ---- +exec-ddl +CREATE TABLE fk ( + k INT PRIMARY KEY, + v INT, + r1 INT NOT NULL REFERENCES xysd(x), + r2 INT REFERENCES xysd(x) +) +---- + +exec-ddl +CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, b, c)) +---- + +exec-ddl +CREATE TABLE ref ( + r1 INT NOT NULL, + r2 INT, + r3 INT NOT NULL, + FOREIGN KEY (r1, r2, r3) REFERENCES abc(a, b, c) +) +---- + # Inner-join. build SELECT *, rowid FROM xysd INNER JOIN uv ON x=u @@ -20,6 +42,7 @@ inner-join (hash) ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6), (1)==(5), (5)==(1) ├── prune: (2-4,6,7) ├── interesting orderings: (+1) (-3,+4,+1) (+7) + ├── multiplicity: left-rows(zero-or-more), right-rows(one-or-zero) ├── scan xysd │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── key: (1) @@ -121,6 +144,7 @@ project │ │ ├── prune: (2-4) │ │ ├── reject-nulls: (6,9) │ │ ├── interesting orderings: (+1) (-3,+4,+1) (+9) + │ │ ├── multiplicity: left-rows(one-or-more), right-rows(one-or-zero) │ │ ├── scan xysd │ │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ │ │ ├── key: (1) @@ -131,6 +155,7 @@ project │ │ │ ├── columns: v:6(int!null) n:9(int!null) │ │ │ ├── fd: (6)==(9), (9)==(6) │ │ │ ├── interesting orderings: (+9) + │ │ │ ├── multiplicity: left-rows(one-or-zero), right-rows(zero-or-more) │ │ │ ├── scan uv │ │ │ │ ├── columns: v:6(int!null) │ │ │ │ └── prune: (6) @@ -240,6 +265,7 @@ left-join (hash) ├── prune: (2-4,6,7) ├── reject-nulls: (5-7) ├── interesting orderings: (+1) (-3,+4,+1) (+7) + ├── multiplicity: left-rows(one-or-more), right-rows(one-or-zero) ├── scan xysd │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── key: (1) @@ -284,6 +310,7 @@ project │ │ ├── prune: (2-4) │ │ ├── reject-nulls: (5) │ │ ├── interesting orderings: (+1) (-3,+4,+1) + │ │ ├── multiplicity: left-rows(one-or-more), right-rows(one-or-zero) │ │ ├── scan xysd │ │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ │ │ ├── key: (1) @@ -350,6 +377,7 @@ full-join (hash) ├── prune: (2-4,6,7) ├── reject-nulls: (1-7) ├── interesting orderings: (+1) (-3,+4,+1) (+7) + ├── multiplicity: left-rows(one-or-more), right-rows(exactly-one) ├── scan xysd │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── key: (1) @@ -598,6 +626,7 @@ select ├── key: () ├── fd: ()-->(5,6) ├── prune: (6) + ├── multiplicity: left-rows(one-or-zero), right-rows(one-or-zero) ├── project │ ├── columns: x:5(int) │ ├── outer: (1) @@ -644,6 +673,7 @@ project ├── cardinality: [0 - 1] ├── key: () ├── fd: ()-->(5) + ├── multiplicity: left-rows(one-or-zero), right-rows(one-or-zero) ├── select │ ├── columns: count_rows:5(int!null) │ ├── cardinality: [0 - 1] @@ -792,6 +822,7 @@ left-join (cross) ├── cardinality: [3 - 6] ├── prune: (1-3) ├── reject-nulls: (2,3) + ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) ├── values │ ├── columns: column1:1(int!null) │ ├── cardinality: [3 - 3] @@ -868,6 +899,7 @@ full-join (cross) ├── cardinality: [2 - 4] ├── prune: (1,2) ├── reject-nulls: (1,2) + ├── multiplicity: left-rows(one-or-more), right-rows(one-or-more) ├── values │ ├── columns: column1:1(unknown) │ ├── cardinality: [2 - 2] @@ -896,6 +928,7 @@ full-join (hash) ├── cardinality: [1 - 2] ├── prune: (2,4) ├── reject-nulls: (1-4) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) ├── values │ ├── columns: column1:1(int!null) column2:2(int!null) │ ├── cardinality: [1 - 1] @@ -928,6 +961,7 @@ full-join (cross) ├── cardinality: [2 - 4] ├── prune: (1,2) ├── reject-nulls: (1,2) + ├── multiplicity: left-rows(one-or-more), right-rows(one-or-more) ├── values │ ├── columns: column1:1(unknown) │ ├── cardinality: [2 - 2] @@ -959,6 +993,7 @@ full-join (cross) ├── prune: (1-5) ├── reject-nulls: (1-5) ├── interesting orderings: (+1) (-3,+4,+1) + ├── multiplicity: left-rows(one-or-more), right-rows(one-or-more) ├── scan xysd │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── key: (1) @@ -986,6 +1021,7 @@ full-join (cross) ├── prune: (1-8) ├── reject-nulls: (1-8) ├── interesting orderings: (+1) (-3,+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) ├── limit │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── cardinality: [0 - 1] @@ -1030,6 +1066,7 @@ left-join (cross) ├── prune: (1-4,8) ├── reject-nulls: (5,8) ├── interesting orderings: (+1) (-3,+4,+1) + ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) ├── scan xysd │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── key: (1) @@ -1070,6 +1107,7 @@ left-join (cross) ├── prune: (1-4,8) ├── reject-nulls: (5,8) ├── interesting orderings: (+1) (-3,+4,+1) + ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) ├── scan xysd │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── key: (1) @@ -1206,6 +1244,7 @@ full-join (cross) ├── prune: (1-4,8) ├── reject-nulls: (1-5,8) ├── interesting orderings: (+1) (-3,+4,+1) + ├── multiplicity: left-rows(one-or-more), right-rows(one-or-more) ├── scan xysd │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) │ ├── key: (1) @@ -1246,6 +1285,7 @@ full-join (cross) ├── prune: (4-8) ├── reject-nulls: (1,4-8) ├── interesting orderings: (+5) (-7,+8,+5) + ├── multiplicity: left-rows(one-or-more), right-rows(one-or-more) ├── group-by │ ├── columns: u:1(int) sum:4(decimal!null) │ ├── grouping columns: u:1(int) @@ -1475,6 +1515,7 @@ full-join (cross) ├── cardinality: [2 - 2] ├── prune: (1) ├── reject-nulls: (1) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) ├── values │ ├── columns: column1:1(unknown) │ ├── cardinality: [1 - 1] @@ -1519,6 +1560,7 @@ full-join (hash) ├── lax-key: (1,4) ├── fd: (1)~~>(2), (4)~~>(5), (1,4)~~>(2,5) ├── reject-nulls: (1,2,4,5) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) ├── select │ ├── columns: t1.x:1(int) t1.y:2(int!null) │ ├── key: (1) @@ -1563,3 +1605,561 @@ full-join (hash) └── eq [type=bool, outer=(1,4), constraints=(/1: (/NULL - ]; /4: (/NULL - ]), fd=(1)==(4), (4)==(1)] ├── variable: t1.x:1 [type=int] └── variable: t2.x:4 [type=int] + +# InnerJoin with an equality between one key column and one non-key column. +# Neither input is guaranteed a match for every row. Rows from uv will not be +# duplicated because the x column is unique. Rows from xysd may be duplicated +# because the v column is not unique. +norm +SELECT * FROM xysd INNER JOIN uv ON x=v +---- +inner-join (hash) + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) + ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (1)==(6), (6)==(1) + ├── prune: (2-5) + ├── interesting orderings: (+1) (-3,+4,+1) + ├── multiplicity: left-rows(zero-or-more), right-rows(one-or-zero) + ├── scan xysd + │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (-3,+4,+1) + ├── scan uv + │ ├── columns: u:5(int) v:6(int!null) + │ └── prune: (5,6) + └── filters + └── eq [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + ├── variable: x:1 [type=int] + └── variable: v:6 [type=int] + +# InnerJoin with a not-null foreign key equality. Since the foreign key is +# not-null, rows from the fk table are guaranteed a match. Since x is a key +# column, rows from the fk table will not be duplicated. +norm +SELECT * FROM fk INNER JOIN xysd ON x = r1 +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + ├── key: (1) + ├── fd: (1)-->(2-4), (5)-->(6-8), (7,8)~~>(5,6), (3)==(5), (5)==(3) + ├── prune: (1,2,4,6-8) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── scan xysd + │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters + └── eq [type=bool, outer=(3,5), constraints=(/3: (/NULL - ]; /5: (/NULL - ]), fd=(3)==(5), (5)==(3)] + ├── variable: x:5 [type=int] + └── variable: r1:3 [type=int] + +# InnerJoin with a nullable foreign key equality condition. +norm +SELECT * FROM fk INNER JOIN xysd ON x = r2 +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int!null) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + ├── key: (1) + ├── fd: (1)-->(2-4), (5)-->(6-8), (7,8)~~>(5,6), (4)==(5), (5)==(4) + ├── prune: (1-3,6-8) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(one-or-zero), right-rows(zero-or-more) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── scan xysd + │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters + └── eq [type=bool, outer=(4,5), constraints=(/4: (/NULL - ]; /5: (/NULL - ]), fd=(4)==(5), (5)==(4)] + ├── variable: x:5 [type=int] + └── variable: r2:4 [type=int] + +# Cross join. Rows from fk are guaranteed matches because the not-null foreign +# key implies that xysd has at least one row whenever fk does. +norm +SELECT * FROM fk CROSS JOIN xysd +---- +inner-join (cross) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + ├── key: (1,5) + ├── fd: (1)-->(2-4), (5)-->(6-8), (7,8)~~>(5,6) + ├── prune: (1-8) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── scan xysd + │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters (true) + +# LeftJoin case with a not-null foreign key. Since fk rows are all guaranteed +# exactly one match, xysd will not be null-extended and the LeftJoin can +# therefore be simplified. +norm +SELECT * FROM fk LEFT JOIN xysd ON x = r1 +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + ├── key: (1) + ├── fd: (1)-->(2-4), (5)-->(6-8), (7,8)~~>(5,6), (3)==(5), (5)==(3) + ├── prune: (1,2,4,6-8) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── scan xysd + │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters + └── eq [type=bool, outer=(3,5), constraints=(/3: (/NULL - ]; /5: (/NULL - ]), fd=(3)==(5), (5)==(3)] + ├── variable: x:5 [type=int] + └── variable: r1:3 [type=int] + + +# LeftJoin case with a nullable foreign key. The LeftJoin cannot be simplified +# because a nullable foreign key is not guaranteed matches. +norm +SELECT * FROM fk LEFT JOIN xysd ON x = r2 +---- +left-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:5(int) y:6(int) s:7(string) d:8(decimal) + ├── key: (1) + ├── fd: (1)-->(2-8), (5)-->(6-8), (7,8)~~>(5,6) + ├── prune: (1-3,6-8) + ├── reject-nulls: (5-8) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── scan xysd + │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters + └── eq [type=bool, outer=(4,5), constraints=(/4: (/NULL - ]; /5: (/NULL - ]), fd=(4)==(5), (5)==(4)] + ├── variable: x:5 [type=int] + └── variable: r2:4 [type=int] + +# FullJoin with equality between key columns. The FullJoin adds back any rows +# that are filtered out, and the equality between key columns ensures that no +# rows are duplicated. Note that both sides may be null-extended. +norm +SELECT * FROM mn FULL JOIN xysd ON m = x +---- +full-join (hash) + ├── columns: m:1(int) n:2(int) x:3(int) y:4(int) s:5(string) d:6(decimal) + ├── key: (1,3) + ├── fd: (1)-->(2), (2)~~>(1), (3)-->(4-6), (5,6)~~>(3,4) + ├── prune: (2,4-6) + ├── reject-nulls: (1-6) + ├── interesting orderings: (+1) (+2,+1) (+3) (-5,+6,+3) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) + ├── scan mn + │ ├── columns: m:1(int!null) n:2(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2), (2)~~>(1) + │ ├── prune: (1,2) + │ └── interesting orderings: (+1) (+2,+1) + ├── scan xysd + │ ├── columns: x:3(int!null) y:4(int) s:5(string) d:6(decimal!null) + │ ├── key: (3) + │ ├── fd: (3)-->(4-6), (5,6)~~>(3,4) + │ ├── prune: (3-6) + │ └── interesting orderings: (+3) (-5,+6,+3) + └── filters + └── eq [type=bool, outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)] + ├── variable: m:1 [type=int] + └── variable: x:3 [type=int] + +# Self-join case. Since the condition is equating a key column with itself, +# every row from both inputs is guaranteed to be included in the join output +# exactly once. +norm +SELECT * FROM xysd INNER JOIN xysd AS a ON xysd.x = a.x +---- +inner-join (hash) + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + ├── key: (5) + ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (5)-->(6-8), (7,8)~~>(5,6), (1)==(5), (5)==(1) + ├── prune: (2-4,6-8) + ├── interesting orderings: (+1) (-3,+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (-3,+4,+1) + ├── scan a + │ ├── columns: a.x:5(int!null) a.y:6(int) a.s:7(string) a.d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters + └── eq [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] + ├── variable: xysd.x:1 [type=int] + └── variable: a.x:5 [type=int] + +# Case with a values cross join in the input of an InnerJoin. +norm +SELECT * FROM +fk INNER JOIN (SELECT * FROM xysd CROSS JOIN (VALUES (1), (2))) ON r1 = x +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) column1:9(int!null) + ├── fd: (1)-->(2-4), (5)-->(6-8), (7,8)~~>(5,6), (3)==(5), (5)==(3) + ├── prune: (1,2,4,6-9) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── inner-join (cross) + │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) column1:9(int!null) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-9) + │ ├── interesting orderings: (+5) (-7,+8,+5) + │ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) + │ ├── scan xysd + │ │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + │ │ ├── key: (5) + │ │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ │ ├── prune: (5-8) + │ │ └── interesting orderings: (+5) (-7,+8,+5) + │ ├── values + │ │ ├── columns: column1:9(int!null) + │ │ ├── cardinality: [2 - 2] + │ │ ├── prune: (9) + │ │ ├── tuple [type=tuple{int}] + │ │ │ └── const: 1 [type=int] + │ │ └── tuple [type=tuple{int}] + │ │ └── const: 2 [type=int] + │ └── filters (true) + └── filters + └── eq [type=bool, outer=(3,5), constraints=(/3: (/NULL - ]; /5: (/NULL - ]), fd=(3)==(5), (5)==(3)] + ├── variable: r1:3 [type=int] + └── variable: x:5 [type=int] + +# Case with a self-join in the input of an InnerJoin. +norm +SELECT * FROM fk +INNER JOIN (SELECT * FROM xysd INNER JOIN xysd AS a ON xysd.x = a.x) f(x) ON r1 = f.x +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) x:9(int!null) y:10(int) s:11(string) d:12(decimal!null) + ├── key: (1) + ├── fd: (1)-->(2-4), (5)-->(6-8), (7,8)~~>(5,6), (9)-->(10-12), (11,12)~~>(9,10), (5)==(3,9), (9)==(3,5), (3)==(5,9) + ├── prune: (1,2,4,6-8,10-12) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) (+9) (-11,+12,+9) + ├── multiplicity: left-rows(one-or-zero), right-rows(zero-or-more) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── inner-join (hash) + │ ├── columns: xysd.x:5(int!null) xysd.y:6(int) xysd.s:7(string) xysd.d:8(decimal!null) a.x:9(int!null) a.y:10(int) a.s:11(string) a.d:12(decimal!null) + │ ├── key: (9) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6), (9)-->(10-12), (11,12)~~>(9,10), (5)==(9), (9)==(5) + │ ├── prune: (6-8,10-12) + │ ├── interesting orderings: (+5) (-7,+8,+5) (+9) (-11,+12,+9) + │ ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) + │ ├── scan xysd + │ │ ├── columns: xysd.x:5(int!null) xysd.y:6(int) xysd.s:7(string) xysd.d:8(decimal!null) + │ │ ├── key: (5) + │ │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ │ ├── prune: (5-8) + │ │ └── interesting orderings: (+5) (-7,+8,+5) + │ ├── scan a + │ │ ├── columns: a.x:9(int!null) a.y:10(int) a.s:11(string) a.d:12(decimal!null) + │ │ ├── key: (9) + │ │ ├── fd: (9)-->(10-12), (11,12)~~>(9,10) + │ │ ├── prune: (9-12) + │ │ └── interesting orderings: (+9) (-11,+12,+9) + │ └── filters + │ └── eq [type=bool, outer=(5,9), constraints=(/5: (/NULL - ]; /9: (/NULL - ]), fd=(5)==(9), (9)==(5)] + │ ├── variable: xysd.x:5 [type=int] + │ └── variable: a.x:9 [type=int] + └── filters + └── eq [type=bool, outer=(3,5), constraints=(/3: (/NULL - ]; /5: (/NULL - ]), fd=(3)==(5), (5)==(3)] + ├── variable: r1:3 [type=int] + └── variable: xysd.x:5 [type=int] + +# Case with an equality with a synthesized column. +norm +SELECT * FROM mn LEFT JOIN xysd ON y = (n * 2) +---- +project + ├── columns: m:1(int!null) n:2(int) x:3(int) y:4(int) s:5(string) d:6(decimal) + ├── key: (1,3) + ├── fd: (1)-->(2), (2)~~>(1), (3)-->(4-6), (5,6)~~>(3,4) + ├── prune: (1-6) + ├── reject-nulls: (3-6) + ├── interesting orderings: (+1) (+2,+1) (+3) (-5,+6,+3) + └── left-join (hash) + ├── columns: m:1(int!null) n:2(int) x:3(int) y:4(int) s:5(string) d:6(decimal) column7:7(int) + ├── key: (1,3) + ├── fd: (1)-->(2), (2)~~>(1), (2)-->(7), (3)-->(4-6), (5,6)~~>(3,4) + ├── prune: (1-3,5,6) + ├── reject-nulls: (3-6) + ├── interesting orderings: (+1) (+2,+1) (+3) (-5,+6,+3) + ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) + ├── project + │ ├── columns: column7:7(int) m:1(int!null) n:2(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2), (2)~~>(1), (2)-->(7) + │ ├── prune: (1,2,7) + │ ├── interesting orderings: (+1) (+2,+1) + │ ├── scan mn + │ │ ├── columns: m:1(int!null) n:2(int) + │ │ ├── key: (1) + │ │ ├── fd: (1)-->(2), (2)~~>(1) + │ │ ├── prune: (1,2) + │ │ └── interesting orderings: (+1) (+2,+1) + │ └── projections + │ └── mult [as=column7:7, type=int, outer=(2)] + │ ├── variable: n:2 [type=int] + │ └── const: 2 [type=int] + ├── scan xysd + │ ├── columns: x:3(int!null) y:4(int) s:5(string) d:6(decimal!null) + │ ├── key: (3) + │ ├── fd: (3)-->(4-6), (5,6)~~>(3,4) + │ ├── prune: (3-6) + │ └── interesting orderings: (+3) (-5,+6,+3) + └── filters + └── eq [type=bool, outer=(4,7), constraints=(/4: (/NULL - ]; /7: (/NULL - ]), fd=(4)==(7), (7)==(4)] + ├── variable: column7:7 [type=int] + └── variable: y:4 [type=int] + +# Case with columns that don't come from base tables. +norm +SELECT * FROM (SELECT * FROM uv UNION (SELECT * FROM uv)) f(v1, v2) INNER JOIN xysd ON v2 = x +---- +inner-join (hash) + ├── columns: v1:7(int) v2:8(int!null) x:9(int!null) y:10(int) s:11(string) d:12(decimal!null) + ├── key: (7,9) + ├── fd: (9)-->(10-12), (11,12)~~>(9,10), (8)==(9), (9)==(8) + ├── prune: (10-12) + ├── interesting orderings: (+9) (-11,+12,+9) + ├── multiplicity: left-rows(one-or-zero), right-rows(zero-or-more) + ├── union + │ ├── columns: u:7(int) v:8(int!null) + │ ├── left columns: uv.u:1(int) uv.v:2(int) + │ ├── right columns: uv.u:4(int) uv.v:5(int) + │ ├── key: (7,8) + │ ├── scan uv + │ │ ├── columns: uv.u:1(int) uv.v:2(int!null) + │ │ └── prune: (1,2) + │ └── scan uv + │ ├── columns: uv.u:4(int) uv.v:5(int!null) + │ └── prune: (4,5) + ├── scan xysd + │ ├── columns: x:9(int!null) y:10(int) s:11(string) d:12(decimal!null) + │ ├── key: (9) + │ ├── fd: (9)-->(10-12), (11,12)~~>(9,10) + │ ├── prune: (9-12) + │ └── interesting orderings: (+9) (-11,+12,+9) + └── filters + └── eq [type=bool, outer=(8,9), constraints=(/8: (/NULL - ]; /9: (/NULL - ]), fd=(8)==(9), (9)==(8)] + ├── variable: v:8 [type=int] + └── variable: x:9 [type=int] + +# Self-join case with different columns. +norm +SELECT * FROM xysd INNER JOIN xysd AS a ON xysd.x = a.y +---- +inner-join (hash) + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:5(int!null) y:6(int!null) s:7(string) d:8(decimal!null) + ├── key: (5) + ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (5)-->(6-8), (7,8)~~>(5,6), (1)==(6), (6)==(1) + ├── prune: (2-5,7,8) + ├── interesting orderings: (+1) (-3,+4,+1) (+5) (-7,+8,+5) + ├── multiplicity: left-rows(zero-or-more), right-rows(one-or-zero) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (-3,+4,+1) + ├── scan a + │ ├── columns: a.x:5(int!null) a.y:6(int) a.s:7(string) a.d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters + └── eq [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + ├── variable: xysd.x:1 [type=int] + └── variable: a.y:6 [type=int] + +# Case with an equality between a not-null foreign key and an unreferenced +# column. +norm +SELECT * FROM fk INNER JOIN xysd ON r1 = y +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:5(int!null) y:6(int!null) s:7(string) d:8(decimal!null) + ├── key: (1,5) + ├── fd: (1)-->(2-4), (5)-->(6-8), (7,8)~~>(5,6), (3)==(6), (6)==(3) + ├── prune: (1,2,4,5,7,8) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (-7,+8,+5) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── scan xysd + │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) + │ ├── key: (5) + │ ├── fd: (5)-->(6-8), (7,8)~~>(5,6) + │ ├── prune: (5-8) + │ └── interesting orderings: (+5) (-7,+8,+5) + └── filters + └── eq [type=bool, outer=(3,6), constraints=(/3: (/NULL - ]; /6: (/NULL - ]), fd=(3)==(6), (6)==(3)] + ├── variable: r1:3 [type=int] + └── variable: y:6 [type=int] + +# Case where left table has a foreign key that references a table that isn't +# from the right input. +norm +SELECT * FROM fk INNER JOIN mn ON k = m +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) m:5(int!null) n:6(int) + ├── key: (5) + ├── fd: (1)-->(2-4), (5)-->(6), (6)~~>(5), (1)==(5), (5)==(1) + ├── prune: (2-4,6) + ├── interesting orderings: (+1) (+3,+1) (+4,+1) (+5) (+6,+5) + ├── multiplicity: left-rows(one-or-zero), right-rows(one-or-zero) + ├── scan fk + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── prune: (1-4) + │ └── interesting orderings: (+1) (+3,+1) (+4,+1) + ├── scan mn + │ ├── columns: m:5(int!null) n:6(int) + │ ├── key: (5) + │ ├── fd: (5)-->(6), (6)~~>(5) + │ ├── prune: (5,6) + │ └── interesting orderings: (+5) (+6,+5) + └── filters + └── eq [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] + ├── variable: k:1 [type=int] + └── variable: m:5 [type=int] + +# Case with a match-simple foreign key with one nullable column. +norm +SELECT * +FROM ref +INNER JOIN abc +ON (r1, r2, r3) = (a, b, c) +---- +inner-join (hash) + ├── columns: r1:1(int!null) r2:2(int!null) r3:3(int!null) a:5(int!null) b:6(int!null) c:7(int!null) + ├── fd: (1)==(5), (5)==(1), (2)==(6), (6)==(2), (3)==(7), (7)==(3) + ├── interesting orderings: (+1,+2,+3) (+5,+6,+7) + ├── multiplicity: left-rows(one-or-zero), right-rows(zero-or-more) + ├── scan ref + │ ├── columns: r1:1(int!null) r2:2(int) r3:3(int!null) + │ ├── prune: (1-3) + │ └── interesting orderings: (+1,+2,+3) + ├── scan abc + │ ├── columns: a:5(int!null) b:6(int!null) c:7(int!null) + │ ├── key: (5-7) + │ ├── prune: (5-7) + │ └── interesting orderings: (+5,+6,+7) + └── filters + ├── eq [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] + │ ├── variable: r1:1 [type=int] + │ └── variable: a:5 [type=int] + ├── eq [type=bool, outer=(2,6), constraints=(/2: (/NULL - ]; /6: (/NULL - ]), fd=(2)==(6), (6)==(2)] + │ ├── variable: r2:2 [type=int] + │ └── variable: b:6 [type=int] + └── eq [type=bool, outer=(3,7), constraints=(/3: (/NULL - ]; /7: (/NULL - ]), fd=(3)==(7), (7)==(3)] + ├── variable: r3:3 [type=int] + └── variable: c:7 [type=int] + +# Case with a not-null multi-column foreign key. +norm +SELECT * +FROM (SELECT r1, r2, r3 FROM ref WHERE r2 IS NOT NULL) +INNER JOIN abc +ON (r1, r2, r3) = (a, b, c) +---- +inner-join (hash) + ├── columns: r1:1(int!null) r2:2(int!null) r3:3(int!null) a:5(int!null) b:6(int!null) c:7(int!null) + ├── fd: (1)==(5), (5)==(1), (2)==(6), (6)==(2), (3)==(7), (7)==(3) + ├── interesting orderings: (+1,+2,+3) (+5,+6,+7) + ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + ├── select + │ ├── columns: r1:1(int!null) r2:2(int!null) r3:3(int!null) + │ ├── prune: (1,3) + │ ├── interesting orderings: (+1,+2,+3) + │ ├── scan ref + │ │ ├── columns: r1:1(int!null) r2:2(int) r3:3(int!null) + │ │ ├── prune: (1-3) + │ │ └── interesting orderings: (+1,+2,+3) + │ └── filters + │ └── is-not [type=bool, outer=(2), constraints=(/2: (/NULL - ]; tight)] + │ ├── variable: r2:2 [type=int] + │ └── null [type=unknown] + ├── scan abc + │ ├── columns: a:5(int!null) b:6(int!null) c:7(int!null) + │ ├── key: (5-7) + │ ├── prune: (5-7) + │ └── interesting orderings: (+5,+6,+7) + └── filters + ├── eq [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] + │ ├── variable: r1:1 [type=int] + │ └── variable: a:5 [type=int] + ├── eq [type=bool, outer=(2,6), constraints=(/2: (/NULL - ]; /6: (/NULL - ]), fd=(2)==(6), (6)==(2)] + │ ├── variable: r2:2 [type=int] + │ └── variable: b:6 [type=int] + └── eq [type=bool, outer=(3,7), constraints=(/3: (/NULL - ]; /7: (/NULL - ]), fd=(3)==(7), (7)==(3)] + ├── variable: r3:3 [type=int] + └── variable: c:7 [type=int] diff --git a/pkg/sql/opt/memo/testdata/logprops/scalar b/pkg/sql/opt/memo/testdata/logprops/scalar index d3b40dd45146..31d8578985f8 100644 --- a/pkg/sql/opt/memo/testdata/logprops/scalar +++ b/pkg/sql/opt/memo/testdata/logprops/scalar @@ -202,6 +202,7 @@ group-by │ ├── fd: (1)-->(2,3), (1)==(4), (4)==(1) │ ├── prune: (2,3,5) │ ├── interesting orderings: (+1) + │ ├── multiplicity: left-rows(zero-or-more), right-rows(one-or-zero) │ ├── project │ │ ├── columns: div:3(decimal) x:1(int!null) y:2(int) │ │ ├── side-effects diff --git a/pkg/sql/opt/memo/testdata/logprops/upsert b/pkg/sql/opt/memo/testdata/logprops/upsert index 4978071f2c70..228c68dfc19e 100644 --- a/pkg/sql/opt/memo/testdata/logprops/upsert +++ b/pkg/sql/opt/memo/testdata/logprops/upsert @@ -81,6 +81,7 @@ project │ │ │ ├── prune: (10,13) │ │ │ ├── reject-nulls: (10-13) │ │ │ ├── interesting orderings: (+13) (+10) (+11,+12,+13) + │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(one-or-zero) │ │ │ ├── ensure-upsert-distinct-on │ │ │ │ ├── columns: x:5(int!null) y:6(int!null) column8:8(int) column9:9(int!null) │ │ │ │ ├── grouping columns: y:6(int!null) column9:9(int!null) @@ -247,6 +248,7 @@ project │ │ ├── prune: (18,21) │ │ ├── reject-nulls: (18-21) │ │ ├── interesting orderings: (+21) (+18) (+19,+20,+21) + │ │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) │ │ ├── upsert-distinct-on │ │ │ ├── columns: x:5(int!null) y:6(int) column8:8(int) column9:9(int) │ │ │ ├── grouping columns: x:5(int!null) @@ -274,6 +276,7 @@ project │ │ │ │ │ ├── prune: (15-17) │ │ │ │ │ ├── reject-nulls: (14-17) │ │ │ │ │ ├── interesting orderings: (+17) (+14) (+15,+16,+17) + │ │ │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(one-or-zero) │ │ │ │ │ ├── upsert-distinct-on │ │ │ │ │ │ ├── columns: x:5(int!null) y:6(int) column8:8(int) column9:9(int) │ │ │ │ │ │ ├── grouping columns: column8:8(int) @@ -302,6 +305,7 @@ project │ │ │ │ │ │ │ │ ├── prune: (5,6,9-12) │ │ │ │ │ │ │ │ ├── reject-nulls: (10-13) │ │ │ │ │ │ │ │ ├── interesting orderings: (+5) (+6) (+13) (+10) (+11,+12,+13) + │ │ │ │ │ │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) │ │ │ │ │ │ │ │ ├── project │ │ │ │ │ │ │ │ │ ├── columns: column9:9(int) x:5(int!null) y:6(int) column8:8(int) │ │ │ │ │ │ │ │ │ ├── side-effects @@ -469,6 +473,7 @@ project │ │ │ ├── prune: (9-11) │ │ │ ├── reject-nulls: (9-12) │ │ │ ├── interesting orderings: (+12) (+9) (+10,+11,+12) + │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(one-or-zero) │ │ │ ├── ensure-upsert-distinct-on │ │ │ │ ├── columns: column1:5(int!null) column6:6(int!null) column7:7(int) column8:8(int!null) │ │ │ │ ├── grouping columns: column7:7(int) @@ -614,6 +619,7 @@ upsert abc │ │ │ ├── prune: (12-14) │ │ │ ├── reject-nulls: (11-14) │ │ │ ├── interesting orderings: (+14) (+11) (+12,+13,+14) + │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(one-or-zero) │ │ │ ├── ensure-upsert-distinct-on │ │ │ │ ├── columns: y:6(int!null) column8:8(int!null) column9:9(int) column10:10(int!null) │ │ │ │ ├── grouping columns: y:6(int!null) diff --git a/pkg/sql/opt/memo/testdata/logprops/virtual-scan b/pkg/sql/opt/memo/testdata/logprops/virtual-scan index e5d62d753716..6c939239ace1 100644 --- a/pkg/sql/opt/memo/testdata/logprops/virtual-scan +++ b/pkg/sql/opt/memo/testdata/logprops/virtual-scan @@ -13,6 +13,7 @@ project ├── prune: (4-6,9-12) ├── reject-nulls: (6-12) ├── interesting orderings: (+6) + ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) ├── project │ ├── columns: catalog_name:2(string!null) schema_name:3(string!null) default_character_set_name:4(string) sql_path:5(string) │ ├── fd: ()-->(3) diff --git a/pkg/sql/opt/memo/testdata/memo b/pkg/sql/opt/memo/testdata/memo index b4be9f94dbbe..9f372d32de89 100644 --- a/pkg/sql/opt/memo/testdata/memo +++ b/pkg/sql/opt/memo/testdata/memo @@ -370,7 +370,7 @@ memo (optimized, ~9KB, required=[presentation: field:6]) memo SELECT DISTINCT tag FROM [SHOW TRACE FOR SESSION] ---- -memo (optimized, ~6KB, required=[presentation: tag:11]) +memo (optimized, ~7KB, required=[presentation: tag:11]) ├── G1: (distinct-on G2 G3 cols=(11)) │ └── [presentation: tag:11] │ ├── best: (distinct-on G2 G3 cols=(11)) diff --git a/pkg/sql/opt/metadata.go b/pkg/sql/opt/metadata.go index 07a9393493e4..d35f3216208f 100644 --- a/pkg/sql/opt/metadata.go +++ b/pkg/sql/opt/metadata.go @@ -369,17 +369,6 @@ func (md *Metadata) AllTables() []TableMeta { return md.tables } -// TableByStableID looks up the catalog table associated with the given -// StableID (unique across all tables and stable across queries). -func (md *Metadata) TableByStableID(id cat.StableID) cat.Table { - for _, mdTab := range md.tables { - if mdTab.Table.ID() == id { - return mdTab.Table - } - } - return nil -} - // AddColumn assigns a new unique id to a column within the query and records // its alias and type. If the alias is empty, a "column" alias is created. func (md *Metadata) AddColumn(alias string, typ *types.T) ColumnID { diff --git a/pkg/sql/opt/norm/join_funcs.go b/pkg/sql/opt/norm/join_funcs.go index 7bf5428e25fb..67beae08247b 100644 --- a/pkg/sql/opt/norm/join_funcs.go +++ b/pkg/sql/opt/norm/join_funcs.go @@ -443,263 +443,17 @@ func (c *CustomFuncs) GetEquivFD( return equivFD } -// eqConditionsToColMap returns a map of left columns to right columns -// that are being equated in the specified conditions. leftCols is used -// to identify which column is a left column. -func (c *CustomFuncs) eqConditionsToColMap( - filters memo.FiltersExpr, leftCols opt.ColSet, -) map[opt.ColumnID]opt.ColumnID { - eqColMap := make(map[opt.ColumnID]opt.ColumnID) - - for i := range filters { - eq, _ := filters[i].Condition.(*memo.EqExpr) - if eq == nil { - continue - } - - leftVarExpr, _ := eq.Left.(*memo.VariableExpr) - rightVarExpr, _ := eq.Right.(*memo.VariableExpr) - if leftVarExpr == nil || rightVarExpr == nil { - continue - } - - if leftCols.Contains(leftVarExpr.Col) { - eqColMap[leftVarExpr.Col] = rightVarExpr.Col - } else { - eqColMap[rightVarExpr.Col] = leftVarExpr.Col - } - } - - return eqColMap -} - // JoinFiltersMatchAllLeftRows returns true when each row in the given join's // left input matches at least one row from the right input, according to the -// join filters. This is true when the following conditions are satisfied: -// -// 1. Each conjunct in the join condition is an equality between a not-null -// column from the left input and a not-null column from the right input. -// 2. All left input equality columns come from a single table (called its -// "equality table"), as do all right input equality columns (can be -// different table). -// 3. The right input contains every row from its equality table. There may be -// a subset of columns from the table, and/or duplicate rows, but every row -// must be present. -// 4. If the left equality table is the same as the right equality table, then -// it's the self-join case. The columns in each equality pair must have the -// same ordinal position in the table. -// 5. If the left equality table is different than the right equality table, -// then it's the foreign-key case. The left equality columns must map to -// a foreign key on the left equality table, and the right equality columns -// to the corresponding referenced columns in the right equality table. -// +// join filters. func (c *CustomFuncs) JoinFiltersMatchAllLeftRows( - left, right memo.RelExpr, filters memo.FiltersExpr, + left, right memo.RelExpr, on memo.FiltersExpr, ) bool { - unfilteredCols := c.deriveUnfilteredCols(right) - if unfilteredCols.Empty() { - // Condition #3: right input has no columns which contain values from - // every row. - return false - } - - leftColIDs := left.Relational().NotNullCols - rightColIDs := right.Relational().NotNullCols - - md := c.f.Metadata() - - var leftTab, rightTab opt.TableID - - // Any left columns that don't match conditions 1-4 end up in this set. - var remainingLeftColIDs opt.ColSet - - for i := range filters { - eq, _ := filters[i].Condition.(*memo.EqExpr) - if eq == nil { - // Condition #1: conjunct is not an equality comparison. - return false - } - - leftVar, _ := eq.Left.(*memo.VariableExpr) - rightVar, _ := eq.Right.(*memo.VariableExpr) - if leftVar == nil || rightVar == nil { - // Condition #1: conjunct does not compare two columns. - return false - } - - leftColID := leftVar.Col - rightColID := rightVar.Col - - // Normalize leftColID to come from leftColIDs. - if !leftColIDs.Contains(leftColID) { - leftColID, rightColID = rightColID, leftColID - } - if !leftColIDs.Contains(leftColID) || !rightColIDs.Contains(rightColID) { - // Condition #1: columns don't come from both sides of join, or - // columns are nullable. - return false - } - - if !unfilteredCols.Contains(rightColID) { - // Condition #3: right column doesn't contain values from every row. - return false - } - - if leftTab == 0 { - leftTab = md.ColumnMeta(leftColID).Table - rightTab = md.ColumnMeta(rightColID).Table - if leftTab == 0 || rightTab == 0 { - // Condition #2: Columns don't come from base tables. - return false - } - } else if md.ColumnMeta(leftColID).Table != leftTab { - // Condition #2: All left columns don't come from same table. - return false - } else if md.ColumnMeta(rightColID).Table != rightTab { - // Condition #2: All right columns don't come from same table. - return false - } - - if md.TableMeta(leftTab).Table == md.TableMeta(rightTab).Table { - // Check self-join case. - leftColOrd := leftTab.ColumnOrdinal(leftColID) - rightColOrd := rightTab.ColumnOrdinal(rightColID) - if leftColOrd != rightColOrd { - // Condition #4: Left and right column ordinals do not match. - return false - } - } else { - // Column could be a potential foreign key match so save it. - remainingLeftColIDs.Add(leftColID) - } - } - - if remainingLeftColIDs.Empty() { - return true - } - - var leftRightColMap map[opt.ColumnID]opt.ColumnID - // Condition #5: All remaining left columns correspond to a validated foreign - // key relation. - leftTabMeta := md.TableMeta(leftTab) - if leftTabMeta.IgnoreForeignKeys { - // We are not allowed to use any of the left table's outbound foreign keys. - return false - } - rightTabMeta := md.TableMeta(rightTab) - - // Search for validated foreign key references from the left table to the - // right table. - for i, cnt := 0, leftTabMeta.Table.OutboundForeignKeyCount(); i < cnt; i++ { - fkRef := leftTabMeta.Table.OutboundForeignKey(i) - if fkRef.ReferencedTableID() != rightTabMeta.Table.ID() || !fkRef.Validated() { - continue - } - fkTable := md.TableByStableID(fkRef.ReferencedTableID()) - if fkTable == nil { - continue - } - - var leftIndexCols opt.ColSet - numCols := fkRef.ColumnCount() - for j := 0; j < numCols; j++ { - ord := fkRef.OriginColumnOrdinal(leftTabMeta.Table, j) - leftIndexCols.Add(leftTab.ColumnID(ord)) - } - - if !remainingLeftColIDs.SubsetOf(leftIndexCols) { - continue - } - - // Build a mapping of left to right columns as specified - // in the filter conditions - this is used to detect - // whether the filter conditions follow the foreign key - // constraint exactly. - if leftRightColMap == nil { - leftRightColMap = c.eqConditionsToColMap(filters, leftColIDs) - } - - // Loop through all columns in fk index that also exist in LHS of match condition, - // and ensure that they correspond to the correct RHS column according to the - // foreign key relation. In other words, each LHS column's index ordinal - // in the foreign key index matches that of the RHS column (in the index being - // referenced) that it's being equated to. - fkMatch := true - for j := 0; j < numCols; j++ { - indexLeftCol := leftTab.ColumnID(fkRef.OriginColumnOrdinal(leftTabMeta.Table, j)) - - // Not every fk column needs to be in the equality conditions. - if !remainingLeftColIDs.Contains(indexLeftCol) { - continue - } - - indexRightCol := rightTab.ColumnID(fkRef.ReferencedColumnOrdinal(fkTable, j)) - - if rightCol, ok := leftRightColMap[indexLeftCol]; !ok || rightCol != indexRightCol { - fkMatch = false - break - } - } - - // Condition #5 satisfied. - if fkMatch { - return true - } - } - - return false -} - -// deriveUnfilteredCols returns the subset of the given input expression's -// output columns that have values for every row in their owner table. In other -// words, columns from tables that have had none of their rows filtered (but -// it's OK if rows have been duplicated). -// -// deriveUnfilteredCols recursively derives the property, and populates the -// props.Relational.Rule.UnfilteredCols field as it goes to make future calls -// faster. -func (c *CustomFuncs) deriveUnfilteredCols(in memo.RelExpr) opt.ColSet { - // If the UnfilteredCols property has already been derived, return it - // immediately. - relational := in.Relational() - if relational.IsAvailable(props.UnfilteredCols) { - return relational.Rule.UnfilteredCols - } - relational.Rule.Available |= props.UnfilteredCols - - // Derive the UnfilteredCols property now. - // TODO(andyk): Could add other cases, such as outer joins and union. - switch t := in.(type) { - case *memo.ScanExpr: - // All un-limited, unconstrained output columns are unfiltered columns. - if t.HardLimit == 0 && t.Constraint == nil { - relational.Rule.UnfilteredCols = relational.OutputCols - } - - case *memo.ProjectExpr: - // Project never filters rows, so it passes through unfiltered columns. - unfilteredCols := c.deriveUnfilteredCols(t.Input) - relational.Rule.UnfilteredCols = unfilteredCols.Intersection(relational.OutputCols) - - case *memo.InnerJoinExpr, *memo.InnerJoinApplyExpr: - left := t.Child(0).(memo.RelExpr) - right := t.Child(1).(memo.RelExpr) - on := *t.Child(2).(*memo.FiltersExpr) - - // Cross join always preserves left/right rows. - isCrossJoin := on.IsTrue() - - // Inner joins may preserve left/right rows, according to - // JoinFiltersMatchAllLeftRows conditions. - if isCrossJoin || c.JoinFiltersMatchAllLeftRows(left, right, on) { - relational.Rule.UnfilteredCols.UnionWith(c.deriveUnfilteredCols(left)) - } - if isCrossJoin || c.JoinFiltersMatchAllLeftRows(right, left, on) { - relational.Rule.UnfilteredCols.UnionWith(c.deriveUnfilteredCols(right)) - } - } - - return relational.Rule.UnfilteredCols + // Asking whether a join will match all left rows is the same as asking + // whether an inner join with the same inputs would filter any rows from its + // left input. + multiplicity := memo.GetJoinMultiplicityFromInputs(opt.InnerJoinOp, left, right, on) + return multiplicity.JoinPreservesLeftRows() } // CanExtractJoinEquality returns true if: diff --git a/pkg/sql/opt/norm/rules/join.opt b/pkg/sql/opt/norm/rules/join.opt index 880710eb71ff..f4dcc81a08fc 100644 --- a/pkg/sql/opt/norm/rules/join.opt +++ b/pkg/sql/opt/norm/rules/join.opt @@ -341,7 +341,7 @@ (LeftJoin | LeftJoinApply | FullJoin $left:* $right:* - $on:^[] & (JoinFiltersMatchAllLeftRows $left $right $on) + $on:* & (JoinFiltersMatchAllLeftRows $left $right $on) $private:* ) => @@ -355,7 +355,7 @@ (FullJoin $left:* $right:* - $on:^[] & (JoinFiltersMatchAllLeftRows $right $left $on) + $on:* & (JoinFiltersMatchAllLeftRows $right $left $on) $private:* ) => diff --git a/pkg/sql/opt/norm/testdata/rules/join b/pkg/sql/opt/norm/testdata/rules/join index d3250f47b04b..2284face87eb 100644 --- a/pkg/sql/opt/norm/testdata/rules/join +++ b/pkg/sql/opt/norm/testdata/rules/join @@ -1900,6 +1900,29 @@ inner-join (hash) └── filters └── d.z:3 = c.z:6 [outer=(3,6), constraints=(/3: (/NULL - ]; /6: (/NULL - ]), fd=(3)==(6), (6)==(3)] +# Cross join case. The presence of a not-null foreign key implies that there +# will be at least one right row when there is at least one left row, so left +# rows will always be matched at least once. +norm expect=SimplifyLeftJoinWithFilters +SELECT * +FROM d +LEFT OUTER JOIN c +ON True +---- +inner-join (cross) + ├── columns: x:1!null y:2!null z:3!null x:4!null y:5!null z:6!null + ├── key: (1,4) + ├── fd: (1)-->(2,3), (4)-->(5,6) + ├── scan d + │ ├── columns: d.x:1!null d.y:2!null d.z:3!null + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + ├── scan c + │ ├── columns: c.x:4!null c.y:5!null c.z:6!null + │ ├── key: (4) + │ └── fd: (4)-->(5,6) + └── filters (true) + # Can't simplify: joins on non-foreign keys. norm expect-not=(SimplifyRightJoinWithFilters,SimplifyLeftJoinWithFilters) SELECT * diff --git a/pkg/sql/opt/norm/testdata/rules/with b/pkg/sql/opt/norm/testdata/rules/with index d5df298c9cc2..2aee2a975649 100644 --- a/pkg/sql/opt/norm/testdata/rules/with +++ b/pkg/sql/opt/norm/testdata/rules/with @@ -83,6 +83,7 @@ inner-join (cross) ├── key: () ├── fd: ()-->(2,3) ├── prune: (2,3) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) ├── values │ ├── columns: "?column?":2(int!null) │ ├── cardinality: [1 - 1] @@ -302,6 +303,7 @@ with &1 (foo) ├── key: () ├── fd: ()-->(3-6) ├── prune: (3-6) + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) ├── cte-uses │ ├── &1: count=2 used-columns=(1) │ └── &2: count=2 used-columns=(2) @@ -314,6 +316,7 @@ with &1 (foo) │ ├── fd: ()-->(3-5) │ ├── prune: (3-5) │ ├── join-size: 3 + │ ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) │ ├── cte-uses │ │ ├── &1: count=1 used-columns=(1) │ │ └── &2: count=2 used-columns=(2) @@ -326,6 +329,7 @@ with &1 (foo) │ │ ├── fd: ()-->(3,4) │ │ ├── prune: (3,4) │ │ ├── join-size: 2 + │ │ ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) │ │ ├── cte-uses │ │ │ ├── &1: count=1 used-columns=(1) │ │ │ └── &2: count=1 used-columns=(2) diff --git a/pkg/sql/opt/props/logical.go b/pkg/sql/opt/props/logical.go index 7cd4c3b014d8..44a9e9471c3c 100644 --- a/pkg/sql/opt/props/logical.go +++ b/pkg/sql/opt/props/logical.go @@ -31,10 +31,6 @@ const ( // field is populated. InterestingOrderings - // UnfilteredCols is set when the Relational.Rule.UnfilteredCols field is - // populated. - UnfilteredCols - // HasHoistableSubquery is set when the Scalar.Rule.HasHoistableSubquery // is populated. HasHoistableSubquery @@ -42,6 +38,10 @@ const ( // JoinSize is set when the Relational.Rule.JoinSize field is populated. JoinSize + // MultiplicityProps is set when the Relational.Rule.MultiplicityProps + // field is populated. + MultiplicityProps + // WithUses is set when the Shared.Rule.WithUses field is populated. WithUses ) @@ -331,21 +331,21 @@ type Relational struct { // been set. InterestingOrderings opt.OrderingSet - // UnfilteredCols is the set of output columns that have values for every - // row in their owner table. Rows may be duplicated, but no rows can be - // missing. For example, an unconstrained, unlimited Scan operator can - // add all of its output columns to this property, but a Select operator - // cannot add any columns, as it may have filtered rows. - // - // UnfilteredCols is lazily populated by the SimplifyLeftJoinWithFilters - // and SimplifyRightJoinWithFilters rules. It is only valid once the - // Rule.Available.UnfilteredCols bit has been set. - UnfilteredCols opt.ColSet - // JoinSize is the number of relations being *inner* joined underneath // this node. It is used to only reorder joins via AssociateJoin up to // a certain limit. JoinSize int + + // MultiplicityProps is a struct that describes how rows from the input of + // a join are affected by the join. Rows from the left or right input are + // described as being duplicated and/or filtered. + // MultiplicityProps also contains a ColSet that contains columns from base + // tables that are guaranteed not to have been filtered. This ColSet is used + // in non-join operators as well. + // + // MultiplicityProps is lazily populated by multiplicity_builder.go. It is + // only valid once the Rule.Available.MultiplicityProps bit has been set. + MultiplicityProps JoinMultiplicity } } diff --git a/pkg/sql/opt/props/multiplicity.go b/pkg/sql/opt/props/multiplicity.go new file mode 100644 index 000000000000..989664a28d03 --- /dev/null +++ b/pkg/sql/opt/props/multiplicity.go @@ -0,0 +1,178 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package props + +import ( + "bytes" + + "github.com/cockroachdb/cockroach/pkg/sql/opt" +) + +// MultiplicityValue is a bitfield that describes whether a join duplicates +// and/or filters rows from a particular input. +type MultiplicityValue uint8 + +const ( + // MultiplicityIndeterminateVal indicates that no guarantees can be made about + // the effect the join will have on its input rows. + MultiplicityIndeterminateVal MultiplicityValue = 0 + + // MultiplicityNotDuplicatedVal indicates that the join will not include input + // rows in its output more than once. + MultiplicityNotDuplicatedVal MultiplicityValue = 1 << (iota - 1) + + // MultiplicityPreservedVal indicates that the join will include all input + // rows in its output. + MultiplicityPreservedVal +) + +// JoinMultiplicity answers queries about how a join will affect the rows from +// its inputs. Left and right input rows can be duplicated and/or filtered by +// the join. As an example: +// +// CREATE TABLE xy (x INT PRIMARY KEY, y INT); +// CREATE TABLE uv (u INT PRIMARY KEY, v INT); +// SELECT * FROM xy FULL JOIN uv ON x=u; +// +// 1. Are rows from xy or uv being duplicated by the join? +// 2. Are any rows being filtered from the join output? +// +// A JoinMultiplicity constructed for the join is able to answer either of the +// above questions by checking one of the MultiplicityValue bit flags. The +// not-duplicated and preserved flags are always unset for a join unless it can +// be statically proven that no rows from the given input will be duplicated or +// filtered respectively. As an example, take the following query: +// +// SELECT * FROM xy INNER JOIN uv ON y = v; +// +// At execution time, it may be that every row from xy will be included in the +// join output exactly once. However, since this cannot be proven before +// runtime, the duplicated and filtered flags must be set. +// +// When it is stored in the Relational properties of an operator other than a +// join, JoinMultiplicity is simply used to bubble up unfiltered output columns. +// +// After initial construction by multiplicity_builder.go, JoinMultiplicity +// should be considered immutable. +type JoinMultiplicity struct { + // UnfilteredCols contains all columns from the operator's input(s) that are + // guaranteed to never have been filtered. Row duplication is allowed and + // other columns from the same base table need not be included. This allows + // the validity of properties from the base table to be verified (for example, + // a foreign-key relation). + // + // UnfilteredCols can be populated for non-join as well as join operators + // because the UnfilteredCols fields of a join's inputs are used in the + // construction of its JoinMultiplicity. + // + // UnfilteredCols should only be used by MultiplicityBuilder to aid in + // initializing the other fields. Other callers should only use the property + // methods (e.g. JoinFiltersMatchAllLeftRows). + UnfilteredCols opt.ColSet + + // LeftMultiplicity and RightMultiplicity describe how the left and right + // input rows respectively will be affected by the join operator. + // As an example, using the query from above: + // + // SELECT * FROM xy FULL JOIN uv ON x=u; + // + // MultiplicityNotDuplicatedVal: both LeftMultiplicity and RightMultiplicity + // would set the not-duplicated flag because the equality is between key + // columns, which means that no row can match more than once. + // + // MultiplicityPreservedVal: both fields would set the preserved flag because + // the FullJoin will add back any rows that don't match on the filter + // conditions. + LeftMultiplicity MultiplicityValue + RightMultiplicity MultiplicityValue +} + +// JoinDoesNotDuplicateLeftRows returns true when rows from the left input will +// not be included in the join output more than once. +func (mp *JoinMultiplicity) JoinDoesNotDuplicateLeftRows() bool { + return mp.LeftMultiplicity&MultiplicityNotDuplicatedVal != 0 +} + +// JoinDoesNotDuplicateRightRows returns true when rows from the right input +// will not be included in the join output more than once. +func (mp *JoinMultiplicity) JoinDoesNotDuplicateRightRows() bool { + return mp.RightMultiplicity&MultiplicityNotDuplicatedVal != 0 +} + +// JoinPreservesLeftRows returns true when all rows from the left input are +// guaranteed to be included in the join output. +func (mp *JoinMultiplicity) JoinPreservesLeftRows() bool { + return mp.LeftMultiplicity&MultiplicityPreservedVal != 0 +} + +// JoinPreservesRightRows returns true when all rows from the right input are +// guaranteed to be included in the join output. +func (mp *JoinMultiplicity) JoinPreservesRightRows() bool { + return mp.RightMultiplicity&MultiplicityPreservedVal != 0 +} + +// String returns a formatted string containing flags for the left and right +// inputs that indicate how many times any given input row can be guaranteed to +// show up in the join output. +func (mp *JoinMultiplicity) String() string { + if !mp.isInteresting() { + return "" + } + + var buf bytes.Buffer + const zeroOrMore = "zero-or-more" + const oneOrZero = "one-or-zero" + const oneOrMore = "one-or-more" + const exactlyOne = "exactly-one" + + isFirstFlag := true + + writeFlag := func(name string) { + if !isFirstFlag { + buf.WriteString(", ") + } + buf.WriteString(name) + isFirstFlag = false + } + + outputFlag := func(doesNotDuplicateRows, preservesRows bool) { + if doesNotDuplicateRows { + if preservesRows { + writeFlag(exactlyOne) + } else { + writeFlag(oneOrZero) + } + } else { + if preservesRows { + writeFlag(oneOrMore) + } else { + writeFlag(zeroOrMore) + } + } + } + + buf.WriteString("left-rows(") + outputFlag(mp.JoinDoesNotDuplicateLeftRows(), mp.JoinPreservesLeftRows()) + + isFirstFlag = true + buf.WriteString("), right-rows(") + outputFlag(mp.JoinDoesNotDuplicateRightRows(), mp.JoinPreservesRightRows()) + buf.WriteString(")") + + return buf.String() +} + +// isInteresting returns true when rows from either of the inputs are guaranteed +// not to be duplicated or filtered. +func (mp *JoinMultiplicity) isInteresting() bool { + return mp.JoinDoesNotDuplicateLeftRows() || mp.JoinDoesNotDuplicateRightRows() || + mp.JoinPreservesLeftRows() || mp.JoinPreservesRightRows() +} diff --git a/pkg/sql/opt/props/multiplicity_test.go b/pkg/sql/opt/props/multiplicity_test.go new file mode 100644 index 000000000000..b22355a9df5d --- /dev/null +++ b/pkg/sql/opt/props/multiplicity_test.go @@ -0,0 +1,150 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package props + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/sql/opt" + "github.com/stretchr/testify/require" +) + +var bothIndeterminate = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityIndeterminateVal, + RightMultiplicity: MultiplicityIndeterminateVal, +} + +var bothNoDup = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityNotDuplicatedVal, + RightMultiplicity: MultiplicityNotDuplicatedVal, +} + +var bothPreserved = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityPreservedVal, + RightMultiplicity: MultiplicityPreservedVal, +} + +var bothNoDupBothPreserved = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityNotDuplicatedVal | MultiplicityPreservedVal, + RightMultiplicity: MultiplicityNotDuplicatedVal | MultiplicityPreservedVal, +} + +var leftIndeterminateRightPreserved = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityIndeterminateVal, + RightMultiplicity: MultiplicityPreservedVal, +} + +var leftIndeterminateRightNoDup = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityIndeterminateVal, + RightMultiplicity: MultiplicityNotDuplicatedVal, +} + +var rightIndeterminateLeftPreserved = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityPreservedVal, + RightMultiplicity: MultiplicityIndeterminateVal, +} + +var rightIndeterminateLeftNoDup = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityNotDuplicatedVal, + RightMultiplicity: MultiplicityIndeterminateVal, +} + +var bothNoDupLeftPreserved = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityNotDuplicatedVal | MultiplicityPreservedVal, + RightMultiplicity: MultiplicityNotDuplicatedVal, +} + +var bothPreservedLeftNoDup = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityPreservedVal | MultiplicityNotDuplicatedVal, + RightMultiplicity: MultiplicityPreservedVal, +} + +var bothNoDupRightPreserved = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityNotDuplicatedVal, + RightMultiplicity: MultiplicityNotDuplicatedVal | MultiplicityPreservedVal, +} + +var bothPreservedRightNoDup = JoinMultiplicity{ + UnfilteredCols: opt.ColSet{}, + LeftMultiplicity: MultiplicityPreservedVal, + RightMultiplicity: MultiplicityPreservedVal | MultiplicityNotDuplicatedVal, +} + +func TestJoinMultiplicity_JoinDoesNotDuplicateLeftRows(t *testing.T) { + require.Equal(t, false, bothIndeterminate.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, true, bothNoDup.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, false, bothPreserved.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, true, bothNoDupBothPreserved.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, false, leftIndeterminateRightPreserved.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, false, leftIndeterminateRightNoDup.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, false, rightIndeterminateLeftPreserved.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, true, rightIndeterminateLeftNoDup.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, true, bothNoDupLeftPreserved.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, true, bothPreservedLeftNoDup.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, true, bothNoDupRightPreserved.JoinDoesNotDuplicateLeftRows()) + require.Equal(t, false, bothPreservedRightNoDup.JoinDoesNotDuplicateLeftRows()) +} + +func TestJoinMultiplicity_JoinDoesNotDuplicateRightRows(t *testing.T) { + require.Equal(t, false, bothIndeterminate.JoinDoesNotDuplicateRightRows()) + require.Equal(t, true, bothNoDup.JoinDoesNotDuplicateRightRows()) + require.Equal(t, false, bothPreserved.JoinDoesNotDuplicateRightRows()) + require.Equal(t, true, bothNoDupBothPreserved.JoinDoesNotDuplicateRightRows()) + require.Equal(t, false, leftIndeterminateRightPreserved.JoinDoesNotDuplicateRightRows()) + require.Equal(t, true, leftIndeterminateRightNoDup.JoinDoesNotDuplicateRightRows()) + require.Equal(t, false, rightIndeterminateLeftPreserved.JoinDoesNotDuplicateRightRows()) + require.Equal(t, false, rightIndeterminateLeftNoDup.JoinDoesNotDuplicateRightRows()) + require.Equal(t, true, bothNoDupLeftPreserved.JoinDoesNotDuplicateRightRows()) + require.Equal(t, false, bothPreservedLeftNoDup.JoinDoesNotDuplicateRightRows()) + require.Equal(t, true, bothNoDupRightPreserved.JoinDoesNotDuplicateRightRows()) + require.Equal(t, true, bothPreservedRightNoDup.JoinDoesNotDuplicateRightRows()) +} + +func TestJoinMultiplicity_JoinPreservesLeftRows(t *testing.T) { + require.Equal(t, false, bothIndeterminate.JoinPreservesLeftRows()) + require.Equal(t, false, bothNoDup.JoinPreservesLeftRows()) + require.Equal(t, true, bothPreserved.JoinPreservesLeftRows()) + require.Equal(t, true, bothNoDupBothPreserved.JoinPreservesLeftRows()) + require.Equal(t, false, leftIndeterminateRightPreserved.JoinPreservesLeftRows()) + require.Equal(t, false, leftIndeterminateRightNoDup.JoinPreservesLeftRows()) + require.Equal(t, true, rightIndeterminateLeftPreserved.JoinPreservesLeftRows()) + require.Equal(t, false, rightIndeterminateLeftNoDup.JoinPreservesLeftRows()) + require.Equal(t, true, bothNoDupLeftPreserved.JoinPreservesLeftRows()) + require.Equal(t, true, bothPreservedLeftNoDup.JoinPreservesLeftRows()) + require.Equal(t, false, bothNoDupRightPreserved.JoinPreservesLeftRows()) + require.Equal(t, true, bothPreservedRightNoDup.JoinPreservesLeftRows()) +} + +func TestJoinMultiplicity_JoinPreservesRightRows(t *testing.T) { + require.Equal(t, false, bothIndeterminate.JoinPreservesRightRows()) + require.Equal(t, false, bothNoDup.JoinPreservesRightRows()) + require.Equal(t, true, bothPreserved.JoinPreservesRightRows()) + require.Equal(t, true, bothNoDupBothPreserved.JoinPreservesRightRows()) + require.Equal(t, true, leftIndeterminateRightPreserved.JoinPreservesRightRows()) + require.Equal(t, false, leftIndeterminateRightNoDup.JoinPreservesRightRows()) + require.Equal(t, false, rightIndeterminateLeftPreserved.JoinPreservesRightRows()) + require.Equal(t, false, rightIndeterminateLeftNoDup.JoinPreservesRightRows()) + require.Equal(t, false, bothNoDupLeftPreserved.JoinPreservesRightRows()) + require.Equal(t, true, bothPreservedLeftNoDup.JoinPreservesRightRows()) + require.Equal(t, true, bothNoDupRightPreserved.JoinPreservesRightRows()) + require.Equal(t, true, bothPreservedRightNoDup.JoinPreservesRightRows()) +} diff --git a/pkg/sql/opt/testutils/opttester/opt_tester.go b/pkg/sql/opt/testutils/opttester/opt_tester.go index ba6ae12bfb10..25fd6d65f5eb 100644 --- a/pkg/sql/opt/testutils/opttester/opt_tester.go +++ b/pkg/sql/opt/testutils/opttester/opt_tester.go @@ -607,6 +607,9 @@ func fillInLazyProps(e opt.Expr) { // Make sure the interesting orderings are calculated. xform.DeriveInterestingOrderings(rel) + + // Make sure the multiplicity is populated. + memo.DeriveJoinMultiplicity(rel) } for i, n := 0, e.ChildCount(); i < n; i++ { diff --git a/pkg/sql/opt/xform/testdata/rules/groupby b/pkg/sql/opt/xform/testdata/rules/groupby index bb686abee482..b0d71ec85ec0 100644 --- a/pkg/sql/opt/xform/testdata/rules/groupby +++ b/pkg/sql/opt/xform/testdata/rules/groupby @@ -1333,7 +1333,7 @@ memo (optimized, ~10KB, required=[presentation: array_agg:5]) memo SELECT sum(k) FROM (SELECT * FROM kuvw WHERE u=v) GROUP BY u,w ---- -memo (optimized, ~9KB, required=[presentation: sum:5]) +memo (optimized, ~10KB, required=[presentation: sum:5]) ├── G1: (project G2 G3 sum) │ └── [presentation: sum:5] │ ├── best: (project G2 G3 sum) @@ -1445,7 +1445,7 @@ memo (optimized, ~5KB, required=[presentation: u:2,v:3,w:4]) memo SELECT DISTINCT ON (u, v) u, v, w FROM kuvw ---- -memo (optimized, ~4KB, required=[presentation: u:2,v:3,w:4]) +memo (optimized, ~5KB, required=[presentation: u:2,v:3,w:4]) ├── G1: (distinct-on G2 G3 cols=(2,3)) (distinct-on G2 G3 cols=(2,3),ordering=+2,+3) (distinct-on G2 G3 cols=(2,3),ordering=+3) │ └── [presentation: u:2,v:3,w:4] │ ├── best: (distinct-on G2="[ordering: +2,+3]" G3 cols=(2,3),ordering=+2,+3) @@ -1718,7 +1718,7 @@ memo (optimized, ~4KB, required=[presentation: u:2,v:3,w:4] [ordering: +4]) memo SELECT (SELECT w FROM kuvw WHERE v=1 AND x=u) FROM xyz ORDER BY x+1, x ---- -memo (optimized, ~24KB, required=[presentation: w:8] [ordering: +9,+1]) +memo (optimized, ~25KB, required=[presentation: w:8] [ordering: +9,+1]) ├── G1: (project G2 G3 x) │ ├── [presentation: w:8] [ordering: +9,+1] │ │ ├── best: (sort G1) diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index d06b316b3899..f8e34a81e85b 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -354,7 +354,7 @@ memo (optimized, ~12KB, required=[presentation: a:1,b:2,c:3,x:5,y:6,z:7]) memo SELECT * FROM abc INNER HASH JOIN xyz ON a=x ---- -memo (optimized, ~8KB, required=[presentation: a:1,b:2,c:3,x:5,y:6,z:7]) +memo (optimized, ~9KB, required=[presentation: a:1,b:2,c:3,x:5,y:6,z:7]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) │ └── [presentation: a:1,b:2,c:3,x:5,y:6,z:7] │ ├── best: (inner-join G2 G3 G4) @@ -1861,7 +1861,7 @@ inner-join (lookup pqr) memo SELECT q,r,s FROM pqr WHERE q = 1 AND r = 2 ---- -memo (optimized, ~15KB, required=[presentation: q:2,r:3,s:4]) +memo (optimized, ~16KB, required=[presentation: q:2,r:3,s:4]) ├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4)) (select G6 G7) (select G8 G9) (select G10 G9) │ └── [presentation: q:2,r:3,s:4] │ ├── best: (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4)) @@ -2071,7 +2071,7 @@ select memo SELECT p,q,r,s FROM pqr WHERE q = 1 AND r = 1 AND s = 'foo' ---- -memo (optimized, ~31KB, required=[presentation: p:1,q:2,r:3,s:4]) +memo (optimized, ~32KB, required=[presentation: p:1,q:2,r:3,s:4]) ├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(1-4)) (zigzag-join G3 pqr@q pqr@s) (zigzag-join G3 pqr@q pqr@rs) (lookup-join G6 G7 pqr,keyCols=[1],outCols=(1-4)) (select G8 G9) (select G10 G11) (select G12 G7) (select G13 G7) │ └── [presentation: p:1,q:2,r:3,s:4] │ ├── best: (zigzag-join G3 pqr@q pqr@s) @@ -2929,7 +2929,7 @@ inner-join (merge) memo expect=AssociateJoin SELECT * FROM abc, stu, xyz WHERE abc.a=stu.s AND stu.s=xyz.x ---- -memo (optimized, ~37KB, required=[presentation: a:1,b:2,c:3,s:5,t:6,u:7,x:8,y:9,z:10]) +memo (optimized, ~38KB, required=[presentation: a:1,b:2,c:3,s:5,t:6,u:7,x:8,y:9,z:10]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (merge-join G2 G3 G5 inner-join,+1,+5) (merge-join G3 G2 G5 inner-join,+5,+1) (lookup-join G3 G5 abc@ab,keyCols=[5],outCols=(1-3,5-10)) (inner-join G6 G7 G8) (inner-join G9 G10 G11) (inner-join G7 G6 G8) (merge-join G6 G7 G5 inner-join,+5,+1) (inner-join G10 G9 G11) (merge-join G9 G10 G5 inner-join,+8,+5) (merge-join G7 G6 G5 inner-join,+1,+5) (lookup-join G7 G5 stu,keyCols=[1],outCols=(1-3,5-10)) (inner-join G9 G12 G13) (merge-join G10 G9 G5 inner-join,+5,+8) (lookup-join G10 G5 xyz@xy,keyCols=[5],outCols=(1-3,5-10)) (inner-join G12 G9 G13) (merge-join G9 G12 G5 inner-join,+8,+1) (merge-join G12 G9 G5 inner-join,+1,+8) (lookup-join G12 G5 xyz@xy,keyCols=[1],outCols=(1-3,5-10)) │ └── [presentation: a:1,b:2,c:3,s:5,t:6,u:7,x:8,y:9,z:10] │ ├── best: (merge-join G6="[ordering: +5]" G7="[ordering: +(1|8)]" G5 inner-join,+5,+1) diff --git a/pkg/sql/opt/xform/testdata/rules/join_order b/pkg/sql/opt/xform/testdata/rules/join_order index 33f782e42c8e..891ee4261366 100644 --- a/pkg/sql/opt/xform/testdata/rules/join_order +++ b/pkg/sql/opt/xform/testdata/rules/join_order @@ -135,7 +135,7 @@ inner-join (lookup bx) memo join-limit=3 SELECT * FROM bx, cy, abc WHERE a = 1 AND abc.b = bx.b AND abc.c = cy.c ---- -memo (optimized, ~32KB, required=[presentation: b:1,x:2,c:3,y:4,a:5,b:6,c:7,d:8]) +memo (optimized, ~33KB, required=[presentation: b:1,x:2,c:3,y:4,a:5,b:6,c:7,d:8]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (merge-join G2 G3 G5 inner-join,+1,+6) (lookup-join G3 G5 bx,keyCols=[6],outCols=(1-8)) (inner-join G6 G7 G8) (inner-join G9 G10 G11) (inner-join G7 G6 G8) (merge-join G6 G7 G5 inner-join,+3,+7) (inner-join G10 G9 G11) (lookup-join G7 G5 cy,keyCols=[7],outCols=(1-8)) (lookup-join G12 G11 abc,keyCols=[11],outCols=(1-8)) │ └── [presentation: b:1,x:2,c:3,y:4,a:5,b:6,c:7,d:8] │ ├── best: (lookup-join G3 G5 bx,keyCols=[6],outCols=(1-8)) @@ -250,6 +250,7 @@ inner-join (cross) ├── fd: ()-->(1-4), (5)-->(6), (7)-->(8), (9)-->(10) ├── prune: (2-10) ├── interesting orderings: (+7) (+9) (+5) (+1) + ├── multiplicity: left-rows(one-or-zero), right-rows(zero-or-more) ├── inner-join (cross) │ ├── columns: t.public.bx.b:5(int!null) t.public.bx.x:6(int) t.public.cy.c:7(int!null) t.public.cy.y:8(int) t.public.dz.d:9(int!null) t.public.dz.z:10(int) │ ├── stats: [rows=1e+09] @@ -312,7 +313,7 @@ inner-join (cross) memo join-limit=1 SELECT * FROM bx, cy, dz, abc WHERE a = 1 ---- -memo (optimized, ~12KB, required=[presentation: b:1,x:2,c:3,y:4,d:5,z:6,a:7,b:8,c:9,d:10]) +memo (optimized, ~13KB, required=[presentation: b:1,x:2,c:3,y:4,d:5,z:6,a:7,b:8,c:9,d:10]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) │ └── [presentation: b:1,x:2,c:3,y:4,d:5,z:6,a:7,b:8,c:9,d:10] │ ├── best: (inner-join G3 G2 G4) diff --git a/pkg/sql/opt/xform/testdata/rules/limit b/pkg/sql/opt/xform/testdata/rules/limit index c932c224f03f..a4b4a6d2a46e 100644 --- a/pkg/sql/opt/xform/testdata/rules/limit +++ b/pkg/sql/opt/xform/testdata/rules/limit @@ -103,7 +103,7 @@ limit memo SELECT s FROM a WHERE s='foo' LIMIT 1 ---- -memo (optimized, ~6KB, required=[presentation: s:4]) +memo (optimized, ~7KB, required=[presentation: s:4]) ├── G1: (limit G2 G3) (scan a@s_idx,cols=(4),constrained,lim=1) (scan a@si_idx,cols=(4),constrained,lim=1) │ └── [presentation: s:4] │ ├── best: (scan a@s_idx,cols=(4),constrained,lim=1) diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index aaf92aa3c638..1b1f9de8643d 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -759,7 +759,7 @@ project memo SELECT k FROM b WHERE j @> '{"a": "b"}' ---- -memo (optimized, ~6KB, required=[presentation: k:1]) +memo (optimized, ~7KB, required=[presentation: k:1]) ├── G1: (project G2 G3 k) │ └── [presentation: k:1] │ ├── best: (project G2 G3 k)