Skip to content

Commit

Permalink
opt: fix missing filters after join reordering
Browse files Browse the repository at this point in the history
This commit eliminates logic in the `assoc`, `leftAsscom`, and
`rightAsscom` functions in the join order builder that aimed to prevent
generating "orphaned" predicates, where one or more referenced relations
are not in a join's input. In rare cases, this logic had the side effect
of creating invalid conflict rules for edges, which could prevent valid
predicates from being added to reordered join trees.

It is safe to remove these conditionals because they are unnecessary.
The CD-C algorithm already prevents generation of orphaned predicates by
checking that the total eligibility set (TES) is a subset of a join's
input vertices. In our implementation, this is handled by the
`checkNonInnerJoin` and `checkInnerJoin` functions.

Fixes cockroachdb#76522

Release note (bug fix): A bug has been fixed which caused the query optimizer
to omit join filters in rare cases when reordering joins, which could
result in incorrect query results. This bug was present since v20.2.
  • Loading branch information
mgartner committed Feb 15, 2022
1 parent df90f09 commit 824f767
Show file tree
Hide file tree
Showing 5 changed files with 309 additions and 138 deletions.
195 changes: 95 additions & 100 deletions pkg/sql/opt/exec/execbuilder/testdata/join
Original file line number Diff line number Diff line change
Expand Up @@ -431,120 +431,115 @@ vectorized: true
│ render relname: relname
└── • hash join (inner)
│ columns: (attrelid, attname, attnum, attrelid, attname, attnum, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series, oid, relname, relnamespace, oid, nspname, objid, refobjid, oid, relname, relkind)
│ columns: (oid, nspname, oid, relname, relnamespace, attrelid, attname, attnum, attrelid, attname, attnum, objid, refobjid, oid, relname, relkind, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series)
│ estimated row count: 110,908 (missing stats)
│ equality: (oid) = (objid)
│ equality: (oid) = (attrelid)
├── • hash join (inner)
│ │ columns: (attrelid, attname, attnum, attrelid, attname, attnum, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series, oid, relname, relnamespace, oid, nspname)
│ │ estimated row count: 114,302 (missing stats)
│ │ equality: (relnamespace) = (oid)
│ │ columns: (oid, nspname, oid, relname, relnamespace)
│ │ estimated row count: 9,801 (missing stats)
│ │ equality: (oid) = (relnamespace)
│ │
│ ├── • hash join (inner)
│ │ │ columns: (attrelid, attname, attnum, attrelid, attname, attnum, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series, oid, relname, relnamespace)
│ │ │ estimated row count: 11,557 (missing stats)
│ │ │ equality: (attrelid) = (oid)
│ │ │ pred: attnum = confkey[generate_series]
│ │ │
│ │ ├── • hash join (inner)
│ │ │ │ columns: (attrelid, attname, attnum, attrelid, attname, attnum, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series)
│ │ │ │ estimated row count: 3,502 (missing stats)
│ │ │ │ equality: (attrelid) = (confrelid)
│ │ │ │
│ │ │ ├── • virtual table
│ │ │ │ columns: (attrelid, attname, attnum)
│ │ │ │ estimated row count: 1,000 (missing stats)
│ │ │ │ table: pg_attribute@primary
│ │ │ │
│ │ │ └── • cross join (inner)
│ │ │ │ columns: (attrelid, attname, attnum, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series)
│ │ │ │ estimated row count: 354 (missing stats)
│ │ │ │ pred: attnum = conkey[generate_series]
│ │ │ │
│ │ │ ├── • hash join (inner)
│ │ │ │ │ columns: (attrelid, attname, attnum, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname)
│ │ │ │ │ estimated row count: 107 (missing stats)
│ │ │ │ │ equality: (relnamespace) = (oid)
│ │ │ │ │
│ │ │ │ ├── • merge join (inner)
│ │ │ │ │ │ columns: (attrelid, attname, attnum, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace)
│ │ │ │ │ │ estimated row count: 105 (missing stats)
│ │ │ │ │ │ equality: (attrelid) = (oid)
│ │ │ │ │ │ merge ordering: +"(attrelid=oid)"
│ │ │ │ │ │
│ │ │ │ │ ├── • virtual table
│ │ │ │ │ │ columns: (attrelid, attname, attnum)
│ │ │ │ │ │ ordering: +attrelid
│ │ │ │ │ │ estimated row count: 1,000 (missing stats)
│ │ │ │ │ │ table: pg_attribute@pg_attribute_attrelid_idx
│ │ │ │ │ │
│ │ │ │ │ └── • virtual table lookup join (inner)
│ │ │ │ │ │ columns: (oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace)
│ │ │ │ │ │ ordering: +conrelid
│ │ │ │ │ │ estimated row count: 10 (missing stats)
│ │ │ │ │ │ table: pg_class@pg_class_oid_idx
│ │ │ │ │ │ equality: (conrelid) = (oid)
│ │ │ │ │ │ pred: relname = 'orders'
│ │ │ │ │ │
│ │ │ │ │ └── • filter
│ │ │ │ │ │ columns: (oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey)
│ │ │ │ │ │ ordering: +conrelid
│ │ │ │ │ │ estimated row count: 10 (missing stats)
│ │ │ │ │ │ filter: contype = 'f'
│ │ │ │ │ │
│ │ │ │ │ └── • virtual table
│ │ │ │ │ columns: (oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey)
│ │ │ │ │ ordering: +conrelid
│ │ │ │ │ estimated row count: 1,000 (missing stats)
│ │ │ │ │ table: pg_constraint@pg_constraint_conrelid_idx
│ │ │ │ │
│ │ │ │ └── • filter
│ │ │ │ │ columns: (oid, nspname)
│ │ │ │ │ estimated row count: 10 (missing stats)
│ │ │ │ │ filter: nspname = 'public'
│ │ │ │ │
│ │ │ │ └── • virtual table
│ │ │ │ columns: (oid, nspname)
│ │ │ │ estimated row count: 1,000 (missing stats)
│ │ │ │ table: pg_namespace@primary
│ │ │ │
│ │ │ └── • project set
│ │ │ │ columns: (generate_series)
│ │ │ │ estimated row count: 10
│ │ │ │ render 0: generate_series(1, 32)
│ │ │ │
│ │ │ └── • emptyrow
│ │ │ columns: ()
│ │ │
│ │ └── • virtual table
│ │ columns: (oid, relname, relnamespace)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: pg_class@primary
│ ├── • virtual table
│ │ columns: (oid, nspname)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: pg_namespace@primary
│ │
│ └── • virtual table
│ columns: (oid, nspname)
│ columns: (oid, relname, relnamespace)
│ estimated row count: 1,000 (missing stats)
│ table: pg_namespace@primary
│ table: pg_class@primary
└── • hash join (inner)
│ columns: (objid, refobjid, oid, relname, relkind)
│ estimated row count: 99 (missing stats)
│ equality: (refobjid) = (oid)
│ columns: (attrelid, attname, attnum, attrelid, attname, attnum, objid, refobjid, oid, relname, relkind, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series)
│ estimated row count: 1,779 (missing stats)
│ equality: (attrelid) = (oid)
│ pred: attnum = conkey[generate_series]
├── • virtual table
│ columns: (objid, refobjid)
│ columns: (attrelid, attname, attnum)
│ estimated row count: 1,000 (missing stats)
│ table: pg_depend@primary
│ table: pg_attribute@primary
└── • filter
│ columns: (oid, relname, relkind)
│ estimated row count: 10 (missing stats)
filter: relkind = 'i'
└── • cross join (inner)
│ columns: (attrelid, attname, attnum, objid, refobjid, oid, relname, relkind, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname, generate_series)
│ estimated row count: 539 (missing stats)
pred: attnum = confkey[generate_series]
└── • virtual table
columns: (oid, relname, relkind)
estimated row count: 1,000 (missing stats)
table: pg_class@primary
├── • hash join (inner)
│ │ columns: (attrelid, attname, attnum, objid, refobjid, oid, relname, relkind, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname)
│ │ estimated row count: 163 (missing stats)
│ │ equality: (attrelid) = (confrelid)
│ │
│ ├── • virtual table
│ │ columns: (attrelid, attname, attnum)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: pg_attribute@primary
│ │
│ └── • hash join (inner)
│ │ columns: (objid, refobjid, oid, relname, relkind, oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname)
│ │ estimated row count: 17 (missing stats)
│ │ equality: (objid) = (oid)
│ │
│ ├── • hash join (inner)
│ │ │ columns: (objid, refobjid, oid, relname, relkind)
│ │ │ estimated row count: 99 (missing stats)
│ │ │ equality: (refobjid) = (oid)
│ │ │
│ │ ├── • virtual table
│ │ │ columns: (objid, refobjid)
│ │ │ estimated row count: 1,000 (missing stats)
│ │ │ table: pg_depend@primary
│ │ │
│ │ └── • filter
│ │ │ columns: (oid, relname, relkind)
│ │ │ estimated row count: 10 (missing stats)
│ │ │ filter: relkind = 'i'
│ │ │
│ │ └── • virtual table
│ │ columns: (oid, relname, relkind)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: pg_class@primary
│ │
│ └── • hash join (inner)
│ │ columns: (oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace, oid, nspname)
│ │ estimated row count: 11 (missing stats)
│ │ equality: (relnamespace) = (oid)
│ │
│ ├── • virtual table lookup join (inner)
│ │ │ columns: (oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey, oid, relname, relnamespace)
│ │ │ estimated row count: 10 (missing stats)
│ │ │ table: pg_class@pg_class_oid_idx
│ │ │ equality: (conrelid) = (oid)
│ │ │ pred: relname = 'orders'
│ │ │
│ │ └── • filter
│ │ │ columns: (oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey)
│ │ │ estimated row count: 10 (missing stats)
│ │ │ filter: contype = 'f'
│ │ │
│ │ └── • virtual table
│ │ columns: (oid, conname, contype, condeferrable, condeferred, conrelid, confrelid, confupdtype, confdeltype, conkey, confkey)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: pg_constraint@primary
│ │
│ └── • filter
│ │ columns: (oid, nspname)
│ │ estimated row count: 10 (missing stats)
│ │ filter: nspname = 'public'
│ │
│ └── • virtual table
│ columns: (oid, nspname)
│ estimated row count: 1,000 (missing stats)
│ table: pg_namespace@primary
└── • project set
│ columns: (generate_series)
│ estimated row count: 10
│ render 0: generate_series(1, 32)
└── • emptyrow
columns: ()

# Ensure that left joins on non-null foreign keys turn into inner joins
statement ok
Expand Down
28 changes: 0 additions & 28 deletions pkg/sql/opt/xform/join_order_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1328,24 +1328,6 @@ func commute(op opt.Operator) bool {
// ON x = a
//
func assoc(edgeA, edgeB *edge) bool {
if edgeB.ses.intersects(edgeA.op.leftVertexes) || edgeA.ses.intersects(edgeB.op.rightVertexes) {
// Ensure that application of the associative property would not lead to
// 'orphaned' predicates, where one or more referenced relations are not in
// the resulting join's inputs. Take as an example this reordering that
// results from applying the associative property:
//
// SELECT * FROM (SELECT * FROM xy INNER JOIN ab ON y = a)
// INNER JOIN uv
// ON x = u
// =>
// SELECT * FROM xy
// INNER JOIN (SELECT * FROM ab INNER JOIN uv ON x = u)
// ON y = a
//
// Note that the x = u predicate references the xy relation, which is not
// in that join's inputs. Therefore, this transformation is invalid.
return false
}
return checkProperty(assocTable, edgeA, edgeB)
}

Expand All @@ -1368,11 +1350,6 @@ func assoc(edgeA, edgeB *edge) bool {
// INNER JOIN ab ON x = a
//
func leftAsscom(edgeA, edgeB *edge) bool {
if edgeB.ses.intersects(edgeA.op.rightVertexes) || edgeA.ses.intersects(edgeB.op.rightVertexes) {
// Ensure that application of the left-asscom property would not lead to
// 'orphaned' predicates. See the assoc() comment for why this is necessary.
return false
}
return checkProperty(leftAsscomTable, edgeA, edgeB)
}

Expand All @@ -1397,11 +1374,6 @@ func leftAsscom(edgeA, edgeB *edge) bool {
// ON x = a
//
func rightAsscom(edgeA, edgeB *edge) bool {
if edgeB.ses.intersects(edgeA.op.leftVertexes) || edgeA.ses.intersects(edgeB.op.leftVertexes) {
// Ensure that application of the right-asscom property would not lead to
// 'orphaned' predicates. See the assoc() comment for why this is necessary.
return false
}
return checkProperty(rightAsscomTable, edgeA, edgeB)
}

Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/opt/xform/join_order_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,22 @@ func TestJoinOrderBuilder_CalcTES(t *testing.T) {
expectedTES: "ABC",
expectedRules: "",
},
{ // 20
// SELECT * FROM (
// SELECT * FROM (
// SELECT * FROM A
// INNER JOIN B ON A.u = B.u
// ) INNER JOIN C ON B.v = C.v
// ) INNER JOIN D ON A.w = D.w
rootEdge: testEdge{joinOp: opt.InnerJoinOp, left: "ABC", right: "D", ses: "AD", notNull: "AD"},
leftChildEdges: []testEdge{
{joinOp: opt.InnerJoinOp, left: "AB", right: "C", ses: "BC", notNull: "BC"},
{joinOp: opt.InnerJoinOp, left: "A", right: "B", ses: "AB", notNull: "AB"},
},
rightChildEdges: []testEdge{},
expectedTES: "AD",
expectedRules: "",
},
}

for i, tc := range testCases {
Expand Down
Loading

0 comments on commit 824f767

Please sign in to comment.