Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: calculate number of rows processed when costing joins #40248

Merged
merged 2 commits into from
Sep 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 33 additions & 35 deletions pkg/sql/opt/exec/execbuilder/testdata/lookup_join
Original file line number Diff line number Diff line change
Expand Up @@ -217,25 +217,25 @@ ALTER TABLE authors INJECT STATISTICS '[
query TTTTT colnames
EXPLAIN (VERBOSE) SELECT DISTINCT authors.name FROM books AS b1, books2 AS b2, authors WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf
----
tree field description columns ordering
distinct · · (name) weak-key(name)
distinct on name · ·
└── render · · (name) ·
render 0 name · ·
└── hash-join · · (title, shelf, title, shelf, name, book) ·
type inner · ·
equality (title) = (book) · ·
├── lookup-join · · (title, shelf, title, shelf) ·
table books2@primary · ·
│ │ type inner · ·
equality (title) = (title) · ·
pred @2 != @4 · ·
── scan · · (title, shelf) ·
table books@primary · ·
spans ALL · ·
└── scan · · (name, book) ·
· table authors@primary · ·
· spans ALL · ·
tree field description columns ordering
distinct · · (name) weak-key(name)
│ distinct on name · ·
└── render · · (name) ·
│ render 0 name · ·
└── lookup-join · · (name, book, title, shelf, title, shelf) ·
table books2@primary · ·
type inner · ·
equality (title) = (title) · ·
pred @4 != @6 · ·
└── hash-join · · (name, book, title, shelf) ·
type inner · ·
equality (book) = (title) · ·
── scan · · (name, book) ·
table authors@primary · ·
spans ALL · ·
└── scan · · (title, shelf) ·
· table books@primary · ·
· spans ALL · ·

# Verify data placement.
query TTTI colnames
Expand All @@ -244,12 +244,10 @@ SELECT start_key, end_key, replicas, lease_holder from [SHOW RANGES FROM TABLE b
start_key end_key replicas lease_holder
NULL NULL {5} 5

# TODO(radu): this doesn't seem to be a lookup join, but it should be.

query T
SELECT url FROM [EXPLAIN (DISTSQL) SELECT DISTINCT authors.name FROM books AS b1, books2 AS b2, authors WHERE b1.title = b2.title AND authors.book = b1.title AND b1.shelf <> b2.shelf]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJyck89q3DAQh-99CndOCSjYku0cDAEdemhK2ZS0t7IHxZruqnE0RpKhJey7F9mlWZu10s1Rf76Zb362nsGSxo16Qg_Nd-DAoIYtg95Ri96Ti9vTpVv9C5qCgbH9EOL2lkFLDqF5hmBCh9DAhq6oz2tgoDEo043XDgxoCC-QD2qH0Fwf2FFhni78TT10eI9Ko8uLWXnonXlS7rd8IHr0wOBrr6xvsitgcDeEJpOcyRLWLPg5Fp_I2L8SPCEhgMFnosehz36SsRnZaBF9NpkU2fubTNbHdoLJksl61VG8NSlxWlINYR-_7OmsxKpHuerx0n6w5DQ61Mt_4PUrJ4b5qPw-ho4uL-ezdPgjXEh-eePMbh8upLj8N8N6kNU5QX4wPhjbhryad5Z8tX49q__KW7lH35P1-F-PpYjpoN7hlLanwbX4xVE7tpmWdyM3bmj0YTq9nha3djqKgscwT8IiDYskXM1gvoTLM2CxhKskXKe16yRcLODt4d2fAAAA__8CwK1z
https://cockroachdb.github.io/distsqlplan/decode.html#eJyck89q3DAQh-99CnVOCSjsyn9SEAR06KEpZbekvZU9KNZ0V42jMZIMLWHfvcgubGxipZujpPlG3_xsPYEjgxv9iAHkDxDAoYYdh85TgyGQT9tj0a35DXLNwbquj2l7x6EhjyCfINrYIkjY0BV1qxo4GIzatkPZkQP18QSFqPcI8vrInzUW-cbf9X2Ld6gN-tV60h46bx-1_6N0Hw_Jl8O3Trsg2RVw2PZRMiW4KmDJQ7zVQ7zscU_0sGRRLloUixany3tH3qBHM8_39ZIXRvmkw-EzWYd-VUwnafFnvFDF5Y23-0O8UOJykiRXJVfV4iTlOXkmgX9xlpk4k-AXooe-Y7_IOkZOMpWA7Yapir2_YerDyXFRrTpH7aMN0bomrqqpWKZ_Pen_ylu5w9CRC_hfj2WdviCaPY5_RKDeN_jVUzNcMy63AzdsGAxxPL0eF7duPEqCz2GRhcsJLOZwcQZczOEyC1d57SoL13m4zsLrGbw7vvsbAAD__5P3rXQ=

query TTTTT colnames
EXPLAIN (VERBOSE) SELECT a.name FROM authors AS a JOIN books2 AS b2 ON a.book = b2.title ORDER BY a.name
Expand Down Expand Up @@ -675,18 +673,18 @@ render · · (a, b, c) ·
query TTTTT
EXPLAIN (VERBOSE) SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=f AND c=e)
----
render · · (a, b, c) ·
│ render 0 a · ·
│ render 1 b · ·
│ render 2 c · ·
└── lookup-join · · (a, b, c, e, f) ·
│ table def@primary · ·
│ type inner · ·
│ equality (a, c) = (f, e) · ·
│ equality cols are key · · ·
└── scan · · (a, b, c) ·
· table abc@primary · ·
· spans ALL · ·
render · · (a, b, c) ·
│ render 0 a · ·
│ render 1 b · ·
│ render 2 c · ·
└── lookup-join · · (a, b, c) ·
│ table def@primary · ·
│ type semi · ·
│ equality (a, c) = (f, e) · ·
│ equality cols are key · · ·
└── scan · · (a, b, c) ·
· table abc@primary · ·
· spans ALL · ·

query TTTTT
EXPLAIN (VERBOSE) SELECT * from abc WHERE NOT EXISTS (SELECT * FROM def WHERE a=f AND c=e)
Expand Down Expand Up @@ -741,7 +739,7 @@ SELECT url FROM [ EXPLAIN (DISTSQL)
SELECT a,b from small WHERE EXISTS (SELECT a FROM data WHERE small.a=data.a) ORDER BY a
]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlEFr2zAUx-_7FOJdJ-NIdtLUp1w6SOmS0eY2ctCiR-fN0TOSDCsl3304HqtdWlngQ3q0lD__X35PvGcwpHGjjuig-A4COEjgkAGHHDjMYc-htnRA58i2P-kCa_0HihmH0tSNb4_3HA5kEYpn8KWvEArYqR8V3qPSaNMZcNDoVVmda2pbHpV9Wrmjqirg8FAr4wqWpIIpo5lg5H-iBQ7bxhdsJfhKwv7EgRr_0ue8ekQoxInHM91Saf4hzYdIu6caC3Z382XHHm6-rtntdr0B_p9UK6-Awx3R76Zmv6g0jEyLFgcp34V8YSOr0aIeYq3EZ9if3vgnG0qoTsVQ7Hv12aBexM9NRM0tFUkqp05uhKo3ucXlJifj1ck4dTJJs6nqRqh66q4upy6LV5fFqcuSNJ-qboSqp255OXV5vLo8Tl2epPOp6kaoeuquP8aqfQPyHl1NxmHUFp21exj1I3Z721FjD_jN0uFc031uz7nzgUbnu1vRfaxNd9UC9sMiGJaDsHgdluHmkeosmM7D4XwK9zwYXoSbF1Oar4LhZbh5OaX5Ojyr2cgzCT-y193706e_AQAA__8Shyy4
https://cockroachdb.github.io/distsqlplan/decode.html#eJzEll9v2jAUxd_3KdB92lSjxE7Cn0iTeNhLJ62dur1NPLjEg0g0RraRVlV89wkyKSKBe-0F6Fsp-XGPfc65yhtUulAP8kVZyH8BBwYCGCTAIAUGGcwZbIxeKGu12T9SA_fFH8hjBmW12br9v-cMFtooyN_AlW6tIIef8nmtnpQslIliYFAoJ8v1YczGlC_SvM7si1yvgcGPjaxsPhhGfCCrYsAH2q2UAQaPW5cPZpzNBMx3DPTW_ZvXjHl-HaykXR0PmHGY7-YMrJNLBTnfsf_TnZ3WXUgnu7JFLds2uluiG0EiRNCX0rqyWriIx51TMng0hTKqQMclZ8c1U3T9O-0Jd_VFnteUBWkK8y3tp5s1T22rU8-dPNo3ZZbqqy4rZSI-Ov7dtfrtPs743afPplyu6j-xmDZHyS5uwYMe6k0keOtEp8ePjsZz_wZwr-ZGfBiJW3Q3QPnIp7uIcM_yEoqaovCLlJe_QwnG1ymB8PdS-KVQDKPkFikMUD72SuF54Z4pJBQ1KRQXSaF4hxROrpPCxN_LxC-FyTBKb5HCAOUTrxSeF-6ZQkJRk8LkIilM3iGF0-ukMPX3MvVLYTqMslukMED51CuFe-HBySNUNMlLL5K89PbJE_H1X0VPSHhSdqMrq7zeMuP9IVSxVPW9WL01C_Xd6MVhTP3x8cAdXmQKZV39bVp_uK_qr_YC_eFpH5gnvehxH1pwnOZtOj6ij-C4DQsUJk6dBLglwuBpH7jlVig97kO33OrQKXrhGX7hGW414fWoT7lwmCgXDlPlImiiXDhNlWuM3vgEv_BJn3JN-wSF91qkBE2YTdCU2xRO2E3g5DLFtyknKsZ77VPeWS1BnndWS5DnOE15jtOk5wROeY7jpOf4WuUjwvPOhgnyHN8wlOedFRPkOU5TnuM06TmBU57jOOk5vl5FTLw2dbZMiOcCXzJtz-e7D38DAAD__xe8Uow=

query T
SELECT url FROM [ EXPLAIN (DISTSQL)
Expand Down
26 changes: 26 additions & 0 deletions pkg/sql/opt/memo/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,32 @@ func ExtractJoinEqualityColumns(
return leftEq, rightEq
}

// ExtractJoinEqualityFilters returns the filters containing pairs of columns
// (one from the left side, one from the right side) which are constrained to
// be equal in a join (and have equivalent types).
func ExtractJoinEqualityFilters(leftCols, rightCols opt.ColSet, on FiltersExpr) FiltersExpr {
// We want to avoid allocating a new slice unless strictly necessary.
var newFilters FiltersExpr
for i := range on {
condition := on[i].Condition
ok, _, _ := isJoinEquality(leftCols, rightCols, condition)
if ok {
if newFilters != nil {
newFilters = append(newFilters, on[i])
}
} else {
if newFilters == nil {
newFilters = make(FiltersExpr, i, len(on)-1)
copy(newFilters, on[:i])
}
}
}
if newFilters != nil {
return newFilters
}
return on
}

func isVarEquality(condition opt.ScalarExpr) (leftVar, rightVar *VariableExpr, ok bool) {
if eq, ok := condition.(*EqExpr); ok {
if leftVar, ok := eq.Left.(*VariableExpr); ok {
Expand Down
14 changes: 14 additions & 0 deletions pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1586,6 +1586,14 @@ func ensureLookupJoinInputProps(join *LookupJoinExpr, sb *statisticsBuilder) *pr
if relational.OutputCols.Empty() {
md := join.Memo().Metadata()
relational.OutputCols = join.Cols.Difference(join.Input.Relational().OutputCols)

// Include the key columns in the output columns.
index := md.Table(join.Table).Index(join.Index)
for i := range join.KeyCols {
indexColID := join.Table.ColumnID(index.Column(i).Ordinal)
relational.OutputCols.Add(indexColID)
}

relational.NotNullCols = tableNotNullCols(md, join.Table)
relational.NotNullCols.IntersectionWith(relational.OutputCols)
relational.Cardinality = props.AnyCardinality
Expand Down Expand Up @@ -1679,6 +1687,8 @@ type joinPropsHelper struct {
filterNotNullCols opt.ColSet
filterIsTrue bool
filterIsFalse bool

selfJoinCols opt.ColSet
}

func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
Expand All @@ -1702,6 +1712,10 @@ func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
h.filterNotNullCols.Add(colID)
h.filterNotNullCols.Add(indexColID)
h.filtersFD.AddEquivalency(colID, indexColID)
if colID == indexColID {
// This can happen if an index join was converted into a lookup join.
h.selfJoinCols.Add(colID)
}
}

// Lookup join has implicit equality conditions on KeyCols.
Expand Down
11 changes: 11 additions & 0 deletions pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,17 @@ func (m *Memo) RequestColStat(
return nil, false
}

// RowsProcessed calculates and returns the number of rows processed by the
// relational expression. It is currently only supported for joins.
func (m *Memo) RowsProcessed(expr RelExpr) (_ float64, ok bool) {
// When SetRoot is called, the statistics builder may have been cleared.
// If this happens, we can't serve the request anymore.
if m.logPropsBuilder.sb.md != nil {
return m.logPropsBuilder.sb.rowsProcessed(expr), true
}
return 0, false
}

// NextWithID returns a not-yet-assigned identifier for a WITH expression.
func (m *Memo) NextWithID() opt.WithID {
m.curWithID++
Expand Down
Loading