Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
44130: sql: postgresql dollar-quoted string support r=knz a=damienhollis

Added support for postgresql dollar-quoted strings in the scanner. A
dollar-quoted string constant consists of a dollar sign ($), an
optional "tag" of zero or more characters, another dollar sign, an
arbitrary sequence of characters that makes up the string content, a
dollar sign, the same tag that began this dollar quote, and a final
dollar sign.

The scanner uses the existing token type of SCONST for dollar-quoted
strings. As a result, when the AST is formatted as a string, there is
no knowledge that the original input was dollar-quoted and it is
therefore formatted as either a plain string or an escaped
string (depending on the content).

Fixes cockroachdb#41777.

Release Note (sql change): CockroachDB now supports string and byte
array literals using the dollar-quoted notation, as documented here:
https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING

45589: sql: update error message for primary key change on an interleave parent r=otan a=rohany

Fixes cockroachdb#45537.

This PR updates the error message when trying to perform a primary
key change on an interleaved parent to include the name of the
table as well as the names of the interleaved children.

Release note: None

45604: opt: factor limit hints into scan and lookup join costs r=rytaft a=rytaft

This PR is a continuation of cockroachdb#43415. The first commit is copied directly
from that PR, and the second commit includes a minor fix as well as a 
number of test cases.

Fixes cockroachdb#34811; the example query in this issue now chooses a lookup join
as desired. The coster now takes limit hints into account when costing
scans and lookup joins, and propagates limit hints through lookup joins.

Release note (sql change): The optimizer now considers the likely number
of rows an operator will need to provide, and might choose query plans
based on this. In particular, the optimizer might prefer lookup joins
over alternatives in some situations where all rows of the join will
probably not be needed.

Co-authored-by: damien.hollis <damien.hollis@unimarket.com>
Co-authored-by: Rohan Yadav <rohany@alumni.cmu.edu>
Co-authored-by: Céline O'Neil <celineloneil@gmail.com>
Co-authored-by: Rebecca Taft <becca@cockroachlabs.com>
  • Loading branch information
5 people committed Mar 3, 2020
4 parents b0be21a + 80894c3 + 4caee85 + 7e0ba7c commit ada086e
Show file tree
Hide file tree
Showing 18 changed files with 739 additions and 123 deletions.
27 changes: 26 additions & 1 deletion pkg/sql/alter_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"context"
gojson "encoding/json"
"fmt"
"strings"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
Expand Down Expand Up @@ -405,7 +406,31 @@ func (n *alterTableNode) startExec(params runParams) error {

// Disable primary key changes on tables that are interleaved parents.
if len(n.tableDesc.PrimaryIndex.InterleavedBy) != 0 {
return errors.New("cannot change the primary key of an interleaved parent")
var sb strings.Builder
sb.WriteString("[")
comma := ", "
for i := range n.tableDesc.PrimaryIndex.InterleavedBy {
interleave := &n.tableDesc.PrimaryIndex.InterleavedBy[i]
if i != 0 {
sb.WriteString(comma)
}
childTable, err := params.p.Tables().getTableVersionByID(
params.ctx,
params.p.Txn(),
interleave.Table,
tree.ObjectLookupFlags{},
)
if err != nil {
return err
}
sb.WriteString(childTable.Name)
}
sb.WriteString("]")
return errors.Newf(
"cannot change primary key of table %s because table(s) %s are interleaved into it",
n.tableDesc.Name,
sb.String(),
)
}

nameExists := func(name string) bool {
Expand Down
8 changes: 7 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/alter_primary_key
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,13 @@ SELECT * FROM child WHERE x >= 1 AND x < 5 AND y >= 2 AND y <= 6
1 2 3
4 5 6

statement error pq: cannot change the primary key of an interleaved parent
statement error pq: cannot change primary key of table parent because table\(s\) \[child\] are interleaved into it
ALTER TABLE parent ALTER PRIMARY KEY USING COLUMNS (x)

statement ok
CREATE TABLE child2 (x INT, y INT, z INT, PRIMARY KEY (x, y, z)) INTERLEAVE IN PARENT parent (x, y)

statement error pq: cannot change primary key of table parent because table\(s\) \[child, child2\] are interleaved into it
ALTER TABLE parent ALTER PRIMARY KEY USING COLUMNS (x)

statement error pq: unimplemented: "parent" is interleaved by table "child"
Expand Down
23 changes: 13 additions & 10 deletions pkg/sql/opt/exec/execbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -751,16 +751,19 @@ group · · (min int) ·
query TTTTT
EXPLAIN (TYPES) SELECT min(v) FROM opt_test WHERE k <> 4
----
· distributed false · ·
· vectorized true · ·
group · · (min int) ·
│ aggregate 0 min(v) · ·
│ scalar · · ·
└── render · · (v int) ·
│ render 0 (v)[int] · ·
└── scan · · (k int, v int) ·
· table opt_test@primary · ·
· spans -/3/# /5- · ·
· distributed false · ·
· vectorized true · ·
group · · (min int) ·
│ aggregate 0 any_not_null(v) · ·
│ scalar · · ·
└── render · · (v int) ·
│ render 0 (v)[int] · ·
└── limit · · (k int, v int) +v
│ count (1)[int] · ·
└── scan · · (k int, v int) +v
· table opt_test@v · ·
· spans /!NULL- · ·
· filter ((k)[int] != (4)[int])[bool] · ·

# Check that the optimization doesn't work when the argument is non-trivial (we
# can't in general guarantee an ordering on a synthesized column).
Expand Down
23 changes: 11 additions & 12 deletions pkg/sql/opt/exec/execbuilder/testdata/limit
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,17 @@ filter · · (k) ·
query TTTTT
EXPLAIN (VERBOSE) SELECT k, w FROM t WHERE v >= 1 AND v <= 100 LIMIT 10
----
· distributed false · ·
· vectorized true · ·
render · · (k, w) ·
│ render 0 k · ·
│ render 1 w · ·
└── index-join · · (k, v, w) ·
│ table t@primary · ·
│ key columns k · ·
└── scan · · (k, v) ·
· table t@t_v_idx · ·
· spans /1-/101 · ·
· limit 10 · ·
· distributed false · ·
· vectorized true · ·
render · · (k, w) ·
│ render 0 k · ·
│ render 1 w · ·
└── limit · · (k, v, w) ·
│ count 10 · ·
└── scan · · (k, v, w) ·
· table t@primary · ·
· spans ALL · ·
· filter (v >= 1) AND (v <= 100) · ·

query TTTTT
EXPLAIN (VERBOSE) SELECT k, w FROM t WHERE v >= 1 AND v <= 100 ORDER BY v LIMIT 10
Expand Down
58 changes: 30 additions & 28 deletions pkg/sql/opt/exec/execbuilder/testdata/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -45,34 +45,36 @@ root · ·
query TTTTT
EXPLAIN (VERBOSE) SELECT * FROM abc WHERE a = (SELECT max(a) FROM abc WHERE EXISTS(SELECT * FROM abc WHERE c=a+3))
----
· distributed false · ·
· vectorized false · ·
root · · (a, b, c) ·
├── scan · · (a, b, c) ·
│ table abc@primary · ·
│ spans ALL · ·
│ filter a = @S2 · ·
├── subquery · · (a, b, c) ·
│ │ id @S1 · ·
│ │ original sql EXISTS (SELECT * FROM abc WHERE c = (a + 3)) · ·
│ │ exec mode exists · ·
│ └── limit · · (a, b, c) ·
│ │ count 1 · ·
│ └── scan · · (a, b, c) ·
│ table abc@primary · ·
│ spans ALL · ·
│ filter c = (a + 3) · ·
└── subquery · · (a, b, c) ·
│ id @S2 · ·
│ original sql (SELECT max(a) FROM abc WHERE EXISTS (SELECT * FROM abc WHERE c = (a + 3))) · ·
│ exec mode one row · ·
└── group · · (max) ·
│ aggregate 0 max(a) · ·
│ scalar · · ·
└── scan · · (a) ·
· table abc@primary · ·
· spans ALL · ·
· filter @S1 · ·
· distributed false · ·
· vectorized false · ·
root · · (a, b, c) ·
├── scan · · (a, b, c) ·
│ table abc@primary · ·
│ spans ALL · ·
│ filter a = @S2 · ·
├── subquery · · (a, b, c) ·
│ │ id @S1 · ·
│ │ original sql EXISTS (SELECT * FROM abc WHERE c = (a + 3)) · ·
│ │ exec mode exists · ·
│ └── limit · · (a, b, c) ·
│ │ count 1 · ·
│ └── scan · · (a, b, c) ·
│ table abc@primary · ·
│ spans ALL · ·
│ filter c = (a + 3) · ·
└── subquery · · (a, b, c) ·
│ id @S2 · ·
│ original sql (SELECT max(a) FROM abc WHERE EXISTS (SELECT * FROM abc WHERE c = (a + 3))) · ·
│ exec mode one row · ·
└── group · · (any_not_null) ·
│ aggregate 0 any_not_null(a) · ·
│ scalar · · ·
└── limit · · (a) -a
│ count 1 · ·
└── revscan · · (a) -a
· table abc@primary · ·
· spans ALL · ·
· filter @S1 · ·

# IN expression transformed into semi-join.
query TTTTT
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/memo/testdata/memo
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ memo (optimized, ~18KB, required=[presentation: y:2,x:3,c:6] [ordering: +2])
├── G5: (const 10)
├── G6: (plus G11 G12)
├── G7: (project G13 G14 y)
│ ├── [ordering: +2]
│ ├── [ordering: +2] [limit hint: 100.00]
│ │ ├── best: (sort G7)
│ │ └── cost: 1119.26
│ ├── [ordering: +5]
Expand All @@ -169,7 +169,7 @@ memo (optimized, ~18KB, required=[presentation: y:2,x:3,c:6] [ordering: +2])
├── G11: (variable y)
├── G12: (const 1)
├── G13: (select G16 G17)
│ ├── [ordering: +2]
│ ├── [ordering: +2] [limit hint: 100.00]
│ │ ├── best: (sort G13)
│ │ └── cost: 1112.58
│ └── []
Expand All @@ -178,7 +178,7 @@ memo (optimized, ~18KB, required=[presentation: y:2,x:3,c:6] [ordering: +2])
├── G14: (projections G18)
├── G15: (eq G19 G20)
├── G16: (scan a)
│ ├── [ordering: +2]
│ ├── [ordering: +2] [limit hint: 300.00]
│ │ ├── best: (sort G16)
│ │ └── cost: 1259.35
│ └── []
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/optgen/exprgen/testdata/limit
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ limit
├── internal-ordering: +1
├── cardinality: [0 - 10]
├── stats: [rows=10]
├── cost: 1050.13
├── cost: 21.13
├── prune: (2)
├── interesting orderings: (+1,+2)
├── scan t.public.abc@ab
│ ├── columns: t.public.abc.a:1(int) t.public.abc.b:2(int)
│ ├── stats: [rows=1000]
│ ├── cost: 1050.02
│ ├── cost: 21.02
│ ├── ordering: +1
│ ├── limit hint: 10.00
│ ├── prune: (1,2)
Expand Down
41 changes: 37 additions & 4 deletions pkg/sql/opt/xform/coster.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ const (
// justification for this constant.
lookupJoinRetrieveRowCost = 2 * seqIOCostFactor

// Input rows to a join are processed in batches of this size.
// See joinreader.go.
joinReaderBatchSize = 100.0

// In the case of a limit hint, a scan will read this multiple of the expected
// number of rows. See scanNode.limitHint.
scanSoftLimitMultiplier = 2.0

// latencyCostFactor represents the throughput impact of doing scans on an
// index that may be remotely located in a different locality. If latencies
// are higher, then overall cluster throughput will suffer somewhat, as there
Expand Down Expand Up @@ -174,7 +182,7 @@ func (c *coster) ComputeCost(candidate memo.RelExpr, required *physical.Required
cost = c.computeIndexJoinCost(candidate.(*memo.IndexJoinExpr))

case opt.LookupJoinOp:
cost = c.computeLookupJoinCost(candidate.(*memo.LookupJoinExpr))
cost = c.computeLookupJoinCost(candidate.(*memo.LookupJoinExpr), required)

case opt.ZigzagJoinOp:
cost = c.computeZigzagJoinCost(candidate.(*memo.ZigzagJoinExpr))
Expand Down Expand Up @@ -281,6 +289,10 @@ func (c *coster) computeScanCost(scan *memo.ScanExpr, required *physical.Require
rowCount := scan.Relational().Stats.RowCount
perRowCost := c.rowScanCost(scan.Table, scan.Index, scan.Cols.Len())

if required.LimitHint != 0 {
rowCount = math.Min(rowCount, required.LimitHint*scanSoftLimitMultiplier)
}

if ordering.ScanIsReverse(scan, &required.Ordering) {
if rowCount > 1 {
// Need to do binary search to seek to the previous row.
Expand Down Expand Up @@ -394,8 +406,18 @@ func (c *coster) computeIndexJoinCost(join *memo.IndexJoinExpr) memo.Cost {
return memo.Cost(leftRowCount) * perRowCost
}

func (c *coster) computeLookupJoinCost(join *memo.LookupJoinExpr) memo.Cost {
leftRowCount := join.Input.Relational().Stats.RowCount
func (c *coster) computeLookupJoinCost(
join *memo.LookupJoinExpr, required *physical.Required,
) memo.Cost {
lookupCount := join.Input.Relational().Stats.RowCount

// Lookup joins can return early if enough rows have been found. An otherwise
// expensive lookup join might have a lower cost if its limit hint estimates
// that most rows will not be needed.
if required.LimitHint != 0 {
outputRows := join.Relational().Stats.RowCount
lookupCount = lookupJoinInputLimitHint(lookupCount, outputRows, required.LimitHint)
}

// The rows in the (left) input are used to probe into the (right) table.
// Since the matching rows in the table may not all be in the same range, this
Expand All @@ -409,7 +431,7 @@ func (c *coster) computeLookupJoinCost(join *memo.LookupJoinExpr) memo.Cost {
// slower.
perLookupCost *= 5
}
cost := memo.Cost(leftRowCount) * perLookupCost
cost := memo.Cost(lookupCount) * perLookupCost

// Each lookup might retrieve many rows; add the IO cost of retrieving the
// rows (relevant when we expect many resulting rows per lookup) and the CPU
Expand Down Expand Up @@ -766,3 +788,14 @@ func localityMatchScore(zone cat.Zone, locality roachpb.Locality) float64 {
// Weight the constraintScore twice as much as the lease score.
return (constraintScore*2 + leaseScore) / 3
}

// lookupJoinInputLimitHint calculates an appropriate limit hint for the input
// to a lookup join.
func lookupJoinInputLimitHint(inputRowCount, outputRowCount, outputLimitHint float64) float64 {
// Estimate the number of lookups needed to output LimitHint rows.
expectedLookupCount := outputLimitHint * inputRowCount / outputRowCount

// Round up to the nearest multiple of a batch.
expectedLookupCount = math.Ceil(expectedLookupCount/joinReaderBatchSize) * joinReaderBatchSize
return math.Min(inputRowCount, expectedLookupCount)
}
Loading

0 comments on commit ada086e

Please sign in to comment.