From ae649523e6da9f2ec5d18a953abcadea224fb60b Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Thu, 2 May 2019 16:00:28 -0700 Subject: [PATCH] exec: template out rank and dense rank into separate operators Release note: None --- pkg/sql/exec/execgen/cmd/execgen/rank_gen.go | 54 +++- pkg/sql/exec/vecbuiltins/rank.eg.go | 287 +++++++++++++++--- pkg/sql/exec/vecbuiltins/rank.go | 76 ++--- pkg/sql/exec/vecbuiltins/rank_tmpl.go | 106 ++++--- .../logictest/testdata/logic_test/exec_window | 1 - 5 files changed, 389 insertions(+), 135 deletions(-) diff --git a/pkg/sql/exec/execgen/cmd/execgen/rank_gen.go b/pkg/sql/exec/execgen/cmd/execgen/rank_gen.go index 5db6322000f6..613dd0f9ec72 100644 --- a/pkg/sql/exec/execgen/cmd/execgen/rank_gen.go +++ b/pkg/sql/exec/execgen/cmd/execgen/rank_gen.go @@ -15,11 +15,48 @@ package main import ( + "fmt" "io" "io/ioutil" + "regexp" + "strings" "text/template" ) +type rankTmplInfo struct { + Dense bool + HasPartition bool +} + +func (r rankTmplInfo) UpdateRank() string { + switch r.Dense { + case true: + return fmt.Sprintf( + `r.rank++`, + ) + case false: + return fmt.Sprintf( + `r.rank += r.rankIncrement +r.rankIncrement = 1`, + ) + default: + panic("third value of boolean?") + } +} + +func (r rankTmplInfo) UpdateRankIncrement() string { + switch r.Dense { + case true: + return `` + case false: + return fmt.Sprintf( + `r.rankIncrement++`, + ) + default: + panic("third value of boolean?") + } +} + func genRankOps(wr io.Writer) error { d, err := ioutil.ReadFile("pkg/sql/exec/vecbuiltins/rank_tmpl.go") if err != nil { @@ -28,8 +65,13 @@ func genRankOps(wr io.Writer) error { s := string(d) - nextRank := makeFunctionRegex("_NEXT_RANK", 1) - s = nextRank.ReplaceAllString(s, `{{template "nextRank" buildDict "Global" $ "HasPartition" $1 }}`) + s = strings.Replace(s, "_DENSE", "{{.Dense}}", -1) + s = strings.Replace(s, "_PARTITION", "{{.HasPartition}}", -1) + + updateRankRe := regexp.MustCompile(`_UPDATE_RANK\(\)`) + s = updateRankRe.ReplaceAllString(s, "{{.UpdateRank}}") + updateRankIncrementRe := regexp.MustCompile(`_UPDATE_RANK_INCREMENT\(\)`) + s = updateRankIncrementRe.ReplaceAllString(s, "{{.UpdateRankIncrement}}") // Now, generate the op, from the template. tmpl, err := template.New("rank_op").Funcs(template.FuncMap{"buildDict": buildDict}).Parse(s) @@ -37,7 +79,13 @@ func genRankOps(wr io.Writer) error { return err } - return tmpl.Execute(wr, struct{}{}) + rankTmplInfos := []rankTmplInfo{ + {Dense: false, HasPartition: false}, + {Dense: false, HasPartition: true}, + {Dense: true, HasPartition: false}, + {Dense: true, HasPartition: true}, + } + return tmpl.Execute(wr, rankTmplInfos) } func init() { diff --git a/pkg/sql/exec/vecbuiltins/rank.eg.go b/pkg/sql/exec/vecbuiltins/rank.eg.go index ba953b3bfd23..2470e85e7a19 100644 --- a/pkg/sql/exec/vecbuiltins/rank.eg.go +++ b/pkg/sql/exec/vecbuiltins/rank.eg.go @@ -12,9 +12,116 @@ package vecbuiltins -import "github.com/cockroachdb/cockroach/pkg/sql/exec/types" +import ( + "context" -func (r *rankOp) nextBodyWithPartition() { + "github.com/cockroachdb/cockroach/pkg/sql/exec" + "github.com/cockroachdb/cockroach/pkg/sql/exec/coldata" + "github.com/cockroachdb/cockroach/pkg/sql/exec/types" +) + +type rankDense_false_HasPartition_false_Op struct { + input exec.Operator + batch coldata.Batch + // distinctCol is the output column of the chain of ordered distinct + // operators in which true will indicate that a new rank needs to be assigned + // to the corresponding tuple. + distinctCol []bool + outputColIdx int + partitionColIdx int + + // rank indicates which rank should be assigned to the next tuple. + rank int64 + // rankIncrement indicates by how much rank should be incremented when a + // tuple distinct from the previous one on the ordering columns is seen. It + // is used only in case of a regular rank function (i.e. not dense). + rankIncrement int64 +} + +var _ exec.Operator = &rankDense_false_HasPartition_false_Op{} + +func (r *rankDense_false_HasPartition_false_Op) Init() { + r.input.Init() + // RANK and DENSE_RANK start counting from 1. Before we assign the rank to a + // tuple in the batch, we first increment r.rank, so setting this + // rankIncrement to 1 will update r.rank to 1 on the very first tuple (as + // desired). + r.rankIncrement = 1 +} + +func (r *rankDense_false_HasPartition_false_Op) Next(ctx context.Context) coldata.Batch { + r.batch = r.input.Next(ctx) + if r.batch.Length() == 0 { + return r.batch + } + + if r.outputColIdx == r.batch.Width() { + r.batch.AppendCol(types.Int64) + } else if r.outputColIdx > r.batch.Width() { + panic("unexpected: column outputColIdx is neither present nor the next to be appended") + } + rankCol := r.batch.ColVec(r.outputColIdx).Int64() + sel := r.batch.Selection() + if sel != nil { + for i := uint16(0); i < r.batch.Length(); i++ { + if r.distinctCol[sel[i]] { + r.rank += r.rankIncrement + r.rankIncrement = 1 + rankCol[sel[i]] = r.rank + } else { + rankCol[sel[i]] = r.rank + r.rankIncrement++ + } + } + } else { + for i := uint16(0); i < r.batch.Length(); i++ { + if r.distinctCol[i] { + r.rank += r.rankIncrement + r.rankIncrement = 1 + rankCol[i] = r.rank + } else { + rankCol[i] = r.rank + r.rankIncrement++ + } + } + } + return r.batch +} + +type rankDense_false_HasPartition_true_Op struct { + input exec.Operator + batch coldata.Batch + // distinctCol is the output column of the chain of ordered distinct + // operators in which true will indicate that a new rank needs to be assigned + // to the corresponding tuple. + distinctCol []bool + outputColIdx int + partitionColIdx int + + // rank indicates which rank should be assigned to the next tuple. + rank int64 + // rankIncrement indicates by how much rank should be incremented when a + // tuple distinct from the previous one on the ordering columns is seen. It + // is used only in case of a regular rank function (i.e. not dense). + rankIncrement int64 +} + +var _ exec.Operator = &rankDense_false_HasPartition_true_Op{} + +func (r *rankDense_false_HasPartition_true_Op) Init() { + r.input.Init() + // RANK and DENSE_RANK start counting from 1. Before we assign the rank to a + // tuple in the batch, we first increment r.rank, so setting this + // rankIncrement to 1 will update r.rank to 1 on the very first tuple (as + // desired). + r.rankIncrement = 1 +} + +func (r *rankDense_false_HasPartition_true_Op) Next(ctx context.Context) coldata.Batch { + r.batch = r.input.Next(ctx) + if r.batch.Length() == 0 { + return r.batch + } if r.partitionColIdx == r.batch.Width() { r.batch.AppendCol(types.Bool) @@ -39,20 +146,12 @@ func (r *rankOp) nextBodyWithPartition() { continue } if r.distinctCol[sel[i]] { - // TODO(yuzefovich): template this part out to generate two different - // rank operators. - if r.dense { - r.rank++ - } else { - r.rank += r.rankIncrement - r.rankIncrement = 1 - } + r.rank += r.rankIncrement + r.rankIncrement = 1 rankCol[sel[i]] = r.rank } else { rankCol[sel[i]] = r.rank - if !r.dense { - r.rankIncrement++ - } + r.rankIncrement++ } } } else { @@ -64,26 +163,125 @@ func (r *rankOp) nextBodyWithPartition() { continue } if r.distinctCol[i] { - // TODO(yuzefovich): template this part out to generate two different - // rank operators. - if r.dense { - r.rank++ - } else { - r.rank += r.rankIncrement - r.rankIncrement = 1 - } + r.rank += r.rankIncrement + r.rankIncrement = 1 + rankCol[i] = r.rank + } else { + rankCol[i] = r.rank + r.rankIncrement++ + } + } + } + return r.batch +} + +type rankDense_true_HasPartition_false_Op struct { + input exec.Operator + batch coldata.Batch + // distinctCol is the output column of the chain of ordered distinct + // operators in which true will indicate that a new rank needs to be assigned + // to the corresponding tuple. + distinctCol []bool + outputColIdx int + partitionColIdx int + + // rank indicates which rank should be assigned to the next tuple. + rank int64 + // rankIncrement indicates by how much rank should be incremented when a + // tuple distinct from the previous one on the ordering columns is seen. It + // is used only in case of a regular rank function (i.e. not dense). + rankIncrement int64 +} + +var _ exec.Operator = &rankDense_true_HasPartition_false_Op{} + +func (r *rankDense_true_HasPartition_false_Op) Init() { + r.input.Init() + // RANK and DENSE_RANK start counting from 1. Before we assign the rank to a + // tuple in the batch, we first increment r.rank, so setting this + // rankIncrement to 1 will update r.rank to 1 on the very first tuple (as + // desired). + r.rankIncrement = 1 +} + +func (r *rankDense_true_HasPartition_false_Op) Next(ctx context.Context) coldata.Batch { + r.batch = r.input.Next(ctx) + if r.batch.Length() == 0 { + return r.batch + } + + if r.outputColIdx == r.batch.Width() { + r.batch.AppendCol(types.Int64) + } else if r.outputColIdx > r.batch.Width() { + panic("unexpected: column outputColIdx is neither present nor the next to be appended") + } + rankCol := r.batch.ColVec(r.outputColIdx).Int64() + sel := r.batch.Selection() + if sel != nil { + for i := uint16(0); i < r.batch.Length(); i++ { + if r.distinctCol[sel[i]] { + r.rank++ + rankCol[sel[i]] = r.rank + } else { + rankCol[sel[i]] = r.rank + + } + } + } else { + for i := uint16(0); i < r.batch.Length(); i++ { + if r.distinctCol[i] { + r.rank++ rankCol[i] = r.rank } else { rankCol[i] = r.rank - if !r.dense { - r.rankIncrement++ - } + } } } + return r.batch } -func (r *rankOp) nextBodyNoPartition() { +type rankDense_true_HasPartition_true_Op struct { + input exec.Operator + batch coldata.Batch + // distinctCol is the output column of the chain of ordered distinct + // operators in which true will indicate that a new rank needs to be assigned + // to the corresponding tuple. + distinctCol []bool + outputColIdx int + partitionColIdx int + + // rank indicates which rank should be assigned to the next tuple. + rank int64 + // rankIncrement indicates by how much rank should be incremented when a + // tuple distinct from the previous one on the ordering columns is seen. It + // is used only in case of a regular rank function (i.e. not dense). + rankIncrement int64 +} + +var _ exec.Operator = &rankDense_true_HasPartition_true_Op{} + +func (r *rankDense_true_HasPartition_true_Op) Init() { + r.input.Init() + // RANK and DENSE_RANK start counting from 1. Before we assign the rank to a + // tuple in the batch, we first increment r.rank, so setting this + // rankIncrement to 1 will update r.rank to 1 on the very first tuple (as + // desired). + r.rankIncrement = 1 +} + +func (r *rankDense_true_HasPartition_true_Op) Next(ctx context.Context) coldata.Batch { + r.batch = r.input.Next(ctx) + if r.batch.Length() == 0 { + return r.batch + } + + if r.partitionColIdx == r.batch.Width() { + r.batch.AppendCol(types.Bool) + } else if r.partitionColIdx > r.batch.Width() { + panic("unexpected: column partitionColIdx is neither present nor the next to be appended") + } + partitionCol := r.batch.ColVec(r.partitionColIdx).Bool() if r.outputColIdx == r.batch.Width() { r.batch.AppendCol(types.Int64) @@ -94,41 +292,36 @@ func (r *rankOp) nextBodyNoPartition() { sel := r.batch.Selection() if sel != nil { for i := uint16(0); i < r.batch.Length(); i++ { + if partitionCol[sel[i]] { + r.rank = 1 + r.rankIncrement = 1 + rankCol[i] = 1 + continue + } if r.distinctCol[sel[i]] { - // TODO(yuzefovich): template this part out to generate two different - // rank operators. - if r.dense { - r.rank++ - } else { - r.rank += r.rankIncrement - r.rankIncrement = 1 - } + r.rank++ rankCol[sel[i]] = r.rank } else { rankCol[sel[i]] = r.rank - if !r.dense { - r.rankIncrement++ - } + } } } else { for i := uint16(0); i < r.batch.Length(); i++ { + if partitionCol[i] { + r.rank = 1 + r.rankIncrement = 1 + rankCol[i] = 1 + continue + } if r.distinctCol[i] { - // TODO(yuzefovich): template this part out to generate two different - // rank operators. - if r.dense { - r.rank++ - } else { - r.rank += r.rankIncrement - r.rankIncrement = 1 - } + r.rank++ rankCol[i] = r.rank } else { rankCol[i] = r.rank - if !r.dense { - r.rankIncrement++ - } + } } } + return r.batch } diff --git a/pkg/sql/exec/vecbuiltins/rank.go b/pkg/sql/exec/vecbuiltins/rank.go index 702748078233..16de9d3c4a57 100644 --- a/pkg/sql/exec/vecbuiltins/rank.go +++ b/pkg/sql/exec/vecbuiltins/rank.go @@ -15,34 +15,10 @@ package vecbuiltins import ( - "context" - "github.com/cockroachdb/cockroach/pkg/sql/exec" - "github.com/cockroachdb/cockroach/pkg/sql/exec/coldata" "github.com/cockroachdb/cockroach/pkg/sql/exec/types" ) -type rankOp struct { - input exec.Operator - batch coldata.Batch - dense bool - // distinctCol is the output column of the chain of ordered distinct - // operators in which true will indicate that a new rank needs to be assigned - // to the corresponding tuple. - distinctCol []bool - outputColIdx int - partitionColIdx int - - // rank indicates which rank should be assigned to the next tuple. - rank int64 - // rankIncrement indicates by how much rank should be incremented when a - // tuple distinct from the previous one on the ordering columns is seen. It - // is used only in case of a regular rank function (i.e. not dense). - rankIncrement int64 -} - -var _ exec.Operator = &rankOp{} - // NewRankOperator creates a new exec.Operator that computes window function // RANK or DENSE_RANK. dense distinguishes between the two functions. input // *must* already be ordered on orderingCols (which should not be empty). @@ -63,27 +39,37 @@ func NewRankOperator( if err != nil { return nil, err } - return &rankOp{input: op, dense: dense, distinctCol: outputCol, outputColIdx: outputColIdx, partitionColIdx: partitionColIdx}, nil -} - -func (r *rankOp) Init() { - r.input.Init() - // RANK and DENSE_RANK start counting from 1. Before we assign the rank to a - // tuple in the batch, we first increment r.rank, so setting this - // rankIncrement to 1 will update r.rank to 1 on the very first tuple (as - // desired). - r.rankIncrement = 1 -} - -func (r *rankOp) Next(ctx context.Context) coldata.Batch { - r.batch = r.input.Next(ctx) - if r.batch.Length() == 0 { - return r.batch - } - if r.partitionColIdx != -1 { - r.nextBodyWithPartition() + if dense { + if partitionColIdx != -1 { + return &rankDense_true_HasPartition_true_Op{ + input: op, + distinctCol: outputCol, + outputColIdx: outputColIdx, + partitionColIdx: partitionColIdx, + }, nil + } else { + return &rankDense_true_HasPartition_false_Op{ + input: op, + distinctCol: outputCol, + outputColIdx: outputColIdx, + partitionColIdx: partitionColIdx, + }, nil + } } else { - r.nextBodyNoPartition() + if partitionColIdx != -1 { + return &rankDense_false_HasPartition_true_Op{ + input: op, + distinctCol: outputCol, + outputColIdx: outputColIdx, + partitionColIdx: partitionColIdx, + }, nil + } else { + return &rankDense_false_HasPartition_false_Op{ + input: op, + distinctCol: outputCol, + outputColIdx: outputColIdx, + partitionColIdx: partitionColIdx, + }, nil + } } - return r.batch } diff --git a/pkg/sql/exec/vecbuiltins/rank_tmpl.go b/pkg/sql/exec/vecbuiltins/rank_tmpl.go index 5d1551cdf16d..062882b1466b 100644 --- a/pkg/sql/exec/vecbuiltins/rank_tmpl.go +++ b/pkg/sql/exec/vecbuiltins/rank_tmpl.go @@ -23,13 +23,68 @@ package vecbuiltins -import "github.com/cockroachdb/cockroach/pkg/sql/exec/types" +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/sql/exec" + "github.com/cockroachdb/cockroach/pkg/sql/exec/coldata" + "github.com/cockroachdb/cockroach/pkg/sql/exec/types" +) // {{/* -func _NEXT_RANK(hasPartition bool) { // */}} - // {{define "nextRank"}} - // {{ if $.HasPartition }} +// _UPDATE_RANK is the template function for updating the state of rank +// operators. +func _UPDATE_RANK() { + panic("") +} + +// _UPDATE_RANK_INCREMENT is the template function for updating the state of +// rank operators. +func _UPDATE_RANK_INCREMENT() { + panic("") +} + +// */}} + +// {{range .}} + +type rankDense__DENSE_HasPartition__PARTITION_Op struct { + input exec.Operator + batch coldata.Batch + // distinctCol is the output column of the chain of ordered distinct + // operators in which true will indicate that a new rank needs to be assigned + // to the corresponding tuple. + distinctCol []bool + outputColIdx int + partitionColIdx int + + // rank indicates which rank should be assigned to the next tuple. + rank int64 + // rankIncrement indicates by how much rank should be incremented when a + // tuple distinct from the previous one on the ordering columns is seen. It + // is used only in case of a regular rank function (i.e. not dense). + rankIncrement int64 +} + +var _ exec.Operator = &rankDense__DENSE_HasPartition__PARTITION_Op{} + +func (r *rankDense__DENSE_HasPartition__PARTITION_Op) Init() { + r.input.Init() + // RANK and DENSE_RANK start counting from 1. Before we assign the rank to a + // tuple in the batch, we first increment r.rank, so setting this + // rankIncrement to 1 will update r.rank to 1 on the very first tuple (as + // desired). + r.rankIncrement = 1 +} + +func (r *rankDense__DENSE_HasPartition__PARTITION_Op) Next(ctx context.Context) coldata.Batch { + r.batch = r.input.Next(ctx) + if r.batch.Length() == 0 { + return r.batch + } + + // {{ if .HasPartition }} if r.partitionColIdx == r.batch.Width() { r.batch.AppendCol(types.Bool) } else if r.partitionColIdx > r.batch.Width() { @@ -47,7 +102,7 @@ func _NEXT_RANK(hasPartition bool) { // */}} sel := r.batch.Selection() if sel != nil { for i := uint16(0); i < r.batch.Length(); i++ { - // {{ if $.HasPartition }} + // {{ if .HasPartition }} if partitionCol[sel[i]] { r.rank = 1 r.rankIncrement = 1 @@ -56,25 +111,16 @@ func _NEXT_RANK(hasPartition bool) { // */}} } // {{end}} if r.distinctCol[sel[i]] { - // TODO(yuzefovich): template this part out to generate two different - // rank operators. - if r.dense { - r.rank++ - } else { - r.rank += r.rankIncrement - r.rankIncrement = 1 - } + _UPDATE_RANK() rankCol[sel[i]] = r.rank } else { rankCol[sel[i]] = r.rank - if !r.dense { - r.rankIncrement++ - } + _UPDATE_RANK_INCREMENT() } } } else { for i := uint16(0); i < r.batch.Length(); i++ { - // {{ if $.HasPartition }} + // {{ if .HasPartition }} if partitionCol[i] { r.rank = 1 r.rankIncrement = 1 @@ -83,33 +129,15 @@ func _NEXT_RANK(hasPartition bool) { // */}} } // {{end}} if r.distinctCol[i] { - // TODO(yuzefovich): template this part out to generate two different - // rank operators. - if r.dense { - r.rank++ - } else { - r.rank += r.rankIncrement - r.rankIncrement = 1 - } + _UPDATE_RANK() rankCol[i] = r.rank } else { rankCol[i] = r.rank - if !r.dense { - r.rankIncrement++ - } + _UPDATE_RANK_INCREMENT() } } } - // {{end}} - // {{/* + return r.batch } -// */}} - -func (r *rankOp) nextBodyWithPartition() { - _NEXT_RANK(true) -} - -func (r *rankOp) nextBodyNoPartition() { - _NEXT_RANK(false) -} +// {{end}} diff --git a/pkg/sql/logictest/testdata/logic_test/exec_window b/pkg/sql/logictest/testdata/logic_test/exec_window index 17b332b714d2..e697a52c3882 100644 --- a/pkg/sql/logictest/testdata/logic_test/exec_window +++ b/pkg/sql/logictest/testdata/logic_test/exec_window @@ -75,7 +75,6 @@ SELECT a, b, dense_rank() OVER () FROM t ORDER BY b, a 0 b 1 1 b 1 - query ITI SELECT a, b, dense_rank() OVER (ORDER BY a) FROM t ORDER BY b, a ----