pkg/sql/execinfrapb/processors_sql.proto

// Copyright 2019 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.
//
// Processor definitions for distributed SQL APIs. See
// docs/RFCS/distributed_sql.md.
// All the concepts here are "physical plan" concepts.

syntax = "proto2";
// Beware! This package name must not be changed, even though it doesn't match
// the Go package name, because it defines the Protobuf message names which
// can't be changed without breaking backward compatibility.
package cockroach.sql.distsqlrun;
option go_package = "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb";

import "gogoproto/gogo.proto";
import "roachpb/data.proto";
import "sql/catalog/descpb/structured.proto";
import "sql/catalog/descpb/join_type.proto";
import "sql/catalog/descpb/locking.proto";
import "sql/catalog/fetchpb/index_fetch.proto";
import "sql/types/types.proto";
import "sql/execinfrapb/data.proto";
import "sql/execinfrapb/processors_base.proto";
import "sql/inverted/span_expression.proto";
import "util/hlc/timestamp.proto";

// ValuesCoreSpec is the core of a processor that has no inputs and generates
// "pre-canned" rows. This is not intended to be used for very large datasets.
message ValuesCoreSpec {
  // There is one DatumInfo for each element in a row. Can be empty, in which
  // case raw_bytes will be empty.
  repeated DatumInfo columns = 1 [(gogoproto.nullable) = false];

  // The number of rows is especially useful when we have zero columns.
  optional uint64 num_rows = 3 [(gogoproto.nullable) = false];

  // Each raw block encodes one row; each datum is encoded according to the
  // corresponding DatumInfo. As an optimization, if columns is empty, this will
  // be empty rather than containing empty byte strings.
  repeated bytes raw_bytes = 2;
}

// TableReaderSpec is the specification for a "table reader". A table reader
// performs KV operations to retrieve rows for a table and outputs the desired
// columns of the rows that pass a filter expression.
//
// The "internal columns" of a TableReader (see ProcessorSpec) correspond to the
// FetchSpec.FetchedColumns.
message TableReaderSpec {
  optional sqlbase.IndexFetchSpec fetch_spec = 20 [(gogoproto.nullable) = false];

  // TableModificationTime is the timestamp of the transaction which last
  // modified the table descriptor.
  // TODO(radu, otan): take this into account during planning and remove it from
  // this spec.
  optional util.hlc.Timestamp table_descriptor_modification_time = 21 [(gogoproto.nullable) = false];

  optional bool reverse = 3 [(gogoproto.nullable) = false];
  repeated roachpb.Span spans = 18 [(gogoproto.nullable) = false];

  // A hint for how many rows the consumer of the table reader output might
  // need. This is used to size the initial KV batches to try to avoid reading
  // many more rows than needed by the processor receiving the output.
  //
  // Not used if there is a limit set in the PostProcessSpec of this processor
  // (that value will be used for sizing batches instead).
  optional int64 limit_hint = 5 [(gogoproto.nullable) = false];

  // If set, the TableReader can read all the spans in parallel, without any
  // batch limits. This should only be the case when there is a known upper
  // bound on the number of rows we can read, and when there is no limit or
  // limit hint.
  optional bool parallelize = 12 [(gogoproto.nullable) = false];

  // batch_bytes_limit, if non-zero, controls the TargetBytes limits that the
  // TableReader will use for its scans. If zero, then the server-side default
  // is used. If parallelize is set, this cannot be set.
  optional int64 batch_bytes_limit = 17 [(gogoproto.nullable) = false];

  // If non-zero, this enables inconsistent historical scanning where different
  // batches can be read with different timestamps. This is used for
  // long-running table statistics which may outlive the TTL. Using this setting
  // will cause inconsistencies across rows and even within rows.
  //
  // The value is a duration (in nanoseconds), which is the maximum "age" of the
  // timestamp. If the scan takes long enough for the timestamp to become older,
  // the timestamp is advanced by however much time passed.
  //
  // Example:
  //
  //     current time:      10
  //     initial timestamp: 0
  //     max timestamp age: 30
  //
  //     time
  //     10:    start scan, timestamp=0
  //     10-29: continue scanning at timestamp=0
  //     30:    bump timestamp to 20
  //     30-49: continue scanning at timestamp=20
  //     50:    bump timestamp to 40
  //     ...
  //
  // Note: it is an error to perform a historical read at an initial timestamp
  // older than this value.
  //
  optional uint64 max_timestamp_age_nanos = 9 [(gogoproto.nullable) = false];

  // Indicates the row-level locking strength to be used by the scan. If set to
  // FOR_NONE, no row-level locking should be performed.
  optional sqlbase.ScanLockingStrength locking_strength = 10 [(gogoproto.nullable) = false];

  // Indicates the policy to be used by the scan for handling conflicting locks
  // held by other active transactions when attempting to lock rows. Always set
  // to BLOCK when locking_strength is FOR_NONE.
  optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 11 [(gogoproto.nullable) = false];

  // Indicates the row-level locking durability to be used by the scan.
  optional sqlbase.ScanLockingDurability locking_durability = 23 [(gogoproto.nullable) = false];

  // Indicates that misplanned ranges metadata should not be sent back to the
  // DistSQLReceiver. This will be set to true for the scan with a hard limit
  // (in which case we create a single processor that is placed at the
  // leaseholder of the beginning of the key spans to be scanned).
  optional bool ignore_misplanned_ranges = 22 [(gogoproto.nullable) = false];

  reserved 1, 2, 4, 6, 7, 8, 13, 14, 15, 16, 19;
}

// FiltererSpec is the specification for a processor that filters input rows
// according to a boolean expression.
message FiltererSpec {
  // A filtering expression which references the internal columns of the
  // processor via ordinal references (@1, @2, etc).
  optional Expression filter = 1 [(gogoproto.nullable) = false];
}

// JoinReaderSpec is the specification for a "join reader". A join reader
// performs KV operations to retrieve specific rows that correspond to the
// values in the input stream (join by lookup). The output can optionally
// preserve the order of the input rows.
//
// The "internal columns" of a JoinReader (see ProcessorSpec) are either:
//  - the fetched columns (see IndexFetchSpec), if we are performing an index
//    join (no lookup columns) or if we are performing a semi or anti join, or
//  - the concatenation of the columns of the input stream with the fetched
//    columns.
//
// Internally, only the values for the columns needed by the post-processing
// stage are populated.
//
// Example:
// Input stream columns: | a | b |              Fetched columns: | c | d | e |
//
// If performing a lookup join on a = c (lookup columns is [0]):
//        Internal columns: | a | b | c | d | e |
//
// If performing an index join (where a = c and b = d) (lookup columns is []):
//        Internal columns: | c | d | e |
//
// There is a special case when a "join reader" is used as the second join in
// a pair of joins to accomplish a LEFT_OUTER, LEFT_SEMI or LEFT_ANTI join.
// The first join in this pair of joins is unable to precisely evaluate the
// join condition and produces false positives. This is typical when the first
// join is an inverted join (see InvertedJoinerSpec), but can also be the case
// when the first join is being evaluated over an index that does not have all
// the columns needed to evaluate the join condition. The first join outputs
// rows in sorted order of the original left columns. The input stream columns
// for the second join are a combination of the original left columns and the
// lookup columns. The first join additionally adds a continuation column that
// demarcates a group of successive rows that correspond to an original left
// row. The first row in a group contains false (since it is not a
// continuation of the group) and successive rows contain true.
//
// The mapping from the original join to the pair of joins is:
// LEFT_OUTER => LEFT_OUTER, LEFT_OUTER
// LEFT_SEMI  => INNER, LEFT_SEMI (better than doing INNER, INNER, SORT, DISTINCT)
// LEFT_ANTI  => LEFT_OUTER, LEFT_ANTI.
// where the first join always preserves order.
//
// More specifically, consider a lookup join example where the input stream
// columns are: | a | b | c | d | cont |.
// The lookup column is | d |. And the fetched columns are | e | f | with
// d = e.
// This join reader can see input of the form
// a1, b1, c1, d1,   false
// a1, b1, c1, d2,   true
// a1, b2, c1, null, false // when the first join is LEFT_OUTER
// a2, b1, c1, d3,   false
// a2, b1, c1, d4,   true
//
// Say both the results for (a1, b1, c1) are false positives, and the first
// of the (a2, b1, c1) result is a false positive.
// The output for LEFT_OUTER:
// a1, b1, c1, d1,   false, null, null
// a1, b2, c1, null, false, null, null
// a2, b1, c1, d4,   true,  d4,   f1
// The d, cont columns are not part of the original left row, so will be
// projected away after the join.
//
// The output for LEFT_ANTI:
// a1, b1, c1, d1,   false
// a1, b2, c1, null, false
// Again, the d, cont columns will be projected away after the join.
//
// The output for LEFT_SEMI:
// a2, b1, c1, d4, true
// Again, the d, cont columns will be projected away after the join.
//
// The example above is for a lookup join as the second join in the
// paired-joins. The lookup join can also be the first join in the
// paired-joins, which is indicated by both
// OutputGroupContinuationForLeftRow and MaintainOrdering set to true.
message JoinReaderSpec {
  optional sqlbase.IndexFetchSpec fetch_spec = 19 [(gogoproto.nullable) = false];

  // SplitFamilyIDs indicates that spans which fully constrain the index should
  // be split into single-family spans for the given families. Unset if
  // splitting is not possible.
  //
  // See span.MakeSplitter for the conditions that must hold for splitting to be
  // allowed. It is recommended to use span.MakeSplitter() followed by
  // splitter.FamilyIDs() to populate this field.
  repeated uint32 split_family_ids = 20 [(gogoproto.customname) = "SplitFamilyIDs",
    (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb.FamilyID"];

  reserved 1, 2;

  // LookupExpr represents the part of the join condition used to perform the
  // lookup into the index. It should only be set when LookupColumns is empty.
  // LookupExpr is used instead of LookupColumns when the lookup condition is
  // more complicated than a simple equality between input columns and index
  // columns. In this case, LookupExpr specifies the expression that will be
  // used to construct the spans for each lookup. Currently, the only
  // expressions supported are conjunctions (AND expressions) of equality, IN
  // expressions, and simple inequalities, specifically:
  //  1. equalities between two variables (one from the input and one from the
  //     index) representing the equi-join condition(s),
  //  2. equalities between an index column and a constant, and
  //  3. IN expressions between an index column and a tuple of constants.
  //  4. LT,GT,GE,LE between an index var and a constant.
  //
  // Variables in this expression are assigned in the same way as the ON
  // condition below. Assuming that the input stream has N columns and the fetch
  // spec has M fetched columns, in this expression variables @1 to @N refer to
  // columns of the input stream and variables @(N+1) to @(N+M) refer to fetched
  // columns.
  //
  // For example, a valid LookupExpr for N=2 and M=2 might be:
  //   @3 IN (10, 20) AND @2 = @4.
  optional Expression lookup_expr = 16 [(gogoproto.nullable) = false];

  // If RemoteLookupExpr is set, this is a locality optimized lookup join. In
  // this case, LookupExpr contains the lookup join conditions targeting ranges
  // located on local nodes (relative to the gateway region), and
  // RemoteLookupExpr contains the lookup join conditions targeting remote
  // nodes. The optimizer will only plan a locality optimized lookup join if it
  // is known that each lookup returns at most one row. This fact allows the
  // joinReader to use the local conditions in LookupExpr first, and if a match
  // is found locally for each input row, there is no need to search remote
  // nodes. If a local match is not found for all input rows, the joinReader
  // uses RemoteLookupExpr to search remote nodes.
  //
  // The same restrictions on supported expressions that apply to LookupExpr
  // also apply to RemoteLookupExpr. See the comment above LookupExpr for more
  // details.
  optional Expression remote_lookup_expr = 17 [(gogoproto.nullable) = false];

  // LookupColumns, like LookupExpr, represents the part of the join condition
  // used to perform the lookup into the index. It is used as an optimization
  // for the common case where the join conditions are all simple equalities
  // between input and index columns (i.e., only the first of the supported
  // expression types listed above for LookupExpr). LookupColumns should only
  // be set when LookupExpr is empty.
  //
  // LookupColumns contains the column indexes in the input stream that match
  // with the index columns. These are the equality columns of the join. For
  // example, if there are 3 input columns and 2 fetched columns, LookupColumns
  // {0, 2} is equivalent to the LookupExpr @1 = @4 AND @3 = @5.
  //
  // If LookupExpr is empty, LookupColumns can be interpreted as follows:
  //
  // If empty (index join), the start of the input stream schema is assumed to
  // match the index columns. The joinReader will perform an index join and the
  // "internal columns" will be the fetched columns.
  //
  // If populated (lookup join), the `joinReader` will perform a lookup join
  // and the "internal columns" will be the concatenation of the input stream
  // columns followed by the fetched columns (except for semi/anti join, which
  // don't output any fetched columns).
  // TODO(rytaft): remove this field and use LookupExpr for all cases. This
  // requires ensuring that cases currently using LookupColumns do not regress.
  repeated uint32 lookup_columns = 3 [packed = true];

  // If set, the lookup columns form a key in the target table and thus each
  // lookup has at most one result.
  optional bool lookup_columns_are_key = 8 [(gogoproto.nullable) = false];

  // "ON" expression (in addition to the conditions in LookupExpr and/or
  // equality constraints captured by the LookupColumns). Assuming that the
  // input stream has N columns and the fetch spec has M fetched columns, in
  // this expression variables @1 to @N refer to columns of the input stream and
  // variables @(N+1) to @(N+M) refer to fetched columns.
  optional Expression on_expr = 4 [(gogoproto.nullable) = false];

  // This used to be used for an extra index filter expression. It was removed
  // in DistSQL version 24.
  reserved 5;

  // For lookup joins. Only JoinType_INNER and JoinType_LEFT_OUTER are
  // supported.
  optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];

  // This field used to be a visibility level of the columns that should be
  // produced. We now produce the columns in the FetchSpec.
  reserved 7;

  // Indicates the row-level locking strength to be used by the join. If set to
  // FOR_NONE, no row-level locking should be performed.
  optional sqlbase.ScanLockingStrength locking_strength = 9 [(gogoproto.nullable) = false];

  // Indicates the policy to be used by the join for handling conflicting locks
  // held by other active transactions when attempting to lock rows. Always set
  // to BLOCK when locking_strength is FOR_NONE.
  optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 10 [(gogoproto.nullable) = false];

  // Indicates the row-level locking durability to be used by the scan.
  optional sqlbase.ScanLockingDurability locking_durability = 24 [(gogoproto.nullable) = false];

  // Indicates that the join reader should maintain the ordering of the input
  // stream. This is applicable to both lookup joins and index joins. For lookup
  // joins, maintaining order is expensive because it requires buffering. For
  // index joins buffering is not required, but still, if ordering is not
  // required, we'll change the output order to allow for some Pebble
  // optimizations.
  optional bool maintain_ordering = 11 [(gogoproto.nullable) = false];

  reserved 12, 13;

  // LeftJoinWithPairedJoiner is used when a left {outer,anti,semi} join is
  // being achieved by pairing two joins, and this is the second join. See
  // the comment above.
  optional bool left_join_with_paired_joiner = 14 [(gogoproto.nullable) = false];

  // OutputGroupContinuationForLeftRow indicates that this join is the first
  // join in the paired-joins. At most one of OutputGroupContinuationForLeftRow
  // and LeftJoinWithPairedJoiner must be true. Additionally, if
  // OutputGroupContinuationForLeftRow is true, MaintainOrdering must also
  // be true.
  optional bool output_group_continuation_for_left_row = 15 [(gogoproto.nullable) = false];

  // lookup_batch_bytes_limit, if non-zero, controls the TargetBytes limits that
  // the joiner will use for its lookups. If zero, then the server-side default
  // is used. Note that, regardless of this setting, bytes limits are not always
  // used for lookups - it depends on whether the joiner decides it wants
  // DistSender-parallelism or not.
  optional int64 lookup_batch_bytes_limit = 18 [(gogoproto.nullable) = false];

  // A hint for how many rows the consumer of the join reader output might
  // need. This is used to size the initial batches of input rows to try to
  // avoid reading many more rows than needed by the processor receiving the
  // output.
  //
  // Not used if there is a limit set in the PostProcessSpec of this processor
  // (that value will be used for sizing batches instead).
  optional int64 limit_hint = 21 [(gogoproto.nullable) = false];

  // Indicates that for each input row, the join reader should return looked-up
  // rows in sorted order. This is only applicable to lookup joins for which
  // more than one lookup row may be associated with a given input row. It can
  // only be set to true if maintain_ordering is also true.
  // maintain_lookup_ordering can be used if the output needs to be ordered by
  // a prefix of input columns followed by index (lookup) columns without
  // requiring a (buffered) sort.
  optional bool maintain_lookup_ordering = 22 [(gogoproto.nullable) = false];

  // RemoteOnlyLookups is true when this join is defined with only lookups
  // that read into remote regions, though the lookups are defined in
  // LookupExpr, not RemoteLookupExpr.
  optional bool remote_only_lookups = 23 [(gogoproto.nullable) = false];
}

// SorterSpec is the specification for a "sorting aggregator". A sorting
// processor sorts elements in the input stream providing a certain output
// order guarantee regardless of the input ordering. The output ordering is
// according to a configurable set of columns.
//
// The "internal columns" of a Sorter (see ProcessorSpec) are the same as the
// input columns.
message SorterSpec {
  optional Ordering output_ordering = 1 [(gogoproto.nullable) = false];

  // Ordering match length, specifying that the input is already sorted by the
  // first 'n' output ordering columns, can be optionally specified for
  // possible speed-ups taking advantage of the partial orderings.
  optional uint32 ordering_match_len = 2 [(gogoproto.nullable) = false];

  // The limit of a top-k sort is the number of rows that the sorter should
  // output. If limit is 0, then all rows should be output.
  optional int64 limit = 3 [(gogoproto.nullable) = false];
}

message DistinctSpec {
  // The ordered columns in the input stream can be optionally specified for
  // possible optimizations. The specific ordering (ascending/descending) of
  // the column itself is not important nor is the order in which the columns
  // are specified. The ordered columns must be a subset of the distinct
  // columns.
  repeated uint32 ordered_columns = 1;
  // The distinct columns in the input stream are those columns on which we
  // check for distinct rows. If A,B,C are in distinct_columns and there is a
  // 4th column D which is not included in distinct_columns, its values are not
  // considered, so rows A1,B1,C1,D1 and A1,B1,C1,D2 are considered equal and
  // only one of them (the first) is output.
  repeated uint32 distinct_columns = 2;
  // If true, then NULL values are treated as not equal to one another. Each NULL
  // value will cause a new row group to be created. For example:
  //
  //   c
  //   ----
  //   NULL
  //   NULL
  //
  // A distinct operation on column "c" will result in one output row if
  // NullsAreDistinct is false, or two output rows if true. This is set to true
  // for UPSERT and INSERT..ON CONFLICT statements, since they must treat NULL
  // values as distinct.
  optional bool nulls_are_distinct = 3 [(gogoproto.nullable) = false];
  // If not empty, then an error with this text will be raised if there are two
  // rows with duplicate distinct column values. This is used to implement the
  // UPSERT and INSERT..ON CONFLICT statements, both of which prohibit the same
  // row from being changed twice.
  optional string error_on_dup = 4 [(gogoproto.nullable) = false];
  // OutputOrdering specifies the required ordering of the output produced by
  // the distinct. The input to the processor *must* already be ordered
  // according to it.
  optional Ordering output_ordering = 5 [(gogoproto.nullable) = false];
}

// The specification for a WITH ORDINALITY processor. It adds a new column to
// each resulting row that contains the ordinal number of the row. Since there
// are no arguments for this operator, the spec is empty.
message OrdinalitySpec {
  // Currently empty
}

// ZigzagJoinerSpec is the specification for a zigzag join processor. The
// processor's current implementation fetches the rows using internal
// rowFetchers.
//
// The "internal columns" of a ZigzagJoiner (see ProcessorSpec) are the
// concatenation of all the fetch columns of all the sides.
message ZigzagJoinerSpec {
  message Side {
    optional sqlbase.IndexFetchSpec fetch_spec = 1 [(gogoproto.nullable) = false];

    // EqColumns contains the equality columns for this side (as fetched column
    // ordinals). All sides have the same number of equality columns.
    optional Columns eq_columns = 2 [(gogoproto.nullable) = false];

    // Fixed values, corresponding to a prefix of the index key columns.
    optional ValuesCoreSpec fixed_values = 3 [(gogoproto.nullable) = false];

    // Indicates the row-level locking strength to be used by the scan. If set to
    // FOR_NONE, no row-level locking should be performed.
    optional sqlbase.ScanLockingStrength locking_strength = 4 [(gogoproto.nullable) = false];

    // Indicates the policy to be used by the scan for handling conflicting locks
    // held by other active transactions when attempting to lock rows. Always set
    // to BLOCK when locking_strength is FOR_NONE.
    optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 5 [(gogoproto.nullable) = false];

    // Indicates the row-level locking durability to be used by the scan.
    optional sqlbase.ScanLockingDurability locking_durability = 8 [(gogoproto.nullable) = false];
  }

  repeated Side sides = 7 [(gogoproto.nullable) = false];

  // "ON" expression (in addition to the equality constraints captured by the
  // equality columns). Assuming that the left side has N columns and the
  // right side has M columns, in this expression ordinal references @1 to @N
  // refer to columns of the left side and variables @(N+1) to @(N+M) refer to
  // columns in the right side.
  optional Expression on_expr = 4 [(gogoproto.nullable) = false];

  optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];

  reserved 1, 2, 3, 5;
}

// MergeJoinerSpec is the specification for a merge join processor. The processor
// has two inputs and one output. The inputs must have the same ordering on the
// columns that have equality constraints. For example:
//   SELECT * FROM T1 INNER JOIN T2 ON T1.C1 = T2.C5 AND T1.C2 = T2.C4
//
// To perform a merge join, the streams corresponding to T1 and T2 must have the
// same ordering on columns C1, C2 and C5, C4 respectively. For example: C1+,C2-
// and C5+,C4-.
//
// The "internal columns" of a MergeJoiner (see ProcessorSpec) are:
// - for INNER, LEFT_OUTER, RIGHT_OUTER, FULL_OUTER - the concatenation of left
//   input columns and right input columns.
//   If the left input has N columns and the right input has M columns, the
//   first N columns contain values from the left side and the following M
//   columns contain values from the right side.
// - for LEFT_SEMI, LEFT_ANTI, INTERSECT_ALL, EXCEPT_ALL - the left input
//   columns.
// - for RIGHT_SEMI, RIGHT_ANTI - the right input columns.
//
// Note that, regardless of the join type, an optional ON expression can refer
// to columns from both inputs.
message MergeJoinerSpec {
  // The streams must be ordered according to the columns that have equality
  // constraints. The first column of the left ordering is constrained to be
  // equal to the first column in the right ordering and so on. The ordering
  // lengths and directions must match.
  // In the example above, left ordering describes C1+,C2- and right ordering
  // describes C5+,C4-.
  optional Ordering left_ordering = 1 [(gogoproto.nullable) = false];
  optional Ordering right_ordering = 2 [(gogoproto.nullable) = false];

  // "ON" expression (in addition to the equality constraints captured by the
  // orderings). Assuming that the left stream has N columns and the right
  // stream has M columns, in this expression ordinal references @1 to @N refer
  // to columns of the left stream and variables @(N+1) to @(N+M) refer to
  // columns in the right stream.
  optional Expression on_expr = 5 [(gogoproto.nullable) = false];

  optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];

  // NullEquality indicates that NULL = NULL should be considered true.
  // This allows OUTER JOINs to consider NULL values meaningfully. An
  // example of this is during SCRUB checks on secondary indexes.
  optional bool null_equality = 7 [(gogoproto.nullable) = false];

  // If true, it is guaranteed that the left equality columns form a key for
  // the left input. In other words, no two rows from the left input have the
  // same set of values on the left equality columns.
  optional bool left_eq_columns_are_key = 8 [(gogoproto.nullable) = false];

  // If true, it is guaranteed that the right equality columns form a key for
  // the right input. In other words, no two rows from the right input have the
  // same set of values on the right equality columns.
  optional bool right_eq_columns_are_key = 9 [(gogoproto.nullable) = false];
}

// HashJoinerSpec is the specification for a hash join processor. The processor
// has two inputs and one output.
//
// The processor works by reading the entire right input and putting it in a hash
// table. Thus, there is no guarantee on the ordering of results that stem only
// from the right input (in the case of RIGHT_OUTER, FULL_OUTER). However, it is
// guaranteed that results that involve the left stream preserve the ordering;
// i.e. all results that stem from left row (i) precede results that stem from
// left row (i+1).
//
// The "internal columns" of a HashJoiner (see ProcessorSpec) are:
// - for INNER, LEFT_OUTER, RIGHT_OUTER, FULL_OUTER - the concatenation of left
//   input columns and right input columns.
//   If the left input has N columns and the right input has M columns, the
//   first N columns contain values from the left side and the following M
//   columns contain values from the right side.
// - for LEFT_SEMI, LEFT_ANTI, INTERSECT_ALL, EXCEPT_ALL - the left input
//   columns.
// - for RIGHT_SEMI, RIGHT_ANTI - the right input columns.
//
// Note that, regardless of the join type, an optional ON expression can refer
// to columns from both inputs.
message HashJoinerSpec {
  // The join constraints certain columns from the left stream to equal
  // corresponding columns on the right stream. These must have the same length.
  repeated uint32 left_eq_columns = 1 [packed = true];
  repeated uint32 right_eq_columns = 2 [packed = true];

  // "ON" expression (in addition to the equality constraints captured by the
  // orderings). Assuming that the left stream has N columns and the right
  // stream has M columns, in this expression variables @1 to @N refer to
  // columns of the left stream and variables @(N+1) to @(N+M) refer to columns
  // in the right stream.
  optional Expression on_expr = 5 [(gogoproto.nullable) = false];

  optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];

  // If true, it is guaranteed that the left equality columns form a key for
  // the left input. In other words, no two rows from the left input have the
  // same set of values on the left equality columns.
  optional bool left_eq_columns_are_key = 8 [(gogoproto.nullable) = false];

  // If true, it is guaranteed that the right equality columns form a key for
  // the right input. In other words, no two rows from the right input have the
  // same set of values on the right equality columns.
  optional bool right_eq_columns_are_key = 9 [(gogoproto.nullable) = false];

  reserved 7;
}

// InvertedJoinerSpec is the specification for an inverted join. The processor
// has one input and one output and performs lookups in an inverted index.
//
// The processor uses the inverted index to join with one or more columns of the
// input, according to an inverted expression. In addition to the InvertedExpr,
// the processor also evaluates an OnExpr on the joined rows that satisfy the
// InvertedExpr.

// The "internal columns" of an InvertedJoiner for INNER and LEFT_OUTER joins
// are a concatenation of the input columns and the fetched columns. For
// LEFT_SEMI and LEFT_ANTI, the "internal columns" are the columns of the input.
//
// The fetched columns must always include the inverted key column (so it can be
// referred to by InvertedExpr), as well as any prefix columns. Note that the
// processor does not produce output values for the inverted key column (a
// single output row can correspond to multiple inverted key values).
//
// In many cases, the inverted join will contain false positives wrt the
// original join condition. This is handled by pairing it with a lookup join.
// This pairing works naturally when the user query specified INNER, by
// running an INNER inverted join followed by INNER lookup join. For a user
// query with LEFT_OUTER/LEFT_ANTI, the inverted join is run as a LEFT_OUTER
// with a special mode that outputs an additional bool column that represents
// whether this row is a continuation of a group, where a group is defined as
// rows corresponding to the same original input row. This is paired with a
// lookup join that also knows about the semantics of this bool column. For a
// user query with LEFT_SEMI, the inverted join is run as an INNER join with
// the same special mode. See the JoinReaderSpec for an example.
//
// Example:
// TABLE (k, other, z, PRIMARY KEY (k), INVERTED INDEX (z))
// Input stream columns: | a | b |
// The InvertedExpr involves columns b, z.
// Fetched columns: | z' | k |
// where z' is derived from z. For instance, if z is an array, z' will
// correspond to elements of the array.
// The OnExpr can use columns a, b, k. The InvertedExpr can use columns
// a, b, z'.
//
// Internal columns for INNER and LEFT_OUTER: | a | b | z' | k |
// The values of z' are not populated in the output.
// Internal columns for LEFT_SEMI and LEFT_ANTI: | a | b |
//
// Multi-column inverted index example:
// TABLE (k, other, region, z, PRIMARY KEY (d), INVERTED INDEX (region, z))
// Input stream columns: | a | b | c |
// Fetch columns: | z' | region | k |
// where z' is derived from z and region is a non-inverted prefix column.
// The InvertedExpr involves columns b, z. The non-inverted prefix key columns
// equate c to region.
// The OnExpr can use columns a, b, c, k, region.
//
// Internal columns for INNER and LEFT_OUTER: | a | b | c | z' | region | k |
// where e, g are not populated.
// Internal columns for LEFT_SEMI and LEFT_ANTI: | a | b | c |
//
// For INNER/LEFT_OUTER with OutputGroupContinuationForLeftRow = true, the
// internal columns include an additional bool column as the last column.
message InvertedJoinerSpec {
  optional sqlbase.IndexFetchSpec fetch_spec = 10 [(gogoproto.nullable) = false];

  // InvertedColumnOriginalType is the type of the table column that is being
  // indexed (e.g. JSON or Geometry).
  optional sql.sem.types.T inverted_column_original_type = 11;

  reserved 1, 2, 3;

  // The join expression is a conjunction of inverted_expr and on_expr.

  // Expression involving the indexed column and columns from the input.
  // Assuming that the input stream has N columns and the table that has been
  // indexed has M columns, in this expression variables @1 to @N refer to
  // columns of the input stream and variables @(N+1) to @(N+M) refer to fetched
  // columns. Although the numbering includes all columns, only columns
  // corresponding to the indexed column and the input columns may be present in
  // this expression. Note that the column numbering matches the numbering used
  // by OnExpr.
  //
  // The expression is passed to xform.NewDatumToInvertedExpr to construct an
  // implementation of invertedexpr.DatumToInvertedExpr, which will be fed each
  // input row and output an expression to evaluate over the inverted index.
  optional Expression inverted_expr = 4 [(gogoproto.nullable) = false];

  // Optional expression involving the columns in the index (other than the
  // inverted column) and the columns in the input stream. Assuming that the
  // input stream has N columns and the table that has been indexed has M
  // columns, in this expression variables @1 to @N refer to columns of the
  // input stream and variables @(N+1) to @(N+M) refer to fetched columns.
  // The numbering does not omit the column in the table corresponding to the
  // inverted column, or other table columns absent from the index, but they
  // cannot be present in this expression. Note that the column numbering
  // matches the numbering used by InvertedExpr.
  optional Expression on_expr = 5 [(gogoproto.nullable) = false];

  // Only INNER, LEFT_OUTER, LEFT_SEMI, LEFT_ANTI are supported. For indexes
  // that produce false positives for user expressions, like geospatial
  // indexes, only INNER and LEFT_OUTER are actually useful -- LEFT_SEMI will
  // be mapped to INNER by the optimizer, and LEFT_ANTI to LEFT_OUTER, to
  // allow the false positives to be eliminated by evaluating the exact
  // expression on the rows output by this join.
  optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];

  // Indicates that the inverted joiner should maintain the ordering of the
  // input stream.
  optional bool maintain_ordering = 7 [(gogoproto.nullable) = false];

  // Indicates that the join should output a continuation column that
  // indicates whether a row is a continuation of a group corresponding to a
  // left row.
  optional bool output_group_continuation_for_left_row = 8 [(gogoproto.nullable) = false];

  // Column indexes in the input stream specifying the columns which match with
  // the non-inverted prefix columns of the index, if the index is multi-column.
  // These are the equality columns of the join. The length of
  // prefix_equality_columns should be equal to the number of non-inverted
  // prefix columns in the index.
  repeated uint32 prefix_equality_columns = 9 [packed = true];

  // Indicates the row-level locking strength to be used by the scan. If set to
  // FOR_NONE, no row-level locking should be performed.
  optional sqlbase.ScanLockingStrength locking_strength = 12 [(gogoproto.nullable) = false];

  // Indicates the policy to be used by the scan for handling conflicting locks
  // held by other active transactions when attempting to lock rows. Always set
  // to BLOCK when locking_strength is FOR_NONE.
  optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 13 [(gogoproto.nullable) = false];

  // Indicates the row-level locking durability to be used by the scan.
  optional sqlbase.ScanLockingDurability locking_durability = 14 [(gogoproto.nullable) = false];
}

// InvertedFiltererSpec is the specification of a processor that does filtering
// on a table by evaluating an invertedexpr.SpanExpressionProto on an inverted
// index of the table. The input consists of the inverted index rows from
// InvertedExpr.SpansToRead. It is acceptable for a filter on the primary key
// to be pushed down between the scan and the inverted filterer.
//
// Example:
// Table columns: | a | b | c | d |
// where a, d are the primary key and b is the column with the inverted index.
// Inverted index columns: | a | b' | d |
// where b' is derived from b. For instance, if b is an array, b' will be
// elements of the array.
//
// Internal columns are | a | b | d |. The output sets b to NULL, since it does
// not have the value of the original column that was indexed in the inverted
// column.
//
// Optionally, there can be a pre-filtering spec that describes an expression
// (derived from the original expression that was converted to inverted_expr),
// that must evaluate to true on each inverted row. This is a performance
// optimization -- for more details see invertedidx.PreFilterer (geometry
// and geography inverted indexes are the only ones that currently use
// pre-filtering).
message InvertedFiltererSpec {
  // The index in the input row of the inverted column.
  optional uint32 inverted_col_idx = 1 [(gogoproto.nullable) = false];

  // The expression to evaluate. The SpansToRead are ignored since they
  // have already been used to setup the input.
  optional inverted.SpanExpressionProto inverted_expr = 2 [(gogoproto.nullable) = false];

  // Optional pre-filtering expression.
  message PreFiltererSpec {
    // Expression has only one variable, @1, which refers to the column with
    // the inverted index.
    optional Expression expression = 1 [(gogoproto.nullable) = false];
    // The type of the original column that was indexed in the inverted index.
    optional sql.sem.types.T type = 2;
  }
  optional PreFiltererSpec pre_filterer_spec = 6;
}

// AggregatorSpec is the specification for an "aggregator" (processor core
// type, not the logical plan computation stage). An aggregator performs
// 'aggregation' in the SQL sense in that it groups rows and computes an aggregate
// for each group. The group is configured using the group key. The aggregator
// can be configured with one or more aggregation functions.
//
// The "internal columns" of an Aggregator map 1-1 to the aggregations.
message AggregatorSpec {
  // These mirror the aggregate functions supported by sql/parser. See
  // sql/parser/aggregate_builtins.go.
  enum Func {
    ANY_NOT_NULL = 0;
    AVG = 1;
    BOOL_AND = 2;
    BOOL_OR = 3;
    CONCAT_AGG = 4;
    COUNT = 5;
    MAX = 7;
    MIN = 8;
    STDDEV = 9;
    SUM = 10;
    SUM_INT = 11;
    VARIANCE = 12;
    XOR_AGG = 13;
    COUNT_ROWS = 14;
    SQRDIFF = 15;
    FINAL_VARIANCE = 16;
    FINAL_STDDEV = 17;
    ARRAY_AGG = 18;
    JSON_AGG = 19;
    // JSONB_AGG is an alias for JSON_AGG, they do the same thing.
    JSONB_AGG = 20;
    STRING_AGG = 21;
    BIT_AND = 22;
    BIT_OR = 23;
    CORR = 24;
    PERCENTILE_DISC_IMPL = 25;
    PERCENTILE_CONT_IMPL = 26;
    JSON_OBJECT_AGG = 27;
    JSONB_OBJECT_AGG = 28;
    VAR_POP = 29;
    STDDEV_POP = 30;
    ST_MAKELINE = 31;
    ST_EXTENT = 32;
    ST_UNION = 33;
    ST_COLLECT = 34;
    COVAR_POP = 35;
    COVAR_SAMP = 36;
    REGR_INTERCEPT = 37;
    REGR_R2 = 38;
    REGR_SLOPE = 39;
    REGR_SXX = 40;
    REGR_SYY = 41;
    REGR_SXY = 42;
    REGR_COUNT = 43;
    REGR_AVGX = 44;
    REGR_AVGY = 45;
    FINAL_STDDEV_POP = 46;
    FINAL_VAR_POP = 47;
    TRANSITION_REGRESSION_AGGREGATE = 48;
    FINAL_COVAR_POP = 49;
    FINAL_REGR_SXX = 50;
    FINAL_REGR_SXY = 51;
    FINAL_REGR_SYY = 52;
    FINAL_REGR_AVGX = 53;
    FINAL_REGR_AVGY = 54;
    FINAL_REGR_INTERCEPT = 55;
    FINAL_REGR_R2 = 56;
    FINAL_REGR_SLOPE = 57;
    FINAL_COVAR_SAMP = 58;
    FINAL_CORR = 59;
    FINAL_SQRDIFF = 60;
    ARRAY_CAT_AGG = 61;
    MERGE_STATS_METADATA = 62;
    MERGE_STATEMENT_STATS = 63;
    MERGE_TRANSACTION_STATS = 64;
    MERGE_AGGREGATED_STMT_METADATA = 65;
  }

  enum Type {
    // This setting exists just for backwards compatibility; it's equivalent to
    // SCALAR when there are no grouping columns, and to NON_SCALAR when there
    // are grouping columns.
    AUTO = 0;
    // A scalar aggregation has no grouping columns and always returns one
    // result row.
    SCALAR = 1;
    // A non-scalar aggregation returns no rows if there are no input rows; it
    // may or may not have grouping columns.
    NON_SCALAR = 2;
  }

  message Aggregation {
    optional Func func = 1 [(gogoproto.nullable) = false];

    // Aggregation functions with distinct = true functions like you would
    // expect '<FUNC> DISTINCT' to operate, the default behavior would be
    // the '<FUNC> ALL' operation.
    optional bool distinct = 2 [(gogoproto.nullable) = false];

    // The column index specifies the argument(s) to the aggregator function.
    //
    // Most aggregations take one argument
    // COUNT_ROWS takes no arguments.
    // FINAL_STDDEV and FINAL_VARIANCE take three arguments (SQRDIFF, SUM,
    // COUNT).
    repeated uint32 col_idx = 5;

    // If set, this column index specifies a boolean argument; rows for which
    // this value is not true don't contribute to this aggregation. This enables
    // the filter clause, e.g.:
    //   SELECT SUM(x) FILTER (WHERE y > 1), SUM(x) FILTER (WHERE y < 1) FROM t
    optional uint32 filter_col_idx = 4;

    // Arguments are const expressions passed to aggregation functions.
    repeated Expression arguments = 6 [(gogoproto.nullable) = false];

    reserved 3;
  }

  // The group key is a subset of the columns in the input stream schema on the
  // basis of which we define our groups.
  repeated uint32 group_cols = 2 [packed = true];

  repeated Aggregation aggregations = 3 [(gogoproto.nullable) = false];

  // A subset of the GROUP BY columns which are ordered in the input.
  repeated uint32 ordered_group_cols = 4 [packed = true];

  optional Type type = 5 [(gogoproto.nullable) = false];

  // OutputOrdering specifies the required ordering of the output produced by
  // the aggregator. The input to the processor *must* already be ordered
  // according to it.
  optional Ordering output_ordering = 6 [(gogoproto.nullable) = false];
}

// ProjectSetSpec is the specification of a processor which applies a set of
// expressions, which may be set-returning functions, to its input.
message ProjectSetSpec {
  // Expressions to be applied
  repeated Expression exprs = 1 [(gogoproto.nullable) = false];

  // Column types for the generated values
  repeated sql.sem.types.T generated_columns = 2;

  // The number of columns each expression returns. Same length as exprs.
  repeated uint32 num_cols_per_gen = 3;

  // Column labels for the generated values. Needed for some builtin functions
  // (like record_to_json) that require the column labels to do their jobs.
  repeated string generated_column_labels = 4;
}

// WindowerSpec is the specification of a processor that performs computations
// of window functions that have the same PARTITION BY clause. For a particular
// windowFn, the processor puts result at windowFn.ArgIdxStart and "consumes"
// all arguments to windowFn (windowFn.ArgCount of them). So if windowFn takes
// no arguments, an extra column is added; if windowFn takes more than one
// argument, (windowFn.ArgCount - 1) columns are removed.
message WindowerSpec {
  enum WindowFunc {
    // These mirror window functions from window_builtins.go.
    ROW_NUMBER = 0;
    RANK = 1;
    DENSE_RANK = 2;
    PERCENT_RANK = 3;
    CUME_DIST = 4;
    NTILE = 5;
    LAG = 6;
    LEAD = 7;
    FIRST_VALUE = 8;
    LAST_VALUE = 9;
    NTH_VALUE = 10;
  }

  // Func specifies which function to compute. It can either be built-in
  // aggregate or built-in window function.
  message Func {
    option (gogoproto.onlyone) = true;

    optional AggregatorSpec.Func aggregateFunc = 1;
    optional WindowFunc windowFunc = 2;
  }

  // Frame is the specification of a single window frame for a window function.
  message Frame {
    // Mode indicates which mode of framing is used.
    enum Mode {
      // RANGE specifies frame in terms of logical range (e.g. 1 unit cheaper).
      RANGE = 0;
      // ROWS specifies frame in terms of physical offsets (e.g. 1 row before).
      ROWS = 1;
      // GROUPS specifies frame in terms of peer groups (where "peers" mean
      // rows not distinct in the ordering columns).
      GROUPS = 2;
    }

    // BoundType indicates which type of boundary is used.
    enum BoundType {
      UNBOUNDED_PRECEDING = 0;
      UNBOUNDED_FOLLOWING = 1;
      // Offsets are stored within Bound.
      OFFSET_PRECEDING = 2;
      OFFSET_FOLLOWING = 3;
      CURRENT_ROW = 4;
    }

    // Exclusion specifies the type of frame exclusion.
    enum Exclusion {
      NO_EXCLUSION = 0;
      EXCLUDE_CURRENT_ROW = 1;
      EXCLUDE_GROUP = 2;
      EXCLUDE_TIES = 3;
    }

    // Bound specifies the type of boundary and the offset (if present).
    message Bound {
      optional BoundType boundType = 1 [(gogoproto.nullable) = false];
      // For UNBOUNDED_PRECEDING, UNBOUNDED_FOLLOWING, and CURRENT_ROW offset
      // is ignored. Integer offset for ROWS and GROUPS modes is stored in
      // int_offset while an encoded datum and the type information are stored
      // for RANGE mode.
      optional uint64 int_offset = 2 [(gogoproto.nullable) = false];
      optional bytes typed_offset = 3;
      optional DatumInfo offset_type = 4 [(gogoproto.nullable) = false];
    }
    // Bounds specifies boundaries of the window frame.
    message Bounds {
      // Start bound must always be present whereas end bound might be omitted.
      optional Bound start = 1 [(gogoproto.nullable) = false];
      optional Bound end = 2;
    }

    optional Mode mode = 1 [(gogoproto.nullable) = false];
    optional Bounds bounds = 2 [(gogoproto.nullable) = false];
    optional Exclusion exclusion = 3 [(gogoproto.nullable) = false];
  }

  // WindowFn is the specification of a single window function.
  message WindowFn {
    // Func is which function to compute.
    optional Func func = 1 [(gogoproto.nullable) = false];
    // ArgsIdxs contains indices of the columns that are arguments to the
    // window function.
    repeated uint32 argsIdxs = 7;
    // Ordering specifies in which order rows should be considered by this
    // window function. Its contents come from ORDER BY clause of the window
    // function.
    optional Ordering ordering = 4 [(gogoproto.nullable) = false];
    // Frame specifies over which frame this window function is computed.
    optional Frame frame = 5;
    // Optional index of a column over which filtering of rows will be done.
    // Special value -1 indicates that filter is not present.
    optional int32 filterColIdx = 6 [(gogoproto.nullable) = false];
    // OutputColIdx specifies the column index which the window function should
    // put its output into.
    optional uint32 outputColIdx = 8 [(gogoproto.nullable) = false];

    reserved 2, 3;
  }

  // PartitionBy specifies how to partition rows for all window functions.
  repeated uint32 partitionBy = 1;
  // WindowFns is the specification of all window functions to be computed.
  repeated WindowFn windowFns = 2 [(gogoproto.nullable) = false];
}

// HashGroupJoinerSpec is the specification for a hash group-join processor. It
// has two inputs and one output.
//
// The processor combines two operations (the hash join followed by the hash
// aggregation) into one when the join's equality columns are exactly the same
// as the aggregation's grouping columns. The hash join cannot have an ON
// expression and the PostProcessSpec on top of the hash join is only allowed to
// to have a projection set (meaning that rendering, limits, and offsets are
// prohibited).
//
// The processor works by reading the entire right input and putting it in a
// hash table. All rows in the hash table are split into buckets according to
// the equality columns. Then it reads the data from the left input in a
// streaming fashion and probes it against the hash table determining which
// bucket (if any) each "joined" row belongs to and performs the aggregation
// accordingly, without fully materializing the result of the join.
//
// At the moment, only INNER, LEFT_OUTER, RIGHT_OUTER, and FULL_OUTER joins are
// supported with non-empty equality columns (i.e. no cross joins).
// TODO(yuzefovich): support other join types.
// TODO(yuzefovich): think through whether we can support cross joins. Probably
// it will be more similar to merge group-join.
// TODO(yuzefovich): consider implementing streaming merge group-join.
//
// There is no guarantee on the ordering of results.
//
// The "internal columns" of a HashGroupJoiner (see ProcessorSpec) are the same
// as of its AggregatorSpec.
message HashGroupJoinerSpec {
  optional HashJoinerSpec hash_joiner_spec = 1 [(gogoproto.nullable) = false];

  // The join output columns describe a projection on the internal set of
  // columns of the hash joiner; only the columns in this list will be needed
  // by the aggregator.
  repeated uint32 join_output_columns = 2 [packed = true];

  optional AggregatorSpec aggregator_spec = 3 [(gogoproto.nullable) = false];
}

// InsertSpec is a limited insert processor that can only handle vectorized 
// batch inserts for rows affected insert queries (ie doesn't support returning
// rows or inserts from select).
message InsertSpec {
  optional sqlbase.TableDescriptor table = 1 [(gogoproto.nullable) = false];
  repeated uint32 column_ids = 2 [
    (gogoproto.customname) = "ColumnIDs",
    (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb.ColumnID"
  ];
  // The serialized bytes from intsets.Fast.Encode. 
  optional bytes check_ords = 3;
  // Whether the insert should be autocommitted.
  optional bool auto_commit = 4 [(gogoproto.nullable) = false];
}