-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
processors_sql.proto
1091 lines (975 loc) · 49.9 KB
/
processors_sql.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2019 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.
//
// Processor definitions for distributed SQL APIs. See
// docs/RFCS/distributed_sql.md.
// All the concepts here are "physical plan" concepts.
syntax = "proto2";
// Beware! This package name must not be changed, even though it doesn't match
// the Go package name, because it defines the Protobuf message names which
// can't be changed without breaking backward compatibility.
package cockroach.sql.distsqlrun;
option go_package = "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb";
import "gogoproto/gogo.proto";
import "roachpb/data.proto";
import "sql/catalog/descpb/structured.proto";
import "sql/catalog/descpb/join_type.proto";
import "sql/catalog/descpb/locking.proto";
import "sql/catalog/fetchpb/index_fetch.proto";
import "sql/types/types.proto";
import "sql/execinfrapb/data.proto";
import "sql/execinfrapb/processors_base.proto";
import "sql/inverted/span_expression.proto";
import "util/hlc/timestamp.proto";
// ValuesCoreSpec is the core of a processor that has no inputs and generates
// "pre-canned" rows. This is not intended to be used for very large datasets.
message ValuesCoreSpec {
// There is one DatumInfo for each element in a row. Can be empty, in which
// case raw_bytes will be empty.
repeated DatumInfo columns = 1 [(gogoproto.nullable) = false];
// The number of rows is especially useful when we have zero columns.
optional uint64 num_rows = 3 [(gogoproto.nullable) = false];
// Each raw block encodes one row; each datum is encoded according to the
// corresponding DatumInfo. As an optimization, if columns is empty, this will
// be empty rather than containing empty byte strings.
repeated bytes raw_bytes = 2;
}
// TableReaderSpec is the specification for a "table reader". A table reader
// performs KV operations to retrieve rows for a table and outputs the desired
// columns of the rows that pass a filter expression.
//
// The "internal columns" of a TableReader (see ProcessorSpec) correspond to the
// FetchSpec.FetchedColumns.
message TableReaderSpec {
optional sqlbase.IndexFetchSpec fetch_spec = 20 [(gogoproto.nullable) = false];
// TableModificationTime is the timestamp of the transaction which last
// modified the table descriptor.
// TODO(radu, otan): take this into account during planning and remove it from
// this spec.
optional util.hlc.Timestamp table_descriptor_modification_time = 21 [(gogoproto.nullable) = false];
optional bool reverse = 3 [(gogoproto.nullable) = false];
repeated roachpb.Span spans = 18 [(gogoproto.nullable) = false];
// A hint for how many rows the consumer of the table reader output might
// need. This is used to size the initial KV batches to try to avoid reading
// many more rows than needed by the processor receiving the output.
//
// Not used if there is a limit set in the PostProcessSpec of this processor
// (that value will be used for sizing batches instead).
optional int64 limit_hint = 5 [(gogoproto.nullable) = false];
// If set, the TableReader can read all the spans in parallel, without any
// batch limits. This should only be the case when there is a known upper
// bound on the number of rows we can read, and when there is no limit or
// limit hint.
optional bool parallelize = 12 [(gogoproto.nullable) = false];
// batch_bytes_limit, if non-zero, controls the TargetBytes limits that the
// TableReader will use for its scans. If zero, then the server-side default
// is used. If parallelize is set, this cannot be set.
optional int64 batch_bytes_limit = 17 [(gogoproto.nullable) = false];
// If non-zero, this enables inconsistent historical scanning where different
// batches can be read with different timestamps. This is used for
// long-running table statistics which may outlive the TTL. Using this setting
// will cause inconsistencies across rows and even within rows.
//
// The value is a duration (in nanoseconds), which is the maximum "age" of the
// timestamp. If the scan takes long enough for the timestamp to become older,
// the timestamp is advanced by however much time passed.
//
// Example:
//
// current time: 10
// initial timestamp: 0
// max timestamp age: 30
//
// time
// 10: start scan, timestamp=0
// 10-29: continue scanning at timestamp=0
// 30: bump timestamp to 20
// 30-49: continue scanning at timestamp=20
// 50: bump timestamp to 40
// ...
//
// Note: it is an error to perform a historical read at an initial timestamp
// older than this value.
//
optional uint64 max_timestamp_age_nanos = 9 [(gogoproto.nullable) = false];
// Indicates the row-level locking strength to be used by the scan. If set to
// FOR_NONE, no row-level locking should be performed.
optional sqlbase.ScanLockingStrength locking_strength = 10 [(gogoproto.nullable) = false];
// Indicates the policy to be used by the scan for handling conflicting locks
// held by other active transactions when attempting to lock rows. Always set
// to BLOCK when locking_strength is FOR_NONE.
optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 11 [(gogoproto.nullable) = false];
// Indicates the row-level locking durability to be used by the scan.
optional sqlbase.ScanLockingDurability locking_durability = 23 [(gogoproto.nullable) = false];
// Indicates that misplanned ranges metadata should not be sent back to the
// DistSQLReceiver. This will be set to true for the scan with a hard limit
// (in which case we create a single processor that is placed at the
// leaseholder of the beginning of the key spans to be scanned).
optional bool ignore_misplanned_ranges = 22 [(gogoproto.nullable) = false];
reserved 1, 2, 4, 6, 7, 8, 13, 14, 15, 16, 19;
}
// FiltererSpec is the specification for a processor that filters input rows
// according to a boolean expression.
message FiltererSpec {
// A filtering expression which references the internal columns of the
// processor via ordinal references (@1, @2, etc).
optional Expression filter = 1 [(gogoproto.nullable) = false];
}
// JoinReaderSpec is the specification for a "join reader". A join reader
// performs KV operations to retrieve specific rows that correspond to the
// values in the input stream (join by lookup). The output can optionally
// preserve the order of the input rows.
//
// The "internal columns" of a JoinReader (see ProcessorSpec) are either:
// - the fetched columns (see IndexFetchSpec), if we are performing an index
// join (no lookup columns) or if we are performing a semi or anti join, or
// - the concatenation of the columns of the input stream with the fetched
// columns.
//
// Internally, only the values for the columns needed by the post-processing
// stage are populated.
//
// Example:
// Input stream columns: | a | b | Fetched columns: | c | d | e |
//
// If performing a lookup join on a = c (lookup columns is [0]):
// Internal columns: | a | b | c | d | e |
//
// If performing an index join (where a = c and b = d) (lookup columns is []):
// Internal columns: | c | d | e |
//
// There is a special case when a "join reader" is used as the second join in
// a pair of joins to accomplish a LEFT_OUTER, LEFT_SEMI or LEFT_ANTI join.
// The first join in this pair of joins is unable to precisely evaluate the
// join condition and produces false positives. This is typical when the first
// join is an inverted join (see InvertedJoinerSpec), but can also be the case
// when the first join is being evaluated over an index that does not have all
// the columns needed to evaluate the join condition. The first join outputs
// rows in sorted order of the original left columns. The input stream columns
// for the second join are a combination of the original left columns and the
// lookup columns. The first join additionally adds a continuation column that
// demarcates a group of successive rows that correspond to an original left
// row. The first row in a group contains false (since it is not a
// continuation of the group) and successive rows contain true.
//
// The mapping from the original join to the pair of joins is:
// LEFT_OUTER => LEFT_OUTER, LEFT_OUTER
// LEFT_SEMI => INNER, LEFT_SEMI (better than doing INNER, INNER, SORT, DISTINCT)
// LEFT_ANTI => LEFT_OUTER, LEFT_ANTI.
// where the first join always preserves order.
//
// More specifically, consider a lookup join example where the input stream
// columns are: | a | b | c | d | cont |.
// The lookup column is | d |. And the fetched columns are | e | f | with
// d = e.
// This join reader can see input of the form
// a1, b1, c1, d1, false
// a1, b1, c1, d2, true
// a1, b2, c1, null, false // when the first join is LEFT_OUTER
// a2, b1, c1, d3, false
// a2, b1, c1, d4, true
//
// Say both the results for (a1, b1, c1) are false positives, and the first
// of the (a2, b1, c1) result is a false positive.
// The output for LEFT_OUTER:
// a1, b1, c1, d1, false, null, null
// a1, b2, c1, null, false, null, null
// a2, b1, c1, d4, true, d4, f1
// The d, cont columns are not part of the original left row, so will be
// projected away after the join.
//
// The output for LEFT_ANTI:
// a1, b1, c1, d1, false
// a1, b2, c1, null, false
// Again, the d, cont columns will be projected away after the join.
//
// The output for LEFT_SEMI:
// a2, b1, c1, d4, true
// Again, the d, cont columns will be projected away after the join.
//
// The example above is for a lookup join as the second join in the
// paired-joins. The lookup join can also be the first join in the
// paired-joins, which is indicated by both
// OutputGroupContinuationForLeftRow and MaintainOrdering set to true.
message JoinReaderSpec {
optional sqlbase.IndexFetchSpec fetch_spec = 19 [(gogoproto.nullable) = false];
// SplitFamilyIDs indicates that spans which fully constrain the index should
// be split into single-family spans for the given families. Unset if
// splitting is not possible.
//
// See span.MakeSplitter for the conditions that must hold for splitting to be
// allowed. It is recommended to use span.MakeSplitter() followed by
// splitter.FamilyIDs() to populate this field.
repeated uint32 split_family_ids = 20 [(gogoproto.customname) = "SplitFamilyIDs",
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb.FamilyID"];
reserved 1, 2;
// LookupExpr represents the part of the join condition used to perform the
// lookup into the index. It should only be set when LookupColumns is empty.
// LookupExpr is used instead of LookupColumns when the lookup condition is
// more complicated than a simple equality between input columns and index
// columns. In this case, LookupExpr specifies the expression that will be
// used to construct the spans for each lookup. Currently, the only
// expressions supported are conjunctions (AND expressions) of equality, IN
// expressions, and simple inequalities, specifically:
// 1. equalities between two variables (one from the input and one from the
// index) representing the equi-join condition(s),
// 2. equalities between an index column and a constant, and
// 3. IN expressions between an index column and a tuple of constants.
// 4. LT,GT,GE,LE between an index var and a constant.
//
// Variables in this expression are assigned in the same way as the ON
// condition below. Assuming that the input stream has N columns and the fetch
// spec has M fetched columns, in this expression variables @1 to @N refer to
// columns of the input stream and variables @(N+1) to @(N+M) refer to fetched
// columns.
//
// For example, a valid LookupExpr for N=2 and M=2 might be:
// @3 IN (10, 20) AND @2 = @4.
optional Expression lookup_expr = 16 [(gogoproto.nullable) = false];
// If RemoteLookupExpr is set, this is a locality optimized lookup join. In
// this case, LookupExpr contains the lookup join conditions targeting ranges
// located on local nodes (relative to the gateway region), and
// RemoteLookupExpr contains the lookup join conditions targeting remote
// nodes. The optimizer will only plan a locality optimized lookup join if it
// is known that each lookup returns at most one row. This fact allows the
// joinReader to use the local conditions in LookupExpr first, and if a match
// is found locally for each input row, there is no need to search remote
// nodes. If a local match is not found for all input rows, the joinReader
// uses RemoteLookupExpr to search remote nodes.
//
// The same restrictions on supported expressions that apply to LookupExpr
// also apply to RemoteLookupExpr. See the comment above LookupExpr for more
// details.
optional Expression remote_lookup_expr = 17 [(gogoproto.nullable) = false];
// LookupColumns, like LookupExpr, represents the part of the join condition
// used to perform the lookup into the index. It is used as an optimization
// for the common case where the join conditions are all simple equalities
// between input and index columns (i.e., only the first of the supported
// expression types listed above for LookupExpr). LookupColumns should only
// be set when LookupExpr is empty.
//
// LookupColumns contains the column indexes in the input stream that match
// with the index columns. These are the equality columns of the join. For
// example, if there are 3 input columns and 2 fetched columns, LookupColumns
// {0, 2} is equivalent to the LookupExpr @1 = @4 AND @3 = @5.
//
// If LookupExpr is empty, LookupColumns can be interpreted as follows:
//
// If empty (index join), the start of the input stream schema is assumed to
// match the index columns. The joinReader will perform an index join and the
// "internal columns" will be the fetched columns.
//
// If populated (lookup join), the `joinReader` will perform a lookup join
// and the "internal columns" will be the concatenation of the input stream
// columns followed by the fetched columns (except for semi/anti join, which
// don't output any fetched columns).
// TODO(rytaft): remove this field and use LookupExpr for all cases. This
// requires ensuring that cases currently using LookupColumns do not regress.
repeated uint32 lookup_columns = 3 [packed = true];
// If set, the lookup columns form a key in the target table and thus each
// lookup has at most one result.
optional bool lookup_columns_are_key = 8 [(gogoproto.nullable) = false];
// "ON" expression (in addition to the conditions in LookupExpr and/or
// equality constraints captured by the LookupColumns). Assuming that the
// input stream has N columns and the fetch spec has M fetched columns, in
// this expression variables @1 to @N refer to columns of the input stream and
// variables @(N+1) to @(N+M) refer to fetched columns.
optional Expression on_expr = 4 [(gogoproto.nullable) = false];
// This used to be used for an extra index filter expression. It was removed
// in DistSQL version 24.
reserved 5;
// For lookup joins. Only JoinType_INNER and JoinType_LEFT_OUTER are
// supported.
optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];
// This field used to be a visibility level of the columns that should be
// produced. We now produce the columns in the FetchSpec.
reserved 7;
// Indicates the row-level locking strength to be used by the join. If set to
// FOR_NONE, no row-level locking should be performed.
optional sqlbase.ScanLockingStrength locking_strength = 9 [(gogoproto.nullable) = false];
// Indicates the policy to be used by the join for handling conflicting locks
// held by other active transactions when attempting to lock rows. Always set
// to BLOCK when locking_strength is FOR_NONE.
optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 10 [(gogoproto.nullable) = false];
// Indicates the row-level locking durability to be used by the scan.
optional sqlbase.ScanLockingDurability locking_durability = 24 [(gogoproto.nullable) = false];
// Indicates that the join reader should maintain the ordering of the input
// stream. This is applicable to both lookup joins and index joins. For lookup
// joins, maintaining order is expensive because it requires buffering. For
// index joins buffering is not required, but still, if ordering is not
// required, we'll change the output order to allow for some Pebble
// optimizations.
optional bool maintain_ordering = 11 [(gogoproto.nullable) = false];
reserved 12, 13;
// LeftJoinWithPairedJoiner is used when a left {outer,anti,semi} join is
// being achieved by pairing two joins, and this is the second join. See
// the comment above.
optional bool left_join_with_paired_joiner = 14 [(gogoproto.nullable) = false];
// OutputGroupContinuationForLeftRow indicates that this join is the first
// join in the paired-joins. At most one of OutputGroupContinuationForLeftRow
// and LeftJoinWithPairedJoiner must be true. Additionally, if
// OutputGroupContinuationForLeftRow is true, MaintainOrdering must also
// be true.
optional bool output_group_continuation_for_left_row = 15 [(gogoproto.nullable) = false];
// lookup_batch_bytes_limit, if non-zero, controls the TargetBytes limits that
// the joiner will use for its lookups. If zero, then the server-side default
// is used. Note that, regardless of this setting, bytes limits are not always
// used for lookups - it depends on whether the joiner decides it wants
// DistSender-parallelism or not.
optional int64 lookup_batch_bytes_limit = 18 [(gogoproto.nullable) = false];
// A hint for how many rows the consumer of the join reader output might
// need. This is used to size the initial batches of input rows to try to
// avoid reading many more rows than needed by the processor receiving the
// output.
//
// Not used if there is a limit set in the PostProcessSpec of this processor
// (that value will be used for sizing batches instead).
optional int64 limit_hint = 21 [(gogoproto.nullable) = false];
// Indicates that for each input row, the join reader should return looked-up
// rows in sorted order. This is only applicable to lookup joins for which
// more than one lookup row may be associated with a given input row. It can
// only be set to true if maintain_ordering is also true.
// maintain_lookup_ordering can be used if the output needs to be ordered by
// a prefix of input columns followed by index (lookup) columns without
// requiring a (buffered) sort.
optional bool maintain_lookup_ordering = 22 [(gogoproto.nullable) = false];
// RemoteOnlyLookups is true when this join is defined with only lookups
// that read into remote regions, though the lookups are defined in
// LookupExpr, not RemoteLookupExpr.
optional bool remote_only_lookups = 23 [(gogoproto.nullable) = false];
}
// SorterSpec is the specification for a "sorting aggregator". A sorting
// processor sorts elements in the input stream providing a certain output
// order guarantee regardless of the input ordering. The output ordering is
// according to a configurable set of columns.
//
// The "internal columns" of a Sorter (see ProcessorSpec) are the same as the
// input columns.
message SorterSpec {
optional Ordering output_ordering = 1 [(gogoproto.nullable) = false];
// Ordering match length, specifying that the input is already sorted by the
// first 'n' output ordering columns, can be optionally specified for
// possible speed-ups taking advantage of the partial orderings.
optional uint32 ordering_match_len = 2 [(gogoproto.nullable) = false];
// The limit of a top-k sort is the number of rows that the sorter should
// output. If limit is 0, then all rows should be output.
optional int64 limit = 3 [(gogoproto.nullable) = false];
}
message DistinctSpec {
// The ordered columns in the input stream can be optionally specified for
// possible optimizations. The specific ordering (ascending/descending) of
// the column itself is not important nor is the order in which the columns
// are specified. The ordered columns must be a subset of the distinct
// columns.
repeated uint32 ordered_columns = 1;
// The distinct columns in the input stream are those columns on which we
// check for distinct rows. If A,B,C are in distinct_columns and there is a
// 4th column D which is not included in distinct_columns, its values are not
// considered, so rows A1,B1,C1,D1 and A1,B1,C1,D2 are considered equal and
// only one of them (the first) is output.
repeated uint32 distinct_columns = 2;
// If true, then NULL values are treated as not equal to one another. Each NULL
// value will cause a new row group to be created. For example:
//
// c
// ----
// NULL
// NULL
//
// A distinct operation on column "c" will result in one output row if
// NullsAreDistinct is false, or two output rows if true. This is set to true
// for UPSERT and INSERT..ON CONFLICT statements, since they must treat NULL
// values as distinct.
optional bool nulls_are_distinct = 3 [(gogoproto.nullable) = false];
// If not empty, then an error with this text will be raised if there are two
// rows with duplicate distinct column values. This is used to implement the
// UPSERT and INSERT..ON CONFLICT statements, both of which prohibit the same
// row from being changed twice.
optional string error_on_dup = 4 [(gogoproto.nullable) = false];
// OutputOrdering specifies the required ordering of the output produced by
// the distinct. The input to the processor *must* already be ordered
// according to it.
optional Ordering output_ordering = 5 [(gogoproto.nullable) = false];
}
// The specification for a WITH ORDINALITY processor. It adds a new column to
// each resulting row that contains the ordinal number of the row. Since there
// are no arguments for this operator, the spec is empty.
message OrdinalitySpec {
// Currently empty
}
// ZigzagJoinerSpec is the specification for a zigzag join processor. The
// processor's current implementation fetches the rows using internal
// rowFetchers.
//
// The "internal columns" of a ZigzagJoiner (see ProcessorSpec) are the
// concatenation of all the fetch columns of all the sides.
message ZigzagJoinerSpec {
message Side {
optional sqlbase.IndexFetchSpec fetch_spec = 1 [(gogoproto.nullable) = false];
// EqColumns contains the equality columns for this side (as fetched column
// ordinals). All sides have the same number of equality columns.
optional Columns eq_columns = 2 [(gogoproto.nullable) = false];
// Fixed values, corresponding to a prefix of the index key columns.
optional ValuesCoreSpec fixed_values = 3 [(gogoproto.nullable) = false];
// Indicates the row-level locking strength to be used by the scan. If set to
// FOR_NONE, no row-level locking should be performed.
optional sqlbase.ScanLockingStrength locking_strength = 4 [(gogoproto.nullable) = false];
// Indicates the policy to be used by the scan for handling conflicting locks
// held by other active transactions when attempting to lock rows. Always set
// to BLOCK when locking_strength is FOR_NONE.
optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 5 [(gogoproto.nullable) = false];
// Indicates the row-level locking durability to be used by the scan.
optional sqlbase.ScanLockingDurability locking_durability = 8 [(gogoproto.nullable) = false];
}
repeated Side sides = 7 [(gogoproto.nullable) = false];
// "ON" expression (in addition to the equality constraints captured by the
// equality columns). Assuming that the left side has N columns and the
// right side has M columns, in this expression ordinal references @1 to @N
// refer to columns of the left side and variables @(N+1) to @(N+M) refer to
// columns in the right side.
optional Expression on_expr = 4 [(gogoproto.nullable) = false];
optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];
reserved 1, 2, 3, 5;
}
// MergeJoinerSpec is the specification for a merge join processor. The processor
// has two inputs and one output. The inputs must have the same ordering on the
// columns that have equality constraints. For example:
// SELECT * FROM T1 INNER JOIN T2 ON T1.C1 = T2.C5 AND T1.C2 = T2.C4
//
// To perform a merge join, the streams corresponding to T1 and T2 must have the
// same ordering on columns C1, C2 and C5, C4 respectively. For example: C1+,C2-
// and C5+,C4-.
//
// The "internal columns" of a MergeJoiner (see ProcessorSpec) are:
// - for INNER, LEFT_OUTER, RIGHT_OUTER, FULL_OUTER - the concatenation of left
// input columns and right input columns.
// If the left input has N columns and the right input has M columns, the
// first N columns contain values from the left side and the following M
// columns contain values from the right side.
// - for LEFT_SEMI, LEFT_ANTI, INTERSECT_ALL, EXCEPT_ALL - the left input
// columns.
// - for RIGHT_SEMI, RIGHT_ANTI - the right input columns.
//
// Note that, regardless of the join type, an optional ON expression can refer
// to columns from both inputs.
message MergeJoinerSpec {
// The streams must be ordered according to the columns that have equality
// constraints. The first column of the left ordering is constrained to be
// equal to the first column in the right ordering and so on. The ordering
// lengths and directions must match.
// In the example above, left ordering describes C1+,C2- and right ordering
// describes C5+,C4-.
optional Ordering left_ordering = 1 [(gogoproto.nullable) = false];
optional Ordering right_ordering = 2 [(gogoproto.nullable) = false];
// "ON" expression (in addition to the equality constraints captured by the
// orderings). Assuming that the left stream has N columns and the right
// stream has M columns, in this expression ordinal references @1 to @N refer
// to columns of the left stream and variables @(N+1) to @(N+M) refer to
// columns in the right stream.
optional Expression on_expr = 5 [(gogoproto.nullable) = false];
optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];
// NullEquality indicates that NULL = NULL should be considered true.
// This allows OUTER JOINs to consider NULL values meaningfully. An
// example of this is during SCRUB checks on secondary indexes.
optional bool null_equality = 7 [(gogoproto.nullable) = false];
// If true, it is guaranteed that the left equality columns form a key for
// the left input. In other words, no two rows from the left input have the
// same set of values on the left equality columns.
optional bool left_eq_columns_are_key = 8 [(gogoproto.nullable) = false];
// If true, it is guaranteed that the right equality columns form a key for
// the right input. In other words, no two rows from the right input have the
// same set of values on the right equality columns.
optional bool right_eq_columns_are_key = 9 [(gogoproto.nullable) = false];
}
// HashJoinerSpec is the specification for a hash join processor. The processor
// has two inputs and one output.
//
// The processor works by reading the entire right input and putting it in a hash
// table. Thus, there is no guarantee on the ordering of results that stem only
// from the right input (in the case of RIGHT_OUTER, FULL_OUTER). However, it is
// guaranteed that results that involve the left stream preserve the ordering;
// i.e. all results that stem from left row (i) precede results that stem from
// left row (i+1).
//
// The "internal columns" of a HashJoiner (see ProcessorSpec) are:
// - for INNER, LEFT_OUTER, RIGHT_OUTER, FULL_OUTER - the concatenation of left
// input columns and right input columns.
// If the left input has N columns and the right input has M columns, the
// first N columns contain values from the left side and the following M
// columns contain values from the right side.
// - for LEFT_SEMI, LEFT_ANTI, INTERSECT_ALL, EXCEPT_ALL - the left input
// columns.
// - for RIGHT_SEMI, RIGHT_ANTI - the right input columns.
//
// Note that, regardless of the join type, an optional ON expression can refer
// to columns from both inputs.
message HashJoinerSpec {
// The join constraints certain columns from the left stream to equal
// corresponding columns on the right stream. These must have the same length.
repeated uint32 left_eq_columns = 1 [packed = true];
repeated uint32 right_eq_columns = 2 [packed = true];
// "ON" expression (in addition to the equality constraints captured by the
// orderings). Assuming that the left stream has N columns and the right
// stream has M columns, in this expression variables @1 to @N refer to
// columns of the left stream and variables @(N+1) to @(N+M) refer to columns
// in the right stream.
optional Expression on_expr = 5 [(gogoproto.nullable) = false];
optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];
// If true, it is guaranteed that the left equality columns form a key for
// the left input. In other words, no two rows from the left input have the
// same set of values on the left equality columns.
optional bool left_eq_columns_are_key = 8 [(gogoproto.nullable) = false];
// If true, it is guaranteed that the right equality columns form a key for
// the right input. In other words, no two rows from the right input have the
// same set of values on the right equality columns.
optional bool right_eq_columns_are_key = 9 [(gogoproto.nullable) = false];
reserved 7;
}
// InvertedJoinerSpec is the specification for an inverted join. The processor
// has one input and one output and performs lookups in an inverted index.
//
// The processor uses the inverted index to join with one or more columns of the
// input, according to an inverted expression. In addition to the InvertedExpr,
// the processor also evaluates an OnExpr on the joined rows that satisfy the
// InvertedExpr.
// The "internal columns" of an InvertedJoiner for INNER and LEFT_OUTER joins
// are a concatenation of the input columns and the fetched columns. For
// LEFT_SEMI and LEFT_ANTI, the "internal columns" are the columns of the input.
//
// The fetched columns must always include the inverted key column (so it can be
// referred to by InvertedExpr), as well as any prefix columns. Note that the
// processor does not produce output values for the inverted key column (a
// single output row can correspond to multiple inverted key values).
//
// In many cases, the inverted join will contain false positives wrt the
// original join condition. This is handled by pairing it with a lookup join.
// This pairing works naturally when the user query specified INNER, by
// running an INNER inverted join followed by INNER lookup join. For a user
// query with LEFT_OUTER/LEFT_ANTI, the inverted join is run as a LEFT_OUTER
// with a special mode that outputs an additional bool column that represents
// whether this row is a continuation of a group, where a group is defined as
// rows corresponding to the same original input row. This is paired with a
// lookup join that also knows about the semantics of this bool column. For a
// user query with LEFT_SEMI, the inverted join is run as an INNER join with
// the same special mode. See the JoinReaderSpec for an example.
//
// Example:
// TABLE (k, other, z, PRIMARY KEY (k), INVERTED INDEX (z))
// Input stream columns: | a | b |
// The InvertedExpr involves columns b, z.
// Fetched columns: | z' | k |
// where z' is derived from z. For instance, if z is an array, z' will
// correspond to elements of the array.
// The OnExpr can use columns a, b, k. The InvertedExpr can use columns
// a, b, z'.
//
// Internal columns for INNER and LEFT_OUTER: | a | b | z' | k |
// The values of z' are not populated in the output.
// Internal columns for LEFT_SEMI and LEFT_ANTI: | a | b |
//
// Multi-column inverted index example:
// TABLE (k, other, region, z, PRIMARY KEY (d), INVERTED INDEX (region, z))
// Input stream columns: | a | b | c |
// Fetch columns: | z' | region | k |
// where z' is derived from z and region is a non-inverted prefix column.
// The InvertedExpr involves columns b, z. The non-inverted prefix key columns
// equate c to region.
// The OnExpr can use columns a, b, c, k, region.
//
// Internal columns for INNER and LEFT_OUTER: | a | b | c | z' | region | k |
// where e, g are not populated.
// Internal columns for LEFT_SEMI and LEFT_ANTI: | a | b | c |
//
// For INNER/LEFT_OUTER with OutputGroupContinuationForLeftRow = true, the
// internal columns include an additional bool column as the last column.
message InvertedJoinerSpec {
optional sqlbase.IndexFetchSpec fetch_spec = 10 [(gogoproto.nullable) = false];
// InvertedColumnOriginalType is the type of the table column that is being
// indexed (e.g. JSON or Geometry).
optional sql.sem.types.T inverted_column_original_type = 11;
reserved 1, 2, 3;
// The join expression is a conjunction of inverted_expr and on_expr.
// Expression involving the indexed column and columns from the input.
// Assuming that the input stream has N columns and the table that has been
// indexed has M columns, in this expression variables @1 to @N refer to
// columns of the input stream and variables @(N+1) to @(N+M) refer to fetched
// columns. Although the numbering includes all columns, only columns
// corresponding to the indexed column and the input columns may be present in
// this expression. Note that the column numbering matches the numbering used
// by OnExpr.
//
// The expression is passed to xform.NewDatumToInvertedExpr to construct an
// implementation of invertedexpr.DatumToInvertedExpr, which will be fed each
// input row and output an expression to evaluate over the inverted index.
optional Expression inverted_expr = 4 [(gogoproto.nullable) = false];
// Optional expression involving the columns in the index (other than the
// inverted column) and the columns in the input stream. Assuming that the
// input stream has N columns and the table that has been indexed has M
// columns, in this expression variables @1 to @N refer to columns of the
// input stream and variables @(N+1) to @(N+M) refer to fetched columns.
// The numbering does not omit the column in the table corresponding to the
// inverted column, or other table columns absent from the index, but they
// cannot be present in this expression. Note that the column numbering
// matches the numbering used by InvertedExpr.
optional Expression on_expr = 5 [(gogoproto.nullable) = false];
// Only INNER, LEFT_OUTER, LEFT_SEMI, LEFT_ANTI are supported. For indexes
// that produce false positives for user expressions, like geospatial
// indexes, only INNER and LEFT_OUTER are actually useful -- LEFT_SEMI will
// be mapped to INNER by the optimizer, and LEFT_ANTI to LEFT_OUTER, to
// allow the false positives to be eliminated by evaluating the exact
// expression on the rows output by this join.
optional sqlbase.JoinType type = 6 [(gogoproto.nullable) = false];
// Indicates that the inverted joiner should maintain the ordering of the
// input stream.
optional bool maintain_ordering = 7 [(gogoproto.nullable) = false];
// Indicates that the join should output a continuation column that
// indicates whether a row is a continuation of a group corresponding to a
// left row.
optional bool output_group_continuation_for_left_row = 8 [(gogoproto.nullable) = false];
// Column indexes in the input stream specifying the columns which match with
// the non-inverted prefix columns of the index, if the index is multi-column.
// These are the equality columns of the join. The length of
// prefix_equality_columns should be equal to the number of non-inverted
// prefix columns in the index.
repeated uint32 prefix_equality_columns = 9 [packed = true];
// Indicates the row-level locking strength to be used by the scan. If set to
// FOR_NONE, no row-level locking should be performed.
optional sqlbase.ScanLockingStrength locking_strength = 12 [(gogoproto.nullable) = false];
// Indicates the policy to be used by the scan for handling conflicting locks
// held by other active transactions when attempting to lock rows. Always set
// to BLOCK when locking_strength is FOR_NONE.
optional sqlbase.ScanLockingWaitPolicy locking_wait_policy = 13 [(gogoproto.nullable) = false];
// Indicates the row-level locking durability to be used by the scan.
optional sqlbase.ScanLockingDurability locking_durability = 14 [(gogoproto.nullable) = false];
}
// InvertedFiltererSpec is the specification of a processor that does filtering
// on a table by evaluating an invertedexpr.SpanExpressionProto on an inverted
// index of the table. The input consists of the inverted index rows from
// InvertedExpr.SpansToRead. It is acceptable for a filter on the primary key
// to be pushed down between the scan and the inverted filterer.
//
// Example:
// Table columns: | a | b | c | d |
// where a, d are the primary key and b is the column with the inverted index.
// Inverted index columns: | a | b' | d |
// where b' is derived from b. For instance, if b is an array, b' will be
// elements of the array.
//
// Internal columns are | a | b | d |. The output sets b to NULL, since it does
// not have the value of the original column that was indexed in the inverted
// column.
//
// Optionally, there can be a pre-filtering spec that describes an expression
// (derived from the original expression that was converted to inverted_expr),
// that must evaluate to true on each inverted row. This is a performance
// optimization -- for more details see invertedidx.PreFilterer (geometry
// and geography inverted indexes are the only ones that currently use
// pre-filtering).
message InvertedFiltererSpec {
// The index in the input row of the inverted column.
optional uint32 inverted_col_idx = 1 [(gogoproto.nullable) = false];
// The expression to evaluate. The SpansToRead are ignored since they
// have already been used to setup the input.
optional inverted.SpanExpressionProto inverted_expr = 2 [(gogoproto.nullable) = false];
// Optional pre-filtering expression.
message PreFiltererSpec {
// Expression has only one variable, @1, which refers to the column with
// the inverted index.
optional Expression expression = 1 [(gogoproto.nullable) = false];
// The type of the original column that was indexed in the inverted index.
optional sql.sem.types.T type = 2;
}
optional PreFiltererSpec pre_filterer_spec = 6;
}
// AggregatorSpec is the specification for an "aggregator" (processor core
// type, not the logical plan computation stage). An aggregator performs
// 'aggregation' in the SQL sense in that it groups rows and computes an aggregate
// for each group. The group is configured using the group key. The aggregator
// can be configured with one or more aggregation functions.
//
// The "internal columns" of an Aggregator map 1-1 to the aggregations.
message AggregatorSpec {
// These mirror the aggregate functions supported by sql/parser. See
// sql/parser/aggregate_builtins.go.
enum Func {
ANY_NOT_NULL = 0;
AVG = 1;
BOOL_AND = 2;
BOOL_OR = 3;
CONCAT_AGG = 4;
COUNT = 5;
MAX = 7;
MIN = 8;
STDDEV = 9;
SUM = 10;
SUM_INT = 11;
VARIANCE = 12;
XOR_AGG = 13;
COUNT_ROWS = 14;
SQRDIFF = 15;
FINAL_VARIANCE = 16;
FINAL_STDDEV = 17;
ARRAY_AGG = 18;
JSON_AGG = 19;
// JSONB_AGG is an alias for JSON_AGG, they do the same thing.
JSONB_AGG = 20;
STRING_AGG = 21;
BIT_AND = 22;
BIT_OR = 23;
CORR = 24;
PERCENTILE_DISC_IMPL = 25;
PERCENTILE_CONT_IMPL = 26;
JSON_OBJECT_AGG = 27;
JSONB_OBJECT_AGG = 28;
VAR_POP = 29;
STDDEV_POP = 30;
ST_MAKELINE = 31;
ST_EXTENT = 32;
ST_UNION = 33;
ST_COLLECT = 34;
COVAR_POP = 35;
COVAR_SAMP = 36;
REGR_INTERCEPT = 37;
REGR_R2 = 38;
REGR_SLOPE = 39;
REGR_SXX = 40;
REGR_SYY = 41;
REGR_SXY = 42;
REGR_COUNT = 43;
REGR_AVGX = 44;
REGR_AVGY = 45;
FINAL_STDDEV_POP = 46;
FINAL_VAR_POP = 47;
TRANSITION_REGRESSION_AGGREGATE = 48;
FINAL_COVAR_POP = 49;
FINAL_REGR_SXX = 50;
FINAL_REGR_SXY = 51;
FINAL_REGR_SYY = 52;
FINAL_REGR_AVGX = 53;
FINAL_REGR_AVGY = 54;
FINAL_REGR_INTERCEPT = 55;
FINAL_REGR_R2 = 56;
FINAL_REGR_SLOPE = 57;
FINAL_COVAR_SAMP = 58;
FINAL_CORR = 59;
FINAL_SQRDIFF = 60;
ARRAY_CAT_AGG = 61;
MERGE_STATS_METADATA = 62;
MERGE_STATEMENT_STATS = 63;
MERGE_TRANSACTION_STATS = 64;
MERGE_AGGREGATED_STMT_METADATA = 65;
}
enum Type {
// This setting exists just for backwards compatibility; it's equivalent to
// SCALAR when there are no grouping columns, and to NON_SCALAR when there
// are grouping columns.
AUTO = 0;
// A scalar aggregation has no grouping columns and always returns one
// result row.
SCALAR = 1;
// A non-scalar aggregation returns no rows if there are no input rows; it
// may or may not have grouping columns.
NON_SCALAR = 2;
}
message Aggregation {
optional Func func = 1 [(gogoproto.nullable) = false];
// Aggregation functions with distinct = true functions like you would
// expect '<FUNC> DISTINCT' to operate, the default behavior would be
// the '<FUNC> ALL' operation.
optional bool distinct = 2 [(gogoproto.nullable) = false];
// The column index specifies the argument(s) to the aggregator function.
//
// Most aggregations take one argument
// COUNT_ROWS takes no arguments.
// FINAL_STDDEV and FINAL_VARIANCE take three arguments (SQRDIFF, SUM,
// COUNT).
repeated uint32 col_idx = 5;
// If set, this column index specifies a boolean argument; rows for which
// this value is not true don't contribute to this aggregation. This enables
// the filter clause, e.g.:
// SELECT SUM(x) FILTER (WHERE y > 1), SUM(x) FILTER (WHERE y < 1) FROM t
optional uint32 filter_col_idx = 4;
// Arguments are const expressions passed to aggregation functions.
repeated Expression arguments = 6 [(gogoproto.nullable) = false];
reserved 3;
}
// The group key is a subset of the columns in the input stream schema on the
// basis of which we define our groups.
repeated uint32 group_cols = 2 [packed = true];
repeated Aggregation aggregations = 3 [(gogoproto.nullable) = false];
// A subset of the GROUP BY columns which are ordered in the input.
repeated uint32 ordered_group_cols = 4 [packed = true];
optional Type type = 5 [(gogoproto.nullable) = false];
// OutputOrdering specifies the required ordering of the output produced by
// the aggregator. The input to the processor *must* already be ordered
// according to it.
optional Ordering output_ordering = 6 [(gogoproto.nullable) = false];
}
// ProjectSetSpec is the specification of a processor which applies a set of
// expressions, which may be set-returning functions, to its input.
message ProjectSetSpec {
// Expressions to be applied
repeated Expression exprs = 1 [(gogoproto.nullable) = false];
// Column types for the generated values
repeated sql.sem.types.T generated_columns = 2;
// The number of columns each expression returns. Same length as exprs.
repeated uint32 num_cols_per_gen = 3;
// Column labels for the generated values. Needed for some builtin functions
// (like record_to_json) that require the column labels to do their jobs.
repeated string generated_column_labels = 4;
}
// WindowerSpec is the specification of a processor that performs computations
// of window functions that have the same PARTITION BY clause. For a particular
// windowFn, the processor puts result at windowFn.ArgIdxStart and "consumes"
// all arguments to windowFn (windowFn.ArgCount of them). So if windowFn takes
// no arguments, an extra column is added; if windowFn takes more than one
// argument, (windowFn.ArgCount - 1) columns are removed.
message WindowerSpec {
enum WindowFunc {
// These mirror window functions from window_builtins.go.
ROW_NUMBER = 0;
RANK = 1;
DENSE_RANK = 2;
PERCENT_RANK = 3;
CUME_DIST = 4;
NTILE = 5;
LAG = 6;
LEAD = 7;
FIRST_VALUE = 8;
LAST_VALUE = 9;
NTH_VALUE = 10;
}
// Func specifies which function to compute. It can either be built-in
// aggregate or built-in window function.
message Func {
option (gogoproto.onlyone) = true;
optional AggregatorSpec.Func aggregateFunc = 1;
optional WindowFunc windowFunc = 2;
}
// Frame is the specification of a single window frame for a window function.
message Frame {
// Mode indicates which mode of framing is used.
enum Mode {
// RANGE specifies frame in terms of logical range (e.g. 1 unit cheaper).
RANGE = 0;
// ROWS specifies frame in terms of physical offsets (e.g. 1 row before).
ROWS = 1;
// GROUPS specifies frame in terms of peer groups (where "peers" mean
// rows not distinct in the ordering columns).
GROUPS = 2;
}
// BoundType indicates which type of boundary is used.
enum BoundType {
UNBOUNDED_PRECEDING = 0;
UNBOUNDED_FOLLOWING = 1;
// Offsets are stored within Bound.
OFFSET_PRECEDING = 2;
OFFSET_FOLLOWING = 3;
CURRENT_ROW = 4;
}
// Exclusion specifies the type of frame exclusion.
enum Exclusion {
NO_EXCLUSION = 0;
EXCLUDE_CURRENT_ROW = 1;
EXCLUDE_GROUP = 2;
EXCLUDE_TIES = 3;
}
// Bound specifies the type of boundary and the offset (if present).
message Bound {
optional BoundType boundType = 1 [(gogoproto.nullable) = false];
// For UNBOUNDED_PRECEDING, UNBOUNDED_FOLLOWING, and CURRENT_ROW offset
// is ignored. Integer offset for ROWS and GROUPS modes is stored in
// int_offset while an encoded datum and the type information are stored
// for RANGE mode.
optional uint64 int_offset = 2 [(gogoproto.nullable) = false];
optional bytes typed_offset = 3;
optional DatumInfo offset_type = 4 [(gogoproto.nullable) = false];
}
// Bounds specifies boundaries of the window frame.
message Bounds {
// Start bound must always be present whereas end bound might be omitted.