-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
relational.opt
1105 lines (981 loc) · 39.8 KB
/
relational.opt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# relational.opt contains Optgen language definitions for all of Cockroach's
# physical and logical operators that return a table-valued result having rows
# and columns (i.e. relational). Many of them correspond to operators in the
# relational algebra, but there are also variants that are useful for concisely
# and incrementally expressing transformations.
#
# Tags
#
# Relational - All operators in this file are marked with the Relational tag,
# so they can be easily distinguished from Scalar and Enforcer
# operators.
#
# Join - All join operators (inner, left, right, full, semi, anti), as well as
# their JoinApply variants, are marked with the Join tag, which allows
# any of them to fulfill a Join pattern match.
#
# JoinApply - All join apply operators are marked with the JoinApply tag.
# Unlike standard Join operators, JoinApply operators allow the
# right input to refer to columns projected by the left input.
# Allowing this is useful as an intermediate (or sometimes final)
# step in some important transformations (like eliminating
# subqueries).
# Scan returns a result set containing every row in a table by scanning one of
# the table's indexes according to its ordering. The ScanPrivate field
# identifies the table and index to scan, as well as the subset of columns to
# project from it.
#
# The scan can be constrained and/or have an internal row limit. A scan can be
# executed either as a forward or as a reverse scan (except when it has a limit,
# in which case the direction is fixed).
[Relational]
define Scan {
_ ScanPrivate
}
[Private]
define ScanPrivate {
# Table identifies the table to scan. It is an id that can be passed to
# the Metadata.Table method in order to fetch cat.Table metadata.
Table TableID
# Index identifies the index to scan (whether primary or secondary). It
# can be passed to the cat.Table.Index() method in order to fetch the
# cat.Index metadata.
Index IndexOrdinal
# Cols specifies the set of columns that the scan operator projects. This
# may be a subset of the columns that the table/index contains.
Cols ColSet
# If set, the scan is a constrained scan; the constraint contains the spans
# that need to be scanned. If InvertedConstraint is also set, Constraint
# contains non-ranging spans that constrain the non-inverted prefix columns.
Constraint Constraint
# If set, the scan is a constrained scan of an inverted index; the
# InvertedConstraint contains the spans that need to be scanned.
InvertedConstraint InvertedSpans
# HardLimit specifies the maximum number of rows that the scan can return
# (after applying any constraint), as well as the required scan direction.
# This is a "hard" limit, meaning that the scan operator must never return
# more than this number of rows, even if more are available. If its value is
# zero, then the limit is unknown, and the scan should return all available
# rows.
HardLimit ScanLimit
# Flags modify how the table is scanned, such as which index is used to scan.
Flags ScanFlags
# Locking represents the row-level locking mode of the Scan. Most scans
# leave this unset (Strength = ForNone), which indicates that no row-level
# locking will be performed while scanning the table. Stronger locking modes
# are used by SELECT .. FOR [KEY] UPDATE/SHARE statements and by the initial
# row retrieval of DELETE and UPDATE statements. The locking item's Targets
# list will always be empty when part of a ScanPrivate.
Locking LockingItem
# PartitionConstrainedScan records whether or not we were able to use partitions
# to constrain the lookup spans further. This flag is used to record telemetry
# about how often this optimization is getting applied.
PartitionConstrainedScan bool
}
# SequenceSelect represents a read from a sequence as a data source. It always returns
# three columns, last_value, log_cnt, and is_called, with a single row. last_value is
# the most recent value returned from the sequence and log_cnt and is_called are
# always 0 and true, respectively.
[Relational]
define SequenceSelect {
_ SequenceSelectPrivate
}
[Private]
define SequenceSelectPrivate {
# Sequence identifies the sequence to read from.
Sequence SequenceID
# Cols is the 3 element list of column IDs returned by the operator.
Cols ColList
}
# Values returns a manufactured result set containing a constant number of rows.
# specified by the Rows list field. Each row must contain the same set of
# columns in the same order.
#
# The Rows field contains a list of Tuples, one for each row. Each tuple has
# the same length (same with that of Cols).
#
# The Cols field contains the set of column indices returned by each row
# as an opt.ColList. It is legal for Cols to be empty.
[Relational]
define Values {
Rows ScalarListExpr
_ ValuesPrivate
}
[Private]
define ValuesPrivate {
Cols ColList
# ID is a memo-unique identifier which distinguishes between identical
# Values expressions which appear in different places in the query. In most
# cases the column set is sufficient to do this, but various rules make it
# possible to construct Values expressions with no columns.
ID UniqueID
}
# Select filters rows from its input result set, based on the boolean filter
# predicate expression. Rows which do not match the filter are discarded. While
# the Filter operand can be any boolean expression, normalization rules will
# typically convert it to a Filters operator in order to make conjunction list
# matching easier.
[Relational]
define Select {
Input RelExpr
Filters FiltersExpr
}
# Project modifies the set of columns returned by the input result set. Columns
# can be removed, reordered, or renamed. In addition, new columns can be
# synthesized.
#
# Projections describes the synthesized columns constructed by Project, and
# Passthrough describes the input columns that are passed through as Project
# output columns.
[Relational]
define Project {
Input RelExpr
Projections ProjectionsExpr
Passthrough ColSet
# notNullCols is the set of columns (input or synthesized) that are known to
# be not-null.
notNullCols ColSet
# internalFuncDeps are the functional dependencies between all columns
# (input or synthesized).
internalFuncDeps FuncDepSet
}
# InvertedFilter filters rows from its input result set, based on the
# InvertedExpression predicate (which is defined in InvertedFilterPrivate).
# Rows which do not match the filter are discarded. The input should be a
# constrained scan of an inverted index, possibly wrapped in other operators
# such as Select.
[Relational]
define InvertedFilter {
Input RelExpr
_ InvertedFilterPrivate
}
[Private]
define InvertedFilterPrivate {
# InvertedExpression represents the set operations (UNION or INTERSECTION)
# that should be executed on the inverted spans retrieved from the input.
# The input should already be filtered based on SpansToRead from the
# SpanExpression, but this InvertedExpression serves to filter the rows
# further by applying set operations on the primary key columns.
InvertedExpression SpanExpression
# PreFiltererState represents the optional pre-filtering state.
PreFiltererState PreFiltererState
# The InvertedColumn is the id of the inverted column in the input. It is
# used during execution to map rows from the input to their corresponding
# spans in the SpanExpression.
InvertedColumn ColumnID
}
# InnerJoin creates a result set that combines columns from its left and right
# inputs, based upon its "on" join predicate. Rows which do not match the
# predicate are filtered. While expressions in the predicate can refer to
# columns projected by either the left or right inputs, the inputs are not
# allowed to refer to the other's projected columns.
[Relational, Join, JoinNonApply]
define InnerJoin {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
multiplicity JoinMultiplicity
}
[Relational, Join, JoinNonApply]
define LeftJoin {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
multiplicity JoinMultiplicity
}
[Relational, Join, JoinNonApply]
define RightJoin {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
}
[Relational, Join, JoinNonApply]
define FullJoin {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
multiplicity JoinMultiplicity
}
[Relational, Join, JoinNonApply]
define SemiJoin {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
multiplicity JoinMultiplicity
}
[Relational, Join, JoinNonApply]
define AntiJoin {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
}
# JoinPrivate is shared between the various join operators including apply
# variants, but excluding IndexJoin, LookupJoin, MergeJoin.
[Private]
define JoinPrivate {
# Flags modify what type of join we choose.
Flags JoinFlags
# WasReordered indicates whether reorderings of the join tree rooted at this
# join have already been considered.
WasReordered bool
}
# IndexJoin represents an inner join between an input expression and a primary
# index. It is a special case of LookupJoin where the input columns are the PK
# columns of the table we are looking up into, and every input row results in
# exactly one output row.
#
# IndexJoin operators are created from Scan operators (unlike lookup joins which
# are created from Join operators).
[Relational]
define IndexJoin {
Input RelExpr
_ IndexJoinPrivate
}
[Private]
define IndexJoinPrivate {
# Table identifies the table to do lookups in. The primary index is
# currently the only index used.
Table TableID
# Cols specifies the set of columns that the index join operator projects.
# This may be a subset of the columns that the table contains.
Cols ColSet
}
# LookupJoin represents a join between an input expression and an index. The
# type of join is in the LookupJoinPrivate field.
[Relational]
define LookupJoin {
Input RelExpr
On FiltersExpr
_ LookupJoinPrivate
# lookupProps caches relational properties for the "table" side of the lookup
# join, treating it as if it were another relational input. This makes the
# lookup join appear more like other join operators.
lookupProps RelProps
}
[Private]
define LookupJoinPrivate {
# JoinType is InnerJoin, LeftJoin, SemiJoin, or AntiJoin.
JoinType Operator
# Table identifies the table do to lookups in.
Table TableID
# Index identifies the index to do lookups in (whether primary or secondary).
# It can be passed to the cat.Table.Index() method in order to fetch the
# cat.Index metadata.
Index IndexOrdinal
# KeyCols are the columns (produced by the input) used to create lookup keys.
# The key columns must be non-empty, and are listed in the same order as the
# index columns (or a prefix of them).
KeyCols ColList
# Cols is the union between the set of input columns that are returned by
# the lookup join and the set of lookup columns retrieved through lookup.
#
# For inner/left join, this is the set of columns produced by the LookupJoin
# operator.
# For semi/anti-join, this contains the semi-join output columns (from the
# input) plus the columns that we need to look up for the ON condition.
#
# Cols may not contain some or all of the KeyCols, if they are not output
# columns for the join.
#
# TODO(radu): this effectively allows an arbitrary projection; it should be
# just a LookupCols set indicating which columns we should add from the
# index. However, this requires extra Project operators in the lookup join
# exploration transforms which currently leads to problems related to lookup
# join statistics.
Cols ColSet
# LookupColsAreTableKey is true if the lookup columns form a key in the
# table (and thus each left row matches with at most one table row).
LookupColsAreTableKey bool
# At most one of Is{First,Second}JoinInPairedJoiner can be true.
#
# IsFirstJoinInPairedJoiner is true if this is the first join of a
# paired-joiner used for left joins.
IsFirstJoinInPairedJoiner bool
# IsSecondJoinInPairedJoiner is true if this is the second join of a
# paired-joiner used for left joins.
IsSecondJoinInPairedJoiner bool
# ContinuationCol is the column ID of the continuation column when
# IsFirstJoinInPairedJoiner is true.
ContinuationCol ColumnID
# ConstFilters contains the constant filters that are represented as equality
# conditions on the KeyCols. These filters are needed by the statistics code to
# correctly estimate selectivity.
ConstFilters FiltersExpr
_ JoinPrivate
}
# InvertedJoin represents a join between an input expression and an inverted
# index. The type of join is in the InvertedJoinPrivate field.
[Relational]
define InvertedJoin {
Input RelExpr
# On only contains filters on the input columns and primary key columns of
# the inverted index's base table.
On FiltersExpr
_ InvertedJoinPrivate
# lookupProps caches relational properties for the "table" side of the lookup
# join, treating it as if it were another relational input. This makes the
# lookup join appear more like other join operators.
lookupProps RelProps
}
[Private]
define InvertedJoinPrivate {
# JoinType is InnerJoin, LeftJoin, SemiJoin, or AntiJoin.
JoinType Operator
# InvertedExpr is the inverted join condition. It is used to get the keys
# to lookup in the inverted index based on the values of the input columns.
InvertedExpr ScalarExpr
# Table identifies the table do to lookups in.
Table TableID
# Index identifies the inverted index to do lookups in. It can be passed to
# the cat.Table.Index() method in order to fetch the cat.Index metadata.
Index IndexOrdinal
# InvertedCol is the inverted column in the index that is referenced by
# InvertedExpr.
InvertedCol ColumnID
# IsFirstJoinInPairedJoiner is true if this is the first join of a
# paired-joiner used for left joins.
IsFirstJoinInPairedJoiner bool
# ContinuationCol is the column ID of the continuation column when
# IsFirstJoinInPairedJoiner is true.
ContinuationCol ColumnID
# PrefixKeyCols are the columns (produced by the input) used to create
# lookup keys for the non-inverted prefix columns if the index is a
# multi-column inverted index. There must be a key column for each prefix
# column.
PrefixKeyCols ColList
# Cols is the set of columns produced by the inverted join. This set can
# contain columns from the input and columns from the index. Any columns
# not in the input are retrieved from the index.
Cols ColSet
# ConstFilters contains the constant filters that are represented as
# equality conditions on the PrefixKeyCols. These filters are needed by the
# statistics code to correctly estimate selectivity.
ConstFilters FiltersExpr
_ JoinPrivate
}
# MergeJoin represents a join that is executed using merge-join.
# MergeOn is a scalar which contains the ON condition and merge-join ordering
# information; see the MergeOn scalar operator.
# It can be any type of join (identified in the MergeJoinPrivate field).
[Relational]
define MergeJoin {
Left RelExpr
Right RelExpr
On FiltersExpr
_ MergeJoinPrivate
}
[Private]
define MergeJoinPrivate {
# JoinType is one of the basic join operators: InnerJoin, LeftJoin,
# RightJoin, FullJoin, SemiJoin, AntiJoin.
JoinType Operator
# LeftEq and RightEq are orderings on equality columns. They have the same
# length and LeftEq[i] is a column on the left side which is constrained to
# be equal to RightEq[i] on the right side. The directions also have to
# match.
#
# Examples of valid settings for abc JOIN def ON a=d,b=e:
# LeftEq: a+,b+ RightEq: d+,e+
# LeftEq: b-,a+ RightEq: e-,d+
LeftEq Ordering
RightEq Ordering
# LeftOrdering and RightOrdering are "simplified" versions of LeftEq/RightEq,
# taking into account the functional dependencies of each side. We need both
# versions because we need to configure execution with specific equality
# columns and orderings.
LeftOrdering OrderingChoice
RightOrdering OrderingChoice
_ JoinPrivate
}
# ZigzagJoin represents a join that is executed using the zigzag joiner.
# All fields except for the ON expression are stored in the private;
# since the zigzag joiner operates directly on indexes and doesn't
# support arbitrary inputs.
#
# TODO(itsbilal): Add support for representing multi-way zigzag joins.
[Relational, Telemetry]
define ZigzagJoin {
On FiltersExpr
_ ZigzagJoinPrivate
# leftProps and rightProps cache relational properties corresponding to an
# unconstrained scan on the respective indexes. By putting this in the
# expr, zigzag joins can reuse a lot of the logical property building code
# for joins.
leftProps RelProps
rightProps RelProps
}
[Private]
define ZigzagJoinPrivate {
# LeftTable and RightTable identifies the left and right tables for this
# join.
LeftTable TableID
RightTable TableID
# LeftIndex and RightIndex identifies the index to do lookups in (whether
# primary or secondary). It can be passed to the cat.Table.Index() method in
# order to fetch the cat.Index metadata.
LeftIndex IndexOrdinal
RightIndex IndexOrdinal
# LeftEqCols and RightEqCols contains lists of columns on the left and
# right sides that are being equated. Both lists must be of equal length.
LeftEqCols ColList
RightEqCols ColList
# FixedVals, LeftFixedCols and RightFixedCols reference fixed values.
# Fixed values are constants that constrain each index' prefix columns
# (the ones denoted in {Left,Right}FixedCols). These fixed columns must
# lie at the start of the index and must immediately precede EqCols.
#
# FixedVals is a list of 2 tuples, each representing one side's fixed
# values.
#
# Read the comment in pkg/sql/distsqlrun/zigzagjoiner.go for more on
# fixed and equality columns.
FixedVals ScalarListExpr
LeftFixedCols ColList
RightFixedCols ColList
# Cols is the set of columns produced by the zigzag join. This set can
# contain columns from either side's index.
Cols ColSet
}
# InnerJoinApply has the same join semantics as InnerJoin. However, unlike
# InnerJoin, it allows the right input to refer to columns projected by the
# left input.
[Relational, Join, JoinApply, Telemetry]
define InnerJoinApply {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
}
[Relational, Join, JoinApply, Telemetry]
define LeftJoinApply {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
}
[Relational, Join, JoinApply, Telemetry]
define SemiJoinApply {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
}
[Relational, Join, JoinApply, Telemetry]
define AntiJoinApply {
Left RelExpr
Right RelExpr
On FiltersExpr
_ JoinPrivate
}
# GroupBy computes aggregate functions over groups of input rows. Input rows
# that are equal on the grouping columns are grouped together. The set of
# computed aggregate functions is described by the Aggregations field (which is
# always an Aggregations operator).
#
# The arguments of the aggregate functions are columns from the input
# (i.e. Variables), possibly wrapped in aggregate modifiers like AggDistinct.
#
# If the set of input rows is empty, then the output of the GroupBy operator
# will also be empty. If the grouping columns are empty, then all input rows
# form a single group. GroupBy is used for queries with aggregate functions,
# HAVING clauses and/or GROUP BY expressions.
#
# The GroupingPrivate field contains an ordering; this ordering serves a
# dual-purpose:
# - if we ignore any grouping columns, the remaining columns indicate an
# intra-group ordering; this is useful if there is an order-dependent
# aggregation (like ARRAY_AGG).
# - any prefix containing only grouping columns is used to execute the
# aggregation in a streaming fashion.
#
# Currently, the initially built GroupBy has all grouping columns as "optional"
# in the ordering (we call this the "canonical" variant). Subsequently, the
# GenerateStreamingGroupBy exploration rule can add more variants, based on
# interesting orderings.
[Relational, Grouping, Telemetry]
define GroupBy {
Input RelExpr
Aggregations AggregationsExpr
_ GroupingPrivate
}
# GroupingPrivate is shared between the grouping-related operators: GroupBy
# ScalarGroupBy, DistinctOn, EnsureDistinctOn, UpsertDistinctOn, and
# EnsureUpsertDistinctOn. This allows the operators to be treated
# polymorphically.
[Private]
define GroupingPrivate {
# GroupingCols partitions the GroupBy input rows into aggregation groups.
# All rows sharing the same values for these columns are in the same group.
# GroupingCols is always empty in the ScalarGroupBy case.
GroupingCols ColSet
# Ordering specifies the order required of the input. This order can intermix
# grouping and non-grouping columns, serving a dual-purpose:
# - if we ignore grouping columns, it specifies an intra-group ordering (sort
# order of values within each group, useful for order-sensitive aggregation
# operators like ArrayAgg;
# - leading grouping columns specify an inter-group ordering, allowing for
# more efficient streaming execution.
#
# The canonical operation always contains an ordering that has no grouping
# columns. Exploration rules can create versions of the operator with
# orderings that contain grouping columns.
Ordering OrderingChoice
# NullsAreDistinct specifies the null behavior of the grouping operator. If
# true, the operator considers nulls to be distinct for grouping purposes.
# NullsAreDistinct should only be true for UpsertDistinctOn and
# EnsureUpsertDistinctOn.
NullsAreDistinct bool
# ErrorOnDup, if non-empty, triggers an error with the given text if any
# aggregation group contains more than one row. This can only take on a
# value for the EnsureDistinctOn and EnsureUpsertDistinctOn operators.
ErrorOnDup string
}
# ScalarGroupBy computes aggregate functions over the complete set of input
# rows. This is similar to GroupBy with empty grouping columns, where all input
# rows form a single group. However, there is an important difference. If the
# input set is empty, then the output of the ScalarGroupBy operator will have a
# single row containing default values for each aggregate function (typically
# null or zero, depending on the function). ScalarGroupBy always returns exactly
# one row - either the single-group aggregates or the default aggregate values.
#
# ScalarGroupBy uses the GroupingPrivate struct so that it's polymorphic with
# GroupBy and can be used in the same rules (when appropriate). In the
# ScalarGroupBy case, the grouping column field in GroupingPrivate is always
# empty.
[Relational, Grouping, Telemetry]
define ScalarGroupBy {
Input RelExpr
Aggregations AggregationsExpr
_ GroupingPrivate
}
# DistinctOn filters out rows that are identical on the set of grouping columns;
# only the first row (according to an ordering) is kept for each set of possible
# values. It is roughly equivalent with a GroupBy on the same grouping columns
# except that it uses FirstAgg functions that ensure the value on the first row
# is chosen (across all aggregations).
#
# In addition, the value on that first row must be chosen for all the grouping
# columns as well; this is relevant in the case of equal but non-identical
# values, like decimals. For example, if we have rows (1, 2.0) and (1.0, 2) and
# we are grouping on these two columns, the values output can be either (1, 2.0)
# or (1.0, 2), but not (1.0, 2.0).
#
# The execution of DistinctOn resembles that of Select more than that of
# GroupBy: each row is tested against a map of what groups we have seen already,
# and is either passed through or discarded. In particular, note that this
# preserves the input ordering.
#
# The ordering in the GroupingPrivate field will be required of the input; it
# determines which row can get "chosen" for each group of values on the grouping
# columns. There is no restriction on the ordering; but note that grouping
# columns are inconsequential - they can appear anywhere in the ordering and
# they won't change the results (other than the result ordering).
#
# Currently when we build DistinctOn, we set all grouping columns as optional
# cols in Ordering (but this is not required by the operator).
#
# TODO(radu): in the future we may want an exploration transform to try out more
# specific interesting orderings because execution is more efficient when we can
# rely on an ordering on the grouping columns (or a subset of them).
#
# DistinctOn uses an Aggregations child and the GroupingPrivate struct so that
# it's polymorphic with GroupBy and can be used in the same rules (when
# appropriate). In the DistinctOn case, the aggregations can be only FirstAgg or
# ConstAgg.
[Relational, Grouping, Telemetry]
define DistinctOn {
Input RelExpr
Aggregations AggregationsExpr
_ GroupingPrivate
}
# EnsureDistinctOn is a variation on DistinctOn that is only used to replace a
# Max1Row operator in a decorrelation attempt. It raises an error if any
# distinct grouping contains more than one row. Or in other words, it "ensures"
# that the input is distinct on the grouping columns.
#
# EnsureDistinctOn is used when nulls are not considered distinct for grouping
# purposes and an error should be raised when duplicates are detected.
#
# Rules should only "push through" or eliminate an EnsureDistinctOn if they
# preserve the expected error behavior. For example, it would be invalid to
# push a Select filter into an EnsureDistinctOn, as it might eliminate rows
# that would otherwise trigger the EnsureDistinctOn error.
[Relational, Grouping, Telemetry]
define EnsureDistinctOn {
Input RelExpr
Aggregations AggregationsExpr
_ GroupingPrivate
}
# UpsertDistinctOn is a variation on DistinctOn that is only used with UPSERT
# and INSERT..ON CONFLICT statements. Unlike DistinctOn, UpsertDistinctOn treats
# NULL values as not equal to one another for purposes of grouping. Two rows
# having a NULL-valued grouping column will be placed in different groups. This
# differs from DistinctOn behavior, where the two rows would be grouped
# together. This behavior difference reflects SQL semantics, in which a unique
# index key still allows multiple NULL values.
#
# UpsertDistinctOn is used when nulls are considered distinct for grouping
# purposes and duplicates should be filtered out without raising an error.
[Relational, Grouping, Telemetry]
define UpsertDistinctOn {
Input RelExpr
Aggregations AggregationsExpr
_ GroupingPrivate
}
# EnsureUpsertDistinctOn is a variation on UpsertDistinctOn that is only used
# with UPSERT and INSERT..ON CONFLICT statements. Like UpsertDistinctOn,
# EnsureUpsertDistinctOn treats NULL values as not equal to one another for
# purposes of grouping. Unlike UpsertDistinctOn, it raises an error if any
# distinct grouping contains more than one row. Or in other words, it "ensures"
# that the input is distinct on the grouping columns.
#
# EnsureUpsertDistinctOn is used when nulls are considered distinct for grouping
# purposes and an error should be raised when duplicates are detected.
#
# Rules should only "push through" or eliminate an EnsureUpsertDistinctOn if
# they preserve the expected error behavior. For example, it would be invalid to
# push a Select filter into an EnsureUpsertDistinctOn, as it might eliminate
# rows that would otherwise trigger the EnsureUpsertDistinctOn error.
[Relational, Grouping, Telemetry]
define EnsureUpsertDistinctOn {
Input RelExpr
Aggregations AggregationsExpr
_ GroupingPrivate
}
# Union is an operator used to combine the Left and Right input relations into
# a single set containing rows from both inputs. Duplicate rows are discarded.
# The SetPrivate field matches columns from the Left and Right inputs of the
# Union with the output columns. See the comment above SetPrivate for more
# details.
[Relational, Set]
define Union {
Left RelExpr
Right RelExpr
_ SetPrivate
}
# SetPrivate contains fields used by the relational set operators: Union,
# Intersect, Except, UnionAll, IntersectAll and ExceptAll. It matches columns
# from the left and right inputs of the operator with the output columns, since
# OutputCols are not ordered and may not correspond to each other.
#
# For example, consider the following query:
# SELECT y, x FROM xy UNION SELECT b, a FROM ab
#
# Given:
# col index
# x 1
# y 2
# a 3
# b 4
#
# SetPrivate will contain the following values:
# Left: [2, 1]
# Right: [4, 3]
# Out: [5, 6] <-- synthesized output columns
#
# To make normalization rules and execution simpler, both inputs to the set op
# must have matching types.
[Private]
define SetPrivate {
LeftCols ColList
RightCols ColList
OutCols ColList
}
# Intersect is an operator used to perform an intersection between the Left
# and Right input relations. The result consists only of rows in the Left
# relation that are also present in the Right relation. Duplicate rows are
# discarded.
# The SetPrivate field matches columns from the Left and Right inputs of the
# Intersect with the output columns. See the comment above SetPrivate for more
# details.
[Relational, Set]
define Intersect {
Left RelExpr
Right RelExpr
_ SetPrivate
}
# Except is an operator used to perform a set difference between the Left and
# Right input relations. The result consists only of rows in the Left relation
# that are not present in the Right relation. Duplicate rows are discarded.
# The SetPrivate field matches columns from the Left and Right inputs of the Except
# with the output columns. See the comment above SetPrivate for more details.
[Relational, Set]
define Except {
Left RelExpr
Right RelExpr
_ SetPrivate
}
# UnionAll is an operator used to combine the Left and Right input relations
# into a single set containing rows from both inputs. Duplicate rows are
# not discarded. For example:
#
# SELECT x FROM xx UNION ALL SELECT y FROM yy
# x y out
# ----- ----- -----
# 1 1 1
# 1 2 -> 1
# 2 3 1
# 2
# 2
# 3
#
# The SetPrivate field matches columns from the Left and Right inputs of the
# UnionAll with the output columns. See the comment above SetPrivate for more
# details.
[Relational, Set]
define UnionAll {
Left RelExpr
Right RelExpr
_ SetPrivate
}
# IntersectAll is an operator used to perform an intersection between the Left
# and Right input relations. The result consists only of rows in the Left
# relation that have a corresponding row in the Right relation. Duplicate rows
# are not discarded. This effectively creates a one-to-one mapping between the
# Left and Right rows. For example:
#
# SELECT x FROM xx INTERSECT ALL SELECT y FROM yy
# x y out
# ----- ----- -----
# 1 1 1
# 1 1 -> 1
# 1 2 2
# 2 2 2
# 2 3
# 4
#
# The SetPrivate field matches columns from the Left and Right inputs of the
# IntersectAll with the output columns. See the comment above SetPrivate for more
# details.
[Relational, Set]
define IntersectAll {
Left RelExpr
Right RelExpr
_ SetPrivate
}
# ExceptAll is an operator used to perform a set difference between the Left
# and Right input relations. The result consists only of rows in the Left
# relation that do not have a corresponding row in the Right relation.
# Duplicate rows are not discarded. This effectively creates a one-to-one
# mapping between the Left and Right rows. For example:
# SELECT x FROM xx EXCEPT ALL SELECT y FROM yy
# x y out
# ----- ----- -----
# 1 1 -> 1
# 1 1 4
# 1 2
# 2 2
# 2 3
# 4
#
# The SetPrivate field matches columns from the Left and Right inputs of the
# ExceptAll with the output columns. See the comment above SetPrivate for more
# details.
[Relational, Set]
define ExceptAll {
Left RelExpr
Right RelExpr
_ SetPrivate
}
# Limit returns a limited subset of the results in the input relation. The limit
# expression is a scalar value; the operator returns at most this many rows. The
# Orering field is a physical.OrderingChoice which indicates the row ordering
# required from the input (the first rows with respect to this ordering are
# returned).
[Relational]
define Limit {
Input RelExpr
Limit ScalarExpr
Ordering OrderingChoice
}
# Offset filters out the first Offset rows of the input relation; used in
# conjunction with Limit.
[Relational]
define Offset {
Input RelExpr
Offset ScalarExpr
Ordering OrderingChoice
}
# Max1Row enforces that its input must return at most one row. If the input
# has more than one row, Max1Row raises an error with the specified error text.
#
# Max1Row is most often used as input to the Subquery operator. See the comment
# above Subquery for more details.
[Relational]
define Max1Row {
Input RelExpr
ErrorText string
}
# Ordinality adds a column to each row in its input containing a unique,
# increasing number.
[Relational]
define Ordinality {
Input RelExpr
_ OrdinalityPrivate
}
[Private]
define OrdinalityPrivate {
# Ordering denotes the required ordering of the input.
Ordering OrderingChoice
# ColID holds the id of the column introduced by this operator.
ColID ColumnID
}
# ProjectSet represents a relational operator which zips through a list of
# generators for every row of the input.
#
# As a reminder, a functional zip over generators a,b,c returns tuples of
# values from a,b,c picked "simultaneously". NULLs are used when a generator is
# "shorter" than another. For example:
#
# zip([1,2,3], ['a','b']) = [(1,'a'), (2,'b'), (3, null)]
#
# ProjectSet corresponds to a relational operator project(R, a, b, c, ...)
# which, for each row in R, produces all the rows produced by zip(a, b, c, ...)
# with the values of R prefixed. Formally, this performs a lateral cross join
# of R with zip(a,b,c).
#
# See the Zip header for more details.
[Relational, Telemetry]
define ProjectSet {
Input RelExpr
Zip ZipExpr
}
# Window represents a window function. Window functions are operators which
# allow computations that take into consideration other rows in the same result
# set.
#
# More concretely, a window function is a relational operator that takes in a
# result set and appends a single new column whose value depends on the other
# rows within the result set, and that row's relative position in it.
#
# Depending on the exact window function being computed, the value of the new
# column could be the position of the row in the output (`row_number`), or a
# cumulative sum, or something else.
[Relational]
define Window {
Input RelExpr
# Windows is the set of window functions to be computed for this operator.
Windows WindowsExpr
_ WindowPrivate
}
[Private]
define WindowPrivate {
# Partition is the set of columns to partition on. Every set of rows
# sharing the values for this set of columns will be treated independently.
Partition ColSet
# Ordering is the ordering that the window function is computed relative to
# within each partition.
Ordering OrderingChoice
}
# With executes Binding, making its results available to Main. Within Main, the
# results of Binding may be referenced by a WithScan expression containing the
# ID of this With.
[Relational, WithBinding]
define With {
Binding RelExpr
Main RelExpr
_ WithPrivate
}
[Private]
define WithPrivate {
ID WithID
# OriginalExpr contains the original CTE expression (so that we can display
# it in the EXPLAIN plan).
OriginalExpr Statement
# Mtr is used to specify whether or not to override the optimizer's
# default decision for materializing or not materializing tables.
Mtr MaterializeClause