-
Notifications
You must be signed in to change notification settings - Fork 3.9k
/
Copy pathscalar.opt
404 lines (377 loc) · 13.4 KB
/
scalar.opt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# =============================================================================
# scalar.opt contains scalar normalization rules that aren't handled elsewhere.
# =============================================================================
# CommuteVar ensures that variable references are on the left side of
# commutative comparison and binary operators. Other patterns don't need to
# handle both combinations.
[CommuteVar, Normalize]
(Eq | Ne | Is | IsNot | Plus | Mult | Bitand | Bitor | Bitxor
$left:^(Variable)
$right:(Variable)
)
=>
((OpName) $right $left)
# CommuteConst ensures that "constant expression trees" are on the right side
# of commutative comparison and binary operators. A constant expression tree
# has no unbound variables that refer to outer columns. It therefore always
# evaluates to the same result. Note that this is possible even if the tree
# contains variable expressions, as long as they are bound, such as in
# uncorrelated subqueries:
#
# SELECT * FROM a WHERE a.x = (SELECT SUM(b.x) FROM b)
#
# The right side of the equality expression is a constant expression tree, even
# though it contains an entire subquery, because it always evaluates to the same
# result. The left side is not a constant expression tree, even though it
# contains just a single variable, because its value can be different for each
# row in the table "a".
#
# The goal of this and related patterns is to push constant expression trees to
# the right side until only a Variable remains on the left (if possible). Other
# patterns can rely on this normal form and only handle one combination.
[CommuteConst, Normalize]
(Eq | Ne | Is | IsNot | Plus | Mult | Bitand | Bitor | Bitxor
$left:(ConstValue)
$right:^(ConstValue)
)
=>
((OpName) $right $left)
# EliminateCoalesce discards the Coalesce operator if it has a single operand.
[EliminateCoalesce, Normalize]
(Coalesce [ $item:* ])
=>
$item
# SimplifyCoalesce discards any leading null operands, and then if the next
# operand is a constant, replaces with that constant. Note that ConstValue
# matches nulls as well as other constants.
[SimplifyCoalesce, Normalize]
(Coalesce
$args:[
$arg:* & (IsConstValueOrGroupOfConstValues $arg)
...
]
)
=>
(SimplifyCoalesce $args)
# EliminateCast discards the cast operator if its input already has a type
# that's identical to the desired static type.
#
# Note that CastExpr removes unnecessary casts during type-checking; this rule
# can still be helpful if some other rule creates an unnecessary CastExpr.
[EliminateCast, Normalize]
(Cast $input:* $targetTyp:* & (HasColType $input $targetTyp))
=>
$input
# EliminateAssignmentCast discards the assignment cast operator if the input
# type and the target type are identical and the assignment cast is guaranteed
# to be a no-op. See AssignmentCastIsNoop for an explanation of what makes an
# assignment cast a no-op.
[EliminateAssignmentCast, Normalize]
(AssignmentCast
$input:*
$targetTyp:* &
(HasColType $input $targetTyp) &
(AssignmentCastIsNoop $targetTyp)
)
=>
$input
# NormalizeInConst ensures that the In operator's tuple operand is sorted with
# duplicates removed (since duplicates do not change the result).
[NormalizeInConst, Normalize]
(In | NotIn
$left:*
$right:(Tuple $elems:*) & (NeedSortedUniqueList $elems)
)
=>
((OpName) $left (Tuple (ConstructSortedUniqueList $elems)))
# FoldInNull replaces the In/Not operator with Null when the tuple only
# contains null. The NormalizeInConst pattern will reduce multiple nulls to a
# single null when it removes duplicates, so this pattern will match that.
[FoldInNull, Normalize]
(In | NotIn $left:* (Tuple [ (Null) ]))
=>
(Null (BoolType))
[SimplifyInSingleElement, Normalize]
(In $left:* (Tuple [ $right:* ]))
=>
(Eq $left $right)
[SimplifyNotInSingleElement, Normalize]
(NotIn $left:* (Tuple [ $right:* ]))
=>
(Ne $left $right)
# UnifyComparisonTypes takes a mixed-type comparison between a non-constant and
# a constant and, if appropriate, converts the constant to the type of the
# non-constant to allow constraints to be generated.
[UnifyComparisonTypes, Normalize]
(Comparison
$left:(Variable)
$right:(Const) &
(Let ($result $ok):(UnifyComparison $left $right) $ok)
)
=>
((OpName) $left $result)
# EliminateExistsZeroRows converts an Exists subquery to False when it's known
# that the input produces zero rows.
[EliminateExistsZeroRows, Normalize]
(Exists $input:* & (HasZeroRows $input))
=>
(False)
# EliminateExistsProject discards a Project input to the Exists operator. The
# Project operator never changes the row cardinality of its input, and row
# cardinality is the only thing that Exists cares about, so Project is a no-op.
[EliminateExistsProject, Normalize]
(Exists (Project $input:*) $subqueryPrivate:*)
=>
(Exists $input $subqueryPrivate)
# EliminateExistsGroupBy discards a non-scalar GroupBy input to the Exists
# operator. While non-scalar GroupBy (or DistinctOn) can change row cardinality,
# it always returns a non-empty set if its input is non-empty. Similarly, if its
# input is empty, then it returns the empty set. Therefore, it's a no-op for
# Exists.
#
# NOTE: EnsureDistinctOn has the side effect of error'ing if the input has
# duplicates, so do not eliminate it.
[EliminateExistsGroupBy, Normalize]
(Exists (GroupBy | DistinctOn $input:*) $subqueryPrivate:*)
=>
(Exists $input $subqueryPrivate)
# InlineExistsSelectTuple splits a tuple equality filter into multiple
# (per-column) equalities, in the case where the tuple on one side is being
# projected.
#
# We are specifically handling the case when this is under Exists because we
# don't have to keep the same output columns for the Select. This case is
# important because it is produced for an IN subquery:
#
# SELECT * FROM ab WHERE (a, b) IN (SELECT c, d FROM cd)
#
# Without this rule, we would not be able to produce a lookup join plan for such
# a query.
#
[InlineExistsSelectTuple, Normalize]
(Exists
(Select
(Project
$input:*
[
...
(ProjectionsItem $tuple:(Tuple) $tupleCol:*)
...
]
)
$filters:[
...
$item:(FiltersItem
(Eq
# CommuteVar ensures that the variable is on the left.
(Variable
$varCol:* &
(EqualsColumn $varCol $tupleCol)
)
$rhs:(Tuple) &
(TuplesHaveSameLength $tuple $rhs)
)
)
...
]
)
$subqueryPrivate:*
)
=>
(Exists
(Select
$input
(ConcatFilters
(RemoveFiltersItem $filters $item)
(SplitTupleEq $tuple $rhs)
)
)
$subqueryPrivate
)
# IntroduceExistsLimit inserts a LIMIT 1 "under" Exists so as to save resources
# to make the EXISTS determination.
#
# This rule uses and sets a boolean "WasLimited" on the Exists
# node to ensure the rule is only applied once. This is because the
# rule expands to an Exists pattern that's also a valid input pattern
# and it would recurse otherwise.
#
# We avoid this rule if the query is correlated because the decorrelation rules
# get confused by the presence of a limit. (It will be worth re-considering this
# when a general-purpose apply operator is supported - in that case it can be
# definitely worthwhile pushing down a LIMIT 1 to limit the amount of work done
# on every row.)
[IntroduceExistsLimit, Normalize]
(Exists
$input:* & ^(HasOuterCols $input) & ^(HasZeroOrOneRow $input)
$subqueryPrivate:* & ^(IsLimited $subqueryPrivate)
)
=>
(Exists
(Limit $input (IntConst 1) (EmptyOrdering))
(MakeLimited $subqueryPrivate)
)
# EliminateExistsLimit discards a Limit operator with a positive limit inside an
# Exist operator.
#
# The Limit operator prevents decorrelation rules from being applied. By
# discarding the Limit, which is a no-op inside of Exist operators, the query
# can be decorrelated into a more efficient SemiJoin or AntiJoin.
#
# Note that this rule uses HasOuterCols to ensure that it only matches
# correlated Exists subqueries. There is no need to discard limits from
# non-correlated Exists subqueries. Limits are preferred for non-correlated
# Exists subqueries. See IntroduceExistsLimit above for details.
[EliminateExistsLimit, Normalize]
(Exists
(Limit
$input:* & (HasOuterCols $input)
(Const $limit:*) & (IsPositiveInt $limit)
)
$subqueryPrivate:*
)
=>
(Exists $input $subqueryPrivate)
# SimplifyCaseWhenConstValue removes branches known to not match. Any
# branch known to match is used as the ELSE and further WHEN conditions
# are skipped. If all WHEN conditions have been removed, the ELSE
# expression is used.
# This transforms
#
# CASE WHEN v THEN 1 WHEN false THEN a WHEN true THEN b ELSE c END
#
# to
#
# CASE WHEN v THEN 1 ELSE b END
#
[SimplifyCaseWhenConstValue, Normalize]
(Case
$condition:(ConstValue)
$whens:[ ... (When (ConstValue)) ... ]
$orElse:*
)
=>
(SimplifyWhens $condition $whens $orElse)
# InlineAnyValuesSingleCol converts Any with Values input to AnyScalar.
# This version handles the case where there is a single column.
[InlineAnyValuesSingleCol, Normalize]
(Any $values:(Values) $scalar:* $private:*)
=>
(AnyScalar $scalar (InlineValues $values) (SubqueryCmp $private))
# InlineAnyValuesMultiCol converts Any with Values input to AnyScalar.
# This version handles the case where there are multiple columns; in this case,
# the Values is wrapped into a Project that converts each row to a tuple.
[InlineAnyValuesMultiCol, Normalize]
(Any
(Project
$values:(Values * $valuesPrivate:*)
[ (ProjectionsItem $tuple:(Tuple)) ] &
(IsTupleOfVars $tuple (ValuesCols $valuesPrivate))
$passthrough:* & (ColsAreEmpty $passthrough)
)
$scalar:*
$private:*
)
=>
(AnyScalar $scalar (InlineValues $values) (SubqueryCmp $private))
# SimplifyEqualsAnyTuple converts a scalar ANY operation to an IN comparison.
# It transforms
#
# x = ANY (...)
#
# to
#
# x IN (...)
#
# Which allows scans to be constrained.
[SimplifyEqualsAnyTuple, Normalize]
(AnyScalar $input:* $tuple:(Tuple) $cmp:* & (OpsAreSame $cmp Eq))
=>
(In $input $tuple)
# SimplifyAnyScalarArray converts a scalar ANY operation on a constant ARRAY to a scalar
# ANY operation on a tuple. In particular, this allows SimplifyEqualsAnyTuple to be
# triggered, which allows constraints to be generated.
[SimplifyAnyScalarArray, Normalize]
(AnyScalar $input:* $ary:(Const) & (IsConstArray $ary) $cmp:*)
=>
(AnyScalar $input (ConvertConstArrayToTuple $ary) $cmp)
# FoldCollate converts a Collate expr over an uncollated string into a collated
# string constant.
[FoldCollate, Normalize]
(Collate $input:(Const) $locale:*)
=>
(CastToCollatedString $input $locale)
# ArrayFlattenToAgg converts a correlated ArrayFlatten to an aggregation.
# This rule exists because:
#
# 1. We cannot do the aggregation method if we don't have a scalar type
# (for instance, if we have a tuple type).
# 2. We cannot decorrelate an ArrayFlatten directly (but we can decorrelate
# an aggregation). So it's desirable to perform this conversion in the
# interest of decorrelation.
#
# So the outcome is that we can perform uncorrelated ARRAY(...)s over any datatype,
# and correlated ones only over the types that array_agg supports.
#
# Note that optbuilder should have already verified that if the input is
# correlated, then we can array_agg over the input type. Also note that the
# Max1Row operator we introduce is guaranteed to be eliminated as
# MakeArrayAggForFlatten will return a ScalarGroupBy.
[NormalizeArrayFlattenToAgg, Normalize]
(ArrayFlatten $input:(HasOuterCols $input) $subquery:*)
=>
(Coalesce
[
(Subquery
(ScalarGroupBy
$input
[
(AggregationsItem
(ArrayAgg
(Variable
$requestedCol:(SubqueryRequestedCol
$subquery
)
)
)
(MakeArrayAggCol
(ArrayType $requestedCol)
)
)
]
(MakeGrouping
(MakeEmptyColSet)
(SubqueryOrdering $subquery)
)
)
(MakeUnorderedSubquery)
)
(Array [] (ArrayType $requestedCol))
]
)
# SimplifySameVarEqualities converts `x = x` and other equality
# comparisons into `x IS NOT NULL OR NULL`. The `OR NULL` is necessary
# when x is NULL.
[SimplifySameVarEqualities, Normalize]
(Eq | Le | Ge
$left:(Variable)
$right:(Variable) & (VarsAreSame $left $right)
)
=>
(Or (IsNot $left (Null (TypeOf $left))) (Null (BoolType)))
# SimplifySameVarInequalities converts `x != x` and other inequality
# comparisons into `x IS NULL AND NULL`. The `AND NULL` is necessary
# when x is NULL.
[SimplifySameVarInequalities, Normalize]
(Ne | Lt | Gt
$left:(Variable)
$right:(Variable) & (VarsAreSame $left $right)
)
=>
(And (Is $left (Null (TypeOf $left))) (Null (BoolType)))
# SimplifyNotDisjoint converts !st_disjoint to st_intersects so that
# the query can be index-accelerated if a suitable inverted index exists.
[SimplifyNotDisjoint, Normalize]
(Not (Function $args:* $private:(FunctionPrivate "st_disjoint")))
=>
(MakeIntersectionFunction $args)