-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
kvaccessor.go
451 lines (410 loc) · 15 KB
/
kvaccessor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package spanconfigkvaccessor
import (
"context"
"fmt"
"sort"
"strings"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/spanconfig"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/errors"
)
// KVAccessor provides read/write access to all the span configurations for a
// CRDB cluster. It's a concrete implementation of the KVAccessor interface.
type KVAccessor struct {
db *kv.DB
ie sqlutil.InternalExecutor
// optionalTxn captures the transaction we're scoped to; it's allowed to be
// nil. If nil, it's unsafe to use multiple times as part of the same
// request with any expectation of transactionality -- we're responsible for
// opening a fresh txn.
optionalTxn *kv.Txn
settings *cluster.Settings
// configurationsTableFQN is typically 'system.public.span_configurations',
// but left configurable ease-of-testing.
configurationsTableFQN string
}
var _ spanconfig.KVAccessor = &KVAccessor{}
// New constructs a new KVAccessor.
func New(
db *kv.DB, ie sqlutil.InternalExecutor, settings *cluster.Settings, configurationsTableFQN string,
) *KVAccessor {
if _, err := parser.ParseQualifiedTableName(configurationsTableFQN); err != nil {
panic(fmt.Sprintf("unabled to parse configurations table FQN: %s", configurationsTableFQN))
}
return newKVAccessor(db, ie, settings, configurationsTableFQN, nil /* optionalTxn */)
}
// WithTxn is part of the KVAccessor interface.
func (k *KVAccessor) WithTxn(ctx context.Context, txn *kv.Txn) spanconfig.KVAccessor {
if k.optionalTxn != nil {
log.Fatalf(ctx, "KVAccessor already scoped to txn (was .WithTxn(...) chained multiple times?")
}
return newKVAccessor(k.db, k.ie, k.settings, k.configurationsTableFQN, txn)
}
// GetSpanConfigEntriesFor is part of the KVAccessor interface.
func (k *KVAccessor) GetSpanConfigEntriesFor(
ctx context.Context, spans []roachpb.Span,
) (resp []roachpb.SpanConfigEntry, retErr error) {
if len(spans) == 0 {
return resp, nil
}
if err := validateSpans(spans); err != nil {
return nil, err
}
getStmt, getQueryArgs := k.constructGetStmtAndArgs(spans)
it, err := k.ie.QueryIteratorEx(ctx, "get-span-cfgs", k.optionalTxn,
sessiondata.InternalExecutorOverride{User: security.RootUserName()},
getStmt, getQueryArgs...,
)
if err != nil {
return nil, err
}
defer func() {
if closeErr := it.Close(); closeErr != nil {
resp, retErr = nil, errors.CombineErrors(retErr, closeErr)
}
}()
var ok bool
for ok, err = it.Next(ctx); ok; ok, err = it.Next(ctx) {
row := it.Cur()
span := roachpb.Span{
Key: []byte(*row[0].(*tree.DBytes)),
EndKey: []byte(*row[1].(*tree.DBytes)),
}
var conf roachpb.SpanConfig
if err := protoutil.Unmarshal(([]byte)(*row[2].(*tree.DBytes)), &conf); err != nil {
return nil, err
}
resp = append(resp, roachpb.SpanConfigEntry{
Span: span,
Config: conf,
})
}
if err != nil {
return nil, err
}
return resp, nil
}
// UpdateSpanConfigEntries is part of the KVAccessor interface.
func (k *KVAccessor) UpdateSpanConfigEntries(
ctx context.Context, toDelete []roachpb.Span, toUpsert []roachpb.SpanConfigEntry,
) error {
if k.optionalTxn != nil {
return k.updateSpanConfigEntriesWithTxn(ctx, toDelete, toUpsert, k.optionalTxn)
}
return k.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
return k.updateSpanConfigEntriesWithTxn(ctx, toDelete, toUpsert, txn)
})
}
func newKVAccessor(
db *kv.DB,
ie sqlutil.InternalExecutor,
settings *cluster.Settings,
configurationsTableFQN string,
optionalTxn *kv.Txn,
) *KVAccessor {
return &KVAccessor{
db: db,
ie: ie,
optionalTxn: optionalTxn,
settings: settings,
configurationsTableFQN: configurationsTableFQN,
}
}
func (k *KVAccessor) updateSpanConfigEntriesWithTxn(
ctx context.Context, toDelete []roachpb.Span, toUpsert []roachpb.SpanConfigEntry, txn *kv.Txn,
) error {
if txn == nil {
log.Fatalf(ctx, "expected non-nil txn")
}
if err := validateUpdateArgs(toDelete, toUpsert); err != nil {
return err
}
var deleteStmt string
var deleteQueryArgs []interface{}
if len(toDelete) > 0 {
deleteStmt, deleteQueryArgs = k.constructDeleteStmtAndArgs(toDelete)
}
var upsertStmt, validationStmt string
var upsertQueryArgs, validationQueryArgs []interface{}
if len(toUpsert) > 0 {
var err error
upsertStmt, upsertQueryArgs, err = k.constructUpsertStmtAndArgs(toUpsert)
if err != nil {
return err
}
validationStmt, validationQueryArgs = k.constructValidationStmtAndArgs(toUpsert)
}
if len(toDelete) > 0 {
n, err := k.ie.ExecEx(ctx, "delete-span-cfgs", txn,
sessiondata.InternalExecutorOverride{User: security.RootUserName()},
deleteStmt, deleteQueryArgs...,
)
if err != nil {
return err
}
if n != len(toDelete) {
return errors.AssertionFailedf("expected to delete %d row(s), deleted %d", len(toDelete), n)
}
}
if len(toUpsert) == 0 {
// Nothing left to do
return nil
}
if n, err := k.ie.ExecEx(ctx, "upsert-span-cfgs", txn,
sessiondata.InternalExecutorOverride{User: security.RootUserName()},
upsertStmt, upsertQueryArgs...,
); err != nil {
return err
} else if n != len(toUpsert) {
return errors.AssertionFailedf("expected to upsert %d row(s), upserted %d", len(toUpsert), n)
}
if datums, err := k.ie.QueryRowEx(ctx, "validate-span-cfgs", txn,
sessiondata.InternalExecutorOverride{User: security.RootUserName()},
validationStmt, validationQueryArgs...,
); err != nil {
return err
} else if valid := bool(tree.MustBeDBool(datums[0])); !valid {
return errors.AssertionFailedf("expected to find single row containing upserted spans")
}
return nil
}
// constructGetStmtAndArgs constructs the statement and query arguments needed
// to fetch span configs for the given spans.
func (k *KVAccessor) constructGetStmtAndArgs(spans []roachpb.Span) (string, []interface{}) {
// We want to fetch the overlapping span configs for each requested span in
// a single round trip and using only constrained index scans. For a single
// requested span, we effectively want to query the following:
//
// -- to find spans overlapping with [$start, $end)
// SELECT * FROM system.span_configurations
// WHERE start_key < $end AND end_key > $start
//
// With the naive form above that translates to an unbounded index scan on
// followed by a filter. We can do better by observing that start_key <
// end_key, and that spans are non-overlapping.
//
// SELECT * FROM span_configurations
// WHERE start_key >= $start AND start_key < $end
// UNION ALL
// SELECT * FROM (
// SELECT * FROM span_configurations
// WHERE start_key < $start ORDER BY start_key DESC LIMIT 1
// ) WHERE end_key > $start;
//
// The idea is to first find all spans that start within the query span, and
// then to include the span with the start key immediately preceding the
// query start if it also overlaps with the query span (achieved by
// the outer filter). We're intentional about not pushing the filter down into
// the query -- we want to select using only the start_key index. Doing so
// results in an unbounded index scan [ - $start) with the filter and limit
// applied after.
//
// To batch multiple query spans into the same statement, we make use of
// UNION ALL.
//
// ( ... statement above for 1st query span ...)
// UNION ALL
// ( ... statement above for 2nd query span ...)
// UNION ALL
// ...
//
var getStmtBuilder strings.Builder
queryArgs := make([]interface{}, len(spans)*2)
for i, sp := range spans {
if i > 0 {
getStmtBuilder.WriteString(`UNION ALL`)
}
startKeyIdx, endKeyIdx := i*2, (i*2)+1
queryArgs[startKeyIdx] = sp.Key
queryArgs[endKeyIdx] = sp.EndKey
fmt.Fprintf(&getStmtBuilder, `
SELECT start_key, end_key, config FROM %[1]s
WHERE start_key >= $%[2]d AND start_key < $%[3]d
UNION ALL
SELECT start_key, end_key, config FROM (
SELECT start_key, end_key, config FROM %[1]s
WHERE start_key < $%[2]d ORDER BY start_key DESC LIMIT 1
) WHERE end_key > $%[2]d
`,
k.configurationsTableFQN, // [1]
startKeyIdx+1, // [2] -- prepared statement placeholder (1-indexed)
endKeyIdx+1, // [3] -- prepared statement placeholder (1-indexed)
)
}
return getStmtBuilder.String(), queryArgs
}
// constructDeleteStmtAndArgs constructs the statement and query arguments
// needed to delete span configs for the given spans.
func (k *KVAccessor) constructDeleteStmtAndArgs(toDelete []roachpb.Span) (string, []interface{}) {
// We're constructing a single delete statement to delete all requested
// spans. It's of the form:
//
// DELETE FROM span_configurations WHERE (start_key, end_key) IN
// (VALUES ( ... 1st span ... ), ( ... 2nd span ...), ... );
//
values := make([]string, len(toDelete))
deleteQueryArgs := make([]interface{}, len(toDelete)*2)
for i, sp := range toDelete {
startKeyIdx, endKeyIdx := i*2, (i*2)+1
deleteQueryArgs[startKeyIdx] = sp.Key
deleteQueryArgs[endKeyIdx] = sp.EndKey
values[i] = fmt.Sprintf("($%d::BYTES, $%d::BYTES)",
startKeyIdx+1, endKeyIdx+1) // prepared statement placeholders (1-indexed)
}
deleteStmt := fmt.Sprintf(`DELETE FROM %[1]s WHERE (start_key, end_key) IN (VALUES %[2]s)`,
k.configurationsTableFQN, strings.Join(values, ", "))
return deleteStmt, deleteQueryArgs
}
// constructUpsertStmtAndArgs constructs the statement and query arguments
// needed to upsert the given span config entries.
func (k *KVAccessor) constructUpsertStmtAndArgs(
toUpsert []roachpb.SpanConfigEntry,
) (string, []interface{}, error) {
// We're constructing a single upsert statement to upsert all requested
// spans. It's of the form:
//
// UPSERT INTO span_configurations (start_key, end_key, config)
// VALUES ( ... 1st span ... ), ( ... 2nd span ...), ... ;
//
upsertValues := make([]string, len(toUpsert))
upsertQueryArgs := make([]interface{}, len(toUpsert)*3)
for i, entry := range toUpsert {
marshaled, err := protoutil.Marshal(&entry.Config)
if err != nil {
return "", nil, err
}
startKeyIdx, endKeyIdx, configIdx := i*3, (i*3)+1, (i*3)+2
upsertQueryArgs[startKeyIdx] = entry.Span.Key
upsertQueryArgs[endKeyIdx] = entry.Span.EndKey
upsertQueryArgs[configIdx] = marshaled
upsertValues[i] = fmt.Sprintf("($%d::BYTES, $%d::BYTES, $%d::BYTES)",
startKeyIdx+1, endKeyIdx+1, configIdx+1) // prepared statement placeholders (1-indexed)
}
upsertStmt := fmt.Sprintf(`UPSERT INTO %[1]s (start_key, end_key, config) VALUES %[2]s`,
k.configurationsTableFQN, strings.Join(upsertValues, ", "))
return upsertStmt, upsertQueryArgs, nil
}
// constructValidationStmtAndArgs constructs the statement and query arguments
// needed to validate that the spans being upserted don't violate table
// invariants (spans are non overlapping).
func (k *KVAccessor) constructValidationStmtAndArgs(
toUpsert []roachpb.SpanConfigEntry,
) (string, []interface{}) {
// We want to validate that upserting spans does not break the invariant
// that spans in the table are non-overlapping. We only need to validate
// the spans that are being upserted, and can use a query similar to
// what we do in GetSpanConfigEntriesFor. For a single upserted span, we
// want effectively validate using:
//
// SELECT count(*) = 1 FROM system.span_configurations
// WHERE start_key < $end AND end_key > $start
//
// Applying the GetSpanConfigEntriesFor treatment, we can arrive at:
//
// SELECT count(*) = 1 FROM (
// SELECT * FROM span_configurations
// WHERE start_key >= 100 AND start_key < 105
// UNION ALL
// SELECT * FROM (
// SELECT * FROM span_configurations
// WHERE start_key < 100 ORDER BY start_key DESC LIMIT 1
// ) WHERE end_key > 100
// )
//
// To batch multiple query spans into the same statement, we make use of
// ALL and UNION ALL.
//
// SELECT true = ALL(
// ( ... validation statement for 1st query span ...),
// UNION ALL
// ( ... validation statement for 2nd query span ...),
// ...
// )
//
var validationInnerStmtBuilder strings.Builder
validationQueryArgs := make([]interface{}, len(toUpsert)*2)
for i, entry := range toUpsert {
if i > 0 {
validationInnerStmtBuilder.WriteString(`UNION ALL`)
}
startKeyIdx, endKeyIdx := i*2, (i*2)+1
validationQueryArgs[startKeyIdx] = entry.Span.Key
validationQueryArgs[endKeyIdx] = entry.Span.EndKey
fmt.Fprintf(&validationInnerStmtBuilder, `
SELECT count(*) = 1 FROM (
SELECT start_key, end_key, config FROM %[1]s
WHERE start_key >= $%[2]d AND start_key < $%[3]d
UNION ALL
SELECT start_key, end_key, config FROM (
SELECT start_key, end_key, config FROM %[1]s
WHERE start_key < $%[2]d ORDER BY start_key DESC LIMIT 1
) WHERE end_key > $%[2]d
)
`,
k.configurationsTableFQN, // [1]
startKeyIdx+1, // [2] -- prepared statement placeholder (1-indexed)
endKeyIdx+1, // [3] -- prepared statement placeholder (1-indexed)
)
}
validationStmt := fmt.Sprintf("SELECT true = ALL(%s)", validationInnerStmtBuilder.String())
return validationStmt, validationQueryArgs
}
// validateUpdateArgs returns an error the arguments to UpdateSpanConfigEntries
// are malformed. All spans included in the toDelete and toUpsert list are
// expected to be valid and to have non-empty end keys. Spans are also expected
// to be non-overlapping with other spans in the same list.
func validateUpdateArgs(toDelete []roachpb.Span, toUpsert []roachpb.SpanConfigEntry) error {
spansToUpdate := func(ents []roachpb.SpanConfigEntry) []roachpb.Span {
spans := make([]roachpb.Span, len(ents))
for i, ent := range ents {
spans[i] = ent.Span
}
return spans
}(toUpsert)
for _, list := range [][]roachpb.Span{toDelete, spansToUpdate} {
if err := validateSpans(list); err != nil {
return err
}
spans := make([]roachpb.Span, len(list))
copy(spans, list)
sort.Sort(roachpb.Spans(spans))
for i := range spans {
if i == 0 {
continue
}
if spans[i].Overlaps(spans[i-1]) {
return errors.AssertionFailedf("overlapping spans %s and %s in same list",
spans[i-1], spans[i])
}
}
}
return nil
}
// validateSpans returns an error if any of the spans are invalid or have an
// empty end key.
func validateSpans(spans []roachpb.Span) error {
for _, span := range spans {
if !span.Valid() || len(span.EndKey) == 0 {
return errors.AssertionFailedf("invalid span: %s", span)
}
}
return nil
}