-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
spanconfig.go
241 lines (226 loc) · 11.1 KB
/
spanconfig.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package spanconfig
import (
"context"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
)
// KVAccessor mediates access to KV span configurations pertaining to a given
// tenant.
type KVAccessor interface {
// GetSpanConfigEntriesFor returns the span configurations that overlap with
// the given spans.
GetSpanConfigEntriesFor(
ctx context.Context,
spans []roachpb.Span,
) ([]roachpb.SpanConfigEntry, error)
// UpdateSpanConfigEntries updates configurations for the given spans. This
// is a "targeted" API: the spans being deleted are expected to have been
// present with the exact same bounds; if spans are being updated with new
// configs, they're expected to have been present with the same bounds. When
// divvying up an existing span into multiple others with distinct configs,
// callers are to issue a delete for the previous span and upserts for the
// new ones.
UpdateSpanConfigEntries(
ctx context.Context,
toDelete []roachpb.Span,
toUpsert []roachpb.SpanConfigEntry,
) error
}
// KVSubscriber presents a consistent[1] snapshot of a StoreReader that's
// incrementally maintained with changes made to the global span configurations
// state (system.span_configurations). The maintenance happens transparently;
// callers can subscribe to learn about what key spans may have seen a
// configuration change. After learning about a span update through a callback
// invocation, subscribers can consult the embedded StoreReader to retrieve an
// up-to-date[2] config for the updated span. The callback is called in a single
// goroutine; it should avoid doing any long-running or blocking work.
//
// When a callback is first installed, it's invoked with the [min,max) span --
// a shorthand to indicate that subscribers should consult the StoreReader for all
// spans of interest. Subsequent updates are of the more incremental kind. It's
// possible that the span updates received are no-ops, i.e. consulting the
// StoreReader for the given span would still retrieve the last config observed
// for the span[3].
//
// [1]: The contents of the StoreReader at t1 corresponds exactly to the
// contents of the global span configuration state at t0 where t0 <= t1. If
// the StoreReader is read from at t2 where t2 > t1, it's guaranteed to
// observe a view of the global state at t >= t0.
// [2]: For the canonical KVSubscriber implementation, this is typically lagging
// by the closed timestamp target duration.
// [3]: The canonical KVSubscriber implementation is bounced whenever errors
// occur, which may result in the re-transmission of earlier updates
// (typically through a coarsely targeted [min,max) span).
type KVSubscriber interface {
StoreReader
Subscribe(func(updated roachpb.Span))
}
// SQLTranslator translates SQL descriptors and their corresponding zone
// configurations to constituent spans and span configurations.
//
// Concretely, for the following zone configuration hierarchy:
//
// CREATE DATABASE db;
// CREATE TABLE db.t1();
// ALTER DATABASE db CONFIGURE ZONE USING num_replicas=7;
// ALTER TABLE db.t1 CONFIGURE ZONE USING num_voters=5;
//
// The SQLTranslator produces the following translation (represented as a diff
// against RANGE DEFAULT for brevity):
//
// Table/5{3-4} num_replicas=7 num_voters=5
type SQLTranslator interface {
// Translate generates the span configuration state given a list of
// {descriptor, named zone} IDs. No entry is returned for an ID if it
// doesn't exist or if it's dropped. The timestamp at which the translation
// is valid is also returned.
//
// For every ID we first descend the zone configuration hierarchy with the
// ID as the root to accumulate IDs of all leaf objects. Leaf objects are
// tables and named zones (other than RANGE DEFAULT) which have actual span
// configurations associated with them (as opposed to non-leaf nodes that
// only serve to hold zone configurations for inheritance purposes). Then,
// for each one of these accumulated IDs, we generate <span, span
// config> tuples by following up the inheritance chain to fully hydrate the
// span configuration. Translate also accounts for and negotiates subzone
// spans.
Translate(ctx context.Context, ids descpb.IDs) ([]roachpb.SpanConfigEntry, hlc.Timestamp, error)
}
// FullTranslate translates the entire SQL zone configuration state to the
// span configuration state. The timestamp at which such a translation is valid
// is also returned.
func FullTranslate(
ctx context.Context, s SQLTranslator,
) ([]roachpb.SpanConfigEntry, hlc.Timestamp, error) {
// As RANGE DEFAULT is the root of all zone configurations (including
// other named zones for the system tenant), we can construct the entire
// span configuration state by starting from RANGE DEFAULT.
return s.Translate(ctx, descpb.IDs{keys.RootNamespaceID})
}
// SQLWatcher watches for events on system.zones and system.descriptors.
type SQLWatcher interface {
// WatchForSQLUpdates watches for changes to zones and descriptors starting at
// the given timestamp (exclusive), informing callers using the handler
// callback.
//
// The handler callback is invoked from time to time with a list of updates
// and a checkpointTS. Invocations of the handler callback provide the
// following semantics:
// 1. Calls to the handler are serial.
// 2. The timestamp supplied to the handler is monotonically increasing.
// 3. The list of DescriptorUpdates supplied to handler includes all events
// in the window (prevInvocationCheckpointTS, checkpointTS].
// 4. No further calls to the handler are made if an invocation returns an
// error.
//
// These guarantees mean that users of this interface are free to persist the
// checkpointTS and later use it to re-establish the SQLWatcher without
// missing any updates.
WatchForSQLUpdates(
ctx context.Context,
startTS hlc.Timestamp,
handler func(ctx context.Context, updates []DescriptorUpdate, checkpointTS hlc.Timestamp) error,
) error
}
// ReconciliationDependencies captures what's needed by the span config
// reconciliation job to perform its task. The job is responsible for
// reconciling a tenant's zone configurations with the clusters span
// configurations.
type ReconciliationDependencies interface {
KVAccessor
SQLTranslator
SQLWatcher
}
// Store is a data structure used to store spans and their corresponding
// configs.
type Store interface {
StoreWriter
StoreReader
}
// StoreWriter is the write-only portion of the Store interface.
type StoreWriter interface {
// Apply applies the given update[1]. It also returns the existing spans that
// were deleted and entries that were newly added to make room for the
// update. The deleted list can double as a list of overlapping spans in the
// Store, provided the update is not a no-op[2].
//
// Span configs are stored in non-overlapping fashion. When an update
// overlaps with existing configs, the existing configs are deleted. If the
// overlap is only partial, the non-overlapping components of the existing
// configs are re-added. If the update itself is adding an entry, that too
// is added. This is best illustrated with the following example:
//
// [--- X --) is a span with config X
//
// Store | [--- A ----)[------------- B -----------)[---------- C -----)
// Update | [------------------ D -------------)
// |
// Deleted | [------------- B -----------)[---------- C -----)
// Added | [------------------ D -------------)[--- C -----)
// Store* | [--- A ----)[------------------ D -------------)[--- C -----)
//
// TODO(irfansharif): We'll make use of the dryrun option in a future PR
// when wiring up the reconciliation job to use the KVAccessor. Since the
// KVAccessor is a "targeted" API (the spans being deleted/upserted
// have to already be present with the exact same bounds), we'll dryrun an
// update against a StoreWriter (pre-populated with the entries present in
// KV) to generate the targeted deletes and upserts we'd need to issue.
// After successfully installing them in KV, we can keep our StoreWriter
// up-to-date by actually applying the update.
//
// There's also the question of a "full reconciliation pass". We'll be
// generating updates reactively listening in on changes to
// system.{descriptor,zones} (see SQLWatcher). It's possible then for a
// suspended tenant's table history to be GC-ed away and for its SQLWatcher
// to never detect that a certain table/index/partition has been deleted.
// Left as is, this results in us never issuing a corresponding span config
// deletion request. We'd be leaving a bunch of delete-able span configs
// lying around, and a bunch of empty ranges as a result of those. A "full
// reconciliation pass" is our attempt to find all these extraneous entries
// in KV and to delete them.
//
// We can use a StoreWriter here too (one that's pre-populated with the
// contents of KVAccessor, as before). We'd iterate through all descriptors,
// find all overlapping spans, issue KVAccessor deletes for them, and upsert
// the descriptor's span config[3]. As for the StoreWriter itself, we'd
// simply delete the overlapping entries. After iterating through all the
// descriptors, we'd finally issue KVAccessor deletes for all span configs
// still remaining in the Store.
//
// TODO(irfansharif): The descriptions above presume holding the entire set
// of span configs in memory, but we could break away from that by adding
// pagination + retrieval limit to the GetSpanConfigEntriesFor API. We'd
// then paginate through chunks of the keyspace at a time, do a "full
// reconciliation pass" over just that chunk, and continue.
//
// [1]: Unless dryrun is true. We'll still generate the same {deleted,added}
// lists.
// [2]: We could alternatively expose a GetAllOverlapping() API to make
// things clearer.
// [3]: We could skip the delete + upsert dance if the descriptor's exact
// span config entry already exists in KV. Using Apply (dryrun=true)
// against a StoreWriter (populated using KVAccessor contents) using
// the descriptor's span config entry would return empty lists,
// indicating a no-op.
Apply(ctx context.Context, update Update, dryrun bool) (
deleted []roachpb.Span, added []roachpb.SpanConfigEntry,
)
}
// StoreReader is the read-only portion of the Store interface. It doubles as an
// adaptor interface for config.SystemConfig.
type StoreReader interface {
NeedsSplit(ctx context.Context, start, end roachpb.RKey) bool
ComputeSplitKey(ctx context.Context, start, end roachpb.RKey) roachpb.RKey
GetSpanConfigForKey(ctx context.Context, key roachpb.RKey) (roachpb.SpanConfig, error)
}