Skip to content

Commit

Permalink
Merge #40464
Browse files Browse the repository at this point in the history
40464: storage: kv.atomic_replication_changes=true r=nvanbenschoten a=tbg

I ran the experiments in

#40370 (comment)

on (essentially) this branch and everything passed.

Going to run another five instances of mixed-headroom and headroom with this
change to shake out anything else that I might've missed.

Release note (general change): atomic replication changes are now enabled by
default.

Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
  • Loading branch information
craig[bot] and tbg committed Sep 8, 2019
2 parents 47bb2a5 + 8c910ec commit 9dc62d4
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 9 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<tr><td><code>kv.allocator.load_based_rebalancing</code></td><td>enumeration</td><td><code>leases and replicas</code></td><td>whether to rebalance based on the distribution of QPS across stores [off = 0, leases = 1, leases and replicas = 2]</td></tr>
<tr><td><code>kv.allocator.qps_rebalance_threshold</code></td><td>float</td><td><code>0.25</code></td><td>minimum fraction away from the mean a store's QPS (such as queries per second) can be before it is considered overfull or underfull</td></tr>
<tr><td><code>kv.allocator.range_rebalance_threshold</code></td><td>float</td><td><code>0.05</code></td><td>minimum fraction away from the mean a store's range count can be before it is considered overfull or underfull</td></tr>
<tr><td><code>kv.atomic_replication_changes.enabled</code></td><td>boolean</td><td><code>false</code></td><td>use atomic replication changes</td></tr>
<tr><td><code>kv.atomic_replication_changes.enabled</code></td><td>boolean</td><td><code>true</code></td><td>use atomic replication changes</td></tr>
<tr><td><code>kv.bulk_ingest.batch_size</code></td><td>byte size</td><td><code>16 MiB</code></td><td>the maximum size of the payload in an AddSSTable request</td></tr>
<tr><td><code>kv.bulk_ingest.buffer_increment</code></td><td>byte size</td><td><code>32 MiB</code></td><td>the size by which the BulkAdder attempts to grow its buffer before flushing</td></tr>
<tr><td><code>kv.bulk_ingest.index_buffer_size</code></td><td>byte size</td><td><code>32 MiB</code></td><td>the initial size of the BulkAdder buffer handling secondary index imports</td></tr>
Expand Down
6 changes: 5 additions & 1 deletion pkg/storage/batcheval/cmd_end_transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,11 @@ func RunCommitTrigger(
}
if sbt := ct.GetStickyBitTrigger(); sbt != nil {
newDesc := *rec.Desc()
newDesc.StickyBit = &sbt.StickyBit
if sbt.StickyBit != (hlc.Timestamp{}) {
newDesc.StickyBit = &sbt.StickyBit
} else {
newDesc.StickyBit = nil
}
var res result.Result
res.Replicated.State = &storagepb.ReplicaState{
Desc: &newDesc,
Expand Down
6 changes: 6 additions & 0 deletions pkg/storage/bulk/sst_batcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
if splitAt, err := keys.EnsureSafeSplitKey(start); err != nil {
log.Warning(ctx, err)
} else {
// NB: Passing 'hour' here is technically illegal until 19.2 is
// active, but the value will be ignored before that, and we don't
// have access to the cluster version here.
if err := b.db.SplitAndScatter(ctx, splitAt, hour); err != nil {
log.Warning(ctx, err)
}
Expand Down Expand Up @@ -289,6 +292,9 @@ func (b *SSTBatcher) doFlush(ctx context.Context, reason int, nextKey roachpb.Ke
log.Warning(ctx, err)
} else {
log.VEventf(ctx, 2, "%s added since last split, splitting/scattering for next range at %v", sz(b.flushedToCurrentRange), end)
// NB: Passing 'hour' here is technically illegal until 19.2 is
// active, but the value will be ignored before that, and we don't
// have access to the cluster version here.
if err := b.db.SplitAndScatter(ctx, splitAt, hour); err != nil {
log.Warningf(ctx, "failed to split and scatter during ingest: %+v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ var disableSyncRaftLog = settings.RegisterBoolSetting(
var UseAtomicReplicationChanges = settings.RegisterBoolSetting(
"kv.atomic_replication_changes.enabled",
"use atomic replication changes",
false,
true,
)

// MaxCommandSizeFloor is the minimum allowed value for the MaxCommandSize
Expand Down
29 changes: 23 additions & 6 deletions pkg/storage/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ func prepareSplitDescs(
rightDesc.Generation = leftDesc.Generation
maybeMarkGenerationComparable(st, rightDesc)

setStickyBit(rightDesc, expiration)
return leftDesc, rightDesc
}

func setStickyBit(desc *roachpb.RangeDescriptor, expiration hlc.Timestamp) {
// TODO(jeffreyxiao): Remove this check in 20.1.
// Note that the client API for splitting has expiration time as
// non-nullable, but the internal representation of a sticky bit is nullable
Expand All @@ -149,10 +154,8 @@ func prepareSplitDescs(
// setting it to hlc.Timestamp{}. This check ensures that CPuts would not
// fail on older versions.
if (expiration != hlc.Timestamp{}) {
rightDesc.StickyBit = &expiration
desc.StickyBit = &expiration
}

return leftDesc, rightDesc
}

func splitTxnAttempt(
Expand Down Expand Up @@ -248,7 +251,7 @@ func splitTxnStickyUpdateAttempt(
return err
}
newDesc := *desc
newDesc.StickyBit = &expiration
setStickyBit(&newDesc, expiration)

b := txn.NewBatch()
descKey := keys.RangeDescriptorKey(desc.StartKey)
Expand All @@ -264,7 +267,7 @@ func splitTxnStickyUpdateAttempt(
Commit: true,
InternalCommitTrigger: &roachpb.InternalCommitTrigger{
StickyBitTrigger: &roachpb.StickyBitTrigger{
StickyBit: expiration,
StickyBit: newDesc.GetStickyBit(),
},
},
})
Expand Down Expand Up @@ -296,6 +299,16 @@ func (r *Replica) adminSplitWithDescriptor(
delayable bool,
reason string,
) (roachpb.AdminSplitResponse, error) {
if !r.store.ClusterSettings().Version.IsActive(cluster.VersionStickyBit) {
// If sticky bits aren't supported yet but we receive one anyway, ignore
// it. The callers are supposed to only pass hlc.Timestamp{} in that
// case, but this is violated in at least one case (and there are lots of
// callers that aren't easy to audit and maintain audited). Take the
// small risk that the cluster version is actually active (but we don't
// know it yet) instead of risking broken descriptors.
args.ExpirationTime = hlc.Timestamp{}
}

var err error
// The split queue doesn't care about the set of replicas, so if we somehow
// are being handed one that's in a joint state, finalize that before
Expand Down Expand Up @@ -456,7 +469,11 @@ func (r *Replica) adminUnsplitWithDescriptor(
}

newDesc := *desc
newDesc.StickyBit = &hlc.Timestamp{}
// Use nil instead of &zero until 20.1; this field is new in 19.2. We
// could use &zero here because the sticky bit will never be populated
// before the cluster version reaches 19.2 and the early return above
// already handles that case, but nothing is won in doing so.
newDesc.StickyBit = nil
descKey := keys.RangeDescriptorKey(newDesc.StartKey)

b := txn.NewBatch()
Expand Down

0 comments on commit 9dc62d4

Please sign in to comment.