Skip to content

Commit

Permalink
Merge #39687
Browse files Browse the repository at this point in the history
39687: storage: increase the load-based splitting QPS threshold from 250 to 2500 r=ajwerner a=ajwerner

Recent improvements in write efficiency and batching lead to a re-evaluation
of the reasons for load-based splitting. Intuitively fewer splits ought to
offer increased batching opportunities while more splits ought to offer
increased concurrency. Above a given number it is not obvious that
increased concurrency will translate effectively to increased parallelism.

Experimental evidence shows that the right threshold for load-based splitting
is now closer to 2500 than 250. It also shows that over-splitting can have
negative effects on latency and throughput.

Load-based splitting remains important additionally for the opportunity it
provides to balance load. Load-balancing however is not currently a part of the
splitting heuristic.

The second commit in the PR adds roachtests which do not perform any manual splits. 

Release note (performance improvement): Adjust load-based splitting QPS
threshold to avoid over-splitting.

Co-authored-by: Andrew Werner <ajwerner@cockroachlabs.com>
  • Loading branch information
craig[bot] and ajwerner committed Aug 16, 2019
2 parents a00215b + 0099761 commit d6193de
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
<tr><td><code>kv.range_merge.queue_enabled</code></td><td>boolean</td><td><code>true</code></td><td>whether the automatic merge queue is enabled</td></tr>
<tr><td><code>kv.range_merge.queue_interval</code></td><td>duration</td><td><code>1s</code></td><td>how long the merge queue waits between processing replicas (WARNING: may compromise cluster stability or correctness; do not edit without supervision)</td></tr>
<tr><td><code>kv.range_split.by_load_enabled</code></td><td>boolean</td><td><code>true</code></td><td>allow automatic splits of ranges based on where load is concentrated</td></tr>
<tr><td><code>kv.range_split.load_qps_threshold</code></td><td>integer</td><td><code>250</code></td><td>the QPS over which, the range becomes a candidate for load based splitting</td></tr>
<tr><td><code>kv.range_split.load_qps_threshold</code></td><td>integer</td><td><code>2500</code></td><td>the QPS over which, the range becomes a candidate for load based splitting</td></tr>
<tr><td><code>kv.rangefeed.concurrent_catchup_iterators</code></td><td>integer</td><td><code>64</code></td><td>number of rangefeeds catchup iterators a store will allow concurrently before queueing</td></tr>
<tr><td><code>kv.rangefeed.enabled</code></td><td>boolean</td><td><code>false</code></td><td>if set, rangefeed registration is enabled</td></tr>
<tr><td><code>kv.snapshot_rebalance.max_rate</code></td><td>byte size</td><td><code>8.0 MiB</code></td><td>the rate limit (bytes/sec) to use for rebalance and upreplication snapshots</td></tr>
Expand Down
24 changes: 23 additions & 1 deletion pkg/cmd/roachtest/kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,25 @@ func registerKV(r *testRegistry) {
readPercent int
batchSize int
blockSize int
splits int // 0 implies default, negative implies 0
encryption bool
sequential bool
duration time.Duration
tags []string
}
computeNumSplits := func(opts kvOptions) int {
// TODO(ajwerner): set this default to a more sane value or remove it and
// rely on load-based splitting.
const defaultNumSplits = 1000
switch {
case opts.splits == 0:
return defaultNumSplits
case opts.splits < 0:
return 0
default:
return opts.splits
}
}
runKV := func(ctx context.Context, t *test, c *cluster, opts kvOptions) {
nodes := c.spec.NodeCount - 1
c.Put(ctx, cockroach, "./cockroach", c.Range(1, nodes))
Expand All @@ -49,7 +63,8 @@ func registerKV(r *testRegistry) {
m := newMonitor(ctx, c, c.Range(1, nodes))
m.Go(func(ctx context.Context) error {
concurrency := ifLocal("", " --concurrency="+fmt.Sprint(nodes*64))
splits := " --splits=1000"

splits := " --splits=" + strconv.Itoa(computeNumSplits(opts))
if opts.duration == 0 {
opts.duration = 10 * time.Minute
}
Expand Down Expand Up @@ -89,8 +104,12 @@ func registerKV(r *testRegistry) {
{nodes: 1, cpus: 32, readPercent: 95},
{nodes: 3, cpus: 8, readPercent: 0},
{nodes: 3, cpus: 8, readPercent: 95},
{nodes: 3, cpus: 8, readPercent: 0, splits: -1 /* no splits */},
{nodes: 3, cpus: 8, readPercent: 95, splits: -1 /* no splits */},
{nodes: 3, cpus: 32, readPercent: 0},
{nodes: 3, cpus: 32, readPercent: 95},
{nodes: 3, cpus: 32, readPercent: 0, splits: -1 /* no splits */},
{nodes: 3, cpus: 32, readPercent: 95, splits: -1 /* no splits */},

// Configs with large block sizes.
{nodes: 3, cpus: 8, readPercent: 0, blockSize: 1 << 12 /* 4 KB */},
Expand Down Expand Up @@ -143,6 +162,9 @@ func registerKV(r *testRegistry) {
if opts.blockSize != 0 { // support legacy test name which didn't include block size
nameParts = append(nameParts, fmt.Sprintf("size=%dkb", opts.blockSize>>10))
}
if opts.splits != 0 { // support legacy test name which didn't include splits
nameParts = append(nameParts, fmt.Sprintf("splt=%d", computeNumSplits(opts)))
}
if opts.sequential {
nameParts = append(nameParts, fmt.Sprintf("seq"))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/replica_split_load.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ var SplitByLoadEnabled = settings.RegisterBoolSetting(
var SplitByLoadQPSThreshold = settings.RegisterIntSetting(
"kv.range_split.load_qps_threshold",
"the QPS over which, the range becomes a candidate for load based splitting",
250, // 250 req/s
2500, // 2500 req/s
)

// SplitByLoadQPSThreshold returns the QPS request rate for a given replica.
Expand Down

0 comments on commit d6193de

Please sign in to comment.