Skip to content

Commit

Permalink
admission,goschedstats: reduce scheduler sampling frequency when unde…
Browse files Browse the repository at this point in the history
…rloaded

The goschestats makes the determination of the tick interval
every 1s, and either ticks at 1ms or 250ms. 250ms is used when
the cpu is very underloaded.
The admission control code disables slot and token enforcement
if the tick interval is greater than 1ms. This is done since
the reduced frequency of CPULoad could cause us to not adjust
slots fast enough.

Fixes #66881

Release justification: Fix for high-priority issue in new
functionality.

Release note: None
  • Loading branch information
sumeerbhola committed Aug 30, 2021
1 parent d7d56e8 commit 6a8467d
Show file tree
Hide file tree
Showing 6 changed files with 324 additions and 70 deletions.
81 changes: 54 additions & 27 deletions pkg/util/admission/granter.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ type tokenGranter struct {
requester requester
availableBurstTokens int
maxBurstTokens int
skipTokenEnforcement bool
// Optional. Practically, both uses of tokenGranter, for SQLKVResponseWork
// and SQLSQLResponseWork have a non-nil value. We don't expect to use
// memory overload indicators here since memory accounting and disk spilling
Expand All @@ -396,8 +397,9 @@ func (tg *tokenGranter) getPairedRequester() requester {
return tg.requester
}

func (tg *tokenGranter) refillBurstTokens() {
func (tg *tokenGranter) refillBurstTokens(skipTokenEnforcement bool) {
tg.availableBurstTokens = tg.maxBurstTokens
tg.skipTokenEnforcement = skipTokenEnforcement
}

func (tg *tokenGranter) grantKind() grantKind {
Expand All @@ -412,7 +414,7 @@ func (tg *tokenGranter) tryGetLocked() grantResult {
if tg.cpuOverload != nil && tg.cpuOverload.isOverloaded() {
return grantFailDueToSharedResource
}
if tg.availableBurstTokens > 0 {
if tg.availableBurstTokens > 0 || tg.skipTokenEnforcement {
tg.availableBurstTokens--
return grantSuccess
}
Expand Down Expand Up @@ -446,10 +448,12 @@ func (tg *tokenGranter) continueGrantChain(grantChainID grantChainID) {
// KVWork, that are limited by slots (CPU bound work) and/or tokens (IO
// bound work).
type kvGranter struct {
coord *GrantCoordinator
requester requester
usedSlots int
totalSlots int
coord *GrantCoordinator
requester requester
usedSlots int
totalSlots int
skipSlotEnforcement bool

ioTokensEnabled bool
// There is no rate limiting in granting these tokens. That is, they are all
// burst tokens.
Expand Down Expand Up @@ -478,7 +482,7 @@ func (sg *kvGranter) tryGet() bool {
}

func (sg *kvGranter) tryGetLocked() grantResult {
if sg.usedSlots < sg.totalSlots {
if sg.usedSlots < sg.totalSlots || sg.skipSlotEnforcement {
if !sg.ioTokensEnabled || sg.availableIOTokens > 0 {
sg.usedSlots++
if sg.usedSlotsMetric != nil {
Expand Down Expand Up @@ -555,8 +559,11 @@ func (sg *kvGranter) setAvailableIOTokensLocked(tokens int64) {
// StoreGrantCoordinators) for KVWork that uses that store. See the
// NewGrantCoordinators and NewGrantCoordinatorSQL functions.
type GrantCoordinator struct {
settings *cluster.Settings
settings *cluster.Settings
lastCPULoadSamplePeriod time.Duration

// mu is ordered before any mutex acquired in a requester implementation.
// TODO(sumeer): move everything covered by mu into a nested struct.
mu syncutil.Mutex
// NB: Some granters can be nil.
granters [numWorkKinds]granterWithLockedCalls
Expand Down Expand Up @@ -842,20 +849,38 @@ func (coord *GrantCoordinator) GetWorkQueue(workKind WorkKind) *WorkQueue {
return coord.queues[workKind].(*WorkQueue)
}

// CPULoad implements CPULoadListener and is called every 1ms. The same
// frequency is used for refilling the burst tokens since synchronizing the
// two means that the refilled burst can take into account the latest
// schedulers stats (indirectly, via the implementation of
// cpuOverloadIndicator).
// TODO(sumeer): after experimentation, possibly generalize the 1ms ticks used
// for CPULoad.
func (coord *GrantCoordinator) CPULoad(runnable int, procs int) {
// CPULoad implements CPULoadListener and is called periodically (see
// CPULoadListener for details). The same frequency is used for refilling the
// burst tokens since synchronizing the two means that the refilled burst can
// take into account the latest schedulers stats (indirectly, via the
// implementation of cpuOverloadIndicator).
func (coord *GrantCoordinator) CPULoad(runnable int, procs int, samplePeriod time.Duration) {
if coord.lastCPULoadSamplePeriod != 0 && coord.lastCPULoadSamplePeriod != samplePeriod &&
KVAdmissionControlEnabled.Get(&coord.settings.SV) {
log.Infof(context.Background(), "CPULoad switching to period %s", samplePeriod.String())
}
coord.lastCPULoadSamplePeriod = samplePeriod

coord.mu.Lock()
defer coord.mu.Unlock()
coord.numProcs = procs
coord.cpuLoadListener.CPULoad(runnable, procs)
coord.granters[SQLKVResponseWork].(*tokenGranter).refillBurstTokens()
coord.granters[SQLSQLResponseWork].(*tokenGranter).refillBurstTokens()
coord.cpuLoadListener.CPULoad(runnable, procs, samplePeriod)

// Slot adjustment and token refilling requires 1ms periods to work well. If
// the CPULoad ticks are less frequent, there is no guarantee that the
// tokens or slots will be sufficient to service requests. This is
// particularly the case for slots where we dynamically adjust them, and
// high contention can suddenly result in high slot utilization even while
// cpu utilization stays low. We don't want to artificially bottleneck
// request processing when we are in this slow CPULoad ticks regime since we
// can't adjust slots or refill tokens fast enough. So we explicitly tell
// the granters to not do token or slot enforcement.
skipEnforcement := samplePeriod > time.Millisecond
coord.granters[SQLKVResponseWork].(*tokenGranter).refillBurstTokens(skipEnforcement)
coord.granters[SQLSQLResponseWork].(*tokenGranter).refillBurstTokens(skipEnforcement)
if coord.granters[KVWork] != nil {
coord.granters[KVWork].(*kvGranter).skipSlotEnforcement = skipEnforcement
}
if coord.grantChainActive && !coord.tryTerminateGrantChain() {
return
}
Expand Down Expand Up @@ -1264,12 +1289,11 @@ type cpuOverloadIndicator interface {
}

// CPULoadListener listens to the latest CPU load information. Currently we
// expect this to be called every 1ms.
// TODO(sumeer): experiment with more smoothing. It is possible that rapid
// slot fluctuation may be resulting in under-utilization at a time scale that
// is not observable at our metrics frequency.
// expect this to be called every 1ms, unless the cpu is extremely
// underloaded. If the samplePeriod is > 1ms, admission control enforcement
// for CPU is disabled.
type CPULoadListener interface {
CPULoad(runnable int, procs int)
CPULoad(runnable int, procs int, samplePeriod time.Duration)
}

// kvSlotAdjuster is an implementer of CPULoadListener and
Expand All @@ -1293,8 +1317,9 @@ type kvSlotAdjuster struct {
var _ cpuOverloadIndicator = &kvSlotAdjuster{}
var _ CPULoadListener = &kvSlotAdjuster{}

func (kvsa *kvSlotAdjuster) CPULoad(runnable int, procs int) {
func (kvsa *kvSlotAdjuster) CPULoad(runnable int, procs int, _ time.Duration) {
threshold := int(KVSlotAdjusterOverloadThreshold.Get(&kvsa.settings.SV))

// Simple heuristic, which worked ok in experiments. More sophisticated ones
// could be devised.
if runnable >= threshold*procs {
Expand Down Expand Up @@ -1332,7 +1357,7 @@ func (kvsa *kvSlotAdjuster) CPULoad(runnable int, procs int) {
}

func (kvsa *kvSlotAdjuster) isOverloaded() bool {
return kvsa.granter.usedSlots >= kvsa.granter.totalSlots
return kvsa.granter.usedSlots >= kvsa.granter.totalSlots && !kvsa.granter.skipSlotEnforcement
}

// sqlNodeCPUOverloadIndicator is the implementation of cpuOverloadIndicator
Expand Down Expand Up @@ -1559,7 +1584,9 @@ func (io *ioLoadListener) adjustTokens(m pebble.Metrics) {
var _ cpuOverloadIndicator = &sqlNodeCPUOverloadIndicator{}
var _ CPULoadListener = &sqlNodeCPUOverloadIndicator{}

func (sn *sqlNodeCPUOverloadIndicator) CPULoad(runnable int, procs int) {
func (sn *sqlNodeCPUOverloadIndicator) CPULoad(
runnable int, procs int, samplePeriod time.Duration,
) {
}

func (sn *sqlNodeCPUOverloadIndicator) isOverloaded() bool {
Expand Down
13 changes: 11 additions & 2 deletions pkg/util/admission/granter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"sort"
"strings"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -83,7 +84,7 @@ func (tr *testRequester) getAdmittedCount() uint64 {
// return-grant work=<kind>
// took-without-permission work=<kind>
// continue-grant-chain work=<kind>
// cpu-load runnable=<int> procs=<int>
// cpu-load runnable=<int> procs=<int> [infrequent=<bool>]
// set-io-tokens tokens=<int>
func TestGranterBasic(t *testing.T) {
defer leaktest.AfterTest(t)()
Expand Down Expand Up @@ -159,7 +160,15 @@ func TestGranterBasic(t *testing.T) {
var runnable, procs int
d.ScanArgs(t, "runnable", &runnable)
d.ScanArgs(t, "procs", &procs)
coord.CPULoad(runnable, procs)
infrequent := false
if d.HasArg("infrequent") {
d.ScanArgs(t, "infrequent", &infrequent)
}
samplePeriod := time.Millisecond
if infrequent {
samplePeriod = 250 * time.Millisecond
}
coord.CPULoad(runnable, procs, samplePeriod)
return flushAndReset()

case "set-io-tokens":
Expand Down
80 changes: 80 additions & 0 deletions pkg/util/admission/testdata/granter
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,83 @@ kv: granted in chain 0, and returning true
GrantCoordinator:
(chain: id: 6 active: false index: 0) kv: used: 3, total: 3 io-avail: 0 sql-kv-response: avail: 0
sql-sql-response: avail: 1 sql-leaf-start: used: 2, total: 2 sql-root-start: used: 1, total: 1

#####################################################################
# Test skipping of enforcements when CPULoad has high sampling period.
init-grant-coordinator min-cpu=1 max-cpu=3 sql-kv-tokens=1 sql-sql-tokens=1 sql-leaf=2 sql-root=2
----
GrantCoordinator:
(chain: id: 1 active: false index: 0) kv: used: 0, total: 1 sql-kv-response: avail: 1
sql-sql-response: avail: 1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# No more slots after this slot is granted.
try-get work=kv
----
kv: tryGet returned true
GrantCoordinator:
(chain: id: 1 active: false index: 0) kv: used: 1, total: 1 sql-kv-response: avail: 1
sql-sql-response: avail: 1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# Since no more KV slots, cannot grant token to sql-kv-response.
try-get work=sql-kv-response
----
sql-kv-response: tryGet returned false
GrantCoordinator:
(chain: id: 1 active: false index: 0) kv: used: 1, total: 1 sql-kv-response: avail: 1
sql-sql-response: avail: 1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# Since no more KV slots, cannot grant token to sql-sql-response.
try-get work=sql-sql-response
----
sql-sql-response: tryGet returned false
GrantCoordinator:
(chain: id: 1 active: false index: 0) kv: used: 1, total: 1 sql-kv-response: avail: 1
sql-sql-response: avail: 1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# CPULoad shows overload, so cannot increase KV slots, but since it is
# infrequent, slot and token enforcement is disabled.
cpu-load runnable=20 procs=1 infrequent=true
----
GrantCoordinator:
(chain: id: 1 active: false index: 5) kv: used: 1, total: 1 sql-kv-response: avail: 1
sql-sql-response: avail: 1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# sql-kv-response can get a token.
try-get work=sql-kv-response
----
sql-kv-response: tryGet returned true
GrantCoordinator:
(chain: id: 1 active: false index: 5) kv: used: 1, total: 1 sql-kv-response: avail: 0
sql-sql-response: avail: 1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# sql-kv-response can get another token, even though tokens are exhausted.
try-get work=sql-kv-response
----
sql-kv-response: tryGet returned true
GrantCoordinator:
(chain: id: 1 active: false index: 5) kv: used: 1, total: 1 sql-kv-response: avail: -1
sql-sql-response: avail: 1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# sql-sql-response can get a token.
try-get work=sql-sql-response
----
sql-sql-response: tryGet returned true
GrantCoordinator:
(chain: id: 1 active: false index: 5) kv: used: 1, total: 1 sql-kv-response: avail: -1
sql-sql-response: avail: 0 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# sql-sql-response can get another token, even though tokens are exhausted.
try-get work=sql-sql-response
----
sql-sql-response: tryGet returned true
GrantCoordinator:
(chain: id: 1 active: false index: 5) kv: used: 1, total: 1 sql-kv-response: avail: -1
sql-sql-response: avail: -1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2

# KV can get another slot even though slots are exhausted.
try-get work=kv
----
kv: tryGet returned true
GrantCoordinator:
(chain: id: 1 active: false index: 5) kv: used: 2, total: 1 sql-kv-response: avail: -1
sql-sql-response: avail: -1 sql-leaf-start: used: 0, total: 2 sql-root-start: used: 0, total: 2
6 changes: 5 additions & 1 deletion pkg/util/goschedstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,9 @@ go_test(
name = "goschedstats_test",
srcs = ["runnable_test.go"],
embed = [":goschedstats"],
deps = ["//pkg/testutils"],
deps = [
"//pkg/testutils",
"//pkg/util/timeutil",
"@com_github_stretchr_testify//require",
],
)
Loading

0 comments on commit 6a8467d

Please sign in to comment.