Skip to content

Commit

Permalink
Prevent watch cache starvation, by moving its watch to separate RPC a…
Browse files Browse the repository at this point in the history
…nd add a SeparateCacheWatchRPC feature flag to disable this behavior
  • Loading branch information
serathius committed Mar 4, 2024
1 parent a4eaf6e commit 31d404b
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 16 deletions.
2 changes: 2 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

genericfeatures.RemainingItemCount: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},

genericfeatures.SeparateCacheWatchRPC: {Default: true, PreRelease: featuregate.Beta},

genericfeatures.ServerSideApply: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.29

genericfeatures.ServerSideFieldValidation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.29
Expand Down
9 changes: 9 additions & 0 deletions staging/src/k8s.io/apiserver/pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,13 @@ const (
// to a chunking list request.
RemainingItemCount featuregate.Feature = "RemainingItemCount"

// owner: @serathius
// beta: v1.30
//
// Allow watch cache to create a watch on a dedicated RPC.
// This prevents watch cache from being starved by other watches.
SeparateCacheWatchRPC featuregate.Feature = "SeparateCacheWatchRPC"

// owner: @apelisse, @lavalamp
// alpha: v1.14
// beta: v1.16
Expand Down Expand Up @@ -319,6 +326,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

RetryGenerateName: {Default: false, PreRelease: featuregate.Alpha},

SeparateCacheWatchRPC: {Default: true, PreRelease: featuregate.Beta},

ServerSideApply: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.29

ServerSideFieldValidation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.29
Expand Down
13 changes: 11 additions & 2 deletions staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"time"

"go.opentelemetry.io/otel/attribute"
"google.golang.org/grpc/metadata"

"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
Expand Down Expand Up @@ -397,10 +398,18 @@ func NewCacherFromConfig(config Config) (*Cacher, error) {
// so that future reuse does not get a spurious timeout.
<-cacher.timer.C
}
progressRequester := newConditionalProgressRequester(config.Storage.RequestWatchProgress, config.Clock)
var contextMetadata metadata.MD
if utilfeature.DefaultFeatureGate.Enabled(features.SeparateCacheWatchRPC) {
// Add grpc context metadata to watch and progress notify requests done by cacher to:
// * Prevent starvation of watch opened by cacher, by moving it to separate Watch RPC than watch request that bypass cacher.
// * Ensure that progress notification requests are executed on the same Watch RPC as their watch, which is required for it to work.
contextMetadata = metadata.New(map[string]string{"source": "cache"})
}

progressRequester := newConditionalProgressRequester(config.Storage.RequestWatchProgress, config.Clock, contextMetadata)
watchCache := newWatchCache(
config.KeyFunc, cacher.processEvent, config.GetAttrsFunc, config.Versioner, config.Indexers, config.Clock, config.GroupResource, progressRequester)
listerWatcher := NewListerWatcher(config.Storage, config.ResourcePrefix, config.NewListFunc)
listerWatcher := NewListerWatcher(config.Storage, config.ResourcePrefix, config.NewListFunc, contextMetadata)
reflectorName := "storage/cacher.go:" + config.ResourcePrefix

reflector := cache.NewNamedReflector(reflectorName, listerWatcher, obj, watchCache, 0)
Expand Down
30 changes: 21 additions & 9 deletions staging/src/k8s.io/apiserver/pkg/storage/cacher/lister_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package cacher
import (
"context"

"google.golang.org/grpc/metadata"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
Expand All @@ -30,17 +32,19 @@ import (

// listerWatcher opaques storage.Interface to expose cache.ListerWatcher.
type listerWatcher struct {
storage storage.Interface
resourcePrefix string
newListFunc func() runtime.Object
storage storage.Interface
resourcePrefix string
newListFunc func() runtime.Object
contextMetadata metadata.MD
}

// NewListerWatcher returns a storage.Interface backed ListerWatcher.
func NewListerWatcher(storage storage.Interface, resourcePrefix string, newListFunc func() runtime.Object) cache.ListerWatcher {
func NewListerWatcher(storage storage.Interface, resourcePrefix string, newListFunc func() runtime.Object, contextMetadata metadata.MD) cache.ListerWatcher {
return &listerWatcher{
storage: storage,
resourcePrefix: resourcePrefix,
newListFunc: newListFunc,
storage: storage,
resourcePrefix: resourcePrefix,
newListFunc: newListFunc,
contextMetadata: contextMetadata,
}
}

Expand All @@ -59,7 +63,11 @@ func (lw *listerWatcher) List(options metav1.ListOptions) (runtime.Object, error
Predicate: pred,
Recursive: true,
}
if err := lw.storage.GetList(context.TODO(), lw.resourcePrefix, storageOpts, list); err != nil {
ctx := context.Background()
if lw.contextMetadata != nil {
ctx = metadata.NewOutgoingContext(ctx, lw.contextMetadata)
}
if err := lw.storage.GetList(ctx, lw.resourcePrefix, storageOpts, list); err != nil {
return nil, err
}
return list, nil
Expand All @@ -73,5 +81,9 @@ func (lw *listerWatcher) Watch(options metav1.ListOptions) (watch.Interface, err
Recursive: true,
ProgressNotify: true,
}
return lw.storage.Watch(context.TODO(), lw.resourcePrefix, opts)
ctx := context.Background()
if lw.contextMetadata != nil {
ctx = metadata.NewOutgoingContext(ctx, lw.contextMetadata)
}
return lw.storage.Watch(ctx, lw.resourcePrefix, opts)
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func TestCacherListerWatcher(t *testing.T) {
}
}

lw := NewListerWatcher(store, prefix, fn)
lw := NewListerWatcher(store, prefix, fn, nil)

obj, err := lw.List(metav1.ListOptions{})
if err != nil {
Expand Down Expand Up @@ -80,7 +80,7 @@ func TestCacherListerWatcherPagination(t *testing.T) {
}
}

lw := NewListerWatcher(store, prefix, fn)
lw := NewListerWatcher(store, prefix, fn, nil)

obj1, err := lw.List(metav1.ListOptions{Limit: 2})
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func newTestWatchCache(capacity int, indexers *cache.Indexers) *testWatchCache {
wc := &testWatchCache{}
wc.bookmarkRevision = make(chan int64, 1)
wc.stopCh = make(chan struct{})
pr := newConditionalProgressRequester(wc.RequestWatchProgress, &immediateTickerFactory{})
pr := newConditionalProgressRequester(wc.RequestWatchProgress, &immediateTickerFactory{}, nil)
go pr.Run(wc.stopCh)
wc.watchCache = newWatchCache(keyFunc, mockHandler, getAttrsFunc, versioner, indexers, testingclock.NewFakeClock(time.Now()), schema.GroupResource{Resource: "pods"}, pr)
// To preserve behavior of tests that assume a given capacity,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
"sync"
"time"

"google.golang.org/grpc/metadata"

utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"

Expand All @@ -34,10 +36,11 @@ const (
progressRequestPeriod = 100 * time.Millisecond
)

func newConditionalProgressRequester(requestWatchProgress WatchProgressRequester, clock TickerFactory) *conditionalProgressRequester {
func newConditionalProgressRequester(requestWatchProgress WatchProgressRequester, clock TickerFactory, contextMetadata metadata.MD) *conditionalProgressRequester {
pr := &conditionalProgressRequester{
clock: clock,
requestWatchProgress: requestWatchProgress,
contextMetadata: contextMetadata,
}
pr.cond = sync.NewCond(pr.mux.RLocker())
return pr
Expand All @@ -54,6 +57,7 @@ type TickerFactory interface {
type conditionalProgressRequester struct {
clock TickerFactory
requestWatchProgress WatchProgressRequester
contextMetadata metadata.MD

mux sync.RWMutex
cond *sync.Cond
Expand All @@ -63,6 +67,9 @@ type conditionalProgressRequester struct {

func (pr *conditionalProgressRequester) Run(stopCh <-chan struct{}) {
ctx := wait.ContextForChannel(stopCh)
if pr.contextMetadata != nil {
ctx = metadata.NewOutgoingContext(ctx, pr.contextMetadata)
}
go func() {
defer utilruntime.HandleCrash()
<-stopCh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func TestConditionalProgressRequester(t *testing.T) {

func newTestConditionalProgressRequester(clock clock.WithTicker) *testConditionalProgressRequester {
pr := &testConditionalProgressRequester{}
pr.conditionalProgressRequester = newConditionalProgressRequester(pr.RequestWatchProgress, clock)
pr.conditionalProgressRequester = newConditionalProgressRequester(pr.RequestWatchProgress, clock, nil)
return pr
}

Expand Down

0 comments on commit 31d404b

Please sign in to comment.