Skip to content

Commit

Permalink
Add feature flag treat-pod-as-always-schedulable
Browse files Browse the repository at this point in the history
The feature flag allows to declare that Pods in the system will eventually all get scheduled and Revisions should therefore not be marked unschedulable

Signed-off-by: Sascha Schwarze <schwarzs@de.ibm.com>
  • Loading branch information
SaschaSchwarze0 committed Jan 14, 2025
1 parent 19b9a09 commit 5303dc0
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 6 deletions.
11 changes: 10 additions & 1 deletion config/core/configmaps/features.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ metadata:
app.kubernetes.io/component: controller
app.kubernetes.io/version: devel
annotations:
knative.dev/example-checksum: "63a13754"
knative.dev/example-checksum: "0ac06704"
data:
_example: |-
################################
Expand Down Expand Up @@ -242,3 +242,12 @@ data:
# Default queue proxy resource requests and limits to good values for most cases if set.
queueproxy.resource-defaults: "disabled"
# treat-pod-as-always-schedulable can be used to define that Pods in the system will always be
# scheduled, and a Revision should not be marked unschedulable.
# Setting this to `enabled` makes sense if you have cluster-autoscaling set up for you cluster
# where unschedulable Pods trigger the addition of a new Node and are therefore a short and
# transient state.
#
# See https://github.com/knative/serving/issues/14862
treat-pod-as-always-schedulable: "disabled"
3 changes: 3 additions & 0 deletions pkg/apis/config/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ func defaultFeaturesConfig() *Features {
SecurePodDefaults: Disabled,
TagHeaderBasedRouting: Disabled,
AutoDetectHTTP2: Disabled,
TreatPodAsAlwaysSchedulable: Disabled,
}
}

Expand All @@ -126,6 +127,7 @@ func NewFeaturesConfigFromMap(data map[string]string) (*Features, error) {
asFlag("queueproxy.resource-defaults", &nc.QueueProxyResourceDefaults),
asFlag("secure-pod-defaults", &nc.SecurePodDefaults),
asFlag("tag-header-based-routing", &nc.TagHeaderBasedRouting),
asFlag("treat-pod-as-always-schedulable", &nc.TreatPodAsAlwaysSchedulable),
asFlag(FeatureContainerSpecAddCapabilities, &nc.ContainerSpecAddCapabilities),
asFlag(FeaturePodSpecAffinity, &nc.PodSpecAffinity),
asFlag(FeaturePodSpecDNSConfig, &nc.PodSpecDNSConfig),
Expand Down Expand Up @@ -191,6 +193,7 @@ type Features struct {
SecurePodDefaults Flag
TagHeaderBasedRouting Flag
AutoDetectHTTP2 Flag
TreatPodAsAlwaysSchedulable Flag
}

// asFlag parses the value at key as a Flag into the target, if it exists.
Expand Down
29 changes: 29 additions & 0 deletions pkg/apis/config/features_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func TestFeaturesConfiguration(t *testing.T) {
SecurePodDefaults: Enabled,
QueueProxyResourceDefaults: Enabled,
TagHeaderBasedRouting: Enabled,
TreatPodAsAlwaysSchedulable: Enabled,
}),
data: map[string]string{
"multi-container": "Enabled",
Expand All @@ -103,6 +104,7 @@ func TestFeaturesConfiguration(t *testing.T) {
"secure-pod-defaults": "Enabled",
"queueproxy.resource-defaults": "Enabled",
"tag-header-based-routing": "Enabled",
"treat-pod-as-always-schedulable": "Enabled",
},
}, {
name: "multi-container Allowed",
Expand Down Expand Up @@ -671,6 +673,33 @@ func TestFeaturesConfiguration(t *testing.T) {
data: map[string]string{
"kubernetes.podspec-hostnetwork": "Disabled",
},
}, {
name: "treat-pod-as-always-schedulable Allowed",
wantErr: false,
wantFeatures: defaultWith(&Features{
TreatPodAsAlwaysSchedulable: Allowed,
}),
data: map[string]string{
"treat-pod-as-always-schedulable": "Allowed",
},
}, {
name: "treat-pod-as-always-schedulable Enabled",
wantErr: false,
wantFeatures: defaultWith(&Features{
TreatPodAsAlwaysSchedulable: Enabled,
}),
data: map[string]string{
"treat-pod-as-always-schedulable": "Enabled",
},
}, {
name: "treat-pod-as-always-schedulable Disabled",
wantErr: false,
wantFeatures: defaultWith(&Features{
TreatPodAsAlwaysSchedulable: Disabled,
}),
data: map[string]string{
"treat-pod-as-always-schedulable": "Disabled",
},
}}

for _, tt := range configTests {
Expand Down
12 changes: 8 additions & 4 deletions pkg/reconciler/revision/reconcile_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"knative.dev/pkg/kmp"
"knative.dev/pkg/logging"
"knative.dev/pkg/logging/logkey"
apicfg "knative.dev/serving/pkg/apis/config"
v1 "knative.dev/serving/pkg/apis/serving/v1"
"knative.dev/serving/pkg/networking"
"knative.dev/serving/pkg/reconciler/revision/config"
Expand Down Expand Up @@ -87,10 +88,13 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision)

// Update the revision status if pod cannot be scheduled (possibly resource constraints)
// If pod cannot be scheduled then we expect the container status to be empty.
for _, cond := range pod.Status.Conditions {
if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse {
rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message)
break
treatPodAsAlwaysSchedulable := config.FromContext(ctx).Features.TreatPodAsAlwaysSchedulable
if treatPodAsAlwaysSchedulable != apicfg.Enabled {
for _, cond := range pod.Status.Conditions {
if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse {
rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message)
break
}
}
}

Expand Down
35 changes: 34 additions & 1 deletion pkg/reconciler/revision/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,32 @@ func TestReconcile(t *testing.T) {
Object: pa("foo", "pod-schedule-error", WithReachabilityUnreachable),
}},
Key: "foo/pod-schedule-error",
}, {
Name: "surface no pod schedule errors if treat-pod-as-always-schedulable is enabled",
// Test the propagation of the scheduling errors of Pod into the
// revision is not happening when treat-pod-as-always-schedulable
// is enabled.
Objects: []runtime.Object{
Revision("foo", "pod-no-schedule-error",
WithLogURL,
MarkActivating("Deploying", ""),
WithRoutingState(v1.RoutingStateActive, fc),
withDefaultContainerStatuses(),
MarkDeploying("Deploying"),
WithRevisionObservedGeneration(1),
MarkContainerHealthyUnknown("Deploying"),
),
pa("foo", "pod-no-schedule-error", WithReachabilityReachable), // PA can't be ready, since no traffic.
pod(t, "foo", "pod-no-schedule-error", WithUnschedulableContainer("Insufficient energy", "Unschedulable")),
deploy(t, "foo", "pod-no-schedule-error"),
image("foo", "pod-no-schedule-error"),
},
Ctx: defaultconfig.ToContext(context.Background(), &defaultconfig.Config{
Features: &defaultconfig.Features{
TreatPodAsAlwaysSchedulable: defaultconfig.Enabled,
},
}),
Key: "foo/pod-no-schedule-error",
}, {
Name: "ready steady state",
// Test the transition that Reconcile makes when Endpoints become ready on the
Expand Down Expand Up @@ -758,11 +784,18 @@ func TestReconcile(t *testing.T) {
resolver: &nopResolver{},
}

cfg := reconcilerTestConfig()

apiCfgOverride := defaultconfig.FromContext(ctx)
if apiCfgOverride != nil {
cfg.Config.Features = apiCfgOverride.Features
}

return revisionreconciler.NewReconciler(ctx, logging.FromContext(ctx), servingclient.Get(ctx),
listers.GetRevisionLister(), controller.GetEventRecorder(ctx), r,
controller.Options{
ConfigStore: &testConfigStore{
config: reconcilerTestConfig(),
config: cfg,
},
})
}))
Expand Down

0 comments on commit 5303dc0

Please sign in to comment.