Skip to content

Commit

Permalink
Merge pull request kubernetes#118772 from kannon92/terminating-pod-gc
Browse files Browse the repository at this point in the history
KEP-3939: pod gc changes for pod replacement policy kep
  • Loading branch information
k8s-ci-robot committed Jul 18, 2023
2 parents 3be3b86 + e38ab6d commit d1d86da
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 2 deletions.
8 changes: 6 additions & 2 deletions pkg/controller/podgc/gc_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,11 @@ func (gcc *PodGCController) markFailedAndDeletePod(ctx context.Context, pod *v1.
func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Context, pod *v1.Pod, condition *corev1apply.PodConditionApplyConfiguration) error {
logger := klog.FromContext(ctx)
logger.Info("PodGC is force deleting Pod", "pod", klog.KObj(pod))
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
// Patch the pod to make sure it is transitioned to the Failed phase before deletion.
// This is needed for the JobPodReplacementPolicy feature to make sure Job replacement pods are created.
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-apps/3939-allow-replacement-when-fully-terminated#risks-and-mitigations
// for more details.
if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) || utilfeature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {

// Mark the pod as failed - this is especially important in case the pod
// is orphaned, in which case the pod would remain in the Running phase
Expand All @@ -357,7 +361,7 @@ func (gcc *PodGCController) markFailedAndDeletePodWithCondition(ctx context.Cont
// PodGC it means that it is in the Failed phase, so sending the
// condition will not be re-attempted.
podApply.Status.WithPhase(v1.PodFailed)
if condition != nil {
if condition != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
podApply.Status.WithConditions(condition)
}
if _, err := gcc.kubeClient.CoreV1().Pods(pod.Namespace).ApplyStatus(ctx, podApply, metav1.ApplyOptions{FieldManager: fieldManager, Force: true}); err != nil {
Expand Down
10 changes: 10 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,14 @@ const (
// that have never been unsuspended before.
JobMutableNodeSchedulingDirectives featuregate.Feature = "JobMutableNodeSchedulingDirectives"

// owner: @kannon92
// kep : https://kep.k8s.io/3939
// alpha: v1.28
//
// Allow users to specify recreating pods of a job only when
// pods have fully terminated.
JobPodReplacementPolicy featuregate.Feature = "JobPodReplacementPolicy"

// owner: @mimowo
// kep: https://kep.k8s.io/3329
// alpha: v1.25
Expand Down Expand Up @@ -1033,6 +1041,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

JobPodFailurePolicy: {Default: true, PreRelease: featuregate.Beta},

JobPodReplacementPolicy: {Default: false, PreRelease: featuregate.Alpha},

JobReadyPods: {Default: true, PreRelease: featuregate.Beta},

JobTrackingWithFinalizers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.28
Expand Down
29 changes: 29 additions & 0 deletions test/integration/podgc/podgc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
tests := map[string]struct {
enablePodDisruptionConditions bool
enableJobPodReplacementPolicy bool
phase v1.PodPhase
wantPhase v1.PodPhase
wantDisruptionTarget *v1.PodCondition
Expand All @@ -56,6 +57,24 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
Message: "PodGC: node no longer exists",
},
},
"PodDisruptionConditions and PodReplacementPolicy enabled": {
enablePodDisruptionConditions: true,
enableJobPodReplacementPolicy: true,
phase: v1.PodPending,
wantPhase: v1.PodFailed,
wantDisruptionTarget: &v1.PodCondition{
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
Reason: "DeletionByPodGC",
Message: "PodGC: node no longer exists",
},
},
"Only PodReplacementPolicy enabled; no PodDisruptionCondition": {
enablePodDisruptionConditions: false,
enableJobPodReplacementPolicy: true,
phase: v1.PodPending,
wantPhase: v1.PodFailed,
},
"PodDisruptionConditions disabled": {
enablePodDisruptionConditions: false,
phase: v1.PodPending,
Expand All @@ -76,6 +95,7 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
for name, test := range tests {
t.Run(name, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
testCtx := setup(t, "podgc-orphaned")
cs := testCtx.ClientSet

Expand Down Expand Up @@ -151,6 +171,7 @@ func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) {
func TestTerminatingOnOutOfServiceNode(t *testing.T) {
tests := map[string]struct {
enablePodDisruptionConditions bool
enableJobPodReplacementPolicy bool
withFinalizer bool
wantPhase v1.PodPhase
}{
Expand All @@ -172,11 +193,19 @@ func TestTerminatingOnOutOfServiceNode(t *testing.T) {
enablePodDisruptionConditions: false,
withFinalizer: false,
},
"pod has phase changed when PodDisruptionConditions disabled, but JobPodReplacementPolicy enabled": {
enablePodDisruptionConditions: false,
enableJobPodReplacementPolicy: true,
withFinalizer: true,
wantPhase: v1.PodFailed,
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)()
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)()
testCtx := setup(t, "podgc-out-of-service")
cs := testCtx.ClientSet

Expand Down

0 comments on commit d1d86da

Please sign in to comment.