Skip to content

Commit

Permalink
Force restartPolicy "Never" to prevent runner pods from stucking in T…
Browse files Browse the repository at this point in the history
…erminating when the container disappeared

Ref #1369
  • Loading branch information
mumoshu committed May 12, 2022
1 parent 4053ab3 commit 988389a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 12 deletions.
12 changes: 6 additions & 6 deletions controllers/new_runner_pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func TestNewRunnerPod(t *testing.T) {
},
},
},
RestartPolicy: corev1.RestartPolicyOnFailure,
RestartPolicy: corev1.RestartPolicyNever,
},
}

Expand Down Expand Up @@ -245,7 +245,7 @@ func TestNewRunnerPod(t *testing.T) {
},
},
},
RestartPolicy: corev1.RestartPolicyOnFailure,
RestartPolicy: corev1.RestartPolicyNever,
},
}

Expand Down Expand Up @@ -327,7 +327,7 @@ func TestNewRunnerPod(t *testing.T) {
},
},
},
RestartPolicy: corev1.RestartPolicyOnFailure,
RestartPolicy: corev1.RestartPolicyNever,
},
}

Expand Down Expand Up @@ -593,7 +593,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
},
},
},
RestartPolicy: corev1.RestartPolicyOnFailure,
RestartPolicy: corev1.RestartPolicyNever,
},
}

Expand Down Expand Up @@ -694,7 +694,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
},
},
},
RestartPolicy: corev1.RestartPolicyOnFailure,
RestartPolicy: corev1.RestartPolicyNever,
},
}

Expand Down Expand Up @@ -795,7 +795,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
},
},
},
RestartPolicy: corev1.RestartPolicyOnFailure,
RestartPolicy: corev1.RestartPolicyNever,
},
}

Expand Down
4 changes: 1 addition & 3 deletions controllers/pod_runner_token_injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,7 @@ func (t *PodRunnerTokenInjector) Handle(ctx context.Context, req admission.Reque

updated.Annotations[AnnotationKeyTokenExpirationDate] = ts

if pod.Spec.RestartPolicy != corev1.RestartPolicyOnFailure {
updated.Spec.RestartPolicy = corev1.RestartPolicyOnFailure
}
forceRunnerPodRestartPolicyNever(updated)

buf, err := json.Marshal(updated)
if err != nil {
Expand Down
4 changes: 1 addition & 3 deletions controllers/runner_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -629,9 +629,7 @@ func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.Ru

pod := template.DeepCopy()

if pod.Spec.RestartPolicy == "" {
pod.Spec.RestartPolicy = "OnFailure"
}
forceRunnerPodRestartPolicyNever(pod)

if mtu := runnerSpec.DockerMTU; mtu != nil && dockerdInRunner {
runnerContainer.Env = append(runnerContainer.Env, []corev1.EnvVar{
Expand Down
22 changes: 22 additions & 0 deletions controllers/runner_pod.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package controllers

import corev1 "k8s.io/api/core/v1"

// Force the runner pod managed by either RunnerDeployment and RunnerSet to have restartPolicy=Never.
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/1369 for more context.
//
// This is to prevent runner pods from stucking in Terminating when a K8s node disappeared along with the runnr pod and the runner container within it.
//
// Previously, we used restartPolicy of OnFailure, it turned wrong later, and therefore we now set Never.
//
// When the restartPolicy is OnFailure and the node disappeared, runner pods on the node seem to stuck in state.terminated==nil, state.waiting!=nil, and state.lastTerminationState!=nil,
// and will ever become Running.
// It's probably due to that the node onto which the pods have been scheduled will ever come back, hence the container restart attempt swill ever succeed,
// the pods stuck waiting for successful restarts forever.
//
// By forcing runner pods to never restart, we hope there will be no chances of pods being stuck waiting.
func forceRunnerPodRestartPolicyNever(pod *corev1.Pod) {
if pod.Spec.RestartPolicy != corev1.RestartPolicyNever {
pod.Spec.RestartPolicy = corev1.RestartPolicyNever
}
}

0 comments on commit 988389a

Please sign in to comment.