Skip to content

Commit

Permalink
Fix PercentageRunnersBusy scaling delay (#1579)
Browse files Browse the repository at this point in the history
* Use a dedicated pod label to say it is a runner pod

Follow-up for #1546

* Fix PercentageRunnersBusy scaling delay

Ref #1374
  • Loading branch information
mumoshu authored Jun 29, 2022
1 parent a9ac5a1 commit 8161136
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 17 deletions.
35 changes: 34 additions & 1 deletion controllers/autoscaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (

"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
"github.com/google/go-github/v45/github"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
Expand Down Expand Up @@ -314,22 +316,52 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
numRunners int
numRunnersRegistered int
numRunnersBusy int
numTerminatingBusy int
)

numRunners = len(runnerMap)

busyTerminatingRunnerPods := map[string]struct{}{}

kindLabel := LabelKeyRunnerDeploymentName
if hra.Spec.ScaleTargetRef.Kind == "RunnerSet" {
kindLabel = LabelKeyRunnerSetName
}

var runnerPodList corev1.PodList
if err := r.Client.List(ctx, &runnerPodList, client.InNamespace(hra.Namespace), client.MatchingLabels(map[string]string{
kindLabel: hra.Spec.ScaleTargetRef.Name,
})); err != nil {
return nil, err
}

for _, p := range runnerPodList.Items {
if p.Annotations[AnnotationKeyUnregistrationFailureMessage] != "" {
busyTerminatingRunnerPods[p.Name] = struct{}{}
}
}

for _, runner := range runners {
if _, ok := runnerMap[*runner.Name]; ok {
numRunnersRegistered++

if runner.GetBusy() {
numRunnersBusy++
} else if _, ok := busyTerminatingRunnerPods[*runner.Name]; ok {
numTerminatingBusy++
}

delete(busyTerminatingRunnerPods, *runner.Name)
}
}

// Remaining busyTerminatingRunnerPods are runners that were not on the ListRunners API response yet
for range busyTerminatingRunnerPods {
numTerminatingBusy++
}

var desiredReplicas int
fractionBusy := float64(numRunnersBusy) / float64(desiredReplicasBefore)
fractionBusy := float64(numRunnersBusy+numTerminatingBusy) / float64(desiredReplicasBefore)
if fractionBusy >= scaleUpThreshold {
if scaleUpAdjustment > 0 {
desiredReplicas = desiredReplicasBefore + scaleUpAdjustment
Expand Down Expand Up @@ -358,6 +390,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
"num_runners", numRunners,
"num_runners_registered", numRunnersRegistered,
"num_runners_busy", numRunnersBusy,
"num_terminating_busy", numTerminatingBusy,
"namespace", hra.Namespace,
"kind", st.kind,
"name", st.st,
Expand Down
4 changes: 4 additions & 0 deletions controllers/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import "time"

const (
LabelKeyRunnerSetName = "runnerset-name"
LabelKeyRunner = "actions-runner"
)

const (
Expand All @@ -16,6 +17,9 @@ const (

AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"

// AnnotationKeyUnregistrationFailureMessage is the annotation that is added onto the pod once it failed to be unregistered from GitHub due to e.g. 422 error
AnnotationKeyUnregistrationFailureMessage = annotationKeyPrefix + "unregistration-failure-message"

// AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
AnnotationKeyUnregistrationCompleteTimestamp = annotationKeyPrefix + "unregistration-complete-timestamp"

Expand Down
14 changes: 7 additions & 7 deletions controllers/new_runner_pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func TestNewRunnerPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true",
"runnerset-name": "runner",
"actions-runner": "",
},
},
Spec: corev1.PodSpec{
Expand Down Expand Up @@ -198,7 +198,7 @@ func TestNewRunnerPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true",
"runnerset-name": "runner",
"actions-runner": "",
},
},
Spec: corev1.PodSpec{
Expand Down Expand Up @@ -276,7 +276,7 @@ func TestNewRunnerPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true",
"runnerset-name": "runner",
"actions-runner": "",
},
},
Spec: corev1.PodSpec{
Expand Down Expand Up @@ -515,7 +515,7 @@ func TestNewRunnerPod(t *testing.T) {
for i := range testcases {
tc := testcases[i]
t.Run(tc.description, func(t *testing.T) {
got, err := newRunnerPod("runner", tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
got, err := newRunnerPod(tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
require.NoError(t, err)
require.Equal(t, tc.want, got)
})
Expand Down Expand Up @@ -546,7 +546,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true",
"pod-template-hash": "8857b86c7",
"runnerset-name": "runner",
"actions-runner": "",
},
OwnerReferences: []metav1.OwnerReference{
{
Expand Down Expand Up @@ -703,7 +703,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true",
"pod-template-hash": "8857b86c7",
"runnerset-name": "runner",
"actions-runner": "",
},
OwnerReferences: []metav1.OwnerReference{
{
Expand Down Expand Up @@ -800,7 +800,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true",
"pod-template-hash": "8857b86c7",
"runnerset-name": "runner",
"actions-runner": "",
},
OwnerReferences: []metav1.OwnerReference{
{
Expand Down
10 changes: 5 additions & 5 deletions controllers/runner_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
}
}

pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, runner.Name, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
if err != nil {
return pod, err
}
Expand Down Expand Up @@ -589,7 +589,7 @@ func runnerHookEnvs(pod *corev1.Pod) ([]corev1.EnvVar, error) {
}, nil
}

func newRunnerPodWithContainerMode(containerMode string, runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
func newRunnerPodWithContainerMode(containerMode string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
var (
privileged bool = true
dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
Expand All @@ -607,7 +607,7 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
template = *template.DeepCopy()

// This label selector is used by default when rd.Spec.Selector is empty.
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerName)
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunner, "")
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)

workDir := runnerSpec.WorkDir
Expand Down Expand Up @@ -962,8 +962,8 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
return *pod, nil
}

func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
return newRunnerPodWithContainerMode("", runnerName, template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
return newRunnerPodWithContainerMode("", template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
}

func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {
Expand Down
30 changes: 29 additions & 1 deletion controllers/runner_graceful_stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,25 @@ func annotatePodOnce(ctx context.Context, c client.Client, log logr.Logger, pod

return updated, nil
}
func labelPod(ctx context.Context, c client.Client, log logr.Logger, pod *corev1.Pod, k, v string) (*corev1.Pod, error) {
if pod == nil {
return nil, nil
}

updated := pod.DeepCopy()
if updated.Labels == nil {
updated.Labels = map[string]string{}
}
updated.Labels[k] = v
if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil {
log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", k))
return nil, err
}

log.V(2).Info("Labeled pod", "key", k, "value", v)

return updated, nil
}

// If the first return value is nil, it's safe to delete the runner pod.
func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
Expand Down Expand Up @@ -151,7 +170,10 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l

log.V(1).Info("Failed to unregister runner before deleting the pod.", "error", err)

var runnerBusy bool
var (
runnerBusy bool
runnerUnregistrationFailureMessage string
)

errRes := &gogithub.ErrorResponse{}
if errors.As(err, &errRes) {
Expand All @@ -173,6 +195,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
}

runnerBusy = errRes.Response.StatusCode == 422
runnerUnregistrationFailureMessage = errRes.Message

if runnerBusy && code != nil {
log.V(2).Info("Runner container has already stopped but the unregistration attempt failed. "+
Expand All @@ -187,6 +210,11 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
}

if runnerBusy {
_, err := labelPod(ctx, c, log, pod, AnnotationKeyUnregistrationFailureMessage, runnerUnregistrationFailureMessage)
if err != nil {
return &ctrl.Result{}, err
}

// We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't
// work as the reconcilation can happen earlier due to pod status update.
// For ephemeral runners, we can expect it to stop and unregister itself on completion.
Expand Down
3 changes: 1 addition & 2 deletions controllers/runner_pod_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, client.IgnoreNotFound(err)
}

_, isRunnerPod := runnerPod.Labels[LabelKeyRunnerSetName]
if !isRunnerPod {
if _, isRunnerPod := runnerPod.Labels[LabelKeyRunner]; !isRunnerPod {
return ctrl.Result{}, nil
}

Expand Down
4 changes: 3 additions & 1 deletion controllers/runnerset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*ap
template.Spec.ServiceAccountName = runnerSet.Spec.ServiceAccountName
}

pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, runnerSet.Name, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerSet.Name)

pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
if err != nil {
return nil, err
}
Expand Down

0 comments on commit 8161136

Please sign in to comment.