Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance RunnerSet to optionally retain PVs accross restarts #1340

Merged
merged 2 commits into from
May 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 147 additions & 2 deletions acceptance/testdata/runnerset.envsubst.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,48 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ${NAME}
# In kind environments, the provider writes:
# /var/lib/docker/volumes/KIND_NODE_CONTAINER_VOL_ID/_data/local-path-provisioner/PV_NAME
# It can be hundreds of gigabytes depending on what you cache in the test workflow. Beware to not encounter `no space left on device` errors!
# If you did encounter no space errorrs try:
# docker system prune
# docker buildx prune #=> frees up /var/lib/docker/volumes/buildx_buildkit_container-builder0_state
# sudo rm -rf /var/lib/docker/volumes/KIND_NODE_CONTAINER_VOL_ID/_data/local-path-provisioner #=> frees up local-path-provisioner's data
provisioner: rancher.io/local-path
reclaimPolicy: Retain
volumeBindingMode: WaitForFirstConsumer
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ${NAME}-var-lib-docker
labels:
content: ${NAME}-var-lib-docker
provisioner: rancher.io/local-path
reclaimPolicy: Retain
volumeBindingMode: WaitForFirstConsumer
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ${NAME}-cache
labels:
content: ${NAME}-cache
provisioner: rancher.io/local-path
reclaimPolicy: Retain
volumeBindingMode: WaitForFirstConsumer
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ${NAME}-runner-tool-cache
labels:
content: ${NAME}-runner-tool-cache
provisioner: rancher.io/local-path
reclaimPolicy: Retain
volumeBindingMode: WaitForFirstConsumer
---
apiVersion: actions.summerwind.dev/v1alpha1
kind: RunnerSet
metadata:
Expand Down Expand Up @@ -59,8 +104,108 @@ spec:
containers:
- name: runner
imagePullPolicy: IfNotPresent
#- name: docker
# #image: mumoshu/actions-runner-dind:dev
env:
- name: RUNNER_FEATURE_FLAG_EPHEMERAL
value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}"
- name: GOMODCACHE
value: "/home/runner/.cache/go-mod"
volumeMounts:
# Cache docker image layers, in case dockerdWithinRunnerContainer=true
- name: var-lib-docker
mountPath: /var/lib/docker
# Cache go modules and builds
# - name: gocache
# # Run `goenv | grep GOCACHE` to verify the path is correct for your env
# mountPath: /home/runner/.cache/go-build
# - name: gomodcache
# # Run `goenv | grep GOMODCACHE` to verify the path is correct for your env
# # mountPath: /home/runner/go/pkg/mod
- name: cache
# go: could not create module cache: stat /home/runner/.cache/go-mod: permission denied
mountPath: "/home/runner/.cache"
- name: runner-tool-cache
# This corresponds to our runner image's default setting of RUNNER_TOOL_CACHE=/opt/hostedtoolcache.
#
# In case you customize the envvar in both runner and docker containers of the runner pod spec,
# You'd need to change this mountPath accordingly.
#
# The tool cache directory is defined in actions/toolkit's tool-cache module:
# https://github.com/actions/toolkit/blob/2f164000dcd42fb08287824a3bc3030dbed33687/packages/tool-cache/src/tool-cache.ts#L621-L638
#
# Many setup-* actions like setup-go utilizes the tool-cache module to download and cache installed binaries:
# https://github.com/actions/setup-go/blob/56a61c9834b4a4950dbbf4740af0b8a98c73b768/src/installer.ts#L144
mountPath: "/opt/hostedtoolcache"
# Valid only when dockerdWithinRunnerContainer=false
- name: docker
volumeMounts:
# Cache docker image layers, in case dockerdWithinRunnerContainer=false
- name: var-lib-docker
mountPath: /var/lib/docker
# image: mumoshu/actions-runner-dind:dev

# For buildx cache
- name: cache
mountPath: "/home/runner/.cache"
volumeClaimTemplates:
- metadata:
name: vol1
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Mi
storageClassName: ${NAME}
## Dunno which provider supports auto-provisioning with selector.
## At least the rancher local path provider stopped with:
## waiting for a volume to be created, either by external provisioner "rancher.io/local-path" or manually created by system administrator
# selector:
# matchLabels:
# runnerset-volume-id: ${NAME}-vol1
- metadata:
name: vol2
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Mi
storageClassName: ${NAME}
# selector:
# matchLabels:
# runnerset-volume-id: ${NAME}-vol2
- metadata:
name: var-lib-docker
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Mi
storageClassName: ${NAME}-var-lib-docker
- metadata:
name: cache
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Mi
storageClassName: ${NAME}-cache
- metadata:
name: runner-tool-cache
# It turns out labels doesn't distinguish PVs across PVCs and the
# end result is PVs are reused by wrong PVCs.
# The correct way seems to be to differentiate storage class per pvc template.
# labels:
# id: runner-tool-cache
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Mi
storageClassName: ${NAME}-runner-tool-cache
---
apiVersion: actions.summerwind.dev/v1alpha1
kind: HorizontalRunnerAutoscaler
Expand Down
22 changes: 22 additions & 0 deletions charts/actions-runner-controller/templates/manager_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,28 @@ rules:
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- persistentvolumeclaims
verbs:
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- persistentvolumes
verbs:
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- coordination.k8s.io
resources:
Expand Down
23 changes: 23 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,29 @@ rules:
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- persistentvolumeclaims
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- persistentvolumes
verbs:
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
Expand Down
76 changes: 76 additions & 0 deletions controllers/persistent_volume_claim_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
Copyright 2022 The actions-runner-controller authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers

import (
"context"

"github.com/go-logr/logr"

"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

corev1 "k8s.io/api/core/v1"
)

// RunnerPersistentVolumeClaimReconciler reconciles a PersistentVolume object
type RunnerPersistentVolumeClaimReconciler struct {
client.Client
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
Name string
}

// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch

func (r *RunnerPersistentVolumeClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("pvc", req.NamespacedName)

var pvc corev1.PersistentVolumeClaim
if err := r.Get(ctx, req.NamespacedName, &pvc); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}

log.Info("Reconciling runner pvc")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This confusingly logs PVC skipped in syncPVC.

image


res, err := syncPVC(ctx, r.Client, log, req.Namespace, &pvc)

if res == nil {
res = &ctrl.Result{}
}

return *res, err
}

func (r *RunnerPersistentVolumeClaimReconciler) SetupWithManager(mgr ctrl.Manager) error {
name := "runnerpersistentvolumeclaim-controller"
if r.Name != "" {
name = r.Name
}

r.Recorder = mgr.GetEventRecorderFor(name)

return ctrl.NewControllerManagedBy(mgr).
For(&corev1.PersistentVolumeClaim{}).
Named(name).
Complete(r)
}
72 changes: 72 additions & 0 deletions controllers/persistent_volume_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
Copyright 2022 The actions-runner-controller authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers

import (
"context"

"github.com/go-logr/logr"

"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

corev1 "k8s.io/api/core/v1"
)

// RunnerPersistentVolumeReconciler reconciles a PersistentVolume object
type RunnerPersistentVolumeReconciler struct {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is watching PersistentVolumes necessary? Should it be up to the PV controller to decide how to reclaim/recreate when PVs the PVC is deleted.

client.Client
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
Name string
}

// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch

func (r *RunnerPersistentVolumeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("pv", req.NamespacedName)

var pv corev1.PersistentVolume
if err := r.Get(ctx, req.NamespacedName, &pv); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}

res, err := syncPV(ctx, r.Client, log, req.Namespace, &pv)
if res == nil {
res = &ctrl.Result{}
}

return *res, err
}

func (r *RunnerPersistentVolumeReconciler) SetupWithManager(mgr ctrl.Manager) error {
name := "runnerpersistentvolume-controller"
if r.Name != "" {
name = r.Name
}

r.Recorder = mgr.GetEventRecorderFor(name)

return ctrl.NewControllerManagedBy(mgr).
For(&corev1.PersistentVolume{}).
Named(name).
Complete(r)
}
7 changes: 7 additions & 0 deletions controllers/runnerset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type RunnerSetReconciler struct {
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnersets/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update

Expand Down Expand Up @@ -129,6 +130,12 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
owners = append(owners, &ss)
}

if res, err := syncVolumes(ctx, r.Client, log, req.Namespace, runnerSet, statefulsets); err != nil {
return ctrl.Result{}, err
} else if res != nil {
return *res, nil
}

res, err := syncRunnerPodsOwners(ctx, r.Client, log, effectiveTime, newDesiredReplicas, func() client.Object { return create.DeepCopy() }, ephemeral, owners)
if err != nil || res == nil {
return ctrl.Result{}, err
Expand Down
Loading