Add scale out experiment scenario (#128)

* Allow overriding website workers * Allow disabling tailing This helps in automating multiple experiment runs. * Fix website phase panel * Increase recording rule resolution * Fix node affinity for control plane components Multiple `nodeSelectorTerms` are ORed. Guarantee scheduling to high-cpu worker pool. * Increase kube-controller-manager rate limits * Increase kube-controller-manager resources Schedule kube-controller-manager to high-cpu worker pool as well. * Disable HTTP2 in kube-controller-manager * Set `GOMAXPROCS` according to CPU quota everywhere * Simplify handler of `ForEach` * Prefactor: scenario implementation can choose duration * Add `scale-out` scenario * Drop `reconcile` scenario * Update scenario descriptions * Align histogram buckets with SLO
timebertt · Feb 5, 2024 · fd39c7d · fd39c7d
1 parent 9c0cb8e
commit fd39c7d
Show file tree

Hide file tree

Showing 24 changed files with 314 additions and 166 deletions.
diff --git a/Makefile b/Makefile
@@ -154,14 +154,15 @@ export SKAFFOLD_FILENAME = hack/config/skaffold.yaml
 deploy up dev down: export SKAFFOLD_LABEL = skaffold.dev/run-id=sharding
 # use dedicated ghcr repo for dev images to prevent spamming the "production" image repo
 up dev: export SKAFFOLD_DEFAULT_REPO ?= ghcr.io/timebertt/dev-images
+up dev: export SKAFFOLD_TAIL ?= true
 
 .PHONY: deploy
 deploy: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Build all images and deploy everything to K8s cluster specified in $KUBECONFIG.
 	$(SKAFFOLD) deploy -i $(SHARDER_IMG) -i $(SHARD_IMG) -i $(JANITOR_IMG)
 
 .PHONY: up
 up: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Build all images, deploy everything to K8s cluster specified in $KUBECONFIG, start port-forward and tail logs.
-	$(SKAFFOLD) run --port-forward=user --tail
+	$(SKAFFOLD) run
 
 .PHONY: dev
 dev: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Start continuous dev loop with skaffold.

diff --git a/docs/evaluation.md b/docs/evaluation.md
@@ -72,8 +72,8 @@ Usage:
   experiment [command]
 
 Available Scenarios
-  basic       Basic load test scenario (15m) that creates roughly 8k websites over 10m
-  reconcile   High frequency reconciliation load test scenario (15m) with a static number of websites (10k)
+  basic       Basic load test scenario (15m) that creates roughly 9k websites
+  scale-out   Scenario for testing scale-out with high churn rate
 ...
 ```
 
@@ -133,7 +133,7 @@ This is done to make load test experiments more stable and and their results mor
 > These are preliminary results from a first set of test runs.  
 > TODO: update these once the full evaluation is completed.
 
-The following graphs show the generated load and compare the resulting CPU, memory, and network usage of the components in three different setups when running the `basic` experiment scenario (~10k websites created over 15m):
+The following graphs show the generated load and compare the resulting CPU, memory, and network usage of the components in three different setups when running the `basic` experiment scenario (~9k websites created over 15m):
 
 - external sharder: 3 webhosting-operator pods (shards) + 2 sharder pods (the new approach implemented in this repository, second iteration for the Master's thesis)
 - internal sharder: 3 webhosting-operator pods (3 shards, 1 acts as the sharder) (the old approach, first iteration for the study project)

diff --git a/hack/config/monitoring/default/kustomization.yaml b/hack/config/monitoring/default/kustomization.yaml
@@ -34,7 +34,7 @@ secretGenerator:
 
 patches:
 - path: patch_grafana_admin.yaml
-- path: patch_prometheus_resources.yaml
+- path: patch_prometheus.yaml
 - path: patch_grafana_networkpolicy.yaml
   target:
     group: networking.k8s.io

diff --git a/...g/default/patch_prometheus_resources.yaml → .../monitoring/default/patch_prometheus.yaml b/...g/default/patch_prometheus_resources.yaml → .../monitoring/default/patch_prometheus.yaml
@@ -5,6 +5,7 @@ metadata:
   namespace: monitoring
 spec:
   replicas: 1 # don't need HA for our purposes
+  evaluationInterval: 15s
   alerting: null
   resources:
     requests:

diff --git a/hack/config/policy/controlplane/etcd-main.yaml b/hack/config/policy/controlplane/etcd-main.yaml
@@ -29,6 +29,10 @@ spec:
               limits:
                 cpu: 12000m
                 memory: 12Gi
+            # set GOMAXPROCS to CPU quota to minimize goroutine scheduling contention (CPU throttling)
+            env:
+            - name: GOMAXPROCS
+              value: "12"
   # schedule etcd-main on high-cpu worker pool for stable performance
   - name: add-scheduling-constraints
     match:
@@ -45,6 +49,6 @@ spec:
         - op: add
           path: "/spec/tolerations/-"
           value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"}        
-        - op: add
-          path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms/-"
-          value: {"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}
+        - op: replace
+          path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms"
+          value: [{"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}]
diff --git a/hack/config/policy/controlplane/kube-apiserver.yaml b/hack/config/policy/controlplane/kube-apiserver.yaml
@@ -53,6 +53,10 @@ spec:
               limits:
                 cpu: 12000m
                 memory: 12Gi
+            # set GOMAXPROCS to CPU quota to minimize goroutine scheduling contention (CPU throttling)
+            env:
+            - name: GOMAXPROCS
+              value: "12"
   # schedule kube-apiserver on high-cpu worker pool for stable performance
   - name: add-scheduling-constraints
     match:
@@ -70,5 +74,5 @@ spec:
           path: "/spec/tolerations/-"
           value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"}        
         - op: add
-          path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms/-"
-          value: {"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}
+          path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms"
+          value: [{"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}]
diff --git a/hack/config/policy/controlplane/kube-controller-manager.yaml b/hack/config/policy/controlplane/kube-controller-manager.yaml
@@ -6,6 +6,65 @@ metadata:
 spec:
   failurePolicy: Ignore
   rules:
+  # set static requests/limits on kube-controller-manager to ensure similar evaluation environment between load test runs
+  - name: resources
+    match:
+      any:
+      - resources:
+          kinds:
+          - Pod
+          selector:
+            matchLabels:
+              app: kubernetes
+              role: controller-manager
+    mutate:
+      patchStrategicMerge:
+        spec:
+          containers:
+          - name: kube-controller-manager
+            resources:
+              requests:
+                cpu: 6000m
+                memory: 6Gi
+              limits:
+                cpu: 6000m
+                memory: 6Gi
+            # set GOMAXPROCS to CPU quota to minimize goroutine scheduling contention (CPU throttling)
+            env:
+            - name: GOMAXPROCS
+              value: "6"
+  - name: disable-vpa
+    match:
+      any:
+      - resources:
+          kinds:
+          - VerticalPodAutoscaler
+          names:
+          - kube-controller-manager-vpa
+    mutate:
+      patchStrategicMerge:
+        spec:
+          updatePolicy:
+            updateMode: Off
+  # schedule kube-controller-manager on high-cpu worker pool for stable performance
+  - name: add-scheduling-constraints
+    match:
+      any:
+      - resources:
+          kinds:
+          - Pod
+          selector:
+            matchLabels:
+              app: kubernetes
+              role: controller-manager
+    mutate:
+      patchesJson6902: |-
+        - op: add
+          path: "/spec/tolerations/-"
+          value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"}        
+        - op: add
+          path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms"
+          value: [{"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}]
   # increases kube-controller-manager's client-side rate limits to speed up garbage collection after executing load tests
   - name: increase-rate-limits
     match:
@@ -19,7 +78,26 @@ spec:
       patchesJson6902: |-
         - op: add
           path: /spec/template/spec/containers/0/command/-
-          value: "--kube-api-qps=800"
+          value: "--kube-api-qps=2000"
         - op: add
           path: /spec/template/spec/containers/0/command/-
-          value: "--kube-api-burst=1000"
+          value: "--kube-api-burst=2200"
+  # disable HTTP2 in kube-controller-manager's so that API requests are distributed across API server instances
+  - name: disable-http2
+    match:
+      any:
+      - resources:
+          kinds:
+          - Deployment
+          names:
+          - kube-controller-manager
+    mutate:
+      patchStrategicMerge:
+        spec:
+          template:
+            spec:
+              containers:
+              - name: kube-controller-manager
+                env:
+                - name: DISABLE_HTTP2
+                  value: "true"
diff --git a/webhosting-operator/Makefile b/webhosting-operator/Makefile
@@ -124,14 +124,15 @@ export SKAFFOLD_FILENAME = skaffold.yaml
 deploy up dev down: export SKAFFOLD_LABEL = skaffold.dev/run-id=webhosting-operator
 # use dedicated ghcr repo for dev images to prevent spamming the "production" image repo
 up dev: export SKAFFOLD_DEFAULT_REPO ?= ghcr.io/timebertt/dev-images
+up dev: export SKAFFOLD_TAIL ?= true
 
 .PHONY: deploy
 deploy: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Build all images and deploy everything to K8s cluster specified in $KUBECONFIG.
-	$(SKAFFOLD) deploy --port-forward=user --tail -i $(OPERATOR_IMG) -i $(EXPERIMENT_IMG)
+	$(SKAFFOLD) deploy -i $(OPERATOR_IMG) -i $(EXPERIMENT_IMG)
 
 .PHONY: up
 up: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Build all images, deploy everything to K8s cluster specified in $KUBECONFIG, start port-forward and tail logs.
-	$(SKAFFOLD) run --port-forward=user --tail
+	$(SKAFFOLD) run
 
 .PHONY: dev
 dev: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Start continuous dev loop with skaffold.

diff --git a/webhosting-operator/cmd/experiment/main.go b/webhosting-operator/cmd/experiment/main.go
@@ -26,6 +26,7 @@ import (
 
 	"github.com/go-logr/logr"
 	"github.com/spf13/cobra"
+	"go.uber.org/automaxprocs/maxprocs"
 	"go.uber.org/zap/zapcore"
 	"k8s.io/apimachinery/pkg/runtime"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -85,6 +86,15 @@ func main() {
 			log = ctrl.Log
 			klog.SetLogger(ctrl.Log)
 
+			// This is like importing the automaxprocs package for its init func (it will in turn call maxprocs.Set).
+			// Here we pass a custom logger, so that the result of the library gets logged to the same logger we use for the
+			// component itself.
+			if _, err := maxprocs.Set(maxprocs.Logger(func(s string, i ...interface{}) {
+				log.Info(fmt.Sprintf(s, i...))
+			})); err != nil {
+				log.Error(err, "Failed to set GOMAXPROCS")
+			}
+
 			restConfig := ctrl.GetConfigOrDie()
 			restConfig.QPS = 1000
 			restConfig.Burst = 1200

diff --git a/webhosting-operator/cmd/webhosting-operator/main.go b/webhosting-operator/cmd/webhosting-operator/main.go
@@ -24,6 +24,7 @@ import (
 	"strconv"
 
 	"github.com/prometheus/client_golang/prometheus/collectors"
+	"go.uber.org/automaxprocs/maxprocs"
 	"go.uber.org/zap/zapcore"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/serializer"
@@ -86,6 +87,15 @@ func main() {
 		os.Exit(1)
 	}
 
+	// This is like importing the automaxprocs package for its init func (it will in turn call maxprocs.Set).
+	// Here we pass a custom logger, so that the result of the library gets logged to the same logger we use for the
+	// component itself.
+	if _, err := maxprocs.Set(maxprocs.Logger(func(s string, i ...interface{}) {
+		setupLog.Info(fmt.Sprintf(s, i...))
+	})); err != nil {
+		setupLog.Error(err, "Failed to set GOMAXPROCS")
+	}
+
 	// replace deprecated legacy go collector
 	metrics.Registry.Unregister(collectors.NewGoCollector())
 	metrics.Registry.MustRegister(collectors.NewGoCollector(collectors.WithGoCollections(collectors.GoRuntimeMetricsCollection)))

diff --git a/...g/experiment/reconcile/kustomization.yaml → ...g/experiment/scale-out/kustomization.yaml b/...g/experiment/reconcile/kustomization.yaml → ...g/experiment/scale-out/kustomization.yaml
@@ -11,4 +11,4 @@ patches:
   patch: |
     - op: add
       path: /spec/template/spec/containers/0/args/-
-      value: reconcile
+      value: scale-out
diff --git a/webhosting-operator/config/manager/base/manager.yaml b/webhosting-operator/config/manager/base/manager.yaml
@@ -21,6 +21,8 @@ spec:
         env:
         - name: DISABLE_HTTP2
           value: "true"
+        - name: WEBSITE_CONCURRENT_SYNCS
+          value: "15"
         ports:
         - name: metrics
           containerPort: 8080

diff --git a/webhosting-operator/config/manager/overlays/non-sharded/manager_patch.yaml b/webhosting-operator/config/manager/overlays/non-sharded/manager_patch.yaml
@@ -12,3 +12,7 @@ spec:
         env:
         - name: SHARDING_ENABLED
           value: "false"
+        # When comparing singleton vs sharded setups, the singleton will fail to verify the SLOs because it has too few
+        # website workers. Increase the worker count to allow comparing the setups.
+        - name: WEBSITE_CONCURRENT_SYNCS
+          value: "50"
diff --git a/webhosting-operator/config/monitoring/default/dashboards/webhosting.json b/webhosting-operator/config/monitoring/default/dashboards/webhosting.json
@@ -687,7 +687,7 @@
           },
           "editorMode": "code",
           "exemplar": true,
-          "expr": "sum(namespace_phase:kube_website_info:sum{namespace=~\"$project\"}) by (phase)",
+          "expr": "sum(namespace_phase:kube_website_status_phase:sum{namespace=~\"$project\"}) by (phase)",
           "interval": "",
           "legendFormat": "{{phase}}",
           "range": true,

diff --git a/webhosting-operator/go.mod b/webhosting-operator/go.mod
@@ -10,6 +10,7 @@ require (
 	github.com/prometheus/client_golang v1.18.0
 	github.com/prometheus/common v0.46.0
 	github.com/spf13/cobra v1.8.0
+	go.uber.org/automaxprocs v1.5.3
 	go.uber.org/zap v1.26.0
 	golang.org/x/time v0.5.0
 	gopkg.in/yaml.v3 v3.0.1

diff --git a/webhosting-operator/go.sum b/webhosting-operator/go.sum
@@ -121,6 +121,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
 github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
+github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
 github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
 github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@@ -158,6 +160,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8=
+go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0=
 go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
 go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
 go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=

diff --git a/webhosting-operator/pkg/controllers/webhosting/website_controller.go b/webhosting-operator/pkg/controllers/webhosting/website_controller.go
@@ -21,6 +21,8 @@ import (
 	"crypto/sha256"
 	"encoding/hex"
 	"fmt"
+	"os"
+	"strconv"
 
 	"github.com/go-logr/logr"
 	appsv1 "k8s.io/api/apps/v1"
@@ -486,10 +488,8 @@ func (r *WebsiteReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	}
 
 	workers := 15
-	if !mgr.IsSharded() {
-		// When comparing singleton vs sharded setups, the singleton will fail to verify the SLOs because it has too few
-		// website workers. Increase the worker count to allow comparing the setups.
-		workers = 50
+	if override, err := strconv.ParseInt(os.Getenv("WEBSITE_CONCURRENT_SYNCS"), 10, 32); err == nil {
+		workers = int(override)
 	}
 
 	c, err := ctrl.NewControllerManagedBy(mgr).