Skip to content

Commit

Permalink
controller: made QPS, burst QPS and number of workers externally conf…
Browse files Browse the repository at this point in the history
…igurable
  • Loading branch information
jkowalski authored and markmandel committed Jan 28, 2019
1 parent 9b31d92 commit cf37064
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 35 deletions.
72 changes: 46 additions & 26 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ const (
maxPortFlag = "max-port"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
numWorkersFlag = "num-workers"
apiServerSustainedQPSFlag = "api-server-qps"
apiServerBurstQPSFlag = "api-server-qps-burst"
kubeconfigFlag = "kubeconfig"
workers = 2
defaultResync = 30 * time.Second
)

Expand All @@ -85,6 +87,9 @@ func main() {
logger.WithError(err).Fatal("Could not create in cluster config")
}

clientConf.QPS = float32(ctlConf.APIServerSustainedQPS)
clientConf.Burst = ctlConf.APIServerBurstQPS

kubeClient, err := kubernetes.NewForConfig(clientConf)
if err != nil {
logger.WithError(err).Fatal("Could not create the kubernetes clientset")
Expand Down Expand Up @@ -169,7 +174,7 @@ func main() {

for _, r := range rs {
go func(rr runner) {
if runErr := rr.Run(workers, stop); runErr != nil {
if runErr := rr.Run(ctlConf.NumWorkers, stop); runErr != nil {
logger.WithError(runErr).Fatalf("could not start runner: %T", rr)
}
}(r)
Expand All @@ -195,6 +200,9 @@ func parseEnvFlags() config {
viper.SetDefault(enablePrometheusMetricsFlag, true)
viper.SetDefault(enableStackdriverMetricsFlag, false)
viper.SetDefault(projectIDFlag, "")
viper.SetDefault(numWorkersFlag, 64)
viper.SetDefault(apiServerSustainedQPSFlag, 100)
viper.SetDefault(apiServerBurstQPSFlag, 200)

pflag.String(sidecarImageFlag, viper.GetString(sidecarImageFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable")
pflag.String(sidecarCPULimitFlag, viper.GetString(sidecarCPULimitFlag), "Flag to overwrite the GameServer sidecar container's cpu limit. Can also use SIDECAR_CPU_LIMIT env variable")
Expand All @@ -208,6 +216,9 @@ func parseEnvFlags() config {
pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.")
pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.")
pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.")
pflag.Int32(numWorkersFlag, 64, "Number of controller workers per resource type")
pflag.Int32(apiServerSustainedQPSFlag, 100, "Maximum sustained queries per second to send to the API server")
pflag.Int32(apiServerBurstQPSFlag, 200, "Maximum burst queries per second to send to the API server")
pflag.Parse()

viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
Expand All @@ -224,6 +235,9 @@ func parseEnvFlags() config {
runtime.Must(viper.BindEnv(enableStackdriverMetricsFlag))
runtime.Must(viper.BindEnv(projectIDFlag))
runtime.Must(viper.BindPFlags(pflag.CommandLine))
runtime.Must(viper.BindEnv(numWorkersFlag))
runtime.Must(viper.BindEnv(apiServerSustainedQPSFlag))
runtime.Must(viper.BindEnv(apiServerBurstQPSFlag))

request, err := resource.ParseQuantity(viper.GetString(sidecarCPURequestFlag))
if err != nil {
Expand All @@ -236,35 +250,41 @@ func parseEnvFlags() config {
}

return config{
MinPort: int32(viper.GetInt64(minPortFlag)),
MaxPort: int32(viper.GetInt64(maxPortFlag)),
SidecarImage: viper.GetString(sidecarImageFlag),
SidecarCPURequest: request,
SidecarCPULimit: limit,
AlwaysPullSidecar: viper.GetBool(pullSidecarFlag),
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
KubeConfig: viper.GetString(kubeconfigFlag),
PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag),
Stackdriver: viper.GetBool(enableStackdriverMetricsFlag),
GCPProjectID: viper.GetString(projectIDFlag),
MinPort: int32(viper.GetInt64(minPortFlag)),
MaxPort: int32(viper.GetInt64(maxPortFlag)),
SidecarImage: viper.GetString(sidecarImageFlag),
SidecarCPURequest: request,
SidecarCPULimit: limit,
AlwaysPullSidecar: viper.GetBool(pullSidecarFlag),
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
KubeConfig: viper.GetString(kubeconfigFlag),
PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag),
Stackdriver: viper.GetBool(enableStackdriverMetricsFlag),
GCPProjectID: viper.GetString(projectIDFlag),
NumWorkers: int(viper.GetInt32(numWorkersFlag)),
APIServerSustainedQPS: int(viper.GetInt32(apiServerSustainedQPSFlag)),
APIServerBurstQPS: int(viper.GetInt32(apiServerBurstQPSFlag)),
}
}

// config stores all required configuration to create a game server controller.
type config struct {
MinPort int32
MaxPort int32
SidecarImage string
SidecarCPURequest resource.Quantity
SidecarCPULimit resource.Quantity
AlwaysPullSidecar bool
PrometheusMetrics bool
Stackdriver bool
KeyFile string
CertFile string
KubeConfig string
GCPProjectID string
MinPort int32
MaxPort int32
SidecarImage string
SidecarCPURequest resource.Quantity
SidecarCPULimit resource.Quantity
AlwaysPullSidecar bool
PrometheusMetrics bool
Stackdriver bool
KeyFile string
CertFile string
KubeConfig string
GCPProjectID string
NumWorkers int
APIServerSustainedQPS int
APIServerBurstQPS int
}

// validate ensures the ctlConfig data is valid.
Expand Down
6 changes: 6 additions & 0 deletions install/helm/agones/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ spec:
value: {{ .Values.agones.metrics.stackdriverProjectID | quote }}
- name: SIDECAR_CPU_LIMIT
value: {{ .Values.agones.image.sdk.cpuLimit | quote }}
- name: NUM_WORKERS
value: {{ .Values.agones.controller.numWorkers | quote }}
- name: API_SERVER_QPS
value: {{ .Values.agones.controller.apiServerQPS | quote }}
- name: API_SERVER_QPS_BURST
value: {{ .Values.agones.controller.apiServerQPSBurst | quote }}
livenessProbe:
httpGet:
path: /live
Expand Down
3 changes: 3 additions & 0 deletions install/helm/agones/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ agones:
operator: Exists
generateTLS: true
safeToEvict: false
numWorkers: 64
apiServerQPS: 100
apiServerQPSBurst: 200
http:
port: 8080
healthCheck:
Expand Down
6 changes: 6 additions & 0 deletions install/yaml/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,12 @@ spec:
value: ""
- name: SIDECAR_CPU_LIMIT
value: "0"
- name: NUM_WORKERS
value: "64"
- name: API_SERVER_QPS
value: "100"
- name: API_SERVER_QPS_BURST
value: "200"
livenessProbe:
httpGet:
path: /live
Expand Down
14 changes: 9 additions & 5 deletions site/content/en/docs/Installation/helm.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,13 @@ The following tables lists the configurable parameters of the Agones chart and t
| `agones.controller.resources` | Controller resource requests/limit | `{}` |
| `agones.controller.generateTLS` | Set to true to generate TLS certificates or false to provide your own certificates in `certs/*` | `true` |
| `agones.ping.install` | Whether to install the [ping service][ping] | `true` |
| `agones.ping.replicas` | The number of replicas to run in the deployment | `2` |
| `agones.ping.http.expose` | Expose the http ping service via a Service | `true` |
| `agones.ping.http.response` | The string response returned from the http service | `ok` |
| `agones.ping.replicas` | The number of replicas to run in the deployment | `2` |
| `agones.ping.http.expose` | Expose the http ping service via a Service | `true` |
| `agones.ping.http.response` | The string response returned from the http service | `ok` |
| `agones.ping.http.port` | The port to expose on the service | `80` |
| `agones.ping.http.serviceType` | The [Service Type][service] of the HTTP Service | `LoadBalancer` |
| `agones.ping.udp.expose` | Expose the udp ping service via a Service | `true` |
| `agones.ping.udp.rateLimit` | Number of UDP packets the ping service handles per instance, per second, per sender | `20` |
| `agones.ping.udp.expose` | Expose the udp ping service via a Service | `true` |
| `agones.ping.udp.rateLimit` | Number of UDP packets the ping service handles per instance, per second, per sender | `20` |
| `agones.ping.udp.port` | The port to expose on the service | `80` |
| `agones.ping.udp.serviceType` | The [Service Type][service] of the UDP Service | `LoadBalancer` |
| `agones.ping.healthCheck.initialDelaySeconds` | Initial delay before performing the first probe (in seconds) | `3` |
Expand All @@ -162,6 +162,10 @@ The following tables lists the configurable parameters of the Agones chart and t
| `agones.ping.nodeSelector` | Ping [node labels](nodeSelector) for pod assignment | `{}` |
| `agones.ping.tolerations` | Ping [toleration][toleration] labels for pod assignment | `[]` |
| `agones.ping.affinity` | Ping [affinity](affinity) settings for pod assignment | `{}` |
| `agones.controller.numWorkers` | Number of workers to spin per resource type | `64` |
| `agones.controller.apiServerQPS` | Maximum sustained queries per second that controller should be making against API Server | `100` |
| `agones.controller.apiServerQPSBurst` | Maximum burst queries per second that controller should be making against API Server | `200` |

[toleration]: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
{{% /feature %}}

Expand Down
16 changes: 12 additions & 4 deletions test/e2e/fleet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/stretchr/testify/assert"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -445,7 +446,7 @@ func TestFleetAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})

Expand All @@ -467,11 +468,18 @@ func TestFleetAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})
}

func assertSuccessOrUpdateConflict(t *testing.T, err error) {
if !k8serrors.IsConflict(err) {
// update conflicts are sometimes ok, we simply lost the race.
assert.Nil(t, err)
}
}

// TestGameServerAllocationDuringGameServerDeletion is built to specifically
// test for race conditions of allocations when doing scale up/down,
// rolling updates, etc. Failures may not happen ALL the time -- as that is the
Expand Down Expand Up @@ -567,7 +575,7 @@ func TestGameServerAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})

Expand All @@ -589,7 +597,7 @@ func TestGameServerAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})
}
Expand Down

0 comments on commit cf37064

Please sign in to comment.