Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

controller: made QPS, burst QPS and number of workers externally configurable #497

Merged
merged 2 commits into from
Jan 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 46 additions & 26 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ const (
maxPortFlag = "max-port"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
numWorkersFlag = "num-workers"
apiServerSustainedQPSFlag = "api-server-qps"
apiServerBurstQPSFlag = "api-server-qps-burst"
kubeconfigFlag = "kubeconfig"
workers = 2
defaultResync = 30 * time.Second
)

Expand All @@ -85,6 +87,9 @@ func main() {
logger.WithError(err).Fatal("Could not create in cluster config")
}

clientConf.QPS = float32(ctlConf.APIServerSustainedQPS)
clientConf.Burst = ctlConf.APIServerBurstQPS

kubeClient, err := kubernetes.NewForConfig(clientConf)
if err != nil {
logger.WithError(err).Fatal("Could not create the kubernetes clientset")
Expand Down Expand Up @@ -169,7 +174,7 @@ func main() {

for _, r := range rs {
go func(rr runner) {
if runErr := rr.Run(workers, stop); runErr != nil {
if runErr := rr.Run(ctlConf.NumWorkers, stop); runErr != nil {
logger.WithError(runErr).Fatalf("could not start runner: %T", rr)
}
}(r)
Expand All @@ -195,6 +200,9 @@ func parseEnvFlags() config {
viper.SetDefault(enablePrometheusMetricsFlag, true)
viper.SetDefault(enableStackdriverMetricsFlag, false)
viper.SetDefault(projectIDFlag, "")
viper.SetDefault(numWorkersFlag, 64)
viper.SetDefault(apiServerSustainedQPSFlag, 100)
viper.SetDefault(apiServerBurstQPSFlag, 200)

pflag.String(sidecarImageFlag, viper.GetString(sidecarImageFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable")
pflag.String(sidecarCPULimitFlag, viper.GetString(sidecarCPULimitFlag), "Flag to overwrite the GameServer sidecar container's cpu limit. Can also use SIDECAR_CPU_LIMIT env variable")
Expand All @@ -208,6 +216,9 @@ func parseEnvFlags() config {
pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.")
pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.")
pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.")
pflag.Int32(numWorkersFlag, 64, "Number of controller workers per resource type")
pflag.Int32(apiServerSustainedQPSFlag, 100, "Maximum sustained queries per second to send to the API server")
pflag.Int32(apiServerBurstQPSFlag, 200, "Maximum burst queries per second to send to the API server")
pflag.Parse()

viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
Expand All @@ -224,6 +235,9 @@ func parseEnvFlags() config {
runtime.Must(viper.BindEnv(enableStackdriverMetricsFlag))
runtime.Must(viper.BindEnv(projectIDFlag))
runtime.Must(viper.BindPFlags(pflag.CommandLine))
runtime.Must(viper.BindEnv(numWorkersFlag))
runtime.Must(viper.BindEnv(apiServerSustainedQPSFlag))
runtime.Must(viper.BindEnv(apiServerBurstQPSFlag))

request, err := resource.ParseQuantity(viper.GetString(sidecarCPURequestFlag))
if err != nil {
Expand All @@ -236,35 +250,41 @@ func parseEnvFlags() config {
}

return config{
MinPort: int32(viper.GetInt64(minPortFlag)),
MaxPort: int32(viper.GetInt64(maxPortFlag)),
SidecarImage: viper.GetString(sidecarImageFlag),
SidecarCPURequest: request,
SidecarCPULimit: limit,
AlwaysPullSidecar: viper.GetBool(pullSidecarFlag),
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
KubeConfig: viper.GetString(kubeconfigFlag),
PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag),
Stackdriver: viper.GetBool(enableStackdriverMetricsFlag),
GCPProjectID: viper.GetString(projectIDFlag),
MinPort: int32(viper.GetInt64(minPortFlag)),
MaxPort: int32(viper.GetInt64(maxPortFlag)),
SidecarImage: viper.GetString(sidecarImageFlag),
SidecarCPURequest: request,
SidecarCPULimit: limit,
AlwaysPullSidecar: viper.GetBool(pullSidecarFlag),
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
KubeConfig: viper.GetString(kubeconfigFlag),
PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag),
Stackdriver: viper.GetBool(enableStackdriverMetricsFlag),
GCPProjectID: viper.GetString(projectIDFlag),
NumWorkers: int(viper.GetInt32(numWorkersFlag)),
APIServerSustainedQPS: int(viper.GetInt32(apiServerSustainedQPSFlag)),
APIServerBurstQPS: int(viper.GetInt32(apiServerBurstQPSFlag)),
}
}

// config stores all required configuration to create a game server controller.
type config struct {
MinPort int32
MaxPort int32
SidecarImage string
SidecarCPURequest resource.Quantity
SidecarCPULimit resource.Quantity
AlwaysPullSidecar bool
PrometheusMetrics bool
Stackdriver bool
KeyFile string
CertFile string
KubeConfig string
GCPProjectID string
MinPort int32
MaxPort int32
SidecarImage string
SidecarCPURequest resource.Quantity
SidecarCPULimit resource.Quantity
AlwaysPullSidecar bool
PrometheusMetrics bool
Stackdriver bool
KeyFile string
CertFile string
KubeConfig string
GCPProjectID string
NumWorkers int
APIServerSustainedQPS int
APIServerBurstQPS int
}

// validate ensures the ctlConfig data is valid.
Expand Down
6 changes: 6 additions & 0 deletions install/helm/agones/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ spec:
value: {{ .Values.agones.metrics.stackdriverProjectID | quote }}
- name: SIDECAR_CPU_LIMIT
value: {{ .Values.agones.image.sdk.cpuLimit | quote }}
- name: NUM_WORKERS
value: {{ .Values.agones.controller.numWorkers | quote }}
- name: API_SERVER_QPS
value: {{ .Values.agones.controller.apiServerQPS | quote }}
- name: API_SERVER_QPS_BURST
value: {{ .Values.agones.controller.apiServerQPSBurst | quote }}
livenessProbe:
httpGet:
path: /live
Expand Down
3 changes: 3 additions & 0 deletions install/helm/agones/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ agones:
operator: Exists
generateTLS: true
safeToEvict: false
numWorkers: 64
apiServerQPS: 100
apiServerQPSBurst: 200
http:
port: 8080
healthCheck:
Expand Down
6 changes: 6 additions & 0 deletions install/yaml/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,12 @@ spec:
value: ""
- name: SIDECAR_CPU_LIMIT
value: "0"
- name: NUM_WORKERS
value: "64"
- name: API_SERVER_QPS
value: "100"
- name: API_SERVER_QPS_BURST
value: "200"
livenessProbe:
httpGet:
path: /live
Expand Down
14 changes: 9 additions & 5 deletions site/content/en/docs/Installation/helm.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,13 @@ The following tables lists the configurable parameters of the Agones chart and t
| `agones.controller.resources` | Controller resource requests/limit | `{}` |
| `agones.controller.generateTLS` | Set to true to generate TLS certificates or false to provide your own certificates in `certs/*` | `true` |
| `agones.ping.install` | Whether to install the [ping service][ping] | `true` |
| `agones.ping.replicas` | The number of replicas to run in the deployment | `2` |
| `agones.ping.http.expose` | Expose the http ping service via a Service | `true` |
| `agones.ping.http.response` | The string response returned from the http service | `ok` |
| `agones.ping.replicas` | The number of replicas to run in the deployment | `2` |
markmandel marked this conversation as resolved.
Show resolved Hide resolved
| `agones.ping.http.expose` | Expose the http ping service via a Service | `true` |
| `agones.ping.http.response` | The string response returned from the http service | `ok` |
| `agones.ping.http.port` | The port to expose on the service | `80` |
| `agones.ping.http.serviceType` | The [Service Type][service] of the HTTP Service | `LoadBalancer` |
| `agones.ping.udp.expose` | Expose the udp ping service via a Service | `true` |
| `agones.ping.udp.rateLimit` | Number of UDP packets the ping service handles per instance, per second, per sender | `20` |
| `agones.ping.udp.expose` | Expose the udp ping service via a Service | `true` |
| `agones.ping.udp.rateLimit` | Number of UDP packets the ping service handles per instance, per second, per sender | `20` |
| `agones.ping.udp.port` | The port to expose on the service | `80` |
| `agones.ping.udp.serviceType` | The [Service Type][service] of the UDP Service | `LoadBalancer` |
| `agones.ping.healthCheck.initialDelaySeconds` | Initial delay before performing the first probe (in seconds) | `3` |
Expand All @@ -162,6 +162,10 @@ The following tables lists the configurable parameters of the Agones chart and t
| `agones.ping.nodeSelector` | Ping [node labels](nodeSelector) for pod assignment | `{}` |
| `agones.ping.tolerations` | Ping [toleration][toleration] labels for pod assignment | `[]` |
| `agones.ping.affinity` | Ping [affinity](affinity) settings for pod assignment | `{}` |
| `agones.controller.numWorkers` | Number of workers to spin per resource type | `64` |
| `agones.controller.apiServerQPS` | Maximum sustained queries per second that controller should be making against API Server | `100` |
| `agones.controller.apiServerQPSBurst` | Maximum burst queries per second that controller should be making against API Server | `200` |

[toleration]: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
{{% /feature %}}

Expand Down
16 changes: 12 additions & 4 deletions test/e2e/fleet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/stretchr/testify/assert"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -445,7 +446,7 @@ func TestFleetAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})

Expand All @@ -467,11 +468,18 @@ func TestFleetAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})
}

func assertSuccessOrUpdateConflict(t *testing.T, err error) {
if !k8serrors.IsConflict(err) {
// update conflicts are sometimes ok, we simply lost the race.
assert.Nil(t, err)
}
}

// TestGameServerAllocationDuringGameServerDeletion is built to specifically
// test for race conditions of allocations when doing scale up/down,
// rolling updates, etc. Failures may not happen ALL the time -- as that is the
Expand Down Expand Up @@ -567,7 +575,7 @@ func TestGameServerAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})

Expand All @@ -589,7 +597,7 @@ func TestGameServerAllocationDuringGameServerDeletion(t *testing.T) {
fltCopy := flt.DeepCopy()
fltCopy.Spec.Template.ObjectMeta.Annotations[key] = green
_, err = framework.AgonesClient.StableV1alpha1().Fleets(defaultNs).Update(fltCopy)
assert.Nil(t, err)
assertSuccessOrUpdateConflict(t, err)
})
})
}
Expand Down