From ceb6161d7c4d51191c6e36138339f521a1031f47 Mon Sep 17 00:00:00 2001 From: Nikhil Athreya Date: Mon, 31 Aug 2020 22:38:45 +0000 Subject: [PATCH 1/5] Added a new 'disableTLS' flag and changed 'disableMTLS' to only disable client auth. --- cmd/allocator/main.go | 20 +++++++++++-------- cmd/allocator/metrics.go | 6 ++++++ .../agones/templates/service/allocation.yaml | 2 ++ install/helm/agones/values.yaml | 1 + install/yaml/install.yaml | 2 ++ 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index 18d466ad0e..3db8eb47e9 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -88,14 +88,14 @@ func main() { return err }) - h := newServiceHandler(kubeClient, agonesClient, health, conf.MTLSDisabled) + h := newServiceHandler(kubeClient, agonesClient, health, conf.MTLSDisabled, conf.TLSDisabled) listener, err := net.Listen("tcp", fmt.Sprintf(":%s", sslPort)) if err != nil { logger.WithError(err).Fatalf("failed to listen on TCP port %s", sslPort) } - if !h.mTLSDisabled { + if !h.tlsDisabled { // creates a new file watcher for client certificate folder watcher, err := fsnotify.NewWatcher() if err != nil { @@ -167,7 +167,7 @@ func main() { logger.WithError(err).Fatal("allocation service crashed") } -func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.Interface, health healthcheck.Handler, mTLSDisabled bool) *serviceHandler { +func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.Interface, health healthcheck.Handler, mTLSDisabled bool, tlsDisabled bool) *serviceHandler { defaultResync := 30 * time.Second agonesInformerFactory := externalversions.NewSharedInformerFactory(agonesClient, defaultResync) kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, defaultResync) @@ -185,6 +185,7 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I return allocator.Allocate(gsa, stop) }, mTLSDisabled: mTLSDisabled, + tlsDisabled: tlsDisabled, } kubeInformerFactory.Start(stop) @@ -193,7 +194,7 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I logger.WithError(err).Fatal("starting allocator failed.") } - if !h.mTLSDisabled { + if !h.tlsDisabled { caCertPool, err := getCACertPool(certDir) if err != nil { logger.WithError(err).Fatal("could not load CA certs.") @@ -225,14 +226,16 @@ func readTLSCert() (*tls.Certificate, error) { // getServerOptions returns a list of GRPC server options. // Current options are TLS certs and opencensus stats handler. func (h *serviceHandler) getServerOptions() []grpc.ServerOption { - if h.mTLSDisabled { + if h.tlsDisabled { return []grpc.ServerOption{grpc.StatsHandler(&ocgrpc.ServerHandler{})} } cfg := &tls.Config{ - GetCertificate: h.getTLSCert, - ClientAuth: tls.RequireAnyClientCert, - VerifyPeerCertificate: h.verifyClientCertificate, + GetCertificate: h.getTLSCert, + } + if !h.mTLSDisabled { + cfg.ClientAuth = tls.RequireAnyClientCert + cfg.VerifyPeerCertificate = h.verifyClientCertificate } // Add options for creds and OpenCensus stats handler to enable stats and tracing. return []grpc.ServerOption{grpc.Creds(credentials.NewTLS(cfg)), grpc.StatsHandler(&ocgrpc.ServerHandler{})} @@ -333,6 +336,7 @@ type serviceHandler struct { tlsCert *tls.Certificate mTLSDisabled bool + tlsDisabled bool } // Allocate implements the Allocate gRPC method definition diff --git a/cmd/allocator/metrics.go b/cmd/allocator/metrics.go index 7580efa307..9437b1b548 100644 --- a/cmd/allocator/metrics.go +++ b/cmd/allocator/metrics.go @@ -33,6 +33,7 @@ const ( projectIDFlag = "gcp-project-id" stackdriverLabels = "stackdriver-labels" mTLSDisabledFlag = "disable-mtls" + tlsDisabledFlag = "disable-tls" ) func init() { @@ -40,6 +41,7 @@ func init() { } type config struct { + TLSDisabled bool MTLSDisabled bool PrometheusMetrics bool Stackdriver bool @@ -54,12 +56,14 @@ func parseEnvFlags() config { viper.SetDefault(projectIDFlag, "") viper.SetDefault(stackdriverLabels, "") viper.SetDefault(mTLSDisabledFlag, false) + viper.SetDefault(tlsDisabledFlag, false) pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.") pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.") pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.") pflag.String(stackdriverLabels, viper.GetString(stackdriverLabels), "A set of default labels to add to all stackdriver metrics generated. By default metadata are automatically added using Kubernetes API and GCP metadata enpoint.") pflag.Bool(mTLSDisabledFlag, viper.GetBool(mTLSDisabledFlag), "Flag to enable/disable mTLS in the allocator.") + pflag.Bool(tlsDisabledFlag, viper.GetBool(tlsDisabledFlag), "Flag to enable/disable TLS in the allocator.") runtime.FeaturesBindFlags() pflag.Parse() @@ -69,6 +73,7 @@ func parseEnvFlags() config { runtime.Must(viper.BindEnv(projectIDFlag)) runtime.Must(viper.BindEnv(stackdriverLabels)) runtime.Must(viper.BindEnv(mTLSDisabledFlag)) + runtime.Must(viper.BindEnv(tlsDisabledFlag)) runtime.Must(viper.BindPFlags(pflag.CommandLine)) runtime.Must(runtime.FeaturesBindEnv()) @@ -80,6 +85,7 @@ func parseEnvFlags() config { GCPProjectID: viper.GetString(projectIDFlag), StackdriverLabels: viper.GetString(stackdriverLabels), MTLSDisabled: viper.GetBool(mTLSDisabledFlag), + TLSDisabled: viper.GetBool(tlsDisabledFlag), } } diff --git a/install/helm/agones/templates/service/allocation.yaml b/install/helm/agones/templates/service/allocation.yaml index fe5ef422c8..7b4785cd7f 100644 --- a/install/helm/agones/templates/service/allocation.yaml +++ b/install/helm/agones/templates/service/allocation.yaml @@ -124,6 +124,8 @@ spec: value: {{ .Values.agones.metrics.stackdriverLabels | quote }} - name: DISABLE_MTLS value: {{ .Values.agones.allocator.disableMTLS | quote }} + - name: DISABLE_TLS + value: {{ .Values.agones.allocator.disableTLS | quote }} - name: POD_NAME valueFrom: fieldRef: diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index f6e25d30ed..510bef22dc 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -129,6 +129,7 @@ agones: generateTLS: true generateClientTLS: true disableMTLS: false + disableTLS: false image: registry: gcr.io/agones-images tag: 1.9.0-dev diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 42a9ceaba3..f116b94d62 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -1576,6 +1576,8 @@ spec: value: "" - name: DISABLE_MTLS value: "false" + - name: DISABLE_TLS + value: "false" - name: POD_NAME valueFrom: fieldRef: From ca52b0c8f6c77a09f67d82fd85abbe191c0c14b7 Mon Sep 17 00:00:00 2001 From: Nikhil Athreya Date: Mon, 31 Aug 2020 23:17:51 +0000 Subject: [PATCH 2/5] Conditionally disabled cert dir watching/reading on disableMTLS. --- cmd/allocator/main.go | 46 +++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index 3db8eb47e9..27767e36ca 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -95,7 +95,7 @@ func main() { logger.WithError(err).Fatalf("failed to listen on TCP port %s", sslPort) } - if !h.tlsDisabled { + if !h.mTLSDisabled { // creates a new file watcher for client certificate folder watcher, err := fsnotify.NewWatcher() if err != nil { @@ -106,6 +106,30 @@ func main() { logger.WithError(err).Fatalf("cannot watch folder %s for secret changes", certDir) } + go func() { + for { + select { + // watch for events + case event := <-watcher.Events: + h.certMutex.Lock() + caCertPool, err := getCACertPool(certDir) + if err != nil { + logger.WithError(err).Error("could not load CA certs; keeping old ones") + } else { + h.caCertPool = caCertPool + } + logger.Infof("Certificate directory change event %v", event) + h.certMutex.Unlock() + + // watch for errors + case err := <-watcher.Errors: + logger.WithError(err).Error("error watching for certificate directory") + } + } + }() + } + + if !h.tlsDisabled { watcherTLS, err := fsnotify.NewWatcher() if err != nil { logger.WithError(err).Fatal("could not create watcher for tls certs") @@ -130,20 +154,10 @@ func main() { h.tlsMutex.Unlock() } logger.Infof("Tls directory change event %v", event) - case event := <-watcher.Events: - h.certMutex.Lock() - caCertPool, err := getCACertPool(certDir) - if err != nil { - logger.WithError(err).Error("could not load CA certs; keeping old ones") - } else { - h.caCertPool = caCertPool - } - logger.Infof("Certificate directory change event %v", event) - h.certMutex.Unlock() - // watch for errors - case err := <-watcher.Errors: - logger.WithError(err).Error("error watching for certificate directory") + // watch for errors + case err := <-watcherTLS.Errors: + logger.WithError(err).Error("error watching for TLS directory") } } }() @@ -194,7 +208,7 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I logger.WithError(err).Fatal("starting allocator failed.") } - if !h.tlsDisabled { + if !h.mTLSDisabled { caCertPool, err := getCACertPool(certDir) if err != nil { logger.WithError(err).Fatal("could not load CA certs.") @@ -202,7 +216,9 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I h.certMutex.Lock() h.caCertPool = caCertPool h.certMutex.Unlock() + } + if !h.tlsDisabled { tlsCert, err := readTLSCert() if err != nil { logger.WithError(err).Fatal("could not load TLS certs.") From c0b2bf9e32bbb1bf36dc0a814170fff3759bfed1 Mon Sep 17 00:00:00 2001 From: Nikhil Athreya Date: Mon, 31 Aug 2020 23:51:47 +0000 Subject: [PATCH 3/5] Added some keepalive parameters for health checks. --- cmd/allocator/main.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index 27767e36ca..c5e67af2e8 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -43,6 +43,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/keepalive" "google.golang.org/grpc/status" "gopkg.in/fsnotify.v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -249,12 +250,25 @@ func (h *serviceHandler) getServerOptions() []grpc.ServerOption { cfg := &tls.Config{ GetCertificate: h.getTLSCert, } + if !h.mTLSDisabled { cfg.ClientAuth = tls.RequireAnyClientCert cfg.VerifyPeerCertificate = h.verifyClientCertificate } - // Add options for creds and OpenCensus stats handler to enable stats and tracing. - return []grpc.ServerOption{grpc.Creds(credentials.NewTLS(cfg)), grpc.StatsHandler(&ocgrpc.ServerHandler{})} + + // Add options for creds, OpenCensus stats handler to enable stats and tracing, and keepalive for health checks. + return []grpc.ServerOption{ + grpc.Creds(credentials.NewTLS(cfg)), + grpc.StatsHandler(&ocgrpc.ServerHandler{}), + grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ + MinTime: 1 * time.Minute, + PermitWithoutStream: true, + }), + grpc.KeepaliveParams(keepalive.ServerParameters{ + MaxConnectionIdle: 5 * time.Minute, + Timeout: 10 * time.Minute, + }), + } } func (h *serviceHandler) getTLSCert(ch *tls.ClientHelloInfo) (*tls.Certificate, error) { From 4be4715fba443aba626746e1e2c4fd3678d42152 Mon Sep 17 00:00:00 2001 From: Nikhil Athreya Date: Tue, 1 Sep 2020 23:16:35 +0000 Subject: [PATCH 4/5] Changed logic to only have mTLS in the case of TLS. --- cmd/allocator/main.go | 88 +++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index c5e67af2e8..aca6347b5b 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -96,40 +96,6 @@ func main() { logger.WithError(err).Fatalf("failed to listen on TCP port %s", sslPort) } - if !h.mTLSDisabled { - // creates a new file watcher for client certificate folder - watcher, err := fsnotify.NewWatcher() - if err != nil { - logger.WithError(err).Fatal("could not create watcher for client certs") - } - defer watcher.Close() // nolint: errcheck - if err := watcher.Add(certDir); err != nil { - logger.WithError(err).Fatalf("cannot watch folder %s for secret changes", certDir) - } - - go func() { - for { - select { - // watch for events - case event := <-watcher.Events: - h.certMutex.Lock() - caCertPool, err := getCACertPool(certDir) - if err != nil { - logger.WithError(err).Error("could not load CA certs; keeping old ones") - } else { - h.caCertPool = caCertPool - } - logger.Infof("Certificate directory change event %v", event) - h.certMutex.Unlock() - - // watch for errors - case err := <-watcher.Errors: - logger.WithError(err).Error("error watching for certificate directory") - } - } - }() - } - if !h.tlsDisabled { watcherTLS, err := fsnotify.NewWatcher() if err != nil { @@ -162,6 +128,40 @@ func main() { } } }() + + if !h.mTLSDisabled { + // creates a new file watcher for client certificate folder + watcher, err := fsnotify.NewWatcher() + if err != nil { + logger.WithError(err).Fatal("could not create watcher for client certs") + } + defer watcher.Close() // nolint: errcheck + if err := watcher.Add(certDir); err != nil { + logger.WithError(err).Fatalf("cannot watch folder %s for secret changes", certDir) + } + + go func() { + for { + select { + // watch for events + case event := <-watcher.Events: + h.certMutex.Lock() + caCertPool, err := getCACertPool(certDir) + if err != nil { + logger.WithError(err).Error("could not load CA certs; keeping old ones") + } else { + h.caCertPool = caCertPool + } + logger.Infof("Certificate directory change event %v", event) + h.certMutex.Unlock() + + // watch for errors + case err := <-watcher.Errors: + logger.WithError(err).Error("error watching for certificate directory") + } + } + }() + } } opts := h.getServerOptions() @@ -209,16 +209,6 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I logger.WithError(err).Fatal("starting allocator failed.") } - if !h.mTLSDisabled { - caCertPool, err := getCACertPool(certDir) - if err != nil { - logger.WithError(err).Fatal("could not load CA certs.") - } - h.certMutex.Lock() - h.caCertPool = caCertPool - h.certMutex.Unlock() - } - if !h.tlsDisabled { tlsCert, err := readTLSCert() if err != nil { @@ -227,6 +217,16 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I h.tlsMutex.Lock() h.tlsCert = tlsCert h.tlsMutex.Unlock() + + if !h.mTLSDisabled { + caCertPool, err := getCACertPool(certDir) + if err != nil { + logger.WithError(err).Fatal("could not load CA certs.") + } + h.certMutex.Lock() + h.caCertPool = caCertPool + h.certMutex.Unlock() + } } return &h From 628772fe73c32b54952cd3d103478626cc2be39f Mon Sep 17 00:00:00 2001 From: Nikhil Athreya Date: Wed, 2 Sep 2020 20:45:32 +0000 Subject: [PATCH 5/5] Added a comment for why the keepalive options are necessary. --- cmd/allocator/main.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index aca6347b5b..35475c8486 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -256,7 +256,9 @@ func (h *serviceHandler) getServerOptions() []grpc.ServerOption { cfg.VerifyPeerCertificate = h.verifyClientCertificate } - // Add options for creds, OpenCensus stats handler to enable stats and tracing, and keepalive for health checks. + // Add options for creds and OpenCensus stats handler to enable stats and tracing. + // The keepalive options are useful for efficiency purposes (keeping a single connection alive + // instead of constantly recreating connections), when placing the Agones allocator behind load balancers. return []grpc.ServerOption{ grpc.Creds(credentials.NewTLS(cfg)), grpc.StatsHandler(&ocgrpc.ServerHandler{}),