Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conditionally enable mtls for the allocator. #1645

Merged
merged 26 commits into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f53dd07
Working on conditionally enabling mtls.
nathreya-google Jun 24, 2020
fe9fb5a
Removed the need for having certificates with mTLS disabled.
nathreya-google Jun 25, 2020
ef22f25
Fixed bug with mTLS enabling in the controller test.
nathreya-google Jun 25, 2020
7e037d5
Checked error value from runtime feature parsing in controller_test.go
nathreya-google Jun 25, 2020
997ad33
Changed flag name and added it to the documentation.
nathreya-google Jun 25, 2020
f33d47f
Removed feature parsing in controller_test.go
nathreya-google Jun 25, 2020
935b073
Transitioned to using Helm for configuration.
nathreya-google Jun 26, 2020
4d3b31e
Reordered struct members.
nathreya-google Jun 26, 2020
571ed75
Removed non-existent feature gate documentation.
nathreya-google Jun 26, 2020
d90ac89
Ran gen-install to add new configuration parameters.
nathreya-google Jun 26, 2020
889fa3f
Working on conditionally enabling mtls.
nathreya-google Jun 24, 2020
452c1e4
Removed the need for having certificates with mTLS disabled.
nathreya-google Jun 25, 2020
1a0e0b2
Fixed bug with mTLS enabling in the controller test.
nathreya-google Jun 25, 2020
4e15cd9
Checked error value from runtime feature parsing in controller_test.go
nathreya-google Jun 25, 2020
b20305c
Changed flag name and added it to the documentation.
nathreya-google Jun 25, 2020
72b3db1
Removed feature parsing in controller_test.go
nathreya-google Jun 25, 2020
5f98c64
Transitioned to using Helm for configuration.
nathreya-google Jun 26, 2020
25929ef
Reordered struct members.
nathreya-google Jun 26, 2020
1ce6010
Removed non-existent feature gate documentation.
nathreya-google Jun 26, 2020
b61078e
Reverted changes to {cloudbuild,install}.yaml
nathreya-google Jun 26, 2020
19b1f16
Readded disableMTLS configuration to install.yaml.
nathreya-google Jun 26, 2020
737a50e
Removed duplicate disableMTLS flag in values.yaml
nathreya-google Jun 26, 2020
f865947
Still want to keep MTLS for multi-cluster.
nathreya-google Jun 26, 2020
fce7043
Clarified the comment a bit more.
nathreya-google Jun 26, 2020
326750c
Removed mTLS disabling from the game server allocator.
nathreya-google Jun 27, 2020
7d48bb0
Merge branch 'master' into mtls-flag
devloop0 Jun 27, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 75 additions & 65 deletions cmd/allocator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,64 +88,66 @@ func main() {
return err
})

h := newServiceHandler(kubeClient, agonesClient, health)

// creates a new file watcher for client certificate folder
watcher, err := fsnotify.NewWatcher()
if err != nil {
logger.WithError(err).Fatal("could not create watcher for client certs")
}
defer watcher.Close() // nolint: errcheck
if err := watcher.Add(certDir); err != nil {
logger.WithError(err).Fatalf("cannot watch folder %s for secret changes", certDir)
}

watcherTLS, err := fsnotify.NewWatcher()
if err != nil {
logger.WithError(err).Fatal("could not create watcher for tls certs")
}
defer watcherTLS.Close() // nolint: errcheck
if err := watcherTLS.Add(tlsDir); err != nil {
logger.WithError(err).Fatalf("cannot watch folder %s for secret changes", tlsDir)
}
h := newServiceHandler(kubeClient, agonesClient, health, conf.MTLSDisabled)

listener, err := net.Listen("tcp", fmt.Sprintf(":%s", sslPort))
if err != nil {
logger.WithError(err).Fatalf("failed to listen on TCP port %s", sslPort)
}

// Watching for the events in certificate directory for updating certificates, when there is a change
go func() {
for {
select {
// watch for events
case event := <-watcherTLS.Events:
tlsCert, err := readTLSCert()
if err != nil {
logger.WithError(err).Error("could not load TLS cert; keeping old one")
} else {
h.tlsMutex.Lock()
h.tlsCert = tlsCert
h.tlsMutex.Unlock()
}
logger.Infof("Tls directory change event %v", event)
case event := <-watcher.Events:
h.certMutex.Lock()
caCertPool, err := getCACertPool(certDir)
if err != nil {
logger.WithError(err).Error("could not load CA certs; keeping old ones")
} else {
h.caCertPool = caCertPool
}
logger.Infof("Certificate directory change event %v", event)
h.certMutex.Unlock()
if !h.mTLSDisabled {
// creates a new file watcher for client certificate folder
watcher, err := fsnotify.NewWatcher()
if err != nil {
logger.WithError(err).Fatal("could not create watcher for client certs")
}
defer watcher.Close() // nolint: errcheck
if err := watcher.Add(certDir); err != nil {
logger.WithError(err).Fatalf("cannot watch folder %s for secret changes", certDir)
}

// watch for errors
case err := <-watcher.Errors:
logger.WithError(err).Error("error watching for certificate directory")
}
watcherTLS, err := fsnotify.NewWatcher()
if err != nil {
logger.WithError(err).Fatal("could not create watcher for tls certs")
}
}()
defer watcherTLS.Close() // nolint: errcheck
if err := watcherTLS.Add(tlsDir); err != nil {
logger.WithError(err).Fatalf("cannot watch folder %s for secret changes", tlsDir)
}

// Watching for the events in certificate directory for updating certificates, when there is a change
go func() {
for {
select {
// watch for events
case event := <-watcherTLS.Events:
tlsCert, err := readTLSCert()
if err != nil {
logger.WithError(err).Error("could not load TLS cert; keeping old one")
} else {
h.tlsMutex.Lock()
h.tlsCert = tlsCert
h.tlsMutex.Unlock()
}
logger.Infof("Tls directory change event %v", event)
case event := <-watcher.Events:
h.certMutex.Lock()
caCertPool, err := getCACertPool(certDir)
if err != nil {
logger.WithError(err).Error("could not load CA certs; keeping old ones")
} else {
h.caCertPool = caCertPool
}
logger.Infof("Certificate directory change event %v", event)
h.certMutex.Unlock()

// watch for errors
case err := <-watcher.Errors:
logger.WithError(err).Error("error watching for certificate directory")
}
}
}()
}

opts := h.getServerOptions()

Expand All @@ -165,7 +167,7 @@ func main() {
logger.WithError(err).Fatal("allocation service crashed")
}

func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.Interface, health healthcheck.Handler) *serviceHandler {
func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.Interface, health healthcheck.Handler, mTLSDisabled bool) *serviceHandler {
defaultResync := 30 * time.Second
agonesInformerFactory := externalversions.NewSharedInformerFactory(agonesClient, defaultResync)
kubeInformerFactory := informers.NewSharedInformerFactory(kubeClient, defaultResync)
Expand All @@ -175,13 +177,14 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I
agonesInformerFactory.Multicluster().V1().GameServerAllocationPolicies(),
kubeInformerFactory.Core().V1().Secrets(),
kubeClient,
gameserverallocations.NewReadyGameServerCache(agonesInformerFactory.Agones().V1().GameServers(), agonesClient.AgonesV1(), gsCounter, health))
gameserverallocations.NewReadyGameServerCache(agonesInformerFactory.Agones().V1().GameServers(), agonesClient.AgonesV1(), gsCounter, health), mTLSDisabled)

stop := signals.NewStopChannel()
h := serviceHandler{
allocationCallback: func(gsa *allocationv1.GameServerAllocation) (k8sruntime.Object, error) {
return allocator.Allocate(gsa, stop)
},
mTLSDisabled: mTLSDisabled,
}

kubeInformerFactory.Start(stop)
Expand All @@ -190,21 +193,23 @@ func newServiceHandler(kubeClient kubernetes.Interface, agonesClient versioned.I
logger.WithError(err).Fatal("starting allocator failed.")
}

caCertPool, err := getCACertPool(certDir)
if err != nil {
logger.WithError(err).Fatal("could not load CA certs.")
}
h.certMutex.Lock()
h.caCertPool = caCertPool
h.certMutex.Unlock()
if !h.mTLSDisabled {
caCertPool, err := getCACertPool(certDir)
if err != nil {
logger.WithError(err).Fatal("could not load CA certs.")
}
h.certMutex.Lock()
h.caCertPool = caCertPool
h.certMutex.Unlock()

tlsCert, err := readTLSCert()
if err != nil {
logger.WithError(err).Fatal("could not load TLS certs.")
tlsCert, err := readTLSCert()
if err != nil {
logger.WithError(err).Fatal("could not load TLS certs.")
}
h.tlsMutex.Lock()
h.tlsCert = tlsCert
h.tlsMutex.Unlock()
}
h.tlsMutex.Lock()
h.tlsCert = tlsCert
h.tlsMutex.Unlock()

return &h
}
Expand All @@ -220,6 +225,9 @@ func readTLSCert() (*tls.Certificate, error) {
// getServerOptions returns a list of GRPC server options.
// Current options are TLS certs and opencensus stats handler.
func (h *serviceHandler) getServerOptions() []grpc.ServerOption {
if h.mTLSDisabled {
return []grpc.ServerOption{grpc.StatsHandler(&ocgrpc.ServerHandler{})}
}

cfg := &tls.Config{
GetCertificate: h.getTLSCert,
Expand Down Expand Up @@ -323,6 +331,8 @@ type serviceHandler struct {

tlsMutex sync.RWMutex
tlsCert *tls.Certificate

mTLSDisabled bool
}

// Allocate implements the Allocate gRPC method definition
Expand Down
6 changes: 6 additions & 0 deletions cmd/allocator/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ const (
enablePrometheusMetricsFlag = "prometheus-exporter"
projectIDFlag = "gcp-project-id"
stackdriverLabels = "stackdriver-labels"
mTLSDisabledFlag = "disable-mtls"
)

func init() {
registerMetricViews()
}

type config struct {
MTLSDisabled bool
PrometheusMetrics bool
Stackdriver bool
GCPProjectID string
Expand All @@ -51,11 +53,13 @@ func parseEnvFlags() config {
viper.SetDefault(enableStackdriverMetricsFlag, false)
viper.SetDefault(projectIDFlag, "")
viper.SetDefault(stackdriverLabels, "")
viper.SetDefault(mTLSDisabledFlag, false)

pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.")
pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.")
pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.")
pflag.String(stackdriverLabels, viper.GetString(stackdriverLabels), "A set of default labels to add to all stackdriver metrics generated. By default metadata are automatically added using Kubernetes API and GCP metadata enpoint.")
pflag.Bool(mTLSDisabledFlag, viper.GetBool(mTLSDisabledFlag), "Flag to enable/disable mTLS in the allocator.")
runtime.FeaturesBindFlags()
pflag.Parse()

Expand All @@ -64,6 +68,7 @@ func parseEnvFlags() config {
runtime.Must(viper.BindEnv(enableStackdriverMetricsFlag))
runtime.Must(viper.BindEnv(projectIDFlag))
runtime.Must(viper.BindEnv(stackdriverLabels))
runtime.Must(viper.BindEnv(mTLSDisabledFlag))
runtime.Must(viper.BindPFlags(pflag.CommandLine))
runtime.Must(runtime.FeaturesBindEnv())

Expand All @@ -74,6 +79,7 @@ func parseEnvFlags() config {
Stackdriver: viper.GetBool(enableStackdriverMetricsFlag),
GCPProjectID: viper.GetString(projectIDFlag),
StackdriverLabels: viper.GetString(stackdriverLabels),
MTLSDisabled: viper.GetBool(mTLSDisabledFlag),
}
}

Expand Down
8 changes: 7 additions & 1 deletion cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ const (
logLevelFlag = "log-level"
logSizeLimitMBFlag = "log-size-limit-mb"
kubeconfigFlag = "kubeconfig"
mTLSDisabledFlag = "disable-mtls"
defaultResync = 30 * time.Second
)

Expand Down Expand Up @@ -209,7 +210,7 @@ func main() {
gsSetController := gameserversets.NewController(wh, health, gsCounter,
kubeClient, extClient, agonesClient, agonesInformerFactory)
fleetController := fleets.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory)
gasController := gameserverallocations.NewController(api, health, gsCounter, kubeClient, kubeInformerFactory, agonesClient, agonesInformerFactory)
gasController := gameserverallocations.NewController(api, health, gsCounter, kubeClient, kubeInformerFactory, agonesClient, agonesInformerFactory, ctlConf.MTLSDisabled)
fasController := fleetautoscalers.NewController(wh, health,
kubeClient, extClient, agonesClient, agonesInformerFactory)

Expand Down Expand Up @@ -240,6 +241,7 @@ func parseEnvFlags() config {
}

base := filepath.Dir(exec)
viper.SetDefault(mTLSDisabledFlag, false)
viper.SetDefault(sidecarImageFlag, "gcr.io/agones-images/agones-sdk:"+pkg.Version)
viper.SetDefault(sidecarCPURequestFlag, "0")
viper.SetDefault(sidecarCPULimitFlag, "0")
Expand All @@ -261,6 +263,7 @@ func parseEnvFlags() config {
viper.SetDefault(logLevelFlag, "Info")
viper.SetDefault(logSizeLimitMBFlag, 10000) // 10 GB, will be split into 100 MB chunks

pflag.Bool(mTLSDisabledFlag, viper.GetBool(mTLSDisabledFlag), "Flag to enable/disable mTLS for the allocator.")
pflag.String(sidecarImageFlag, viper.GetString(sidecarImageFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable")
pflag.String(sidecarCPULimitFlag, viper.GetString(sidecarCPULimitFlag), "Flag to overwrite the GameServer sidecar container's cpu limit. Can also use SIDECAR_CPU_LIMIT env variable")
pflag.String(sidecarCPURequestFlag, viper.GetString(sidecarCPURequestFlag), "Flag to overwrite the GameServer sidecar container's cpu request. Can also use SIDECAR_CPU_REQUEST env variable")
Expand All @@ -287,6 +290,7 @@ func parseEnvFlags() config {
pflag.Parse()

viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
runtime.Must(viper.BindEnv(mTLSDisabledFlag))
runtime.Must(viper.BindEnv(sidecarImageFlag))
runtime.Must(viper.BindEnv(sidecarCPULimitFlag))
runtime.Must(viper.BindEnv(sidecarCPURequestFlag))
Expand Down Expand Up @@ -357,6 +361,7 @@ func parseEnvFlags() config {
LogLevel: viper.GetString(logLevelFlag),
LogSizeLimitMB: int(viper.GetInt32(logSizeLimitMBFlag)),
StackdriverLabels: viper.GetString(stackdriverLabels),
MTLSDisabled: viper.GetBool(mTLSDisabledFlag),
}
}

Expand All @@ -373,6 +378,7 @@ type config struct {
AlwaysPullSidecar bool
PrometheusMetrics bool
Stackdriver bool
MTLSDisabled bool
StackdriverLabels string
KeyFile string
CertFile string
Expand Down
2 changes: 2 additions & 0 deletions install/helm/agones/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: DISABLE_MTLS
devloop0 marked this conversation as resolved.
Show resolved Hide resolved
value: {{ .Values.agones.disableMTLS | quote }}
- name: POD_NAMESPACE
valueFrom:
fieldRef:
Expand Down
2 changes: 2 additions & 0 deletions install/helm/agones/templates/service/allocation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ spec:
value: {{ .Values.agones.metrics.stackdriverProjectID | quote }}
- name: STACKDRIVER_LABELS
value: {{ .Values.agones.metrics.stackdriverLabels | quote }}
- name: DISABLE_MTLS
value: {{ .Values.agones.disableMTLS | quote }}
- name: POD_NAME
valueFrom:
fieldRef:
Expand Down
1 change: 1 addition & 0 deletions install/helm/agones/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ agones:
sdk: agones-sdk
createPriorityClass: true
priorityClassName: agones-system
disableMTLS: false
controller:
resources: {}
nodeSelector: {}
Expand Down
4 changes: 4 additions & 0 deletions install/yaml/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1392,6 +1392,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: DISABLE_MTLS
value: "false"
- name: POD_NAMESPACE
valueFrom:
fieldRef:
Expand Down Expand Up @@ -1574,6 +1576,8 @@ spec:
value: ""
- name: STACKDRIVER_LABELS
value: ""
- name: DISABLE_MTLS
value: "false"
- name: POD_NAME
valueFrom:
fieldRef:
Expand Down
8 changes: 7 additions & 1 deletion pkg/gameserverallocations/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ type Allocator struct {
readyGameServerCache *ReadyGameServerCache
topNGameServerCount int
remoteAllocationCallback func(string, grpc.DialOption, *pb.AllocationRequest) (*pb.AllocationResponse, error)
mTLSDisabled bool
}

// request is an async request for allocation
Expand All @@ -124,7 +125,7 @@ type response struct {

// NewAllocator creates an instance off Allocator
func NewAllocator(policyInformer multiclusterinformerv1.GameServerAllocationPolicyInformer, secretInformer informercorev1.SecretInformer,
kubeClient kubernetes.Interface, readyGameServerCache *ReadyGameServerCache) *Allocator {
kubeClient kubernetes.Interface, readyGameServerCache *ReadyGameServerCache, mTLSDisabled bool) *Allocator {
ah := &Allocator{
pendingRequests: make(chan request, maxBatchQueue),
allocationPolicyLister: policyInformer.Lister(),
Expand All @@ -143,6 +144,7 @@ func NewAllocator(policyInformer multiclusterinformerv1.GameServerAllocationPoli
grpcClient := pb.NewAllocationServiceClient(conn)
return grpcClient.Allocate(context.Background(), request)
},
mTLSDisabled: mTLSDisabled,
}

ah.baseLogger = runtime.NewLoggerWithType(ah)
Expand Down Expand Up @@ -364,6 +366,10 @@ func (c *Allocator) allocateFromRemoteCluster(gsa *allocationv1.GameServerAlloca

// createRemoteClusterDialOption creates a grpc client dial option with proper certs to make a remote call.
func (c *Allocator) createRemoteClusterDialOption(namespace string, connectionInfo *multiclusterv1.ClusterConnectionInfo) (grpc.DialOption, error) {
if c.mTLSDisabled {
devloop0 marked this conversation as resolved.
Show resolved Hide resolved
return grpc.WithInsecure(), nil
}

clientCert, clientKey, caCert, err := c.getClientCertificates(namespace, connectionInfo.SecretName)
if err != nil {
return nil, err
Expand Down
4 changes: 3 additions & 1 deletion pkg/gameserverallocations/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,16 @@ func NewController(apiServer *apiserver.APIServer,
kubeInformerFactory informers.SharedInformerFactory,
agonesClient versioned.Interface,
agonesInformerFactory externalversions.SharedInformerFactory,
mTLSDisabled bool,
) *Controller {
c := &Controller{
api: apiServer,
allocator: NewAllocator(
agonesInformerFactory.Multicluster().V1().GameServerAllocationPolicies(),
kubeInformerFactory.Core().V1().Secrets(),
kubeClient,
NewReadyGameServerCache(agonesInformerFactory.Agones().V1().GameServers(), agonesClient.AgonesV1(), counter, health)),
NewReadyGameServerCache(agonesInformerFactory.Agones().V1().GameServers(), agonesClient.AgonesV1(), counter, health),
mTLSDisabled),
}
c.baseLogger = runtime.NewLoggerWithType(c)

Expand Down
Loading