Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not use the admin kubeconfig for the supervisor and core controllers #7616

Merged
merged 4 commits into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions docs/adrs/core-controller-user.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Use a dedicated user for K3s core controllers

Date: 2023-05-26

## Status

Accepted

## Context

Users who collect audit logs from K3s currently have a hard time determining if an action was performed by an administrator, or by the K3s supervisor.
This is due to the K3s supervisor using the same `system:admin` user for both the admin kubeconfig, and the kubeconfig used by core Wrangler controllers that drive core functionality and the deploy/helm controllers.

Users may have policies in place that prohibit use of the `system:admin` account, or that require service accounts to be distinct from user accounts.

## Decision

* We will add a new kubeconfig for the K3s supervisor controllers: core functionality, deploy (AddOns; aka the manifests directory), and helm (HelmChart/HelmChartConfig).
* Each of the three controllers will use a dedicated user-agent to further assist in discriminating between events, via both audit logs and resource ManageFields tracking.
* The new user account will use exisiting core Kubernetes group RBAC.

## Consequences

* K3s servers will create and manage an additional kubeconfig, client cert, and key that is intended only for use by the supervisor controllers.
* K3s supervisor controllers will use distinct user-agents to further discriminate between which component initiated the request.
8 changes: 4 additions & 4 deletions pkg/cli/cmds/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ const (
)

type StartupHookArgs struct {
APIServerReady <-chan struct{}
KubeConfigAdmin string
Skips map[string]bool
Disables map[string]bool
APIServerReady <-chan struct{}
KubeConfigSupervisor string
Skips map[string]bool
Disables map[string]bool
}

type StartupHook func(context.Context, *sync.WaitGroup, StartupHookArgs) error
Expand Down
8 changes: 4 additions & 4 deletions pkg/cli/etcdsnapshot/etcd_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func commandSetup(app *cli.Context, cfg *cmds.Server, sc *server.Config) error {
sc.ControlConfig.Runtime.ETCDServerCA = filepath.Join(dataDir, "tls", "etcd", "server-ca.crt")
sc.ControlConfig.Runtime.ClientETCDCert = filepath.Join(dataDir, "tls", "etcd", "client.crt")
sc.ControlConfig.Runtime.ClientETCDKey = filepath.Join(dataDir, "tls", "etcd", "client.key")
sc.ControlConfig.Runtime.KubeConfigAdmin = filepath.Join(dataDir, "cred", "admin.kubeconfig")
sc.ControlConfig.Runtime.KubeConfigSupervisor = filepath.Join(dataDir, "cred", "supervisor.kubeconfig")

return nil
}
Expand Down Expand Up @@ -110,7 +110,7 @@ func save(app *cli.Context, cfg *cmds.Server) error {
return err
}

sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigAdmin)
sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigSupervisor)
if err != nil {
return err
}
Expand Down Expand Up @@ -144,7 +144,7 @@ func delete(app *cli.Context, cfg *cmds.Server) error {
return err
}

sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigAdmin)
sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigSupervisor)
if err != nil {
return err
}
Expand Down Expand Up @@ -250,7 +250,7 @@ func prune(app *cli.Context, cfg *cmds.Server) error {
return err
}

sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigAdmin)
sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigSupervisor)
if err != nil {
return err
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ type ControlRuntime struct {
ServiceCurrentKey string

KubeConfigAdmin string
KubeConfigSupervisor string
KubeConfigController string
KubeConfigScheduler string
KubeConfigAPIServer string
Expand All @@ -317,6 +318,8 @@ type ControlRuntime struct {

ClientAdminCert string
ClientAdminKey string
ClientSupervisorCert string
ClientSupervisorKey string
ClientControllerCert string
ClientControllerKey string
ClientSchedulerCert string
Expand Down
13 changes: 13 additions & 0 deletions pkg/daemons/control/deps/deps.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,16 @@ func CreateRuntimeCertFiles(config *config.Control) {
runtime.ServiceCurrentKey = filepath.Join(config.DataDir, "tls", "service.current.key")

runtime.KubeConfigAdmin = filepath.Join(config.DataDir, "cred", "admin.kubeconfig")
runtime.KubeConfigSupervisor = filepath.Join(config.DataDir, "cred", "supervisor.kubeconfig")
runtime.KubeConfigController = filepath.Join(config.DataDir, "cred", "controller.kubeconfig")
runtime.KubeConfigScheduler = filepath.Join(config.DataDir, "cred", "scheduler.kubeconfig")
runtime.KubeConfigAPIServer = filepath.Join(config.DataDir, "cred", "api-server.kubeconfig")
runtime.KubeConfigCloudController = filepath.Join(config.DataDir, "cred", "cloud-controller.kubeconfig")

runtime.ClientAdminCert = filepath.Join(config.DataDir, "tls", "client-admin.crt")
runtime.ClientAdminKey = filepath.Join(config.DataDir, "tls", "client-admin.key")
runtime.ClientSupervisorCert = filepath.Join(config.DataDir, "tls", "client-supervisor.crt")
runtime.ClientSupervisorKey = filepath.Join(config.DataDir, "tls", "client-supervisor.key")
runtime.ClientControllerCert = filepath.Join(config.DataDir, "tls", "client-controller.crt")
runtime.ClientControllerKey = filepath.Join(config.DataDir, "tls", "client-controller.key")
runtime.ClientCloudControllerCert = filepath.Join(config.DataDir, "tls", "client-"+version.Program+"-cloud-controller.crt")
Expand Down Expand Up @@ -351,6 +354,16 @@ func genClientCerts(config *config.Control) error {
}
}

certGen, err = factory("system:"+version.Program+"-supervisor", []string{user.SystemPrivilegedGroup}, runtime.ClientSupervisorCert, runtime.ClientSupervisorKey)
if err != nil {
return err
}
if certGen {
if err := KubeConfig(runtime.KubeConfigSupervisor, apiEndpoint, runtime.ServerCA, runtime.ClientSupervisorCert, runtime.ClientSupervisorKey); err != nil {
return err
}
}

certGen, err = factory(user.KubeControllerManager, nil, runtime.ClientControllerCert, runtime.ClientControllerKey)
if err != nil {
return err
Expand Down
4 changes: 2 additions & 2 deletions pkg/daemons/control/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ func cloudControllerManager(ctx context.Context, cfg *config.Control) error {
// If the CCM RBAC changes, the ResourceAttributes checked for by this function should
// be modified to check for the most recently added privilege.
func checkForCloudControllerPrivileges(ctx context.Context, runtime *config.ControlRuntime, timeout time.Duration) error {
return util.WaitForRBACReady(ctx, runtime.KubeConfigAdmin, timeout, authorizationv1.ResourceAttributes{
return util.WaitForRBACReady(ctx, runtime.KubeConfigSupervisor, timeout, authorizationv1.ResourceAttributes{
Namespace: metav1.NamespaceSystem,
Verb: "watch",
Resource: "endpointslices",
Expand Down Expand Up @@ -424,7 +424,7 @@ func waitForAPIServerInBackground(ctx context.Context, runtime *config.ControlRu
select {
case <-ctx.Done():
return
case err := <-promise(func() error { return util.WaitForAPIServerReady(ctx, runtime.KubeConfigAdmin, 30*time.Second) }):
case err := <-promise(func() error { return util.WaitForAPIServerReady(ctx, runtime.KubeConfigSupervisor, 30*time.Second) }):
if err != nil {
logrus.Infof("Waiting for API server to become available")
continue
Expand Down
21 changes: 2 additions & 19 deletions pkg/server/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,20 @@ package server

import (
"context"
"fmt"
"os"
"runtime"

helmcrd "github.com/k3s-io/helm-controller/pkg/crd"
"github.com/k3s-io/helm-controller/pkg/generated/controllers/helm.cattle.io"
addoncrd "github.com/k3s-io/k3s/pkg/crd"
"github.com/k3s-io/k3s/pkg/deploy"
"github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io"
"github.com/k3s-io/k3s/pkg/util"
"github.com/k3s-io/k3s/pkg/version"
"github.com/pkg/errors"
"github.com/rancher/wrangler/pkg/apply"
"github.com/rancher/wrangler/pkg/crd"
"github.com/rancher/wrangler/pkg/generated/controllers/apps"
"github.com/rancher/wrangler/pkg/generated/controllers/batch"
"github.com/rancher/wrangler/pkg/generated/controllers/core"
"github.com/rancher/wrangler/pkg/generated/controllers/rbac"
"github.com/rancher/wrangler/pkg/start"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
Expand All @@ -35,7 +29,6 @@ type Context struct {
Auth *rbac.Factory
Core *core.Factory
K8s kubernetes.Interface
Apply apply.Apply
}

func (c *Context) Start(ctx context.Context) error {
Expand All @@ -47,16 +40,7 @@ func NewContext(ctx context.Context, cfg string) (*Context, error) {
if err != nil {
return nil, err
}

// Construct a custom user-agent string for the apply client used by the deploy controller
// so that we can track which node's deploy controller most recently modified a resource.
nodeName := os.Getenv("NODE_NAME")
managerName := deploy.ControllerName + "@" + nodeName
if nodeName == "" || len(managerName) > validation.FieldManagerMaxLength {
logrus.Warn("Deploy controller node name is empty or too long, and will not be tracked via server side apply field management")
managerName = deploy.ControllerName
}
restConfig.UserAgent = fmt.Sprintf("%s/%s (%s/%s) %s/%s", managerName, version.Version, runtime.GOOS, runtime.GOARCH, version.Program, version.GitCommit)
restConfig.UserAgent = util.GetUserAgent(version.Program + "-supervisor")

if err := crds(ctx, restConfig); err != nil {
return nil, errors.Wrap(err, "failed to register CRDs")
Expand All @@ -74,7 +58,6 @@ func NewContext(ctx context.Context, cfg string) (*Context, error) {
Apps: apps.NewFactoryFromConfigOrDie(restConfig),
Batch: batch.NewFactoryFromConfigOrDie(restConfig),
Core: core.NewFactoryFromConfigOrDie(restConfig),
Apply: apply.New(k8s, apply.NewClientFactory(restConfig)).WithDynamicLookup(),
}, nil
}

Expand Down
81 changes: 57 additions & 24 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"sync"
"time"

helm "github.com/k3s-io/helm-controller/pkg/controllers/chart"
helmchart "github.com/k3s-io/helm-controller/pkg/controllers/chart"
helmcommon "github.com/k3s-io/helm-controller/pkg/controllers/common"
"github.com/k3s-io/k3s/pkg/cli/cmds"
"github.com/k3s-io/k3s/pkg/clientaccess"
Expand All @@ -28,12 +28,15 @@ import (
"github.com/k3s-io/k3s/pkg/util"
"github.com/k3s-io/k3s/pkg/version"
"github.com/pkg/errors"
"github.com/rancher/wrangler/pkg/apply"
v1 "github.com/rancher/wrangler/pkg/generated/controllers/core/v1"
"github.com/rancher/wrangler/pkg/leader"
"github.com/rancher/wrangler/pkg/resolvehome"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)

const (
Expand Down Expand Up @@ -67,10 +70,10 @@ func StartServer(ctx context.Context, config *Config, cfg *cmds.Server) error {
config.ControlConfig.Runtime.StartupHooksWg = wg

shArgs := cmds.StartupHookArgs{
APIServerReady: config.ControlConfig.Runtime.APIServerReady,
KubeConfigAdmin: config.ControlConfig.Runtime.KubeConfigAdmin,
Skips: config.ControlConfig.Skips,
Disables: config.ControlConfig.Disables,
APIServerReady: config.ControlConfig.Runtime.APIServerReady,
KubeConfigSupervisor: config.ControlConfig.Runtime.KubeConfigSupervisor,
Skips: config.ControlConfig.Skips,
Disables: config.ControlConfig.Disables,
}
for _, hook := range config.StartupHooks {
if err := hook(ctx, wg, shArgs); err != nil {
Expand Down Expand Up @@ -101,7 +104,7 @@ func startOnAPIServerReady(ctx context.Context, config *Config) {
func runControllers(ctx context.Context, config *Config) error {
controlConfig := &config.ControlConfig

sc, err := NewContext(ctx, controlConfig.Runtime.KubeConfigAdmin)
sc, err := NewContext(ctx, controlConfig.Runtime.KubeConfigSupervisor)
if err != nil {
return errors.Wrap(err, "failed to create new server context")
}
Expand Down Expand Up @@ -205,26 +208,42 @@ func coreControllers(ctx context.Context, sc *Context, config *Config) error {

// apply SystemDefaultRegistry setting to Helm before starting controllers
if config.ControlConfig.SystemDefaultRegistry != "" {
helm.DefaultJobImage = config.ControlConfig.SystemDefaultRegistry + "/" + helm.DefaultJobImage
helmchart.DefaultJobImage = config.ControlConfig.SystemDefaultRegistry + "/" + helmchart.DefaultJobImage
}

if !config.ControlConfig.DisableHelmController {
helm.Register(ctx,
restConfig, err := clientcmd.BuildConfigFromFlags("", config.ControlConfig.Runtime.KubeConfigSupervisor)
if err != nil {
return err
}
restConfig.UserAgent = util.GetUserAgent(helmcommon.Name)

k8s, err := clientset.NewForConfig(restConfig)
if err != nil {
return err
}

apply := apply.New(k8s, apply.NewClientFactory(restConfig)).WithDynamicLookup()
helm := sc.Helm.WithAgent(restConfig.UserAgent)
batch := sc.Batch.WithAgent(restConfig.UserAgent)
auth := sc.Auth.WithAgent(restConfig.UserAgent)
core := sc.Core.WithAgent(restConfig.UserAgent)
helmchart.Register(ctx,
metav1.NamespaceAll,
helmcommon.Name,
sc.K8s,
sc.Apply,
util.BuildControllerEventRecorder(sc.K8s, helmcommon.Name, metav1.NamespaceAll),
sc.Helm.Helm().V1().HelmChart(),
sc.Helm.Helm().V1().HelmChart().Cache(),
sc.Helm.Helm().V1().HelmChartConfig(),
sc.Helm.Helm().V1().HelmChartConfig().Cache(),
sc.Batch.Batch().V1().Job(),
sc.Batch.Batch().V1().Job().Cache(),
sc.Auth.Rbac().V1().ClusterRoleBinding(),
sc.Core.Core().V1().ServiceAccount(),
sc.Core.Core().V1().ConfigMap(),
sc.Core.Core().V1().Secret())
k8s,
apply,
util.BuildControllerEventRecorder(k8s, helmcommon.Name, metav1.NamespaceAll),
helm.V1().HelmChart(),
helm.V1().HelmChart().Cache(),
helm.V1().HelmChartConfig(),
helm.V1().HelmChartConfig().Cache(),
batch.V1().Job(),
batch.V1().Job().Cache(),
auth.V1().ClusterRoleBinding(),
core.V1().ServiceAccount(),
core.V1().ConfigMap(),
core.V1().Secret())
}

if config.ControlConfig.EncryptSecrets {
Expand Down Expand Up @@ -274,10 +293,24 @@ func stageFiles(ctx context.Context, sc *Context, controlConfig *config.Control)
return err
}

restConfig, err := clientcmd.BuildConfigFromFlags("", controlConfig.Runtime.KubeConfigSupervisor)
if err != nil {
return err
}
restConfig.UserAgent = util.GetUserAgent("deploy")

k8s, err := clientset.NewForConfig(restConfig)
if err != nil {
return err
}

apply := apply.New(k8s, apply.NewClientFactory(restConfig)).WithDynamicLookup()
k3s := sc.K3s.WithAgent(restConfig.UserAgent)

return deploy.WatchFiles(ctx,
sc.K8s,
sc.Apply,
sc.K3s.K3s().V1().Addon(),
k8s,
apply,
k3s.V1().Addon(),
controlConfig.Disables,
dataDir)
}
Expand Down
17 changes: 17 additions & 0 deletions pkg/util/client.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
package util

import (
"fmt"
"os"
"runtime"
"strings"

"github.com/k3s-io/k3s/pkg/datadir"
"github.com/k3s-io/k3s/pkg/version"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/apis/meta/v1/validation"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)
Expand All @@ -30,6 +36,17 @@ func GetClientSet(file string) (clientset.Interface, error) {
return clientset.NewForConfig(restConfig)
}

// GetUserAgent builds a complete UserAgent string for a given controller, including the node name if possible.
func GetUserAgent(controllerName string) string {
nodeName := os.Getenv("NODE_NAME")
managerName := controllerName + "@" + nodeName
if nodeName == "" || len(managerName) > validation.FieldManagerMaxLength {
logrus.Warnf("%s controller node name is empty or too long, and will not be tracked via server side apply field management", controllerName)
managerName = controllerName
}
return fmt.Sprintf("%s/%s (%s/%s) %s/%s", managerName, version.Version, runtime.GOOS, runtime.GOARCH, version.Program, version.GitCommit)
}

// SplitStringSlice is a helper function to handle StringSliceFlag containing multiple values
// By default, StringSliceFlag only supports repeated values, not multiple values
// e.g. --foo="bar,car" --foo=baz will result in []string{"bar", "car". "baz"}
Expand Down
10 changes: 5 additions & 5 deletions tests/e2e/validatecluster/validatecluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,17 +361,17 @@ var _ = Describe("Verify Create", Ordered, func() {
})
It("Validates certificates", func() {
const grepCert = "sudo ls -lt /var/lib/rancher/k3s/server/ | grep tls"
var expectResult = []string{"client-ca.crt",
"client-ca.key",
"client-ca.nochain.crt",
var expectResult = []string{
"client-ca.crt", "client-ca.key", "client-ca.nochain.crt",
"client-supervisor.crt", "client-supervisor.key",
"dynamic-cert.json", "peer-ca.crt",
"peer-ca.key", "server-ca.crt",
"server-ca.key", "request-header-ca.crt",
"request-header-ca.key", "server-ca.crt",
"server-ca.key", "server-ca.nochain.crt",
"service.current.key", "service.key",
"apiserver-loopback-client__.crt",
"apiserver-loopback-client__.key", "",
"apiserver-loopback-client__.crt", "apiserver-loopback-client__.key",
"",
}

var finalResult string
Expand Down