diff --git a/docs/adrs/core-controller-user.md b/docs/adrs/core-controller-user.md new file mode 100644 index 000000000000..91a693c19421 --- /dev/null +++ b/docs/adrs/core-controller-user.md @@ -0,0 +1,25 @@ +# Use a dedicated user for K3s core controllers + +Date: 2023-05-26 + +## Status + +Accepted + +## Context + +Users who collect audit logs from K3s currently have a hard time determining if an action was performed by an administrator, or by the K3s supervisor. +This is due to the K3s supervisor using the same `system:admin` user for both the admin kubeconfig, and the kubeconfig used by core Wrangler controllers that drive core functionality and the deploy/helm controllers. + +Users may have policies in place that prohibit use of the `system:admin` account, or that require service accounts to be distinct from user accounts. + +## Decision + +* We will add a new kubeconfig for the K3s supervisor controllers: core functionality, deploy (AddOns; aka the manifests directory), and helm (HelmChart/HelmChartConfig). +* Each of the three controllers will use a dedicated user-agent to further assist in discriminating between events, via both audit logs and resource ManageFields tracking. +* The new user account will use exisiting core Kubernetes group RBAC. + +## Consequences + +* K3s servers will create and manage an additional kubeconfig, client cert, and key that is intended only for use by the supervisor controllers. +* K3s supervisor controllers will use distinct user-agents to further discriminate between which component initiated the request. diff --git a/pkg/cli/cmds/server.go b/pkg/cli/cmds/server.go index d88c264e94e0..4c83f2916434 100644 --- a/pkg/cli/cmds/server.go +++ b/pkg/cli/cmds/server.go @@ -15,10 +15,10 @@ const ( ) type StartupHookArgs struct { - APIServerReady <-chan struct{} - KubeConfigAdmin string - Skips map[string]bool - Disables map[string]bool + APIServerReady <-chan struct{} + KubeConfigSupervisor string + Skips map[string]bool + Disables map[string]bool } type StartupHook func(context.Context, *sync.WaitGroup, StartupHookArgs) error diff --git a/pkg/cli/etcdsnapshot/etcd_snapshot.go b/pkg/cli/etcdsnapshot/etcd_snapshot.go index 6c3f51028419..fb230d2cc725 100644 --- a/pkg/cli/etcdsnapshot/etcd_snapshot.go +++ b/pkg/cli/etcdsnapshot/etcd_snapshot.go @@ -64,7 +64,7 @@ func commandSetup(app *cli.Context, cfg *cmds.Server, sc *server.Config) error { sc.ControlConfig.Runtime.ETCDServerCA = filepath.Join(dataDir, "tls", "etcd", "server-ca.crt") sc.ControlConfig.Runtime.ClientETCDCert = filepath.Join(dataDir, "tls", "etcd", "client.crt") sc.ControlConfig.Runtime.ClientETCDKey = filepath.Join(dataDir, "tls", "etcd", "client.key") - sc.ControlConfig.Runtime.KubeConfigAdmin = filepath.Join(dataDir, "cred", "admin.kubeconfig") + sc.ControlConfig.Runtime.KubeConfigSupervisor = filepath.Join(dataDir, "cred", "supervisor.kubeconfig") return nil } @@ -110,7 +110,7 @@ func save(app *cli.Context, cfg *cmds.Server) error { return err } - sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigAdmin) + sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigSupervisor) if err != nil { return err } @@ -144,7 +144,7 @@ func delete(app *cli.Context, cfg *cmds.Server) error { return err } - sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigAdmin) + sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigSupervisor) if err != nil { return err } @@ -250,7 +250,7 @@ func prune(app *cli.Context, cfg *cmds.Server) error { return err } - sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigAdmin) + sc, err := server.NewContext(ctx, serverConfig.ControlConfig.Runtime.KubeConfigSupervisor) if err != nil { return err } diff --git a/pkg/daemons/config/types.go b/pkg/daemons/config/types.go index 33b1ab4ab274..ae9d79b47f04 100644 --- a/pkg/daemons/config/types.go +++ b/pkg/daemons/config/types.go @@ -294,6 +294,7 @@ type ControlRuntime struct { ServiceCurrentKey string KubeConfigAdmin string + KubeConfigSupervisor string KubeConfigController string KubeConfigScheduler string KubeConfigAPIServer string @@ -317,6 +318,8 @@ type ControlRuntime struct { ClientAdminCert string ClientAdminKey string + ClientSupervisorCert string + ClientSupervisorKey string ClientControllerCert string ClientControllerKey string ClientSchedulerCert string diff --git a/pkg/daemons/control/deps/deps.go b/pkg/daemons/control/deps/deps.go index 6753e6c792ed..bfb69f6f278b 100644 --- a/pkg/daemons/control/deps/deps.go +++ b/pkg/daemons/control/deps/deps.go @@ -119,6 +119,7 @@ func CreateRuntimeCertFiles(config *config.Control) { runtime.ServiceCurrentKey = filepath.Join(config.DataDir, "tls", "service.current.key") runtime.KubeConfigAdmin = filepath.Join(config.DataDir, "cred", "admin.kubeconfig") + runtime.KubeConfigSupervisor = filepath.Join(config.DataDir, "cred", "supervisor.kubeconfig") runtime.KubeConfigController = filepath.Join(config.DataDir, "cred", "controller.kubeconfig") runtime.KubeConfigScheduler = filepath.Join(config.DataDir, "cred", "scheduler.kubeconfig") runtime.KubeConfigAPIServer = filepath.Join(config.DataDir, "cred", "api-server.kubeconfig") @@ -126,6 +127,8 @@ func CreateRuntimeCertFiles(config *config.Control) { runtime.ClientAdminCert = filepath.Join(config.DataDir, "tls", "client-admin.crt") runtime.ClientAdminKey = filepath.Join(config.DataDir, "tls", "client-admin.key") + runtime.ClientSupervisorCert = filepath.Join(config.DataDir, "tls", "client-supervisor.crt") + runtime.ClientSupervisorKey = filepath.Join(config.DataDir, "tls", "client-supervisor.key") runtime.ClientControllerCert = filepath.Join(config.DataDir, "tls", "client-controller.crt") runtime.ClientControllerKey = filepath.Join(config.DataDir, "tls", "client-controller.key") runtime.ClientCloudControllerCert = filepath.Join(config.DataDir, "tls", "client-"+version.Program+"-cloud-controller.crt") @@ -351,6 +354,16 @@ func genClientCerts(config *config.Control) error { } } + certGen, err = factory("system:"+version.Program+"-supervisor", []string{user.SystemPrivilegedGroup}, runtime.ClientSupervisorCert, runtime.ClientSupervisorKey) + if err != nil { + return err + } + if certGen { + if err := KubeConfig(runtime.KubeConfigSupervisor, apiEndpoint, runtime.ServerCA, runtime.ClientSupervisorCert, runtime.ClientSupervisorKey); err != nil { + return err + } + } + certGen, err = factory(user.KubeControllerManager, nil, runtime.ClientControllerCert, runtime.ClientControllerKey) if err != nil { return err diff --git a/pkg/daemons/control/server.go b/pkg/daemons/control/server.go index 95b7960623b2..afed71228a5e 100644 --- a/pkg/daemons/control/server.go +++ b/pkg/daemons/control/server.go @@ -383,7 +383,7 @@ func cloudControllerManager(ctx context.Context, cfg *config.Control) error { // If the CCM RBAC changes, the ResourceAttributes checked for by this function should // be modified to check for the most recently added privilege. func checkForCloudControllerPrivileges(ctx context.Context, runtime *config.ControlRuntime, timeout time.Duration) error { - return util.WaitForRBACReady(ctx, runtime.KubeConfigAdmin, timeout, authorizationv1.ResourceAttributes{ + return util.WaitForRBACReady(ctx, runtime.KubeConfigSupervisor, timeout, authorizationv1.ResourceAttributes{ Namespace: metav1.NamespaceSystem, Verb: "watch", Resource: "endpointslices", @@ -424,7 +424,7 @@ func waitForAPIServerInBackground(ctx context.Context, runtime *config.ControlRu select { case <-ctx.Done(): return - case err := <-promise(func() error { return util.WaitForAPIServerReady(ctx, runtime.KubeConfigAdmin, 30*time.Second) }): + case err := <-promise(func() error { return util.WaitForAPIServerReady(ctx, runtime.KubeConfigSupervisor, 30*time.Second) }): if err != nil { logrus.Infof("Waiting for API server to become available") continue diff --git a/pkg/server/context.go b/pkg/server/context.go index 0ad06230dc54..c8ecc2f6a911 100644 --- a/pkg/server/context.go +++ b/pkg/server/context.go @@ -2,26 +2,20 @@ package server import ( "context" - "fmt" - "os" - "runtime" helmcrd "github.com/k3s-io/helm-controller/pkg/crd" "github.com/k3s-io/helm-controller/pkg/generated/controllers/helm.cattle.io" addoncrd "github.com/k3s-io/k3s/pkg/crd" - "github.com/k3s-io/k3s/pkg/deploy" "github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io" + "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" "github.com/pkg/errors" - "github.com/rancher/wrangler/pkg/apply" "github.com/rancher/wrangler/pkg/crd" "github.com/rancher/wrangler/pkg/generated/controllers/apps" "github.com/rancher/wrangler/pkg/generated/controllers/batch" "github.com/rancher/wrangler/pkg/generated/controllers/core" "github.com/rancher/wrangler/pkg/generated/controllers/rbac" "github.com/rancher/wrangler/pkg/start" - "github.com/sirupsen/logrus" - "k8s.io/apimachinery/pkg/apis/meta/v1/validation" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -35,7 +29,6 @@ type Context struct { Auth *rbac.Factory Core *core.Factory K8s kubernetes.Interface - Apply apply.Apply } func (c *Context) Start(ctx context.Context) error { @@ -47,16 +40,7 @@ func NewContext(ctx context.Context, cfg string) (*Context, error) { if err != nil { return nil, err } - - // Construct a custom user-agent string for the apply client used by the deploy controller - // so that we can track which node's deploy controller most recently modified a resource. - nodeName := os.Getenv("NODE_NAME") - managerName := deploy.ControllerName + "@" + nodeName - if nodeName == "" || len(managerName) > validation.FieldManagerMaxLength { - logrus.Warn("Deploy controller node name is empty or too long, and will not be tracked via server side apply field management") - managerName = deploy.ControllerName - } - restConfig.UserAgent = fmt.Sprintf("%s/%s (%s/%s) %s/%s", managerName, version.Version, runtime.GOOS, runtime.GOARCH, version.Program, version.GitCommit) + restConfig.UserAgent = util.GetUserAgent(version.Program + "-supervisor") if err := crds(ctx, restConfig); err != nil { return nil, errors.Wrap(err, "failed to register CRDs") @@ -74,7 +58,6 @@ func NewContext(ctx context.Context, cfg string) (*Context, error) { Apps: apps.NewFactoryFromConfigOrDie(restConfig), Batch: batch.NewFactoryFromConfigOrDie(restConfig), Core: core.NewFactoryFromConfigOrDie(restConfig), - Apply: apply.New(k8s, apply.NewClientFactory(restConfig)).WithDynamicLookup(), }, nil } diff --git a/pkg/server/server.go b/pkg/server/server.go index 018edc425b44..435333454d15 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -12,7 +12,7 @@ import ( "sync" "time" - helm "github.com/k3s-io/helm-controller/pkg/controllers/chart" + helmchart "github.com/k3s-io/helm-controller/pkg/controllers/chart" helmcommon "github.com/k3s-io/helm-controller/pkg/controllers/common" "github.com/k3s-io/k3s/pkg/cli/cmds" "github.com/k3s-io/k3s/pkg/clientaccess" @@ -28,12 +28,15 @@ import ( "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" "github.com/pkg/errors" + "github.com/rancher/wrangler/pkg/apply" v1 "github.com/rancher/wrangler/pkg/generated/controllers/core/v1" "github.com/rancher/wrangler/pkg/leader" "github.com/rancher/wrangler/pkg/resolvehome" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" ) const ( @@ -67,10 +70,10 @@ func StartServer(ctx context.Context, config *Config, cfg *cmds.Server) error { config.ControlConfig.Runtime.StartupHooksWg = wg shArgs := cmds.StartupHookArgs{ - APIServerReady: config.ControlConfig.Runtime.APIServerReady, - KubeConfigAdmin: config.ControlConfig.Runtime.KubeConfigAdmin, - Skips: config.ControlConfig.Skips, - Disables: config.ControlConfig.Disables, + APIServerReady: config.ControlConfig.Runtime.APIServerReady, + KubeConfigSupervisor: config.ControlConfig.Runtime.KubeConfigSupervisor, + Skips: config.ControlConfig.Skips, + Disables: config.ControlConfig.Disables, } for _, hook := range config.StartupHooks { if err := hook(ctx, wg, shArgs); err != nil { @@ -101,7 +104,7 @@ func startOnAPIServerReady(ctx context.Context, config *Config) { func runControllers(ctx context.Context, config *Config) error { controlConfig := &config.ControlConfig - sc, err := NewContext(ctx, controlConfig.Runtime.KubeConfigAdmin) + sc, err := NewContext(ctx, controlConfig.Runtime.KubeConfigSupervisor) if err != nil { return errors.Wrap(err, "failed to create new server context") } @@ -205,26 +208,42 @@ func coreControllers(ctx context.Context, sc *Context, config *Config) error { // apply SystemDefaultRegistry setting to Helm before starting controllers if config.ControlConfig.SystemDefaultRegistry != "" { - helm.DefaultJobImage = config.ControlConfig.SystemDefaultRegistry + "/" + helm.DefaultJobImage + helmchart.DefaultJobImage = config.ControlConfig.SystemDefaultRegistry + "/" + helmchart.DefaultJobImage } if !config.ControlConfig.DisableHelmController { - helm.Register(ctx, + restConfig, err := clientcmd.BuildConfigFromFlags("", config.ControlConfig.Runtime.KubeConfigSupervisor) + if err != nil { + return err + } + restConfig.UserAgent = util.GetUserAgent(helmcommon.Name) + + k8s, err := clientset.NewForConfig(restConfig) + if err != nil { + return err + } + + apply := apply.New(k8s, apply.NewClientFactory(restConfig)).WithDynamicLookup() + helm := sc.Helm.WithAgent(restConfig.UserAgent) + batch := sc.Batch.WithAgent(restConfig.UserAgent) + auth := sc.Auth.WithAgent(restConfig.UserAgent) + core := sc.Core.WithAgent(restConfig.UserAgent) + helmchart.Register(ctx, metav1.NamespaceAll, helmcommon.Name, - sc.K8s, - sc.Apply, - util.BuildControllerEventRecorder(sc.K8s, helmcommon.Name, metav1.NamespaceAll), - sc.Helm.Helm().V1().HelmChart(), - sc.Helm.Helm().V1().HelmChart().Cache(), - sc.Helm.Helm().V1().HelmChartConfig(), - sc.Helm.Helm().V1().HelmChartConfig().Cache(), - sc.Batch.Batch().V1().Job(), - sc.Batch.Batch().V1().Job().Cache(), - sc.Auth.Rbac().V1().ClusterRoleBinding(), - sc.Core.Core().V1().ServiceAccount(), - sc.Core.Core().V1().ConfigMap(), - sc.Core.Core().V1().Secret()) + k8s, + apply, + util.BuildControllerEventRecorder(k8s, helmcommon.Name, metav1.NamespaceAll), + helm.V1().HelmChart(), + helm.V1().HelmChart().Cache(), + helm.V1().HelmChartConfig(), + helm.V1().HelmChartConfig().Cache(), + batch.V1().Job(), + batch.V1().Job().Cache(), + auth.V1().ClusterRoleBinding(), + core.V1().ServiceAccount(), + core.V1().ConfigMap(), + core.V1().Secret()) } if config.ControlConfig.EncryptSecrets { @@ -274,10 +293,24 @@ func stageFiles(ctx context.Context, sc *Context, controlConfig *config.Control) return err } + restConfig, err := clientcmd.BuildConfigFromFlags("", controlConfig.Runtime.KubeConfigSupervisor) + if err != nil { + return err + } + restConfig.UserAgent = util.GetUserAgent("deploy") + + k8s, err := clientset.NewForConfig(restConfig) + if err != nil { + return err + } + + apply := apply.New(k8s, apply.NewClientFactory(restConfig)).WithDynamicLookup() + k3s := sc.K3s.WithAgent(restConfig.UserAgent) + return deploy.WatchFiles(ctx, - sc.K8s, - sc.Apply, - sc.K3s.K3s().V1().Addon(), + k8s, + apply, + k3s.V1().Addon(), controlConfig.Disables, dataDir) } diff --git a/pkg/util/client.go b/pkg/util/client.go index 6c6fefd00060..561a5cbc0817 100644 --- a/pkg/util/client.go +++ b/pkg/util/client.go @@ -1,9 +1,15 @@ package util import ( + "fmt" + "os" + "runtime" "strings" "github.com/k3s-io/k3s/pkg/datadir" + "github.com/k3s-io/k3s/pkg/version" + "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/apis/meta/v1/validation" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" ) @@ -30,6 +36,17 @@ func GetClientSet(file string) (clientset.Interface, error) { return clientset.NewForConfig(restConfig) } +// GetUserAgent builds a complete UserAgent string for a given controller, including the node name if possible. +func GetUserAgent(controllerName string) string { + nodeName := os.Getenv("NODE_NAME") + managerName := controllerName + "@" + nodeName + if nodeName == "" || len(managerName) > validation.FieldManagerMaxLength { + logrus.Warnf("%s controller node name is empty or too long, and will not be tracked via server side apply field management", controllerName) + managerName = controllerName + } + return fmt.Sprintf("%s/%s (%s/%s) %s/%s", managerName, version.Version, runtime.GOOS, runtime.GOARCH, version.Program, version.GitCommit) +} + // SplitStringSlice is a helper function to handle StringSliceFlag containing multiple values // By default, StringSliceFlag only supports repeated values, not multiple values // e.g. --foo="bar,car" --foo=baz will result in []string{"bar", "car". "baz"} diff --git a/tests/e2e/validatecluster/validatecluster_test.go b/tests/e2e/validatecluster/validatecluster_test.go index 955f6c700b70..786f187e6289 100644 --- a/tests/e2e/validatecluster/validatecluster_test.go +++ b/tests/e2e/validatecluster/validatecluster_test.go @@ -361,17 +361,17 @@ var _ = Describe("Verify Create", Ordered, func() { }) It("Validates certificates", func() { const grepCert = "sudo ls -lt /var/lib/rancher/k3s/server/ | grep tls" - var expectResult = []string{"client-ca.crt", - "client-ca.key", - "client-ca.nochain.crt", + var expectResult = []string{ + "client-ca.crt", "client-ca.key", "client-ca.nochain.crt", + "client-supervisor.crt", "client-supervisor.key", "dynamic-cert.json", "peer-ca.crt", "peer-ca.key", "server-ca.crt", "server-ca.key", "request-header-ca.crt", "request-header-ca.key", "server-ca.crt", "server-ca.key", "server-ca.nochain.crt", "service.current.key", "service.key", - "apiserver-loopback-client__.crt", - "apiserver-loopback-client__.key", "", + "apiserver-loopback-client__.crt", "apiserver-loopback-client__.key", + "", } var finalResult string