diff --git a/README.md b/README.md index ded0fcad..80a0f33b 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,71 @@ you will be able to see the Results objects of the analysis after some minutes ( "details": "The error message means that the service in Kubernetes doesn't have any associated endpoints, which should have been labeled with \"control-plane=controller-manager\". \n\nTo solve this issue, you need to add the \"control-plane=controller-manager\" label to the endpoint that matches the service. Once the endpoint is labeled correctly, Kubernetes can associate it with the service, and the error should be resolved.", ``` +## Monitor multiple clusters + +The `k8sgpt.ai` Operator allows monitoring multiple clusters by providing a `kubeconfig` value. + +This feature could be fascinating if you want to embrace Platform Engineering such as running a fleet of Kubernetes clusters for multiple stakeholders. +Especially designed for the Cluster API-based infrastructures, `k8sgpt.ai` Operator is going to be installed in the same Cluster API management cluster: +this one is responsible for creating the required clusters according to the infrastructure provider for the seed clusters. + +Once a Cluster API-based cluster has been provisioned a `kubeconfig` according to the naming convention `${CLUSTERNAME}-kubeconfig` will be available in the same namespace: +the conventional Secret data key is `value`, this can be used to instruct the `k8sgpt.ai` Operator to monitor a remote cluster without installing any resource deployed to the seed cluster. + +``` +$: kubectl get clusters +NAME PHASE AGE VERSION +capi-quickstart Provisioned 8s v1.28.0 + +$: kubectl get secrets +NAME TYPE DATA AGE +capi-quickstart-kubeconfig Opaque 1 8s +``` + +> **A security concern** +> +> If your setup requires the least privilege approach, +> a different `kubeconfig` must be provided since the Cluster API generated one is bounded to the `admin` user which has `clustr-admin` permissions. + + +Once you have a valid `kubeconfig`, a `k8sgpt` instance can be created as it follows. + +```yaml +apiVersion: core.k8sgpt.ai/v1alpha1 +kind: K8sGPT +metadata: + name: capi-quickstart + namespace: default +spec: + ai: + anonymized: true + backend: openai + language: english + model: gpt-3.5-turbo + secret: + key: api_key + name: my_openai_secret + kubeconfig: + key: value + name: capi-quickstart-kubeconfig +``` + +Once applied the `k8sgpt.ai` Operator will create the `k8sgpt.ai` Deployment by using the seed cluster `kubeconfig` defined in the field `/spec/kubeconfig`. + +The resulting `Result` objects will be available in the same Namespace where the `k8sgpt.ai` instance has been deployed, +accordingly labelled with the following keys: + +- `k8sgpts.k8sgpt.ai/name`: the `k8sgpt.ai` instance Name +- `k8sgpts.k8sgpt.ai/namespace`: the `k8sgpt.ai` instance Namespace +- `k8sgpts.k8sgpt.ai/backend`: the AI backend (if specified) + +Thanks to these labels, the results can be filtered according to the specified monitored cluster, +without polluting the underlying cluster with the `k8sgpt.ai` CRDs and consuming seed compute workloads, +as well as keeping confidentiality about the AI backend driver credentials. + +> In case of missing `/spec/kubeconfig` field, `k8sgpt.ai` Operator will track the cluster on which has been deployed: +> this is possible by mounting the provided `ServiceAccount`. + ## Remote Cache
diff --git a/api/v1alpha1/k8sgpt_types.go b/api/v1alpha1/k8sgpt_types.go index 55e31ff3..2bbb6525 100644 --- a/api/v1alpha1/k8sgpt_types.go +++ b/api/v1alpha1/k8sgpt_types.go @@ -124,6 +124,9 @@ type K8sGPTSpec struct { RemoteCache *RemoteCacheRef `json:"remoteCache,omitempty"` Integrations *Integrations `json:"integrations,omitempty"` NodeSelector map[string]string `json:"nodeSelector,omitempty"` + // Define the kubeconfig the Deployment must use. + // If empty, the Deployment will use the ServiceAccount provided by Kubernetes itself. + Kubeconfig *SecretRef `json:"kubeconfig,omitempty"` } const ( diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 1cbff8c1..972b8e30 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -304,6 +304,11 @@ func (in *K8sGPTSpec) DeepCopyInto(out *K8sGPTSpec) { (*out)[key] = val } } + if in.Kubeconfig != nil { + in, out := &in.Kubeconfig, &out.Kubeconfig + *out = new(SecretRef) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new K8sGPTSpec. diff --git a/config/crd/bases/core.k8sgpt.ai_k8sgpts.yaml b/config/crd/bases/core.k8sgpt.ai_k8sgpts.yaml index 5793b0cb..b7e5fc21 100644 --- a/config/crd/bases/core.k8sgpt.ai_k8sgpts.yaml +++ b/config/crd/bases/core.k8sgpt.ai_k8sgpts.yaml @@ -115,6 +115,16 @@ spec: type: boolean type: object type: object + kubeconfig: + description: Define the kubeconfig the Deployment must use. If empty, + the Deployment will use the ServiceAccount provided by Kubernetes + itself. + properties: + key: + type: string + name: + type: string + type: object noCache: type: boolean nodeSelector: diff --git a/controllers/k8sgpt_controller.go b/controllers/k8sgpt_controller.go index 66e2e41c..0622f3f5 100644 --- a/controllers/k8sgpt_controller.go +++ b/controllers/k8sgpt_controller.go @@ -22,11 +22,6 @@ import ( corev1alpha1 "github.com/k8sgpt-ai/k8sgpt-operator/api/v1alpha1" - kclient "github.com/k8sgpt-ai/k8sgpt-operator/pkg/client" - "github.com/k8sgpt-ai/k8sgpt-operator/pkg/integrations" - "github.com/k8sgpt-ai/k8sgpt-operator/pkg/resources" - "github.com/k8sgpt-ai/k8sgpt-operator/pkg/sinks" - "github.com/k8sgpt-ai/k8sgpt-operator/pkg/utils" "github.com/prometheus/client_golang/prometheus" v1 "k8s.io/api/apps/v1" kcorev1 "k8s.io/api/core/v1" @@ -37,6 +32,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics" + + kclient "github.com/k8sgpt-ai/k8sgpt-operator/pkg/client" + "github.com/k8sgpt-ai/k8sgpt-operator/pkg/integrations" + "github.com/k8sgpt-ai/k8sgpt-operator/pkg/resources" + "github.com/k8sgpt-ai/k8sgpt-operator/pkg/sinks" + "github.com/k8sgpt-ai/k8sgpt-operator/pkg/utils" ) const ( @@ -151,7 +152,7 @@ func (r *K8sGPTReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr // Check and see if the instance is new or has a K8sGPT deployment in flight deployment := v1.Deployment{} err = r.Get(ctx, client.ObjectKey{Namespace: k8sgptConfig.Namespace, - Name: "k8sgpt-deployment"}, &deployment) + Name: k8sgptConfig.Name}, &deployment) if client.IgnoreNotFound(err) != nil { k8sgptReconcileErrorCount.Inc() return r.finishReconcile(err, false) @@ -260,7 +261,10 @@ func (r *K8sGPTReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr // no longer are relevent, we can do this by using the resultSpec composed name against // the custom resource name resultList := &corev1alpha1.ResultList{} - err = r.List(ctx, resultList) + err = r.List(ctx, resultList, client.MatchingLabels(map[string]string{ + "k8sgpts.k8sgpt.ai/name": k8sgptConfig.Name, + "k8sgpts.k8sgpt.ai/namespace": k8sgptConfig.Namespace, + })) if err != nil { k8sgptReconcileErrorCount.Inc() return r.finishReconcile(err, false) diff --git a/pkg/client/client.go b/pkg/client/client.go index 8bf2ffd9..bd490907 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -54,7 +54,7 @@ func GenerateAddress(ctx context.Context, cli client.Client, k8sgptConfig *v1alp // Get service IP and port for k8sgpt-deployment svc := &corev1.Service{} err := cli.Get(ctx, client.ObjectKey{Namespace: k8sgptConfig.Namespace, - Name: "k8sgpt"}, svc) + Name: k8sgptConfig.Name}, svc) if err != nil { return "", nil } diff --git a/pkg/resources/k8sgpt.go b/pkg/resources/k8sgpt.go index f08c5124..327a3814 100644 --- a/pkg/resources/k8sgpt.go +++ b/pkg/resources/k8sgpt.go @@ -17,6 +17,7 @@ package resources import ( "context" err "errors" + "fmt" "github.com/k8sgpt-ai/k8sgpt-operator/api/v1alpha1" "github.com/k8sgpt-ai/k8sgpt-operator/pkg/utils" @@ -29,6 +30,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) @@ -39,7 +41,6 @@ type SyncOrDestroy int const ( SyncOp SyncOrDestroy = iota DestroyOp - DeploymentName = "k8sgpt-deployment" ) // GetService Create service for K8sGPT @@ -47,7 +48,7 @@ func GetService(config v1alpha1.K8sGPT) (*corev1.Service, error) { // Create service service := corev1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: "k8sgpt", + Name: config.Name, Namespace: config.Namespace, OwnerReferences: []metav1.OwnerReference{ { @@ -62,7 +63,7 @@ func GetService(config v1alpha1.K8sGPT) (*corev1.Service, error) { }, Spec: corev1.ServiceSpec{ Selector: map[string]string{ - "app": DeploymentName, + "app": config.Name, }, Ports: []corev1.ServicePort{ { @@ -178,14 +179,14 @@ func GetClusterRole(config v1alpha1.K8sGPT) (*r1.ClusterRole, error) { } // GetDeployment Create deployment with the latest K8sGPT image -func GetDeployment(config v1alpha1.K8sGPT) (*appsv1.Deployment, error) { +func GetDeployment(config v1alpha1.K8sGPT, outOfClusterMode bool) (*appsv1.Deployment, error) { // Create deployment image := config.Spec.Repository + ":" + config.Spec.Version replicas := int32(1) deployment := appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: DeploymentName, + Name: config.Name, Namespace: config.Namespace, OwnerReferences: []metav1.OwnerReference{ { @@ -202,13 +203,13 @@ func GetDeployment(config v1alpha1.K8sGPT) (*appsv1.Deployment, error) { Replicas: &replicas, Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - "app": DeploymentName, + "app": config.Name, }, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ - "app": DeploymentName, + "app": config.Name, }, }, Spec: corev1.PodSpec{ @@ -273,6 +274,35 @@ func GetDeployment(config v1alpha1.K8sGPT) (*appsv1.Deployment, error) { }, }, } + if outOfClusterMode { + // No need of ServiceAccount since the Deployment will use + // a kubeconfig pointing to an external cluster. + deployment.Spec.Template.Spec.ServiceAccountName = "" + deployment.Spec.Template.Spec.AutomountServiceAccountToken = ptr.To(false) + + kubeconfigPath := fmt.Sprintf("/tmp/%s", config.Name) + + deployment.Spec.Template.Spec.Containers[0].Args = append(deployment.Spec.Template.Spec.Containers[0].Args, fmt.Sprintf("--kubeconfig=%s/kubeconfig", kubeconfigPath)) + deployment.Spec.Template.Spec.Containers[0].VolumeMounts = append(deployment.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ + Name: "kubeconfig", + ReadOnly: true, + MountPath: kubeconfigPath, + }) + deployment.Spec.Template.Spec.Volumes = append(deployment.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: "kubeconfig", + VolumeSource: v1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: config.Spec.Kubeconfig.Name, + Items: []corev1.KeyToPath{ + { + Key: config.Spec.Kubeconfig.Key, + Path: "kubeconfig", + }, + }, + }, + }, + }) + } if config.Spec.AI.Secret != nil { password := corev1.EnvVar{ Name: "K8SGPT_PASSWORD", @@ -347,35 +377,39 @@ func Sync(ctx context.Context, c client.Client, var objs []client.Object - svc, er := GetService(config) - if er != nil { - return er - } + outOfClusterMode := config.Spec.Kubeconfig != nil - objs = append(objs, svc) + if !outOfClusterMode { + svcAcc, er := GetServiceAccount(config) + if er != nil { + return er + } - svcAcc, er := GetServiceAccount(config) - if er != nil { - return er - } + objs = append(objs, svcAcc) - objs = append(objs, svcAcc) + clusterRole, er := GetClusterRole(config) + if er != nil { + return er + } - clusterRole, er := GetClusterRole(config) - if er != nil { - return er - } + objs = append(objs, clusterRole) + + clusterRoleBinding, er := GetClusterRoleBinding(config) + if er != nil { + return er + } - objs = append(objs, clusterRole) + objs = append(objs, clusterRoleBinding) + } - clusterRoleBinding, er := GetClusterRoleBinding(config) + svc, er := GetService(config) if er != nil { return er } - objs = append(objs, clusterRoleBinding) + objs = append(objs, svc) - deployment, er := GetDeployment(config) + deployment, er := GetDeployment(config, outOfClusterMode) if er != nil { return er }