Skip to content

Commit

Permalink
etrics-exporter: configure Prometheus operator
Browse files Browse the repository at this point in the history
Deploy the needed configuration to make the prometheus
operator to find and scrape the sriov-network-metrics-exporter
endpoints, including the ServiceMonitor, Role and RoleBinding.

Resources are installed only if the Prometheus operator is installed.

Signed-off-by: Andrea Panattoni <apanatto@redhat.com>
  • Loading branch information
zeeke committed Jul 3, 2024
1 parent 11d6708 commit 19e5be7
Show file tree
Hide file tree
Showing 13 changed files with 1,156 additions and 59 deletions.
58 changes: 57 additions & 1 deletion bindata/manifests/metrics-exporter/metrics-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
namespace: {{.Namespace}}
annotations:
prometheus.io/target: "true"
{{- if eq .ClusterType "openshift" }}
{{ if .IsOpenshift }}
service.beta.openshift.io/serving-cert-secret-name: {{ .MetricsExporterSecretName }}
{{- end }}
labels:
Expand All @@ -18,3 +18,59 @@ spec:
name: sriov-network-metrics
port: {{ .MetricsExporterPort }}
targetPort: {{ .MetricsExporterPort }}
{{ if .IsPrometheusOperatorInstalled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: sriov-network-metrics-exporter
namespace: {{.Namespace}}
spec:
endpoints:
- interval: 30s
port: sriov-network-metrics
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token"
scheme: "https"
honorLabels: true
tlsConfig:
serverName: sriov-network-metrics-exporter-service.{{.Namespace}}.svc
caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt
insecureSkipVerify: false
namespaceSelector:
matchNames:
- {{.Namespace}}
selector:
matchLabels:
name: sriov-network-metrics-exporter-service
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: {{.Namespace}}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: openshift-monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: {{.Namespace}}
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
{{ end }}
15 changes: 15 additions & 0 deletions controllers/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
errs "github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"

"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand Down Expand Up @@ -397,3 +399,16 @@ func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string]
}
return nil
}

func isPrometheusOperatorInstalled(ctx context.Context, client k8sclient.Reader) bool {
prometheusCRD := &apiextv1.CustomResourceDefinition{}
err := client.Get(ctx, k8sclient.ObjectKey{Name: "servicemonitors.monitoring.coreos.com"}, prometheusCRD)
if err != nil {
if errors.IsNotFound(err) {
return false
}
log.Log.WithName("isPrometheusOperatorInstalled").Error(err, "Error while looking for prometheus operator")
return false
}
return true
}
1 change: 1 addition & 0 deletions controllers/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
. "github.com/onsi/gomega"

"github.com/google/go-cmp/cmp"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
Expand Down
25 changes: 17 additions & 8 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context,
data.Data["MetricsExporterSecretName"] = os.Getenv("METRICS_EXPORTER_SECRET_NAME")
data.Data["MetricsExporterPort"] = os.Getenv("METRICS_EXPORTER_PORT")
data.Data["MetricsExporterKubeRbacProxyImage"] = os.Getenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE")
data.Data["ClusterType"] = vars.ClusterType
data.Data["IsOpenshift"] = r.PlatformHelper.IsOpenshiftCluster()
data.Data["IsPrometheusOperatorInstalled"] = isPrometheusOperatorInstalled(ctx, r.GlobalClient)
data.Data["NodeSelectorField"] = GetDefaultNodeSelector()
if dc.Spec.ConfigDaemonNodeSelector != nil {
data.Data["NodeSelectorField"] = dc.Spec.ConfigDaemonNodeSelector
Expand All @@ -249,23 +250,21 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context,
return err
}

deployMetricsExporter, ok := dc.Spec.FeatureGates[consts.MetricsExporterFeatureGate]
if ok && deployMetricsExporter {
if r.FeatureGate.IsEnabled(consts.MetricsExporterFeatureGate) {
for _, obj := range objs {
err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Couldn't sync metrics exporter objects")
return err
}
}

return nil
}

for _, obj := range objs {
err = r.deleteK8sResource(ctx, obj)
if err != nil {
return err
}
err = r.deleteK8sResources(ctx, objs)
if err != nil {
return err
}

return nil
Expand Down Expand Up @@ -361,6 +360,16 @@ func (r *SriovOperatorConfigReconciler) deleteK8sResource(ctx context.Context, i
return nil
}

func (r *SriovOperatorConfigReconciler) deleteK8sResources(ctx context.Context, objs []*uns.Unstructured) error {
for _, obj := range objs {
err := r.deleteK8sResource(ctx, obj)
if err != nil {
return err
}
}
return nil
}

func (r *SriovOperatorConfigReconciler) syncK8sResource(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig, in *uns.Unstructured) error {
switch in.GetKind() {
case clusterRoleResourceName, clusterRoleBindingResourceName, mutatingWebhookConfigurationCRDName, validatingWebhookConfigurationCRDName, machineConfigCRDName:
Expand Down
132 changes: 83 additions & 49 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/golang/mock/gomock"
. "github.com/onsi/ginkgo/v2"
Expand All @@ -32,6 +35,7 @@ import (
var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
var cancel context.CancelFunc
var ctx context.Context
var reconciler *SriovOperatorConfigReconciler

BeforeAll(func() {
By("Create SriovOperatorConfig controller k8s objs")
Expand Down Expand Up @@ -77,13 +81,14 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
platformHelper.EXPECT().IsOpenshiftCluster().Return(false).AnyTimes()
platformHelper.EXPECT().IsHypershift().Return(false).AnyTimes()

err = (&SriovOperatorConfigReconciler{
reconciler = &SriovOperatorConfigReconciler{
Client: k8sManager.GetClient(),
GlobalClient: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
PlatformHelper: platformHelper,
FeatureGate: featuregate.New(),
}).SetupWithManager(k8sManager)
}
err = reconciler.SetupWithManager(k8sManager)
Expect(err).ToNot(HaveOccurred())

ctx, cancel = context.WithCancel(context.Background())
Expand All @@ -105,21 +110,21 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})
})

Context("When is up", func() {
JustBeforeEach(func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())
config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{
EnableInjector: true,
EnableOperatorWebhook: true,
LogLevel: 2,
FeatureGates: map[string]bool{},
}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})
BeforeEach(func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())
config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{
EnableInjector: true,
EnableOperatorWebhook: true,
LogLevel: 2,
FeatureGates: map[string]bool{},
}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})

Context("When is up", func() {
It("should have webhook enable", func() {
mutateCfg := &admv1.MutatingWebhookConfiguration{}
err := util.WaitForNamespacedObject(mutateCfg, k8sClient, testNamespace, "sriov-operator-webhook-config", util.RetryInterval, util.APITimeout*3)
Expand Down Expand Up @@ -334,41 +339,62 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
Expect(err).ToNot(HaveOccurred())
})

It("should deploy the metrics-exporter when the feature gate is enabled", func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())

DeferCleanup(func() {
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
Context("metricsExporter feature gate", func() {
When("is disabled", func() {
It("should not deploy the daemonset", func() {
daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())
})
})

err = util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

By("Turn `metricsExporter` feature gate off")
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)

err = util.WaitForNamespacedObjectDeleted(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObjectDeleted(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
When("is enabled", func() {
BeforeEach(func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true}
err := k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})

It("should deploy the sriov-network-metrics-exporter DaemonSet", func() {
err := util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
})

It("should deploy extra configuration when the Prometheus operator is installed", func() {
assertResourceExists(
schema.GroupVersionKind{
Group: "monitoring.coreos.com",
Kind: "ServiceMonitor",
Version: "v1",
},
client.ObjectKey{Namespace: testNamespace, Name: "sriov-network-metrics-exporter"})

assertResourceExists(
schema.GroupVersionKind{
Group: "rbac.authorization.k8s.io",
Kind: "Role",
Version: "v1",
},
client.ObjectKey{Namespace: testNamespace, Name: "prometheus-k8s"})

assertResourceExists(
schema.GroupVersionKind{
Group: "rbac.authorization.k8s.io",
Kind: "RoleBinding",
Version: "v1",
},
client.ObjectKey{Namespace: testNamespace, Name: "prometheus-k8s"})

})
})
})

// This test verifies that the CABundle field in the webhook configuration added by third party components is not
Expand Down Expand Up @@ -430,6 +456,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n")))
}, "1s").Should(Succeed())
})

It("should reconcile to a converging state when multiple node policies are set", func() {
By("Creating a consistent number of node policies")
for i := 0; i < 30; i++ {
Expand Down Expand Up @@ -478,3 +505,10 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})
})
})

func assertResourceExists(gvk schema.GroupVersionKind, key client.ObjectKey) {
u := &unstructured.Unstructured{}
u.SetGroupVersionKind(gvk)
err := k8sClient.Get(context.Background(), key, u)
Expect(err).NotTo(HaveOccurred())
}
3 changes: 3 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
openshiftconfigv1 "github.com/openshift/api/config/v1"
mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"

//+kubebuilder:scaffold:imports
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
Expand Down Expand Up @@ -157,6 +158,8 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = openshiftconfigv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())
err = monitoringv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())

vars.Config = cfg
vars.Scheme = scheme.Scheme
Expand Down
3 changes: 3 additions & 0 deletions deploy/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ rules:
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ rules:
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
openshiftconfigv1 "github.com/openshift/api/config/v1"
mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
Expand Down Expand Up @@ -70,6 +71,7 @@ func init() {
utilruntime.Must(netattdefv1.AddToScheme(scheme))
utilruntime.Must(mcfgv1.AddToScheme(scheme))
utilruntime.Must(openshiftconfigv1.AddToScheme(scheme))
utilruntime.Must(apiextv1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

Expand Down
Loading

0 comments on commit 19e5be7

Please sign in to comment.