Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[metrics 2/x] Configure Prometheus Operator #687

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{{ if .IsPrometheusOperatorInstalled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: sriov-network-metrics-exporter
namespace: {{.Namespace}}
spec:
endpoints:
- interval: 30s
port: sriov-network-metrics
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token"
scheme: "https"
honorLabels: true
tlsConfig:
serverName: sriov-network-metrics-exporter-service.{{.Namespace}}.svc
caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt
insecureSkipVerify: false
namespaceSelector:
matchNames:
- {{.Namespace}}
selector:
matchLabels:
name: sriov-network-metrics-exporter-service
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: {{.Namespace}}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: {{.PrometheusOperatorServiceAccount}}
namespace: {{.PrometheusOperatorNamespace}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: {{.Namespace}}
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
{{ end }}
2 changes: 1 addition & 1 deletion bindata/manifests/metrics-exporter/metrics-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
namespace: {{.Namespace}}
annotations:
prometheus.io/target: "true"
{{- if eq .ClusterType "openshift" }}
{{ if .IsOpenshift }}
service.beta.openshift.io/serving-cert-secret-name: {{ .MetricsExporterSecretName }}
{{- end }}
labels:
Expand Down
29 changes: 21 additions & 8 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,12 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context,
data.Data["MetricsExporterSecretName"] = os.Getenv("METRICS_EXPORTER_SECRET_NAME")
data.Data["MetricsExporterPort"] = os.Getenv("METRICS_EXPORTER_PORT")
data.Data["MetricsExporterKubeRbacProxyImage"] = os.Getenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE")
data.Data["ClusterType"] = vars.ClusterType
data.Data["IsOpenshift"] = r.PlatformHelper.IsOpenshiftCluster()

data.Data["IsPrometheusOperatorInstalled"] = strings.ToLower(os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED")) == trueString
data.Data["PrometheusOperatorServiceAccount"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT")
data.Data["PrometheusOperatorNamespace"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE")

data.Data["NodeSelectorField"] = GetDefaultNodeSelector()
if dc.Spec.ConfigDaemonNodeSelector != nil {
data.Data["NodeSelectorField"] = dc.Spec.ConfigDaemonNodeSelector
Expand All @@ -250,23 +255,21 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context,
return err
}

deployMetricsExporter, ok := dc.Spec.FeatureGates[consts.MetricsExporterFeatureGate]
if ok && deployMetricsExporter {
if r.FeatureGate.IsEnabled(consts.MetricsExporterFeatureGate) {
for _, obj := range objs {
err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Couldn't sync metrics exporter objects")
return err
}
}

return nil
}

for _, obj := range objs {
err = r.deleteK8sResource(ctx, obj)
if err != nil {
return err
}
err = r.deleteK8sResources(ctx, objs)
if err != nil {
return err
}

return nil
Expand Down Expand Up @@ -362,6 +365,16 @@ func (r *SriovOperatorConfigReconciler) deleteK8sResource(ctx context.Context, i
return nil
}

func (r *SriovOperatorConfigReconciler) deleteK8sResources(ctx context.Context, objs []*uns.Unstructured) error {
for _, obj := range objs {
err := r.deleteK8sResource(ctx, obj)
if err != nil {
return err
}
}
return nil
}

func (r *SriovOperatorConfigReconciler) syncK8sResource(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig, in *uns.Unstructured) error {
switch in.GetKind() {
case clusterRoleResourceName, clusterRoleBindingResourceName, mutatingWebhookConfigurationCRDName, validatingWebhookConfigurationCRDName, machineConfigCRDName:
Expand Down
96 changes: 60 additions & 36 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@ import (
admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/golang/mock/gomock"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift"
Expand All @@ -37,7 +40,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
By("Create SriovOperatorConfig controller k8s objs")
config := &sriovnetworkv1.SriovOperatorConfig{}
config.SetNamespace(testNamespace)
config.SetName(constants.DefaultConfigName)
config.SetName(consts.DefaultConfigName)
config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{
EnableInjector: true,
EnableOperatorWebhook: true,
Expand Down Expand Up @@ -105,7 +108,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})

Context("When is up", func() {
JustBeforeEach(func() {
BeforeEach(func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())
Expand Down Expand Up @@ -333,41 +336,54 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
Expect(err).ToNot(HaveOccurred())
})

It("should deploy the metrics-exporter when the feature gate is enabled", func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())

DeferCleanup(func() {
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
Context("metricsExporter feature gate", func() {
When("is disabled", func() {
It("should not deploy the daemonset", func() {
daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())
})
})

err = util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

By("Turn `metricsExporter` feature gate off")
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)

err = util.WaitForNamespacedObjectDeleted(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObjectDeleted(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
When("is enabled", func() {
BeforeEach(func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{consts.MetricsExporterFeatureGate: true}
err := k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})

It("should deploy the sriov-network-metrics-exporter DaemonSet", func() {
err := util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
})

It("should deploy extra configuration when the Prometheus operator is installed", func() {
DeferCleanup(os.Setenv, "METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED"))
os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", "true")

err := util.WaitForNamespacedObject(&rbacv1.Role{}, k8sClient, testNamespace, "prometheus-k8s", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

err = util.WaitForNamespacedObject(&rbacv1.RoleBinding{}, k8sClient, testNamespace, "prometheus-k8s", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

assertResourceExists(
schema.GroupVersionKind{
Group: "monitoring.coreos.com",
Kind: "ServiceMonitor",
Version: "v1",
},
client.ObjectKey{Namespace: testNamespace, Name: "sriov-network-metrics-exporter"})
})
})
})

// This test verifies that the CABundle field in the webhook configuration added by third party components is not
Expand Down Expand Up @@ -429,6 +445,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n")))
}, "1s").Should(Succeed())
})

It("should reconcile to a converging state when multiple node policies are set", func() {
By("Creating a consistent number of node policies")
for i := 0; i < 30; i++ {
Expand Down Expand Up @@ -477,3 +494,10 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})
})
})

func assertResourceExists(gvk schema.GroupVersionKind, key client.ObjectKey) {
u := &unstructured.Unstructured{}
u.SetGroupVersionKind(gvk)
err := k8sClient.Get(context.Background(), key, u)
Expect(err).NotTo(HaveOccurred())
}
7 changes: 7 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
openshiftconfigv1 "github.com/openshift/api/config/v1"
mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"

//+kubebuilder:scaffold:imports
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
Expand Down Expand Up @@ -137,6 +138,10 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT", "k8s-prometheus")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you need the new variable here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE", "default")
Expect(err).NotTo(HaveOccurred())

By("bootstrapping test environment")
testEnv = &envtest.Environment{
Expand All @@ -159,6 +164,8 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = openshiftconfigv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())
err = monitoringv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())

vars.Config = cfg
vars.Scheme = scheme.Scheme
Expand Down
6 changes: 6 additions & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ spec:
value: $METRICS_EXPORTER_IMAGE
- name: METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE
value: $METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED
value: "$METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED"
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT
value: $METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE
value: $METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE
- name: RESOURCE_PREFIX
value: $RESOURCE_PREFIX
- name: DEV_MODE
Expand Down
3 changes: 3 additions & 0 deletions deployment/sriov-network-operator-chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ We have introduced the following Chart parameters.
| `operator.clustertype` | string | `kubernetes` | Cluster environment type |
| `operator.metricsExporter.port` | string | `9110` | Port where the Network Metrics Exporter listen |
| `operator.metricsExporter.certificates.secretName` | string | `metrics-exporter-cert` | Secret name to serve metrics via TLS. The secret must have the same fields as `operator.admissionControllers.certificates.secretNames` |
| `operator.metricsExporter.prometheusOperator.enabled` | bool | false | Wheter the operator shoud configure Prometheus resources or not (e.g. `ServiceMonitors`). |
| `operator.metricsExporter.prometheusOperator.serviceAccount` | string | `prometheus-k8s` | The service account used by the Prometheus Operator. This is used to give Prometheus the permission to list resource in the SR-IOV operator namespace |
| `operator.metricsExporter.prometheusOperator.namespace` | string | `monitoring` | The namespace where the Prometheus Operator is installed. Setting this variable makes the operator deploy `monitoring.coreos.com` resources. |

#### Admission Controllers parameters

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ spec:
value: {{ .Values.operator.metricsExporter.certificates.secretName }}
- name: METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE
value: {{ .Values.images.metricsExporterKubeRbacProxy }}
{{- if .Values.operator.metricsExporter.prometheusOperator.enabled }}
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED
value: {{ .Values.operator.metricsExporter.prometheusOperator.enabled | quote}}
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT
value: {{ .Values.operator.metricsExporter.prometheusOperator.serviceAccount }}
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE
value: {{ .Values.operator.metricsExporter.prometheusOperator.namespace }}
{{- end }}
- name: RESOURCE_PREFIX
value: {{ .Values.operator.resourcePrefix }}
- name: IMAGE_PULL_SECRETS
Expand Down
4 changes: 4 additions & 0 deletions deployment/sriov-network-operator-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ operator:
port: "9110"
certificates:
secretName: "metrics-exporter-cert"
prometheusOperator:
zeeke marked this conversation as resolved.
Show resolved Hide resolved
enabled: false
serviceAccount: "prometheus-k8s"
namespace: "monitoring"
admissionControllers:
enabled: false
certificates:
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ require (
github.com/openshift/machine-config-operator v0.0.1-0.20231024085435-7e1fb719c1ba
github.com/ovn-org/libovsdb v0.6.1-0.20240125124854-03f787b1a892
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0
github.com/prometheus-operator/prometheus-operator/pkg/client v0.68.0
github.com/safchain/ethtool v0.3.0
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.8.4
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,10 @@ github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZ
github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0 h1:yl9ceUSUBo9woQIO+8eoWpcxZkdZgm89g+rVvu37TUw=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0/go.mod h1:9Uuu3pEU2jB8PwuqkHvegQ0HV/BlZRJUyfTYAqfdVF8=
github.com/prometheus-operator/prometheus-operator/pkg/client v0.68.0 h1:8FS0sXpFkFPxp2gfkxyEMnhZV9yhf7xPbpsIeUZHlzM=
github.com/prometheus-operator/prometheus-operator/pkg/client v0.68.0/go.mod h1:ul4ND0BMCcOX1OSZvbJA1/lh7yQ2ILHNKuZIojGISe4=
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
Expand Down
3 changes: 3 additions & 0 deletions hack/run-e2e-conformance-virtual-ocp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ export CLUSTER_TYPE=openshift
export DEV_MODE=TRUE
export CLUSTER_HAS_EMULATED_PF=TRUE
export OPERATOR_LEADER_ELECTION_ENABLE=true
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"}
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"}
adrianchiris marked this conversation as resolved.
Show resolved Hide resolved

export SRIOV_NETWORK_OPERATOR_IMAGE="$registry/$NAMESPACE/sriov-network-operator:latest"
export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$registry/$NAMESPACE/sriov-network-config-daemon:latest"
Expand Down
Loading
Loading