diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index b026ed55..c8fdb419 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -44,3 +44,20 @@ jobs: - run: make -C test/e2e setup - run: make -C test/e2e init-app-without-cert-manager - run: make -C test/e2e test + + e2e-k8s-with-metrics-api: + name: "e2e-k8s-with-metrics-api" + runs-on: "ubuntu-20.04" + strategy: + matrix: + kubernetes_versions: ["1.28.0", "1.27.3", "1.26.6"] + env: + KUBERNETES_VERSION: ${{ matrix.kubernetes_versions }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: "go.mod" + - run: make -C test/e2e setup + - run: make -C test/e2e init-app-with-metrics-api + - run: make -C test/e2e test diff --git a/charts/pvc-autoresizer/README.md b/charts/pvc-autoresizer/README.md index 204646fb..b1ab61e6 100644 --- a/charts/pvc-autoresizer/README.md +++ b/charts/pvc-autoresizer/README.md @@ -36,6 +36,7 @@ helm upgrade --create-namespace --namespace pvc-autoresizer -i pvc-autoresizer - | controller.args.interval | string | `"10s"` | Specify interval to monitor pvc capacity. Used as "--interval" option | | controller.args.namespaces | list | `[]` | Specify namespaces to control the pvcs of. Empty for all namespaces. Used as "--namespaces" option | | controller.args.prometheusURL | string | `"http://prometheus-prometheus-oper-prometheus.prometheus.svc:9090"` | Specify Prometheus URL to query volume stats. Used as "--prometheus-url" option | +| controller.args.useK8sMetricsApi | bool | `false` | Use Kubernetes metrics API instead of Prometheus. Used as "--use-k8s-metrics-api" option | | controller.nodeSelector | object | `{}` | Map of key-value pairs for scheduling pods on specific nodes. | | controller.podAnnotations | object | `{}` | Annotations to be added to controller pods. | | controller.podLabels | object | `{}` | Pod labels to be added to controller pods. | diff --git a/charts/pvc-autoresizer/templates/controller/clusterrole.yaml b/charts/pvc-autoresizer/templates/controller/clusterrole.yaml index f4a3b0ab..e09ef92b 100644 --- a/charts/pvc-autoresizer/templates/controller/clusterrole.yaml +++ b/charts/pvc-autoresizer/templates/controller/clusterrole.yaml @@ -40,3 +40,21 @@ rules: - watch - patch - update +{{- if .Values.controller.args.useK8sMetricsApi }} +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - "nodes/proxy" + verbs: + - get + - list + - watch +{{- end }} diff --git a/charts/pvc-autoresizer/templates/controller/deployment.yaml b/charts/pvc-autoresizer/templates/controller/deployment.yaml index 8b252679..ecd618b2 100644 --- a/charts/pvc-autoresizer/templates/controller/deployment.yaml +++ b/charts/pvc-autoresizer/templates/controller/deployment.yaml @@ -33,6 +33,9 @@ spec: args: - --prometheus-url={{ .Values.controller.args.prometheusURL }} - --interval={{ .Values.controller.args.interval }} + {{- if .Values.controller.args.useK8sMetricsApi }} + - --use-k8s-metrics-api={{ .Values.controller.args.useK8sMetricsApi }} + {{- end }} {{- if .Values.controller.args.namespaces }} - --namespaces={{ join "," .Values.controller.args.namespaces }} {{- end }} diff --git a/charts/pvc-autoresizer/values.yaml b/charts/pvc-autoresizer/values.yaml index 22606d02..05c7b8f8 100644 --- a/charts/pvc-autoresizer/values.yaml +++ b/charts/pvc-autoresizer/values.yaml @@ -14,6 +14,10 @@ controller: replicas: 1 args: + # controller.args.useK8sMetricsApi -- Use Kubernetes metrics API instead of Prometheus. + # Used as "--use-k8s-metrics-api" option + useK8sMetricsApi: false + # controller.args.prometheusURL -- Specify Prometheus URL to query volume stats. # Used as "--prometheus-url" option prometheusURL: http://prometheus-prometheus-oper-prometheus.prometheus.svc:9090 diff --git a/cmd/main.go b/cmd/main.go index 47fae5a6..4db7db4d 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -11,15 +11,16 @@ import ( ) var config struct { - certDir string - webhookAddr string - metricsAddr string - healthAddr string - namespaces []string - watchInterval time.Duration - prometheusURL string - skipAnnotation bool - development bool + certDir string + webhookAddr string + metricsAddr string + healthAddr string + namespaces []string + watchInterval time.Duration + prometheusURL string + useK8sMetricsApi bool + skipAnnotation bool + development bool } // rootCmd represents the base command when called without any subcommands @@ -51,7 +52,7 @@ func init() { "Namespaces to resize PersistentVolumeClaims within. Empty for all namespaces.") fs.DurationVar(&config.watchInterval, "interval", 1*time.Minute, "Interval to monitor pvc capacity.") fs.StringVar(&config.prometheusURL, "prometheus-url", "", "Prometheus URL to query volume stats.") + fs.BoolVar(&config.useK8sMetricsApi, "use-k8s-metrics-api", false, "Use Kubernetes metrics API instead of Prometheus") fs.BoolVar(&config.skipAnnotation, "no-annotation-check", false, "Skip annotation check for StorageClass") fs.BoolVar(&config.development, "development", false, "Use development logger config") - _ = rootCmd.MarkFlagRequired("prometheus-url") } diff --git a/cmd/run.go b/cmd/run.go index 3fbdbe60..936cdc74 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -102,9 +102,18 @@ func subMain() error { return err } - promClient, err := runners.NewPrometheusClient(config.prometheusURL) + var metricsClient runners.MetricsClient + if config.useK8sMetricsApi { + metricsClient, err = runners.NewK8sMetricsApiClient() + } else if config.prometheusURL != "" { + metricsClient, err = runners.NewPrometheusClient(config.prometheusURL) + } else { + setupLog.Error(err, "enable use-k8s-metrics-api or provide prometheus-url") + return err + } + if err != nil { - setupLog.Error(err, "unable to initialize prometheus client") + setupLog.Error(err, "unable to initialize metrics client") return err } @@ -113,7 +122,7 @@ func subMain() error { return err } - pvcAutoresizer := runners.NewPVCAutoresizer(promClient, mgr.GetClient(), + pvcAutoresizer := runners.NewPVCAutoresizer(metricsClient, mgr.GetClient(), ctrl.Log.WithName("pvc-autoresizer"), config.watchInterval, mgr.GetEventRecorderFor("pvc-autoresizer")) if err := mgr.Add(pvcAutoresizer); err != nil { diff --git a/go.mod b/go.mod index 70671a5d..5f659fc1 100644 --- a/go.mod +++ b/go.mod @@ -6,10 +6,12 @@ require ( github.com/go-logr/logr v1.3.0 github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 + github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.16.0 github.com/prometheus/client_model v0.4.0 github.com/prometheus/common v0.44.0 github.com/spf13/cobra v1.7.0 + golang.org/x/sync v0.5.0 k8s.io/api v0.28.6 k8s.io/apimachinery v0.28.6 k8s.io/client-go v0.28.6 @@ -46,7 +48,6 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/spf13/pflag v1.0.5 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/go.sum b/go.sum index ee1cbd99..c9196b8d 100644 --- a/go.sum +++ b/go.sum @@ -130,6 +130,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= +golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/runners/k8s_metrics_api_client.go b/internal/runners/k8s_metrics_api_client.go new file mode 100644 index 00000000..8f53d53c --- /dev/null +++ b/internal/runners/k8s_metrics_api_client.go @@ -0,0 +1,127 @@ +package runners + +import ( + "bytes" + "context" + + "github.com/pkg/errors" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + "golang.org/x/sync/errgroup" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// NewK8sMetricsApiClient returns a new k8sMetricsApiClient client +func NewK8sMetricsApiClient() (MetricsClient, error) { + return &k8sMetricsApiClient{}, nil +} + +type k8sMetricsApiClient struct { +} + +func (c *k8sMetricsApiClient) GetMetrics(ctx context.Context) (map[types.NamespacedName]*VolumeStats, error) { + // create a Kubernetes client using in-cluster configuration + config, err := rest.InClusterConfig() + if err != nil { + return nil, err + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, err + } + + // get a list of nodes and IP addresses + nodes, err := clientset.CoreV1().Nodes().List(context.Background(), v1.ListOptions{}) + if err != nil { + return nil, err + } + + // create a map to hold PVC usage data + pvcUsage := make(map[types.NamespacedName]*VolumeStats) + + // use an errgroup to query kubelet for PVC usage on each node + eg, ctx := errgroup.WithContext(ctx) + for _, node := range nodes.Items { + nodeName := node.Name + eg.Go(func() error { + return getPVCUsage(clientset, nodeName, pvcUsage, ctx) + }) + } + + // wait for all queries to complete and handle any errors + if err := eg.Wait(); err != nil { + return nil, err + } + + return pvcUsage, nil +} + +func getPVCUsage(clientset *kubernetes.Clientset, nodeName string, pvcUsage map[types.NamespacedName]*VolumeStats, ctx context.Context) error { + // make the request to the api /metrics endpoint and handle the response + req := clientset. + CoreV1(). + RESTClient(). + Get(). + Resource("nodes"). + Name(nodeName). + SubResource("proxy"). + Suffix("metrics") + respBody, err := req.DoRaw(ctx) + if err != nil { + return errors.Errorf("failed to get stats from kubelet on node %s: with error %s", nodeName, err) + } + parser := expfmt.TextParser{} + metricFamilies, err := parser.TextToMetricFamilies(bytes.NewReader(respBody)) + if err != nil { + return errors.Wrapf(err, "failed to read response body from kubelet on node %s", nodeName) + } + + // volumeAvailableQuery + if gauge, ok := metricFamilies[volumeAvailableQuery]; ok { + for _, m := range gauge.Metric { + pvcName, value := parseMetric(m) + pvcUsage[pvcName] = new(VolumeStats) + pvcUsage[pvcName].AvailableBytes = int64(value) + } + } + // volumeCapacityQuery + if gauge, ok := metricFamilies[volumeCapacityQuery]; ok { + for _, m := range gauge.Metric { + pvcName, value := parseMetric(m) + pvcUsage[pvcName].CapacityBytes = int64(value) + } + } + + // inodesAvailableQuery + if gauge, ok := metricFamilies[inodesAvailableQuery]; ok { + for _, m := range gauge.Metric { + pvcName, value := parseMetric(m) + pvcUsage[pvcName].AvailableInodeSize = int64(value) + } + } + + // inodesCapacityQuery + if gauge, ok := metricFamilies[inodesCapacityQuery]; ok { + for _, m := range gauge.Metric { + pvcName, value := parseMetric(m) + pvcUsage[pvcName].CapacityInodeSize = int64(value) + } + } + return nil +} + +func parseMetric(m *dto.Metric) (pvcName types.NamespacedName, value uint64) { + for _, label := range m.GetLabel() { + if label.GetName() == "namespace" { + pvcName.Namespace = label.GetValue() + } else if label.GetName() == "persistentvolumeclaim" { + pvcName.Name = label.GetValue() + } + } + value = uint64(m.GetGauge().GetValue()) + return pvcName, value +} diff --git a/internal/runners/metrics_client.go b/internal/runners/metrics_client.go index e464b941..12a8ac43 100644 --- a/internal/runners/metrics_client.go +++ b/internal/runners/metrics_client.go @@ -2,13 +2,7 @@ package runners import ( "context" - "fmt" - "time" - "github.com/prometheus/client_golang/api" - prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1" - "github.com/prometheus/common/model" - "github.com/topolvm/pvc-autoresizer/internal/metrics" "k8s.io/apimachinery/pkg/types" ) @@ -19,21 +13,6 @@ const ( inodesCapacityQuery = "kubelet_volume_stats_inodes" ) -// NewPrometheusClient returns a new prometheusClient -func NewPrometheusClient(url string) (MetricsClient, error) { - client, err := api.NewClient(api.Config{ - Address: url, - }) - if err != nil { - return nil, err - } - v1api := prometheusv1.NewAPI(client) - - return &prometheusClient{ - prometheusAPI: v1api, - }, nil -} - // MetricsClient is an interface for getting metrics type MetricsClient interface { // GetMetrics returns volume stats metrics of PVCs @@ -51,76 +30,3 @@ type VolumeStats struct { AvailableInodeSize int64 CapacityInodeSize int64 } - -type prometheusClient struct { - prometheusAPI prometheusv1.API -} - -// GetMetrics implements MetricsClient.GetMetrics -func (c *prometheusClient) GetMetrics(ctx context.Context) (map[types.NamespacedName]*VolumeStats, error) { - volumeStatsMap := make(map[types.NamespacedName]*VolumeStats) - - availableBytes, err := c.getMetricValues(ctx, volumeAvailableQuery) - if err != nil { - return nil, err - } - - capacityBytes, err := c.getMetricValues(ctx, volumeCapacityQuery) - if err != nil { - return nil, err - } - - availableInodeSize, err := c.getMetricValues(ctx, inodesAvailableQuery) - if err != nil { - return nil, err - } - - capacityInodeSize, err := c.getMetricValues(ctx, inodesCapacityQuery) - if err != nil { - return nil, err - } - - for key, val := range availableBytes { - vs := &VolumeStats{AvailableBytes: val} - if cb, ok := capacityBytes[key]; ok { - vs.CapacityBytes = cb - } else { - continue - } - if ais, ok := availableInodeSize[key]; ok { - vs.AvailableInodeSize = ais - } else { - continue - } - if cis, ok := capacityInodeSize[key]; ok { - vs.CapacityInodeSize = cis - } else { - continue - } - volumeStatsMap[key] = vs - } - - return volumeStatsMap, nil -} - -func (c *prometheusClient) getMetricValues(ctx context.Context, query string) (map[types.NamespacedName]int64, error) { - res, _, err := c.prometheusAPI.Query(ctx, query, time.Now()) - if err != nil { - metrics.MetricsClientFailTotal.Increment() - return nil, err - } - - if res.Type() != model.ValVector { - return nil, fmt.Errorf("unknown response type: %s", res.Type().String()) - } - resultMap := make(map[types.NamespacedName]int64) - vec := res.(model.Vector) - for _, val := range vec { - nn := types.NamespacedName{ - Namespace: string(val.Metric["namespace"]), - Name: string(val.Metric["persistentvolumeclaim"]), - } - resultMap[nn] = int64(val.Value) - } - return resultMap, nil -} diff --git a/internal/runners/prometheus_client.go b/internal/runners/prometheus_client.go new file mode 100644 index 00000000..6a3bd966 --- /dev/null +++ b/internal/runners/prometheus_client.go @@ -0,0 +1,101 @@ +package runners + +import ( + "context" + "fmt" + "time" + + "github.com/prometheus/client_golang/api" + prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + "github.com/topolvm/pvc-autoresizer/internal/metrics" + "k8s.io/apimachinery/pkg/types" +) + +// NewPrometheusClient returns a new prometheusClient +func NewPrometheusClient(url string) (MetricsClient, error) { + client, err := api.NewClient(api.Config{ + Address: url, + }) + if err != nil { + return nil, err + } + v1api := prometheusv1.NewAPI(client) + + return &prometheusClient{ + prometheusAPI: v1api, + }, nil +} + +type prometheusClient struct { + prometheusAPI prometheusv1.API +} + +// GetMetrics implements MetricsClient.GetMetrics +func (c *prometheusClient) GetMetrics(ctx context.Context) (map[types.NamespacedName]*VolumeStats, error) { + volumeStatsMap := make(map[types.NamespacedName]*VolumeStats) + + availableBytes, err := c.getMetricValues(ctx, volumeAvailableQuery) + if err != nil { + return nil, err + } + + capacityBytes, err := c.getMetricValues(ctx, volumeCapacityQuery) + if err != nil { + return nil, err + } + + availableInodeSize, err := c.getMetricValues(ctx, inodesAvailableQuery) + if err != nil { + return nil, err + } + + capacityInodeSize, err := c.getMetricValues(ctx, inodesCapacityQuery) + if err != nil { + return nil, err + } + + for key, val := range availableBytes { + vs := &VolumeStats{AvailableBytes: val} + if cb, ok := capacityBytes[key]; ok { + vs.CapacityBytes = cb + } else { + continue + } + if ais, ok := availableInodeSize[key]; ok { + vs.AvailableInodeSize = ais + } else { + continue + } + if cis, ok := capacityInodeSize[key]; ok { + vs.CapacityInodeSize = cis + } else { + continue + } + volumeStatsMap[key] = vs + } + + return volumeStatsMap, nil +} + +func (c *prometheusClient) getMetricValues(ctx context.Context, query string) (map[types.NamespacedName]int64, error) { + res, _, err := c.prometheusAPI.Query(ctx, query, time.Now()) + if err != nil { + metrics.MetricsClientFailTotal.Increment() + return nil, err + } + + if res.Type() != model.ValVector { + return nil, fmt.Errorf("unknown response type: %s", res.Type().String()) + } + resultMap := make(map[types.NamespacedName]int64) + vec := res.(model.Vector) + for _, val := range vec { + nn := types.NamespacedName{ + Namespace: string(val.Metric["namespace"]), + Name: string(val.Metric["persistentvolumeclaim"]), + } + resultMap[nn] = int64(val.Value) + } + return resultMap, nil +} diff --git a/test/e2e/Makefile b/test/e2e/Makefile index 56567e93..15e3b884 100644 --- a/test/e2e/Makefile +++ b/test/e2e/Makefile @@ -44,6 +44,10 @@ init-app-with-cert-manager: init-cluster init-app-without-cert-manager: init-cluster $(HELM) install --create-namespace --namespace=pvc-autoresizer pvc-autoresizer ../../charts/pvc-autoresizer/ -f manifests/values/values-without-cert-manager.yaml +.PHONY: init-app-with-metrics-api +init-app-with-metrics-api: init-cluster + $(HELM) install --create-namespace --namespace=pvc-autoresizer pvc-autoresizer ../../charts/pvc-autoresizer/ -f manifests/values/values-with-metrics-api.yaml + .PHONY: test test: E2ETEST=1 BINDIR=$(BINDIR) $(GINKGO) --fail-fast -v . diff --git a/test/e2e/manifests/values/values-with-metrics-api.yaml b/test/e2e/manifests/values/values-with-metrics-api.yaml new file mode 100644 index 00000000..2cf9fa96 --- /dev/null +++ b/test/e2e/manifests/values/values-with-metrics-api.yaml @@ -0,0 +1,9 @@ +image: + repository: pvc-autoresizer + tag: devel + pullPolicy: Never + +controller: + args: + useK8sMetricsApi: true + interval: 1s