From 7cff83c36cd1a5b6c023ef071b04302e4aa3bfb8 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Thu, 14 Jan 2021 10:10:39 +0100 Subject: [PATCH 1/2] Provide more interesting metrics. --- cmd/internal/determine-sync-images/lister.go | 13 +++- cmd/internal/metrics/metrics.go | 66 ++++++++++++++++++-- cmd/main.go | 6 +- 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/cmd/internal/determine-sync-images/lister.go b/cmd/internal/determine-sync-images/lister.go index e58a6c7..e2a4175 100644 --- a/cmd/internal/determine-sync-images/lister.go +++ b/cmd/internal/determine-sync-images/lister.go @@ -9,6 +9,7 @@ import ( "github.com/aws/aws-sdk-go/service/s3" metalgo "github.com/metal-stack/metal-go" + "github.com/metal-stack/metal-image-cache-sync/cmd/internal/metrics" "github.com/metal-stack/metal-image-cache-sync/pkg/api" "github.com/metal-stack/metal-image-cache-sync/pkg/utils" "github.com/pkg/errors" @@ -21,15 +22,17 @@ type SyncLister struct { config *api.Config s3 *s3.S3 excludePaths []string + collector *metrics.Collector } -func NewSyncLister(logger *zap.SugaredLogger, driver *metalgo.Driver, s3 *s3.S3, config *api.Config) *SyncLister { +func NewSyncLister(logger *zap.SugaredLogger, driver *metalgo.Driver, s3 *s3.S3, collector *metrics.Collector, config *api.Config) *SyncLister { return &SyncLister{ logger: logger, driver: driver, config: config, s3: s3, excludePaths: config.ExcludePaths, + collector: collector, } } @@ -39,13 +42,15 @@ func (s *SyncLister) DetermineSyncList() ([]api.OS, error) { return nil, errors.Wrap(err, "error listing images in s3") } - apiImages, err := s.driver.ImageList() + resp, err := s.driver.ImageList() if err != nil { return nil, errors.Wrap(err, "error listing images") } + s.collector.SetMetalAPIImageCount(len(resp.Image)) + images := api.OSImagesByOS{} - for _, img := range apiImages.Image { + for _, img := range resp.Image { skip := false for _, exclude := range s.excludePaths { if strings.Contains(img.URL, exclude) { @@ -146,6 +151,8 @@ func (s *SyncLister) DetermineSyncList() ([]api.OS, error) { } } + s.collector.SetUnsyncedImageCount(len(resp.Image) - len(syncImages)) + return syncImages, nil } diff --git a/cmd/internal/metrics/metrics.go b/cmd/internal/metrics/metrics.go index dc0aa4e..16c6093 100644 --- a/cmd/internal/metrics/metrics.go +++ b/cmd/internal/metrics/metrics.go @@ -3,7 +3,9 @@ package metrics import ( "os" "path/filepath" + "strings" + metalgo "github.com/metal-stack/metal-go" "github.com/metal-stack/metal-image-cache-sync/pkg/api" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" @@ -12,16 +14,20 @@ import ( type Collector struct { logger *zap.SugaredLogger config *api.Config + driver *metalgo.Driver cacheMissInc func() cacheSyncDownloadBytesAdd func(float64) cacheSyncDownloadInc func() cacheImageDownloadsInc func() + cacheUnsyncedImageCount func(float64) + metalAPIImageCount func(float64) } -func MustMetrics(logger *zap.SugaredLogger, config *api.Config) *Collector { +func MustMetrics(logger *zap.SugaredLogger, driver *metalgo.Driver, config *api.Config) *Collector { c := &Collector{ logger: logger, config: config, + driver: driver, } cacheSize := prometheus.NewGaugeFunc(prometheus.GaugeOpts{ @@ -29,36 +35,57 @@ func MustMetrics(logger *zap.SugaredLogger, config *api.Config) *Collector { Help: "Current size of the cache directory in bytes", }, c.cacheDirSize) + cacheImageCount := prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "cache_image_count", + Help: "Current amount of images in the cache (amount of files in cache directory excluding checksums)", + }, c.cacheImageCount) + + cacheUnsyncedImageCount := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cache_unsynced_image_count", + Help: "Amount of images from the metal-api not synced into the cache (due to expiration, cache size constraints, ...)", + }) + c.cacheUnsyncedImageCount = cacheUnsyncedImageCount.Set + + metalImageCount := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "metal_api_image_count", + Help: "Amount of images configured in the metal-api", + }) + c.metalAPIImageCount = metalImageCount.Set + cacheMisses := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "cache_misses", - Help: "Amount of cache misses", + Help: "Amount of cache misses during instance lifetime", }) c.cacheMissInc = cacheMisses.Inc cacheSyncDownloadBytes := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "cache_sync_downloaded_image_bytes", - Help: "Amount of bytes downloaded by the image cache", + Help: "Amount of bytes downloaded by the image cache during instance lifetime", }) c.cacheSyncDownloadBytesAdd = cacheSyncDownloadBytes.Add cacheSyncDownloadCount := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "cache_sync_downloaded_image_count", - Help: "Amount of images downloaded by the image cache", + Help: "Amount of images downloaded by the image cache during instance lifetime", }) c.cacheSyncDownloadInc = cacheSyncDownloadCount.Inc cacheImageDownloads := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "cache_image_downloads", - Help: "Amount of images downloaded from the image cache", + Help: "Amount of images downloaded from the image cache during instance lifetime", }) c.cacheImageDownloadsInc = cacheImageDownloads.Inc prometheus.MustRegister(cacheSize) + prometheus.MustRegister(cacheImageCount) + prometheus.MustRegister(cacheUnsyncedImageCount) prometheus.MustRegister(cacheMisses) prometheus.MustRegister(cacheSyncDownloadBytes) prometheus.MustRegister(cacheSyncDownloadCount) prometheus.MustRegister(cacheImageDownloads) + prometheus.MustRegister(metalImageCount) + return c } @@ -71,7 +98,7 @@ func (c *Collector) cacheDirSize() float64 { if !info.IsDir() { size += info.Size() } - return err + return nil }) if err != nil { @@ -81,6 +108,25 @@ func (c *Collector) cacheDirSize() float64 { return float64(size) } +func (c *Collector) cacheImageCount() float64 { + var count int64 + err := filepath.Walk(c.config.ImageCacheRootPath, func(_ string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() && !strings.HasSuffix(info.Name(), ".md5") { + count += 1 + } + return nil + }) + + if err != nil { + c.logger.Errorw("error collecting image cache count metric", "error", err) + } + + return float64(count) +} + func (c *Collector) IncrementCacheMiss() { c.cacheMissInc() } @@ -93,6 +139,14 @@ func (c *Collector) IncrementSyncDownloadImageCount() { c.cacheSyncDownloadInc() } +func (c *Collector) SetUnsyncedImageCount(b int) { + c.cacheUnsyncedImageCount(float64(b)) +} + func (c *Collector) IncrementDownloadedImages() { c.cacheImageDownloadsInc() } + +func (c *Collector) SetMetalAPIImageCount(b int) { + c.metalAPIImageCount(float64(b)) +} diff --git a/cmd/main.go b/cmd/main.go index edef728..662e4df 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -168,14 +168,14 @@ func run() error { return err } - collector := metrics.MustMetrics(logger.Named("metrics"), c) - driver, err := metalgo.NewDriver(c.MetalAPIEndpoint, "", c.MetalAPIHMAC, metalgo.AuthType("Metal-View")) if err != nil { logger.Errorw("cannot create metal-api client", "error", err) return err } + collector := metrics.MustMetrics(logger.Named("metrics"), driver, c) + dummyRegion := "dummy" // we don't use AWS S3, we don't need a proper region ss, err := session.NewSession(&aws.Config{ Endpoint: &c.ImageStore, @@ -194,7 +194,7 @@ func run() error { s3Client := s3.New(ss) s3Downloader := s3manager.NewDownloader(ss) - lister = synclister.NewSyncLister(logger.Named("sync-lister"), driver, s3Client, c) + lister = synclister.NewSyncLister(logger.Named("sync-lister"), driver, s3Client, collector, c) syncer, err = sync.NewSyncer(logger.Named("syncer"), fs, s3Downloader, c, collector, stop) if err != nil { From 9f3d60a36af4eb2728715d2ea4e1485cccec42f7 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Thu, 14 Jan 2021 10:35:41 +0100 Subject: [PATCH 2/2] Cleanup. --- cmd/internal/metrics/metrics.go | 5 +---- cmd/main.go | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cmd/internal/metrics/metrics.go b/cmd/internal/metrics/metrics.go index 16c6093..642e0b0 100644 --- a/cmd/internal/metrics/metrics.go +++ b/cmd/internal/metrics/metrics.go @@ -5,7 +5,6 @@ import ( "path/filepath" "strings" - metalgo "github.com/metal-stack/metal-go" "github.com/metal-stack/metal-image-cache-sync/pkg/api" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" @@ -14,7 +13,6 @@ import ( type Collector struct { logger *zap.SugaredLogger config *api.Config - driver *metalgo.Driver cacheMissInc func() cacheSyncDownloadBytesAdd func(float64) cacheSyncDownloadInc func() @@ -23,11 +21,10 @@ type Collector struct { metalAPIImageCount func(float64) } -func MustMetrics(logger *zap.SugaredLogger, driver *metalgo.Driver, config *api.Config) *Collector { +func MustMetrics(logger *zap.SugaredLogger, config *api.Config) *Collector { c := &Collector{ logger: logger, config: config, - driver: driver, } cacheSize := prometheus.NewGaugeFunc(prometheus.GaugeOpts{ diff --git a/cmd/main.go b/cmd/main.go index 662e4df..b01cfb9 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -174,7 +174,7 @@ func run() error { return err } - collector := metrics.MustMetrics(logger.Named("metrics"), driver, c) + collector := metrics.MustMetrics(logger.Named("metrics"), c) dummyRegion := "dummy" // we don't use AWS S3, we don't need a proper region ss, err := session.NewSession(&aws.Config{