diff --git a/Gopkg.lock b/Gopkg.lock index a346563b0..a81072371 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -396,7 +396,10 @@ [[projects]] digest = "1:a22debd993d18b9203c985810f33f52e7583724800ee8d15e7d362e7175639d1" name = "github.com/prometheus/client_golang" - packages = ["prometheus"] + packages = [ + "prometheus", + "prometheus/promhttp", + ] pruneopts = "NT" revision = "c5b7fccd204277076155f10851dad72b76a49317" version = "v0.8.0" @@ -994,6 +997,7 @@ "github.com/pborman/uuid", "github.com/pkg/errors", "github.com/prometheus/client_golang/prometheus", + "github.com/prometheus/client_golang/prometheus/promhttp", "github.com/sirupsen/logrus", "golang.org/x/oauth2", "golang.org/x/time/rate", @@ -1004,6 +1008,7 @@ "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1", "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset", "k8s.io/apimachinery/pkg/api/errors", + "k8s.io/apimachinery/pkg/api/meta", "k8s.io/apimachinery/pkg/api/resource", "k8s.io/apimachinery/pkg/apis/meta/v1", "k8s.io/apimachinery/pkg/fields", diff --git a/cmd/backup-operator/main.go b/cmd/backup-operator/main.go index fe2ebed0c..18a3827d8 100644 --- a/cmd/backup-operator/main.go +++ b/cmd/backup-operator/main.go @@ -17,15 +17,12 @@ package main import ( "context" "flag" - "os" - "runtime" - "time" - + "fmt" controller "github.com/coreos/etcd-operator/pkg/controller/backup-operator" "github.com/coreos/etcd-operator/pkg/util/constants" "github.com/coreos/etcd-operator/pkg/util/k8sutil" version "github.com/coreos/etcd-operator/version" - + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/sirupsen/logrus" v1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes" @@ -34,6 +31,10 @@ import ( "k8s.io/client-go/tools/leaderelection" "k8s.io/client-go/tools/leaderelection/resourcelock" "k8s.io/client-go/tools/record" + "net/http" + "os" + "runtime" + "time" ) var ( @@ -81,7 +82,8 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - + http.Handle("/metrics", promhttp.Handler()) + go http.ListenAndServe(fmt.Sprintf(":%d", 9091), nil) leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ Lock: rl, LeaseDuration: 15 * time.Second, diff --git a/pkg/backup/metrics/metrics.go b/pkg/backup/metrics/metrics.go new file mode 100644 index 000000000..874666fca --- /dev/null +++ b/pkg/backup/metrics/metrics.go @@ -0,0 +1,35 @@ +package metrics + +import "github.com/prometheus/client_golang/prometheus" + +var ( + BackupsAttemptedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "etcd_operator", + Name: "backups_attempt_total", + Help: "Backups attempt by name and namespace", + }, + []string{"name", "namespace"}, + ) + + BackupsSuccessTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "etcd_operator", + Name: "backups_success_total", + Help: "Backups success by name and namespace", + }, + []string{"name", "namespace"}, + ) + + BackupsLastSuccess = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "etcd_operator", + Name: "backup_last_success", + Help: "Timestamp of last successfull backup, by name and namespace", + }, + []string{"name", "namespace"}, + ) +) + +func init() { + prometheus.MustRegister(BackupsAttemptedTotal) + prometheus.MustRegister(BackupsSuccessTotal) + prometheus.MustRegister(BackupsLastSuccess) +} diff --git a/pkg/controller/backup-operator/sync.go b/pkg/controller/backup-operator/sync.go index e5ae56567..c7e9183c2 100644 --- a/pkg/controller/backup-operator/sync.go +++ b/pkg/controller/backup-operator/sync.go @@ -21,8 +21,9 @@ import ( "time" api "github.com/coreos/etcd-operator/pkg/apis/etcd/v1beta2" + "github.com/coreos/etcd-operator/pkg/backup/metrics" "github.com/coreos/etcd-operator/pkg/util/constants" - + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -101,14 +102,17 @@ func (b *Backup) processItem(key string) error { ctx := context.Background() ctx, cancel := context.WithCancel(ctx) go b.periodicRunnerFunc(ctx, ticker, eb) - // Store cancel function for periodic b.backupRunnerStore.Store(eb.ObjectMeta.UID, BackupRunner{eb.Spec, cancel}) } else if !isPeriodic { // Perform backup + metrics.BackupsAttemptedTotal.With(prometheus.Labels(prometheus.Labels{ + "namespace": eb.ObjectMeta.Namespace, + "name": eb.ObjectMeta.Name, + })).Inc() bs, err := b.handleBackup(nil, &eb.Spec, false) - // Report backup status + //status of backup b.reportBackupStatus(bs, err, eb) } return err @@ -195,9 +199,13 @@ func (b *Backup) periodicRunnerFunc(ctx context.Context, t *time.Ticker, eb *api } if err == nil { // Perform backup + metrics.BackupsAttemptedTotal.With(prometheus.Labels(prometheus.Labels{ + "namespace": eb.ObjectMeta.Namespace, + "name": eb.ObjectMeta.Name, + })).Inc() bs, err = b.handleBackup(&ctx, &latestEb.Spec, true) } - // Report backup status + //BackupStatus here b.reportBackupStatus(bs, err, latestEb) } } @@ -213,6 +221,15 @@ func (b *Backup) reportBackupStatus(bs *api.BackupStatus, berr error, eb *api.Et eb.Status.EtcdRevision = bs.EtcdRevision eb.Status.EtcdVersion = bs.EtcdVersion eb.Status.LastSuccessDate = bs.LastSuccessDate + metrics.BackupsSuccessTotal.With(prometheus.Labels(prometheus.Labels{ + "namespace": eb.ObjectMeta.Namespace, + "name": eb.ObjectMeta.Name, + })).Inc() + metrics.BackupsLastSuccess.With(prometheus.Labels(prometheus.Labels{ + "namespace": eb.ObjectMeta.Namespace, + "name": eb.ObjectMeta.Name, + })).Set(float64(time.Now().Unix())) + } _, err := b.backupCRCli.EtcdV1beta2().EtcdBackups(b.namespace).Update(eb) if err != nil { @@ -249,7 +266,6 @@ func (b *Backup) handleBackup(parentContext *context.Context, spec *api.BackupSp if err != nil { return nil, err } - // When BackupPolicy.TimeoutInSecond <= 0, use default DefaultBackupTimeout. backupTimeout := time.Duration(constants.DefaultBackupTimeout) if spec.BackupPolicy != nil && spec.BackupPolicy.TimeoutInSecond > 0 {