Skip to content

Commit

Permalink
temp commit for compute metrics work
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandreLamarre committed Oct 14, 2022
1 parent 3d8983f commit 44cc2fa
Show file tree
Hide file tree
Showing 18 changed files with 178 additions and 11 deletions.
51 changes: 51 additions & 0 deletions pkg/alerting/metrics/compute_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package metrics_test

import (
"bytes"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/rancher/opni/pkg/alerting/metrics"
)

var _ = Describe("compute alerting options pipeline & compute alerts construction", func() {
When("users want to create cpu compute alerts", func() {
Specify("alerting/metrics package should export cpu alerts", func() {
_, templateOk := metrics.ComputeNameToTemplate["cpu"]
_, optionsOk := metrics.ComputeNameToOpts["cpu"]
Expect((templateOk && optionsOk)).To(BeTrue())
})

Specify("cpu alerts should have parsed an available template", func() {
tmpl := metrics.ComputeNameToTemplate["cpu"]
Expect(tmpl).NotTo(BeNil())
definedTmpls := tmpl.Templates()
Expect(definedTmpls).NotTo(HaveLen(0))
})

Specify("The template should be executed from options", func() {
tmpl := metrics.ComputeNameToTemplate["cpu"]
opts := metrics.ComputeNameToOpts["cpu"]
var b bytes.Buffer
err := tmpl.Execute(&b, opts)
Expect(err).NotTo(HaveOccurred())
})

Specify("valid inputs should construct valid promQL", func() {
validInputs := []metrics.CpuRuleOptions{
{
Node: []string{""},
},
}

for _, input := range validInputs {
tmpl := metrics.ComputeNameToTemplate["cpu"]
var b bytes.Buffer
err := tmpl.Execute(&b, input)
Expect(err).NotTo(HaveOccurred())
Expect(b.String()).NotTo(Equal(""))
}
})

})
})
18 changes: 11 additions & 7 deletions pkg/alerting/metrics/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@ func NewCpuRule() (*AlertingRule, error) {
}

type CpuRuleOptions struct {
cluster corev1.Cluster `metric:"node_cpu_seconds_total"`
Cluster *corev1.Cluster `metric:"node_cpu_seconds_total"`

node string `label:"instance", metric:"node_cpu_seconds_total"`
usageModes string `label:"mode, metric:"node_cpu_seconds_total"`
cpus string `label:"cpu", metric:"node_cpu_seconds_total"`
Node []string `label:"instance" metric:"node_cpu_seconds_total"`
UsageMode string `label:"mode" metric:"node_cpu_seconds_total"`
Cpu []string `label:"cpu" metric:"node_cpu_seconds_total"`

compOperator ComparisonOperator
target float64 `range:[0,100]`
forDuration time.Duration
CompOperator ComparisonOperator
Target float64 `range:"[0,100]"`
ForDuration time.Duration
// Interval prometheus.Duration
}

// Implements MetricOpts interface
func (c *CpuRuleOptions) MetricOptions() {}
9 changes: 9 additions & 0 deletions pkg/alerting/metrics/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ type DiskBytesOptions struct {
forDuration time.Duration
}

// Implements MetricOpts interface
func (d *DiskBytesOptions) MetricOptions() {}

type DiskTimeOptions struct {
cluster corev1.Cluster `metric:"node_disk_.*_time_seconds_total"`
ioType string `jobExtractor:"node_disk_.*_time_seconds_total"`
Expand All @@ -23,10 +26,16 @@ type DiskTimeOptions struct {
forDuration time.Duration
}

// Implements MetricOpts interface
func (d *DiskTimeOptions) MetricOptions() {}

type DiskOperationsOptions struct {
cluster corev1.Cluster `metric:"node_disk_.*_completed_total"`
ioType string `jobExtractor:"node_disk_.*_completed_total"`
compOperator ComparisonOperator
target int64 `range:[0,inf]`
forDuration time.Duration
}

// Implements MEtricOpts interface
func (d *DiskOperationsOptions) MetricOptions() {}
6 changes: 6 additions & 0 deletions pkg/alerting/metrics/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ type FilesystemUsageOptions struct {
forDuration time.Duration
}

// Implements MetricOpts interface
func (f *FilesystemUsageOptions) MetricOptions() {}

// `node_filefd_allocated` is the number of file descriptors allocated.
type FilesystemOpenFiledescriptorRuleOptions struct {
cluster corev1.Cluster `metric:"node_filefd_allocated"`
Expand All @@ -25,3 +28,6 @@ type FilesystemOpenFiledescriptorRuleOptions struct {
target int64 `range:[0,inf]`
forDuration time.Duration
}

// Implements MetricOpts interface
func (f *FilesystemOpenFiledescriptorRuleOptions) MetricOptions() {}
2 changes: 2 additions & 0 deletions pkg/alerting/metrics/mem.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ type MemoryRuleOptions struct {
percentValue float64 `range:[0,100]`
forDuration time.Duration
}

func (m *MemoryRuleOptions) MetricOptions() {}
14 changes: 10 additions & 4 deletions pkg/alerting/metrics/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ import (
corev1 "github.com/rancher/opni/pkg/apis/core/v1"
)

func NewNetworkBytesRule() (*AlertingRule, error) {
// TODO
return nil, nil
}

type NetworkBytesOptions struct {
cluster corev1.Cluster `metric:"node_network_.*_bytes_total"`

Expand All @@ -22,9 +27,10 @@ type NetworkBytesOptions struct {
// node
}

func NewNetworkBytesRule() (*AlertingRule, error) {
// TODO
return nil, nil
}
// Implements MetricOpts interface
func (n *NetworkBytesOptions) MetricOptions() {}

type NetworkErrorsOptions struct{}

// Implements MetricOpts interface
func (n *NetworkErrorsOptions) MetricOptions() {}
73 changes: 73 additions & 0 deletions pkg/alerting/metrics/opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,88 @@
package metrics

import (
_ "embed"
"fmt"
"reflect"
"text/template"
"time"

corev1 "github.com/rancher/opni/pkg/apis/core/v1"
"github.com/rancher/opni/pkg/validation"
"github.com/rancher/opni/plugins/metrics/pkg/apis/cortexadmin"
)

//go:embed templates/cpu_X_rate.tmpl
var cpuAlertTemplate []byte

//go:embed templates/disk_bytes.tmpl
var diskBytesAlertTemplate []byte

//go:embed templates/disk_time.tmpl
var diskIOTimeAlertTemplate []byte

//go:embed templates/disk_ops.tmpl
var diskOperationsAlertTemplate []byte

//go:embed templates/fs_fd.tmpl
var fsFdAlertTemplate []byte

//go:embed templates/fs_usage.tmpl
var fsUsageAlertTemplate []byte

//go:embed templates/mem.tmpl
var memoryAlertTemplate []byte

//go:embed templates/network_bytes.tmpl
var networkAlertTemplate []byte

//go:embed templates/network_error.tmpl
var networkErrorAlertTemplate []byte

//go:embed templates/proc.tmpl
var procAlertTemplate []byte

var ComputeNameToTemplate map[string]*template.Template
var ComputeNameToOpts map[string]MetricOpts

func init() {
ComputeNameToTemplate = map[string]*template.Template{
"cpu": template.Must(template.New("cpu").
Option("missingkey=error").
Parse(string(cpuAlertTemplate))),
"diskBytes": template.Must(template.New("disk").
Option("missingkey=error").
Parse(string(diskBytesAlertTemplate))),
"diskIOTime": template.Must(template.New("disk").
Option("missingkey=error").
Parse(string(diskIOTimeAlertTemplate))),
"diskOperations": template.Must(template.New("disk").
Option("missingkey=error").
Parse(string(diskOperationsAlertTemplate))),
"fsFD": template.Must(template.New("fs").
Option("missingkey=error").
Parse(string(fsFdAlertTemplate))),
"fsUsage": template.Must(template.New("fs").
Option("missingkey=error").
Parse(string(fsUsageAlertTemplate))),
"mem": template.Must(template.New("mem").
Option("missingkey=error").
Parse(string(memoryAlertTemplate))),
"network_bytes": template.Must(template.New("network_bytes").
Option("missingkey=error").
Parse(string(networkAlertTemplate))),
"network_error": template.Must(template.New("network_error").
Option("missingkey=error").
Parse(string(networkErrorAlertTemplate))),
"proc": template.Must(template.New("proc").
Option("missingkey=error").
Parse(string(procAlertTemplate))),
}
ComputeNameToOpts = map[string]MetricOpts{
"cpu": &CpuRuleOptions{},
}
}

const labelTag = "label" // label also requires a metric tag
const jobExtractorTag = "jobExtractor"
const metricTag = "metric"
Expand Down
3 changes: 3 additions & 0 deletions pkg/alerting/metrics/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ type ProcessesRuleOptions struct {
target int64 `range:[0,inf]`
forDuration time.Duration
}

// Implements MetricOpts interface
func (p *ProcessesRuleOptions) MetricOptions() {}
13 changes: 13 additions & 0 deletions pkg/alerting/metrics/templates/cpu_X_rate.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
sum(
rate(
node_cpu_seconds_total{mode={{ .UsageMode }}}[{{ .Interval }}]
)
)
BY (__tenant_id__) / ON(__tenant_id__) GROUP_LEFT()
sum(
rate(
node_cpu_seconds_total[{{ .Interval }}]
)
)
BY (__tenant_id__)

Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.

0 comments on commit 44cc2fa

Please sign in to comment.