Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cortex Rule CRUD through CLI #952

Merged
merged 2 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/alerting/shared/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"google.golang.org/grpc/status"
)

const OpniAlertingCortexNamespace = "opni-alerting"

// Condition constants
var ComparisonOperators = []string{"<", ">", "<=", ">=", "=", "!="}
var KubeStates = []string{"Pending", "Running", "Succeeded", "Failed", "Unknown"}
Expand Down
106 changes: 106 additions & 0 deletions pkg/opni/commands/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@ package commands
import (
"encoding/json"
"fmt"
"os"
"sync"
"time"

"github.com/araddon/dateparse"
"github.com/samber/lo"
"golang.org/x/exp/slices"
"gopkg.in/yaml.v3"

"github.com/olebedev/when"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/rulefmt"
corev1 "github.com/rancher/opni/pkg/apis/core/v1"
managementv1 "github.com/rancher/opni/pkg/apis/management/v1"
"github.com/rancher/opni/pkg/metrics/unmarshal"
Expand Down Expand Up @@ -62,6 +67,104 @@ func BuildCortexAdminRulesCmd() *cobra.Command {
Short: "Cortex admin rules",
}
cmd.AddCommand(BuildListRulesCmd())
cmd.AddCommand(BuildDeleteRuleGroupsCmd())
cmd.AddCommand(BuildLoadRuleGroupsCmd())
return cmd
}

func BuildDeleteRuleGroupsCmd() *cobra.Command {
var clusters string
var namespace string

cmd := &cobra.Command{
Use: "delete <groupname>",
Short: "Delete prometheus rule groups of the given name from Cortex",
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
_, err := adminClient.DeleteRule(cmd.Context(), &cortexadmin.DeleteRuleRequest{
ClusterId: clusters,
Namespace: namespace,
GroupName: args[0],
})
if err != nil {
lg.Error(err)
return
}
fmt.Println("Rule Group deleted successfully")
},
}
cmd.Flags().StringVarP(&clusters, "cluster", "c", "", "The clusters to delete the rule from")
cmd.Flags().StringVarP(&namespace, "namespace", "n", "", "The namespace to delete the rule from")
return cmd
}

func BuildLoadRuleGroupsCmd() *cobra.Command {
var clusters []string
var namespace string
cmd := &cobra.Command{
Use: "load <rulegroupfile>",
Short: "Creates/Updates prometheus rule groups into Cortex from a valid prometheus rule group file",
Long: "See https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules for more information about the expected input format",
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
//read file and validate contents

clMeta, err := mgmtClient.ListClusters(cmd.Context(), &managementv1.ListClustersRequest{})
if err != nil {
lg.Fatal(err)
}
cl := lo.Map(clMeta.Items, func(cl *corev1.Cluster, _ int) string {
return cl.Id
})
if len(clusters) == 0 {
clusters = cl
} else {
// Important to validate here !! Since cortex has no knowledge of available opni clusters,
// it will accept any valid yaml content and therefore could silently fail/ destroy itself
// by writing unsuported names to its (remote) store
for _, c := range clusters {
if !slices.Contains(cl, c) {
lg.Fatalf("invalid cluster id %s", c)
}
}
}
yamlContent, err := os.ReadFile(args[0])
if err != nil {
lg.Fatal(err)
}
rgs, errors := rulefmt.Parse(yamlContent)
if len(errors) > 0 {
for _, err := range errors {
lg.Error(err)
}
lg.Fatal("Failed to parse rule group file")
}

var wg sync.WaitGroup
for _, cl := range clusters {
cl := cl
wg.Add(1)
go func() {
defer wg.Done()
for _, group := range rgs.Groups {
_, err := adminClient.LoadRules(cmd.Context(), &cortexadmin.LoadRuleRequest{
Namespace: namespace,
ClusterId: cl,
YamlContent: util.Must(yaml.Marshal(group)),
})
if err != nil {
lg.Errorf("Failed to load rule group :\n `%s`\n\n for cluster `%s`", string(util.Must(yaml.Marshal(group))), cl)
} else {
fmt.Printf("Successfully loaded rule group `%s` for clusterId `%s`\n", group.Name, cl)
}
}
}()
}
wg.Wait()
},
}
cmd.Flags().StringSliceVar(&clusters, "cluster", []string{}, "The clusters to apply the rule to (default=all)")
cmd.Flags().StringVar(&namespace, "namespace", "", "namespace is a cortex identifier to help organize rules (default=\"default\")")
return cmd
}

Expand All @@ -72,6 +175,7 @@ func BuildListRulesCmd() *cobra.Command {
var stateFilter []string
var ruleNameFilter string
var groupNameFilter string
var namespaceFilter string
var outputFormat string
var invalidDiagnosticRequested bool
var all bool
Expand Down Expand Up @@ -105,6 +209,7 @@ func BuildListRulesCmd() *cobra.Command {
StateFilter: stateFilter,
RuleNameRegexp: ruleNameFilter,
GroupNameRegexp: groupNameFilter,
NamespaceRegexp: namespaceFilter,
ListInvalid: &invalidDiagnosticRequested,
RequestAll: &all,
})
Expand All @@ -125,6 +230,7 @@ func BuildListRulesCmd() *cobra.Command {
cmd.Flags().StringSliceVar(&stateFilter, "state", []string{}, "Rule state to list (default=all)")
cmd.Flags().StringVar(&groupNameFilter, "group-name", "", "Group names to list (supports go regex) (default=all)")
cmd.Flags().StringVar(&ruleNameFilter, "rule-name", "", "Rule names to list (supports go regex) (default=all)")
cmd.Flags().StringVar(&namespaceFilter, "namespace", "", "Namespaces to match against (supports go regex) (default=all)")
cmd.Flags().BoolVar(&invalidDiagnosticRequested, "invalid", false, "List invalid rules (default=false)")
cmd.Flags().BoolVar(&all, "all", false, "List all rules present in cortex, regardless of cluster (default=false)")
cmd.Flags().StringVar(&outputFormat, "output", "table", "Output format : table,json (default=table)")
Expand Down
3 changes: 2 additions & 1 deletion pkg/test/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ func ExpectRuleGroupToExist(
maxTimeout time.Duration,
) {
Eventually(func() error {
_, err := client.GetRule(ctx, &cortexadmin.RuleRequest{
_, err := client.GetRule(ctx, &cortexadmin.GetRuleRequest{
ClusterId: tenant,
Namespace: "test",
GroupName: groupName,
})
if err != nil {
Expand Down
28 changes: 17 additions & 11 deletions plugins/alerting/pkg/alerting/condition_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ func deleteCondition(p *Plugin, lg *zap.SugaredLogger, ctx context.Context, req
return nil
}
if r, _ := handleSwitchCortexRules(req.AlertType); r != nil {
_, err := p.adminClient.Get().DeleteRule(ctx, &cortexadmin.RuleRequest{
_, err := p.adminClient.Get().DeleteRule(ctx, &cortexadmin.DeleteRuleRequest{
ClusterId: r.Id,
Namespace: shared.OpniAlertingCortexNamespace,
GroupName: CortexRuleIdFromUuid(id),
})
return err
Expand Down Expand Up @@ -197,9 +198,10 @@ func (p *Plugin) handleKubeAlertCreation(ctx context.Context, k *alertingv1.Aler
return err
}
p.Logger.With("Expr", "kube-state").Debugf("%s", string(out))
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.PostRuleRequest{
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.LoadRuleRequest{
ClusterId: k.GetClusterId(),
YamlContent: string(out),
Namespace: shared.OpniAlertingCortexNamespace,
YamlContent: out,
})
if err != nil {
return err
Expand Down Expand Up @@ -234,9 +236,10 @@ func (p *Plugin) handleCpuSaturationAlertCreation(
}
p.Logger.With("Expr", "cpu").Debugf("%s", string(out))

_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.PostRuleRequest{
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.LoadRuleRequest{
ClusterId: c.ClusterId.GetId(),
YamlContent: string(out),
Namespace: shared.OpniAlertingCortexNamespace,
YamlContent: out,
})
return err
}
Expand All @@ -263,9 +266,10 @@ func (p *Plugin) handleMemorySaturationAlertCreation(ctx context.Context, m *ale
return err
}
p.Logger.With("Expr", "mem").Debugf("%s", string(out))
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.PostRuleRequest{
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.LoadRuleRequest{
ClusterId: m.ClusterId.GetId(),
YamlContent: string(out),
Namespace: shared.OpniAlertingCortexNamespace,
YamlContent: out,
})
return err
}
Expand All @@ -291,9 +295,10 @@ func (p *Plugin) handleFsSaturationAlertCreation(ctx context.Context, fs *alerti
return err
}
p.Logger.With("Expr", "fs").Debugf("%s", string(out))
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.PostRuleRequest{
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.LoadRuleRequest{
ClusterId: fs.ClusterId.GetId(),
YamlContent: string(out),
Namespace: shared.OpniAlertingCortexNamespace,
YamlContent: out,
})
return err
}
Expand All @@ -317,9 +322,10 @@ func (p *Plugin) handlePrometheusQueryAlertCreation(ctx context.Context, q *aler
return err
}
p.Logger.With("Expr", "user-query").Debugf("%s", string(out))
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.PostRuleRequest{
_, err = p.adminClient.Get().LoadRules(ctx, &cortexadmin.LoadRuleRequest{
ClusterId: q.ClusterId.GetId(),
YamlContent: string(out),
Namespace: shared.OpniAlertingCortexNamespace,
YamlContent: out,
})

return err
Expand Down
Loading