Skip to content

Commit

Permalink
feat: set rule-evaluator options using config
Browse files Browse the repository at this point in the history
  • Loading branch information
TheSpiritXIII committed Aug 23, 2024
1 parent 3db475a commit 142d164
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 77 deletions.
9 changes: 0 additions & 9 deletions e2e/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,12 +614,3 @@ func isPodMonitoringScrapeEndpointSuccess(status *monitoringv1.ScrapeEndpointSta
}
return nil
}

func getEnvVar(evs []corev1.EnvVar, key string) string {
for _, ev := range evs {
if ev.Name == key {
return ev.Value
}
}
return ""
}
43 changes: 4 additions & 39 deletions e2e/ruler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,45 +150,6 @@ func testRuleEvaluatorOperatorConfig(ctx context.Context, kubeClient client.Clie
if err := kubeClient.Update(ctx, &config); err != nil {
t.Fatalf("update operatorconfig: %s", err)
}
// Keep checking the state of the collectors until they're running.
err := wait.PollUntilContextCancel(ctx, pollDuration, false, func(ctx context.Context) (bool, error) {
deploy := appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: operator.NameRuleEvaluator,
Namespace: operator.DefaultOperatorNamespace,
},
}
if err := kubeClient.Get(ctx, client.ObjectKeyFromObject(&deploy), &deploy); err != nil {
if apierrors.IsNotFound(err) {
return false, nil
}
return false, fmt.Errorf("getting collector DaemonSet failed: %w", err)
}

// Ensure evaluator container has expected args.
for _, c := range deploy.Spec.Template.Spec.Containers {
if c.Name != operator.RuleEvaluatorContainerName {
continue
}
// We're mainly interested in the dynamic flags but checking the entire set including
// the static ones is ultimately simpler.
wantArgs := []string{
fmt.Sprintf("--query.project-id=%q", projectID),
fmt.Sprintf("--query.generator-url=%q", "http://example.com/"),
}
gotArgs := getEnvVar(c.Env, "EXTRA_ARGS")
for _, arg := range wantArgs {
if !strings.Contains(gotArgs, arg) {
return false, fmt.Errorf("expected arg %q not found in EXTRA_ARGS: %q", arg, gotArgs)
}
}
return true, nil
}
return false, errors.New("no rule-evaluator container found")
})
if err != nil {
t.Fatalf("waiting for collector configuration failed: %s", err)
}
}
}

Expand Down Expand Up @@ -331,6 +292,10 @@ alerting:
- monitoring
rule_files:
- /etc/rules/*.yaml
google_cloud:
query:
project_id: {projectID}
generator_url: http://example.com/
`),
}

Expand Down
71 changes: 42 additions & 29 deletions pkg/operator/operator_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -231,7 +232,7 @@ func (r *operatorConfigReconciler) Reconcile(ctx context.Context, req reconcile.
}

// Ensure the rule-evaluator deployment and volume mounts.
if err := r.ensureRuleEvaluatorDeployment(ctx, &config.Rules); err != nil {
if err := r.ensureRuleEvaluatorDeployment(ctx); err != nil {
return reconcile.Result{}, fmt.Errorf("ensure rule-evaluator deploy: %w", err)
}

Expand Down Expand Up @@ -335,15 +336,42 @@ func (r *operatorConfigReconciler) makeRuleEvaluatorConfig(ctx context.Context,
secretData[p] = b
}

cfg := &promconfig.Config{
GlobalConfig: promconfig.GlobalConfig{
ExternalLabels: labels.FromMap(spec.ExternalLabels),
// If no explicit project ID is set, use the one provided to the operator.
// On GKE the rule-evaluator can also auto-detect the cluster's project
// but this won't work in other Kubernetes environments.
queryProjectID, _, _ := resolveLabels(r.opts.ProjectID, r.opts.Location, r.opts.Cluster, spec.ExternalLabels)
if spec.QueryProjectID != "" {
queryProjectID = spec.QueryProjectID
}

var credentialsFile string
if spec.Credentials != nil {
credentialsFile = path.Join(secretsDir, pathForSelector(r.opts.PublicNamespace, &monitoringv1.SecretOrConfigMap{Secret: spec.Credentials}))
}

cfg := RuleEvaluatorConfig{
Config: promconfig.Config{
GlobalConfig: promconfig.GlobalConfig{
ExternalLabels: labels.FromMap(spec.ExternalLabels),
},
AlertingConfig: promconfig.AlertingConfig{
AlertmanagerConfigs: amConfigs,
},
RuleFiles: []string{path.Join(rulesDir, "*.yaml")},
},
AlertingConfig: promconfig.AlertingConfig{
AlertmanagerConfigs: amConfigs,
GoogleCloud: GoogleCloudConfig{
Export: &GoogleCloudExportConfig{},
Query: &GoogleCloudQueryConfig{
ProjectID: queryProjectID,
GeneratorURL: spec.GeneratorURL,
CredentialsFile: credentialsFile,
},
},
RuleFiles: []string{path.Join(rulesDir, "*.yaml")},
}
if credentialsFile != "" {
cfg.GoogleCloud.Export.CredentialsFile = ptr.To(credentialsFile)
}

cfgEncoded, err := yaml.Marshal(cfg)
if err != nil {
return nil, nil, fmt.Errorf("marshal Prometheus config: %w", err)
Expand Down Expand Up @@ -543,7 +571,7 @@ func (r *operatorConfigReconciler) ensureAlertmanagerStatefulSet(ctx context.Con
}

// ensureRuleEvaluatorDeployment reconciles the Deployment for rule-evaluator.
func (r *operatorConfigReconciler) ensureRuleEvaluatorDeployment(ctx context.Context, spec *monitoringv1.RuleEvaluatorSpec) error {
func (r *operatorConfigReconciler) ensureRuleEvaluatorDeployment(ctx context.Context) error {
logger, _ := logr.FromContext(ctx)

var deploy appsv1.Deployment
Expand All @@ -558,29 +586,14 @@ func (r *operatorConfigReconciler) ensureRuleEvaluatorDeployment(ctx context.Con
return err
}

var projectID, _, _ = resolveLabels(r.opts.ProjectID, r.opts.Location, r.opts.Cluster, spec.ExternalLabels)

// If no explicit project ID is set, use the one provided to the operator.
// On GKE the rule-evaluator can also auto-detect the cluster's project
// but this won't work in other Kubernetes environments.
queryProjectID := projectID
if spec.QueryProjectID != "" {
queryProjectID = spec.QueryProjectID
}
flags := []string{fmt.Sprintf("--query.project-id=%q", queryProjectID)}
setContainerExtraArgs(deploy.Spec.Template.Spec.Containers, RuleEvaluatorContainerName, "")

if spec.Credentials != nil {
p := path.Join(secretsDir, pathForSelector(r.opts.PublicNamespace, &monitoringv1.SecretOrConfigMap{Secret: spec.Credentials}))
flags = append(flags, fmt.Sprintf("--export.credentials-file=%q", p))
flags = append(flags, fmt.Sprintf("--query.credentials-file=%q", p))
}
if spec.GeneratorURL != "" {
flags = append(flags, fmt.Sprintf("--query.generator-url=%q", spec.GeneratorURL))
// Support not having UPDATE permission. We will remove it in the future.
// See: https://github.com/GoogleCloudPlatform/prometheus-engine/pull/1078
if err := r.client.Update(ctx, &deploy); !apierrors.IsForbidden(err) {
return err
}
setContainerExtraArgs(deploy.Spec.Template.Spec.Containers, RuleEvaluatorContainerName, strings.Join(flags, " "))

// Upsert rule-evaluator Deployment.
return r.client.Update(ctx, &deploy)
return nil
}

// makeAlertmanagerConfigs creates the alertmanager_config entries as described in
Expand Down

0 comments on commit 142d164

Please sign in to comment.