Skip to content

Commit

Permalink
fix: expressionUsesOnlyAllowedLabelsForMetricRegexp logic (#98)
Browse files Browse the repository at this point in the history
Signed-off-by: Martin Chodur <m.chodur@seznam.cz>
  • Loading branch information
FUSAKLA authored Dec 1, 2024
1 parent 6401cfa commit 2168dc2
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 36 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [3.6.1] - 2024-12-01
- Fixed: FIxed the `expressionUsesOnlyAllowedLabelsForMetricRegexp` validator that might return false positives when used on more complex expressions with vector matching and functions using labels.

## [3.6.0] - 2024-11-29
- Added: Configuration file can now be in Jsonnet format if the config file name ends with `.jsonnet` and it will get automatically evaluated.

Expand Down
8 changes: 3 additions & 5 deletions docs/validations.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,9 @@ params:

#### `expressionUsesOnlyAllowedLabelsForMetricRegexp`

Fails if the rule uses any labels beside those listed in `allowedLabels`, in combination with given metric regexp in its `expr` label matchers, aggregations or joins.
Different metric name matchers are handled as follows:
* `{__name__="foo",..}, foo{...}` - `foo` is matched literally against given `metricNameRegexp`, if matches, expr is validated against `allowedLabels`
* `{__name__=~"foo",..}` - skipped
* `{__name__!="foo",}, {__name__!~"foo"}` - skipped
Fails if the rule uses any labels beside those listed in `allowedLabels`, in combination with given metric regexp in its `expr` label matchers, aggregations or joins. If the metric name is omitted in the query, or matched using regexp or any negative matcher on the `__name__` label, the rule will be skipped.

The check rather ignores validation of labels, where it cannot be sure if they are targeting only the metric in question, like aggregations by labels on top of vector matching expression where the labels might come from the other part of the expr.

> If using kube-state-metrics for exposing labels information about K8S objects (kube_*_labels) only those labels whitelisted by kube-state-metrics admin will be available.
> Might be useful to check that users does not use any other in their expressions.
Expand Down
2 changes: 1 addition & 1 deletion pkg/validator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ var registeredUniversalRuleValidators = map[string]validatorCreator{
"validFunctionsOnCounters": newValidFunctionsOnCounters,
"rateBeforeAggregation": newRateBeforeAggregation,
"expressionDoesNotUseLabels": newExpressionDoesNotUseLabels,
"expressionUsesOnlyAllowedLabelsForMetricRegexp": newExpressionUseOnlyWhitelistedLabelsForMetric,
"expressionUsesOnlyAllowedLabelsForMetricRegexp": newExpressionUsesOnlyAllowedLabelsForMetricRegexp,
"expressionDoesNotUseOlderDataThan": newExpressionDoesNotUseOlderDataThan,
"expressionDoesNotUseRangeShorterThan": newExpressionDoesNotUseRangeShorterThan,
"expressionDoesNotUseMetrics": newExpressionDoesNotUseMetrics,
Expand Down
2 changes: 1 addition & 1 deletion pkg/validator/promql_expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ type expressionUsesOnlyAllowedLabelsForMetricRegexp struct {
metricNameRegexp *regexp.Regexp
}

func newExpressionUseOnlyWhitelistedLabelsForMetric(paramsConfig yaml.Node) (Validator, error) {
func newExpressionUsesOnlyAllowedLabelsForMetricRegexp(paramsConfig yaml.Node) (Validator, error) {
params := struct {
AllowedLabels []string `yaml:"allowedLabels"`
MetricNameRegexp string `yaml:"metricNameRegexp"`
Expand Down
87 changes: 70 additions & 17 deletions pkg/validator/promql_expression_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package validator

import (
"fmt"
"maps"
"regexp"
"slices"

"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql/parser"
"golang.org/x/exp/slices"
)

func init() {
Expand Down Expand Up @@ -37,9 +38,18 @@ func labelsUsedInSelectorForMetric(selector *parser.VectorSelector, metricRegexp
return usedLabels, metricUsed
}

func parserCallStringArgValue(e parser.Expr) string {
val, ok := e.(*parser.StringLiteral)
if !ok {
return "" // Ignore the error, this shouldn't happen anyway, parser should already catch this.
}
return val.Val
}

// Returns a list of labels which are used in given expr in relation to given metric.
// Beside labels within vector selector itself, it adds labels used in Aggregate expressions and labels used in Binary expression.
// For Binary expressions it may report false positives as the current implementation does not consider on which side of group_left/group_right is the given metric.
// It traverses the whole expression tree top to bottom and collects all labels used in selectors, operators, functions etc.
// In case of vector matching, it also collects labels used in vector matching only relevant to the part of the expression where the metric is used.
// If the vector matching uses grouping, any labels used on top of the expression are not validated, since they might come from the other side of the expression.
func getExpressionUsedLabelsForMetric(expr string, metricRegexp *regexp.Regexp) ([]string, error) {
promQl, err := parser.ParseExpr(expr)
if err != nil {
Expand All @@ -49,29 +59,72 @@ func getExpressionUsedLabelsForMetric(expr string, metricRegexp *regexp.Regexp)
var usedLabels []string

labelsUpInExpr := func(path []parser.Node) []string {
usedLabels := []string{}
for _, n := range path {
usedLabels := map[string]struct{}{}
for i, n := range path {
switch v := n.(type) {
case *parser.AggregateExpr:
usedLabels = append(usedLabels, v.Grouping...)
for _, l := range v.Grouping {
usedLabels[l] = struct{}{}
}
case *parser.BinaryExpr:
if v.VectorMatching != nil {
usedLabels = append(usedLabels, v.VectorMatching.Include...)
usedLabels = append(usedLabels, v.VectorMatching.MatchingLabels...)
if v.VectorMatching == nil {
continue
}
// If any group_left/group_right is used, we need to reset the used labels, since any labels used on top of this expression might come from the other side of the expression.
if v.VectorMatching.Include != nil {
usedLabels = map[string]struct{}{}
}
// Validate only the on(...) labels. The ignoring(...) might target the other side of the binary expression.
if v.VectorMatching.On {
for _, l := range v.VectorMatching.MatchingLabels {
usedLabels[l] = struct{}{}
}
}
// We want to validate the group_left/group_right labels only if the validated metric is on the "one" of the many-one/one-to.many side.
nextExpr := path[i+1].String()
if (v.VectorMatching.Card == parser.CardManyToOne && v.RHS.String() == nextExpr) || (v.VectorMatching.Card == parser.CardOneToMany && v.LHS.String() == nextExpr) {
for _, l := range v.VectorMatching.Include {
usedLabels[l] = struct{}{}
}
}
case *parser.Call:
switch v.Func.Name {
case "label_replace":
// Any PromQL "above" this label_replace can use the destination synthetic label, so drop it from the list of already used labels.
delete(usedLabels, parserCallStringArgValue(v.Args[1]))
usedLabels[parserCallStringArgValue(v.Args[3])] = struct{}{} // The source_label is interesting for us
case "label_join":
delete(usedLabels, parserCallStringArgValue(v.Args[1]))
// label_join is variadic, so we need to iterate over all labels that are used in the expression
for _, l := range v.Args[3:] {
usedLabels[parserCallStringArgValue(l)] = struct{}{}
}
case "sort_by_label":
for _, l := range v.Args[1:] {
usedLabels[parserCallStringArgValue(l)] = struct{}{}
}
case "sort_by_label_desc":
for _, l := range v.Args[1:] {
usedLabels[parserCallStringArgValue(l)] = struct{}{}
}
}
}
}
return usedLabels
delete(usedLabels, "") // Used in case of errors so just drop it
return slices.Collect(maps.Keys(usedLabels))
}

parser.Inspect(promQl, func(n parser.Node, path []parser.Node) error {
if v, isVectorSelector := n.(*parser.VectorSelector); isVectorSelector {
selectorUsedLabels, ok := labelsUsedInSelectorForMetric(v, metricRegexp)
if ok {
metricInExpr = true
usedLabels = append(usedLabels, selectorUsedLabels...)
usedLabels = append(usedLabels, labelsUpInExpr(path)...)
}
v, isVectorSelector := n.(*parser.VectorSelector)
if !isVectorSelector {
return nil
}
selectorUsedLabels, ok := labelsUsedInSelectorForMetric(v, metricRegexp)
if ok {
metricInExpr = true
usedLabels = append(usedLabels, selectorUsedLabels...)
// The path does not contain the current node, so we need to append it since some cases need also the last node.
usedLabels = append(usedLabels, labelsUpInExpr(append(path, n))...)
}
return nil
})
Expand Down
29 changes: 21 additions & 8 deletions pkg/validator/promql_expression_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package validator

import (
"errors"
"fmt"
"reflect"
"regexp"
"testing"
Expand Down Expand Up @@ -48,16 +49,28 @@ func TestGetExpressionUsedLabelsForMetric(t *testing.T) {
{expr: "kube_pod_labels{label_app!='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app"}},
{expr: "kube_pod_labels{label_app='foo'} * on(pod) kube_pod_info{}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}},
{expr: "kube_pod_info{} * on(pod) group_left(label_workload) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "label_workload", "pod"}},
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod", "pod_ip"}},
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'} offset 1h", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod", "pod_ip"}},
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}},
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'} offset 1h", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}},
{expr: "sum(kube_pod_info * kube_pod_labels) by (foo)", metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo"}},
{expr: "label_replace(kube_pod_labels, 'bar', '$1', 'foo', '.*')", metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo"}},
{expr: `sum(label_join(kube_pod_labels, "foo", ",", "l1", "l2", "l3")) by (foo)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "l1", "l2", "l3"}},
{expr: `sum(label_join(kube_pod_labels, "foo", ",", "l1", "l2", "l3")) by (bar)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "l1", "l2", "l3", "bar"}},
{expr: `sum(kube_pod_labels * on (foo, bar) group_right(baz) kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
{expr: `sum(kube_pod_labels * on (foo, bar) group_left(baz) kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar"}},
{expr: `sum(kube_pod_labels * ignoring (foo, bar) group_left() kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel}},
{expr: `sum(kube_pod_labels * on (foo, bar) kube_pod_info) by (baz)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
{expr: `sort_by_label(kube_pod_labels, "foo", "bar", "baz")`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
{expr: `sort_by_label_desc(kube_pod_labels, "foo", "bar", "baz")`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
}

for _, test := range tests {
labels, err := getExpressionUsedLabelsForMetric(test.expr, regexp.MustCompile(test.metric))
assert.ElementsMatch(t, labels, test.expected, "Expected labels %v, but got %v", test.expected, labels)
if !errors.Is(err, test.expectedErr) {
t.Errorf("Expected error %v, but got %v", test.expectedErr, err)
}
for i, test := range tests {
t.Run(fmt.Sprintf("test_case_%d", i), func(t *testing.T) {
labels, err := getExpressionUsedLabelsForMetric(test.expr, regexp.MustCompile(test.metric))
assert.ElementsMatch(t, labels, test.expected, "Expected labels %v, but got %v", test.expected, labels)
if !errors.Is(err, test.expectedErr) {
t.Errorf("Expected error %v, but got %v", test.expectedErr, err)
}
})
}
}

Expand Down
6 changes: 2 additions & 4 deletions pkg/validator/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,8 @@ var testCases = []struct {
{name: "ruleExprDoesUseForbiddenLabelInBy", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "sum(kube_pod_labels) by (app)"}, expectedErrors: 1},
{name: "ruleExprDoesUseForbiddenLabelInOn", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) up"}, expectedErrors: 1},
{name: "ruleExprDoesUseForbiddenLabelInGroup", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "group(kube_pod_labels) by (label_app)"}, expectedErrors: 1},
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 2},
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 2},
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{"app"}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 1},
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{"app"}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 1},
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer1", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 1},
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer2", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 2},

// expressionDoesNotUseOlderDataThan
{name: "ruleExprDoesNotUseOlderData", validator: expressionDoesNotUseOlderDataThan{limit: time.Hour}, rule: rulefmt.Rule{Expr: "up{xxx='yyy'}"}, expectedErrors: 0},
Expand Down

0 comments on commit 2168dc2

Please sign in to comment.