Skip to content

Commit

Permalink
Add utf8 support to the prometheus exporter (#5755)
Browse files Browse the repository at this point in the history
### Changes

Disable sanitization when the UTF-8 support is enabled in the Prometheus
library.

### Usage

To enable UTF-8 support for the Prometheus exporter after this change,
set the following in your application:

```golang
import "github.com/prometheus/common/model"

func init() {
    model.NameValidationScheme = model.UTF8Validation
}
```

See `exporters/prometheus/testdata/counter_utf8.txt` for an example of
the text exposition format including names/labels with dots.
  • Loading branch information
dashpole authored Sep 9, 2024
1 parent 506a9ba commit 71b341f
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 129 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Support `OTEL_EXPORTER_OTLP_LOGS_INSECURE` and `OTEL_EXPORTER_OTLP_INSECURE` environments in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc`. (#5739)
- The `WithResource` option for `NewMeterProvider` now merges the provided resources with the ones from environment variables. (#5773)
- The `WithResource` option for `NewLoggerProvider` now merges the provided resources with the ones from environment variables. (#5773)
- Add UTF-8 support to `go.opentelemetry.io/otel/exporters/prometheus`. (#5755)

### Fixed

Expand Down
6 changes: 5 additions & 1 deletion exporters/prometheus/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/sdk/metric"
Expand Down Expand Up @@ -131,7 +132,10 @@ func WithoutScopeInfo() Option {
// have special behavior based on their name.
func WithNamespace(ns string) Option {
return optionFunc(func(cfg config) config {
ns = sanitizeName(ns)
if model.NameValidationScheme != model.UTF8Validation {
// Only sanitize if prometheus does not support UTF-8.
ns = model.EscapeName(ns, model.NameEscapingScheme)
}
if !strings.HasSuffix(ns, "_") {
// namespace and metric names should be separated with an underscore,
// adds a trailing underscore if there is not one already.
Expand Down
117 changes: 35 additions & 82 deletions exporters/prometheus/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@ import (
"slices"
"strings"
"sync"
"unicode"
"unicode/utf8"

"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/model"
"google.golang.org/protobuf/proto"

"go.opentelemetry.io/otel"
Expand Down Expand Up @@ -298,28 +297,38 @@ func addGaugeMetric[N int64 | float64](ch chan<- prometheus.Metric, gauge metric
}

// getAttrs parses the attribute.Set to two lists of matching Prometheus-style
// keys and values. It sanitizes invalid characters and handles duplicate keys
// (due to sanitization) by sorting and concatenating the values following the spec.
// keys and values.
func getAttrs(attrs attribute.Set, ks, vs [2]string, resourceKV keyVals) ([]string, []string) {
keysMap := make(map[string][]string)
itr := attrs.Iter()
for itr.Next() {
kv := itr.Attribute()
key := strings.Map(sanitizeRune, string(kv.Key))
if _, ok := keysMap[key]; !ok {
keysMap[key] = []string{kv.Value.Emit()}
} else {
// if the sanitized key is a duplicate, append to the list of keys
keysMap[key] = append(keysMap[key], kv.Value.Emit())
}
}

keys := make([]string, 0, attrs.Len())
values := make([]string, 0, attrs.Len())
for key, vals := range keysMap {
keys = append(keys, key)
slices.Sort(vals)
values = append(values, strings.Join(vals, ";"))
itr := attrs.Iter()

if model.NameValidationScheme == model.UTF8Validation {
// Do not perform sanitization if prometheus supports UTF-8.
for itr.Next() {
kv := itr.Attribute()
keys = append(keys, string(kv.Key))
values = append(values, kv.Value.Emit())
}
} else {
// It sanitizes invalid characters and handles duplicate keys
// (due to sanitization) by sorting and concatenating the values following the spec.
keysMap := make(map[string][]string)
for itr.Next() {
kv := itr.Attribute()
key := model.EscapeName(string(kv.Key), model.NameEscapingScheme)
if _, ok := keysMap[key]; !ok {
keysMap[key] = []string{kv.Value.Emit()}
} else {
// if the sanitized key is a duplicate, append to the list of keys
keysMap[key] = append(keysMap[key], kv.Value.Emit())
}
}
for key, vals := range keysMap {
keys = append(keys, key)
slices.Sort(vals)
values = append(values, strings.Join(vals, ";"))
}
}

if ks[0] != "" {
Expand Down Expand Up @@ -347,13 +356,6 @@ func createScopeInfoMetric(scope instrumentation.Scope) (prometheus.Metric, erro
return prometheus.NewConstMetric(desc, prometheus.GaugeValue, float64(1), scope.Name, scope.Version)
}

func sanitizeRune(r rune) rune {
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == ':' || r == '_' {
return r
}
return '_'
}

var unitSuffixes = map[string]string{
// Time
"d": "_days",
Expand Down Expand Up @@ -392,7 +394,11 @@ var unitSuffixes = map[string]string{

// getName returns the sanitized name, prefixed with the namespace and suffixed with unit.
func (c *collector) getName(m metricdata.Metrics, typ *dto.MetricType) string {
name := sanitizeName(m.Name)
name := m.Name
if model.NameValidationScheme != model.UTF8Validation {
// Only sanitize if prometheus does not support UTF-8.
name = model.EscapeName(name, model.NameEscapingScheme)
}
addCounterSuffix := !c.withoutCounterSuffixes && *typ == dto.MetricType_COUNTER
if addCounterSuffix {
// Remove the _total suffix here, as we will re-add the total suffix
Expand All @@ -411,59 +417,6 @@ func (c *collector) getName(m metricdata.Metrics, typ *dto.MetricType) string {
return name
}

func sanitizeName(n string) string {
// This algorithm is based on strings.Map from Go 1.19.
const replacement = '_'

valid := func(i int, r rune) bool {
// Taken from
// https://github.com/prometheus/common/blob/dfbc25bd00225c70aca0d94c3c4bb7744f28ace0/model/metric.go#L92-L102
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == '_' || r == ':' || (r >= '0' && r <= '9' && i > 0) {
return true
}
return false
}

// This output buffer b is initialized on demand, the first time a
// character needs to be replaced.
var b strings.Builder
for i, c := range n {
if valid(i, c) {
continue
}

if i == 0 && c >= '0' && c <= '9' {
// Prefix leading number with replacement character.
b.Grow(len(n) + 1)
_ = b.WriteByte(byte(replacement))
break
}
b.Grow(len(n))
_, _ = b.WriteString(n[:i])
_ = b.WriteByte(byte(replacement))
width := utf8.RuneLen(c)
n = n[i+width:]
break
}

// Fast path for unchanged input.
if b.Cap() == 0 { // b.Grow was not called above.
return n
}

for _, c := range n {
// Due to inlining, it is more performant to invoke WriteByte rather then
// WriteRune.
if valid(1, c) { // We are guaranteed to not be at the start.
_ = b.WriteByte(byte(c))
} else {
_ = b.WriteByte(byte(replacement))
}
}

return b.String()
}

func (c *collector) metricType(m metricdata.Metrics) *dto.MetricType {
switch v := m.Data.(type) {
case metricdata.Histogram[int64], metricdata.Histogram[float64]:
Expand Down
69 changes: 39 additions & 30 deletions exporters/prometheus/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

Expand All @@ -34,6 +35,7 @@ func TestPrometheusExporter(t *testing.T) {
recordMetrics func(ctx context.Context, meter otelmetric.Meter)
options []Option
expectedFile string
enableUTF8 bool
}{
{
name: "counter",
Expand Down Expand Up @@ -399,10 +401,47 @@ func TestPrometheusExporter(t *testing.T) {
counter.Add(ctx, 5.3, opt)
},
},
{
name: "counter utf-8",
expectedFile: "testdata/counter_utf8.txt",
enableUTF8: true,
recordMetrics: func(ctx context.Context, meter otelmetric.Meter) {
opt := otelmetric.WithAttributes(
attribute.Key("A.G").String("B"),
attribute.Key("C.H").String("D"),
attribute.Key("E.I").Bool(true),
attribute.Key("F.J").Int(42),
)
counter, err := meter.Float64Counter(
"foo.things",
otelmetric.WithDescription("a simple counter"),
otelmetric.WithUnit("s"),
)
require.NoError(t, err)
counter.Add(ctx, 5, opt)
counter.Add(ctx, 10.3, opt)
counter.Add(ctx, 9, opt)

attrs2 := attribute.NewSet(
attribute.Key("A.G").String("D"),
attribute.Key("C.H").String("B"),
attribute.Key("E.I").Bool(true),
attribute.Key("F.J").Int(42),
)
counter.Add(ctx, 5, otelmetric.WithAttributeSet(attrs2))
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if tc.enableUTF8 {
model.NameValidationScheme = model.UTF8Validation
defer func() {
// Reset to defaults
model.NameValidationScheme = model.LegacyValidation
}()
}
ctx := context.Background()
registry := prometheus.NewRegistry()
exporter, err := New(append(tc.options, WithRegisterer(registry))...)
Expand Down Expand Up @@ -452,36 +491,6 @@ func TestPrometheusExporter(t *testing.T) {
}
}

func TestSantitizeName(t *testing.T) {
tests := []struct {
input string
want string
}{
{"name€_with_4_width_rune", "name__with_4_width_rune"},
{"`", "_"},
{
`! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWKYZ[]\^_abcdefghijklmnopqrstuvwkyz{|}~`,
`________________0123456789:______ABCDEFGHIJKLMNOPQRSTUVWKYZ_____abcdefghijklmnopqrstuvwkyz____`,
},

// Test cases taken from
// https://github.com/prometheus/common/blob/dfbc25bd00225c70aca0d94c3c4bb7744f28ace0/model/metric_test.go#L85-L136
{"Avalid_23name", "Avalid_23name"},
{"_Avalid_23name", "_Avalid_23name"},
{"1valid_23name", "_1valid_23name"},
{"avalid_23name", "avalid_23name"},
{"Ava:lid_23name", "Ava:lid_23name"},
{"a lid_23name", "a_lid_23name"},
{":leading_colon", ":leading_colon"},
{"colon:in:the:middle", "colon:in:the:middle"},
{"", ""},
}

for _, test := range tests {
require.Equalf(t, test.want, sanitizeName(test.input), "input: %q", test.input)
}
}

func TestMultiScopes(t *testing.T) {
ctx := context.Background()
registry := prometheus.NewRegistry()
Expand Down
2 changes: 1 addition & 1 deletion exporters/prometheus/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.22
require (
github.com/prometheus/client_golang v1.20.3
github.com/prometheus/client_model v0.6.1
github.com/prometheus/common v0.59.1
github.com/stretchr/testify v1.9.0
go.opentelemetry.io/otel v1.29.0
go.opentelemetry.io/otel/metric v1.29.0
Expand All @@ -25,7 +26,6 @@ require (
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/common v0.59.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
golang.org/x/sys v0.25.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
10 changes: 10 additions & 0 deletions exporters/prometheus/testdata/counter_utf8.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# HELP "foo.things_seconds_total" a simple counter
# TYPE "foo.things_seconds_total" counter
{"foo.things_seconds_total","A.G"="B","C.H"="D","E.I"="true","F.J"="42",otel_scope_name="testmeter",otel_scope_version="v0.1.0"} 24.3
{"foo.things_seconds_total","A.G"="D","C.H"="B","E.I"="true","F.J"="42",otel_scope_name="testmeter",otel_scope_version="v0.1.0"} 5
# HELP otel_scope_info Instrumentation Scope metadata
# TYPE otel_scope_info gauge
otel_scope_info{otel_scope_name="testmeter",otel_scope_version="v0.1.0"} 1
# HELP target_info Target metadata
# TYPE target_info gauge
target_info{"service.name"="prometheus_test","telemetry.sdk.language"="go","telemetry.sdk.name"="opentelemetry","telemetry.sdk.version"="latest"} 1
6 changes: 3 additions & 3 deletions exporters/prometheus/testdata/sanitized_names.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# HELP bar a fun little gauge
# TYPE bar gauge
bar{A="B",C="D",otel_scope_name="testmeter",otel_scope_version="v0.1.0"} 75
# HELP _0invalid_counter_name_total a counter with an invalid name
# TYPE _0invalid_counter_name_total counter
_0invalid_counter_name_total{A="B",C="D",otel_scope_name="testmeter",otel_scope_version="v0.1.0"} 100
# HELP _invalid_counter_name_total a counter with an invalid name
# TYPE _invalid_counter_name_total counter
_invalid_counter_name_total{A="B",C="D",otel_scope_name="testmeter",otel_scope_version="v0.1.0"} 100
# HELP invalid_gauge_name a gauge with an invalid name
# TYPE invalid_gauge_name gauge
invalid_gauge_name{A="B",C="D",otel_scope_name="testmeter",otel_scope_version="v0.1.0"} 100
Expand Down
9 changes: 5 additions & 4 deletions internal/tools/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ require (
github.com/butuzov/mirror v1.2.0 // indirect
github.com/catenacyber/perfsprint v0.7.1 // indirect
github.com/ccojocar/zxcvbn-go v1.0.2 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/charithe/durationcheck v0.0.10 // indirect
github.com/chavacava/garif v0.1.0 // indirect
github.com/ckaznocha/intrange v0.1.2 // indirect
Expand Down Expand Up @@ -133,6 +133,7 @@ require (
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/moricho/tparallel v0.3.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nakabonne/nestif v0.3.1 // indirect
github.com/nishanths/exhaustive v0.12.0 // indirect
github.com/nishanths/predeclared v0.2.2 // indirect
Expand All @@ -142,10 +143,10 @@ require (
github.com/pjbgf/sha1cd v0.3.0 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/polyfloyd/go-errorlint v1.6.0 // indirect
github.com/prometheus/client_golang v1.19.0 // indirect
github.com/prometheus/client_golang v1.20.2 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.48.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/prometheus/common v0.57.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/quasilyte/go-ruleguard v0.4.2 // indirect
github.com/quasilyte/go-ruleguard/dsl v0.3.22 // indirect
github.com/quasilyte/gogrep v0.5.0 // indirect
Expand Down
Loading

0 comments on commit 71b341f

Please sign in to comment.