diff --git a/model/metric.go b/model/metric.go index 12593c7c..81ef78b9 100644 --- a/model/metric.go +++ b/model/metric.go @@ -18,6 +18,7 @@ import ( "fmt" "regexp" "sort" + "strconv" "strings" "unicode/utf8" @@ -270,10 +271,6 @@ func metricNeedsEscaping(m *dto.Metric) bool { return false } -const ( - lowerhex = "0123456789abcdef" -) - // EscapeName escapes the incoming name according to the provided escaping // scheme. Depending on the rules of escaping, this may cause no change in the // string that is returned. (Especially NoEscaping, which by definition is a @@ -308,7 +305,7 @@ func EscapeName(name string, scheme EscapingScheme) string { } else if isValidLegacyRune(b, i) { escaped.WriteRune(b) } else { - escaped.WriteRune('_') + escaped.WriteString("__") } } return escaped.String() @@ -318,21 +315,15 @@ func EscapeName(name string, scheme EscapingScheme) string { } escaped.WriteString("U__") for i, b := range name { - if isValidLegacyRune(b, i) { + if b == '_' { + escaped.WriteString("__") + } else if isValidLegacyRune(b, i) { escaped.WriteRune(b) } else if !utf8.ValidRune(b) { escaped.WriteString("_FFFD_") - } else if b < 0x100 { - escaped.WriteRune('_') - for s := 4; s >= 0; s -= 4 { - escaped.WriteByte(lowerhex[b>>uint(s)&0xF]) - } - escaped.WriteRune('_') - } else if b < 0x10000 { + } else { escaped.WriteRune('_') - for s := 12; s >= 0; s -= 4 { - escaped.WriteByte(lowerhex[b>>uint(s)&0xF]) - } + escaped.WriteString(strconv.FormatInt(int64(b), 16)) escaped.WriteRune('_') } } @@ -390,8 +381,9 @@ func UnescapeName(name string, scheme EscapingScheme) string { // We think we are in a UTF-8 code, process it. var utf8Val uint for j := 0; i < len(escapedName); j++ { - // This is too many characters for a utf8 value. - if j > 4 { + // This is too many characters for a utf8 value based on the MaxRune + // value of '\U0010FFFF'. + if j > 6 { return name } // Found a closing underscore, convert to a rune, check validity, and append. diff --git a/model/metric_test.go b/model/metric_test.go index ef08a053..8c62f1b5 100644 --- a/model/metric_test.go +++ b/model/metric_test.go @@ -261,6 +261,14 @@ func TestEscapeName(t *testing.T) { expectedUnescapedDots: "mysystem.prod.west.cpu.load", expectedValue: "U__mysystem_2e_prod_2e_west_2e_cpu_2e_load", }, + { + name: "name with dots and underscore", + input: "mysystem.prod.west.cpu.load_total", + expectedUnderscores: "mysystem_prod_west_cpu_load_total", + expectedDots: "mysystem_dot_prod_dot_west_dot_cpu_dot_load__total", + expectedUnescapedDots: "mysystem.prod.west.cpu.load_total", + expectedValue: "U__mysystem_2e_prod_2e_west_2e_cpu_2e_load__total", + }, { name: "name with dots and colon", input: "http.status:sum", @@ -269,16 +277,32 @@ func TestEscapeName(t *testing.T) { expectedUnescapedDots: "http.status:sum", expectedValue: "U__http_2e_status:sum", }, + { + name: "name with spaces and emoji", + input: "label with 😱", + expectedUnderscores: "label_with__", + expectedDots: "label__with____", + expectedUnescapedDots: "label_with__", + expectedValue: "U__label_20_with_20__1f631_", + }, { name: "name with unicode characters > 0x100", input: "花火", expectedUnderscores: "__", - expectedDots: "__", + expectedDots: "____", // Dots-replacement does not know the difference between two replaced // characters and a single underscore. - expectedUnescapedDots: "_", + expectedUnescapedDots: "__", expectedValue: "U___82b1__706b_", }, + { + name: "name with spaces and edge-case value", + input: "label with \u0100", + expectedUnderscores: "label_with__", + expectedDots: "label__with____", + expectedUnescapedDots: "label_with__", + expectedValue: "U__label_20_with_20__100_", + }, } for _, scenario := range scenarios {