Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weโ€™ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update normalization for Metrics #3332

Merged
merged 9 commits into from
Apr 8, 2024
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- Add support for Spring Rest Client ([#3199](https://github.com/getsentry/sentry-java/pull/3199))
- Extend Proxy options with proxy type ([#3326](https://github.com/getsentry/sentry-java/pull/3326))
- Update normalization of metrics keys, tags and values ([#3332](https://github.com/getsentry/sentry-java/pull/3332))

### Fixes

Expand Down
6 changes: 3 additions & 3 deletions sentry/api/sentry.api
Original file line number Diff line number Diff line change
Expand Up @@ -3534,14 +3534,14 @@ public final class io/sentry/metrics/MetricsHelper {
public static fun convertNanosTo (Lio/sentry/MeasurementUnit$Duration;J)D
public static fun encodeMetrics (JLjava/util/Collection;Ljava/lang/StringBuilder;)V
public static fun getCutoffTimestampMs (J)J
public static fun getDayBucketKey (Ljava/util/Calendar;)J
public static fun getExportKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;)Ljava/lang/String;
public static fun getMetricBucketKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;Ljava/util/Map;)Ljava/lang/String;
public static fun getTimeBucketKey (J)J
public static fun mergeTags (Ljava/util/Map;Ljava/util/Map;)Ljava/util/Map;
public static fun sanitizeKey (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeName (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeTagKey (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeTagValue (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeUnit (Ljava/lang/String;)Ljava/lang/String;
public static fun sanitizeValue (Ljava/lang/String;)Ljava/lang/String;
public static fun setFlushShiftMs (J)V
public static fun toStatsdType (Lio/sentry/metrics/MetricType;)Ljava/lang/String;
}
Expand Down
84 changes: 51 additions & 33 deletions sentry/src/main/java/io/sentry/metrics/MetricsHelper.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
package io.sentry.metrics;

import io.sentry.MeasurementUnit;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.TimeZone;
import java.util.regex.Pattern;
import org.jetbrains.annotations.ApiStatus;
import org.jetbrains.annotations.NotNull;
Expand All @@ -20,14 +18,9 @@ public final class MetricsHelper {
public static final int MAX_TOTAL_WEIGHT = 100000;
private static final int ROLLUP_IN_SECONDS = 10;

private static final Pattern INVALID_KEY_CHARACTERS_PATTERN =
Pattern.compile("[^a-zA-Z0-9_/.-]+");
private static final Pattern INVALID_VALUE_CHARACTERS_PATTERN =
Pattern.compile("[^\\w\\d\\s_:/@\\.\\{\\}\\[\\]$-]+");
// See
// https://docs.sysdig.com/en/docs/sysdig-monitor/integrations/working-with-integrations/custom-integrations/integrate-statsd-metrics/#characters-allowed-for-statsd-metric-names
private static final Pattern INVALID_METRIC_UNIT_CHARACTERS_PATTERN =
Pattern.compile("[^a-zA-Z0-9_/.]+");
private static final Pattern UNIT_PATTERN = Pattern.compile("\\W+");
private static final Pattern NAME_PATTERN = Pattern.compile("[^\\w\\-.]+");
private static final Pattern TAG_KEY_PATTERN = Pattern.compile("[^\\w\\-./]+");

private static final char TAGS_PAIR_DELIMITER = ','; // Delimiter between key-value pairs
private static final char TAGS_KEY_VALUE_DELIMITER = '='; // Delimiter between key and value
Expand All @@ -36,15 +29,6 @@ public final class MetricsHelper {
private static long FLUSH_SHIFT_MS =
(long) (new Random().nextFloat() * (ROLLUP_IN_SECONDS * 1000f));

public static long getDayBucketKey(final @NotNull Calendar timestamp) {
final Calendar utc = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
utc.set(Calendar.YEAR, timestamp.get(Calendar.YEAR));
utc.set(Calendar.MONTH, timestamp.get(Calendar.MONTH));
utc.set(Calendar.DAY_OF_MONTH, timestamp.get(Calendar.DAY_OF_MONTH));

return utc.getTimeInMillis() / 1000;
}

public static long getTimeBucketKey(final long timestampMs) {
final long seconds = timestampMs / 1000;
final long bucketKey = (seconds / ROLLUP_IN_SECONDS) * ROLLUP_IN_SECONDS;
Expand All @@ -59,12 +43,50 @@ public static long getCutoffTimestampMs(final long nowMs) {
return nowMs - (ROLLUP_IN_SECONDS * 1000) - FLUSH_SHIFT_MS;
}

public static @NotNull String sanitizeKey(final @NotNull String input) {
return INVALID_KEY_CHARACTERS_PATTERN.matcher(input).replaceAll("_");
@NotNull
public static String sanitizeUnit(final @NotNull String unit) {
return UNIT_PATTERN.matcher(unit).replaceAll("");
}

@NotNull
public static String sanitizeName(final @NotNull String input) {
return NAME_PATTERN.matcher(input).replaceAll("_");
}

public static String sanitizeValue(final @NotNull String input) {
return INVALID_VALUE_CHARACTERS_PATTERN.matcher(input).replaceAll("");
@NotNull
public static String sanitizeTagKey(final @NotNull String input) {
return TAG_KEY_PATTERN.matcher(input).replaceAll("");
}

@NotNull
public static String sanitizeTagValue(final @NotNull String input) {
// see https://develop.sentry.dev/sdk/metrics/#tag-values-replacement-map
// Line feed -> \n
// Carriage return -> \r
// Tab -> \t
// Backslash -> \\
// Pipe -> \\u{7c}
// Comma -> \\u{2c}
final StringBuilder output = new StringBuilder(input.length());
for (int idx = 0; idx < input.length(); idx++) {
final char ch = input.charAt(idx);
if (ch == '\n') {
output.append("\\n");
} else if (ch == '\r') {
output.append("\\r");
} else if (ch == '\t') {
output.append("\\t");
} else if (ch == '\\') {
output.append("\\\\");
} else if (ch == '|') {
output.append("\\u{7c}");
} else if (ch == ',') {
output.append("\\u{2c}");
} else {
output.append(ch);
}
}
return output.toString();
}

public static @NotNull String toStatsdType(final @NotNull MetricType type) {
Expand All @@ -89,7 +111,7 @@ public static String getMetricBucketKey(
final @Nullable MeasurementUnit unit,
final @Nullable Map<String, String> tags) {
final @NotNull String typePrefix = toStatsdType(type);
final @NotNull String serializedTags = GetTagsKey(tags);
final @NotNull String serializedTags = getTagsKey(tags);

final @NotNull String unitName = getUnitName(unit);
return String.format("%s_%s_%s_%s", typePrefix, metricKey, unitName, serializedTags);
Expand All @@ -100,7 +122,8 @@ private static String getUnitName(final @Nullable MeasurementUnit unit) {
return (unit != null) ? unit.apiName() : MeasurementUnit.NONE;
}

private static String GetTagsKey(final @Nullable Map<String, String> tags) {
@NotNull
private static String getTagsKey(final @Nullable Map<String, String> tags) {
if (tags == null || tags.isEmpty()) {
return "";
}
Expand Down Expand Up @@ -197,7 +220,7 @@ public static void encodeMetrics(
final @NotNull Collection<Metric> metrics,
final @NotNull StringBuilder writer) {
for (Metric metric : metrics) {
writer.append(sanitizeKey(metric.getKey()));
writer.append(sanitizeName(metric.getKey()));
writer.append("@");

final @Nullable MeasurementUnit unit = metric.getUnit();
Expand All @@ -218,15 +241,15 @@ public static void encodeMetrics(
writer.append("|#");
boolean first = true;
for (final @NotNull Map.Entry<String, String> tag : tags.entrySet()) {
final @NotNull String tagKey = sanitizeKey(tag.getKey());
final @NotNull String tagKey = sanitizeTagKey(tag.getKey());
if (first) {
first = false;
} else {
writer.append(",");
}
writer.append(tagKey);
writer.append(":");
writer.append(sanitizeValue(tag.getValue()));
writer.append(sanitizeTagValue(tag.getValue()));
}
}

Expand All @@ -236,11 +259,6 @@ public static void encodeMetrics(
}
}

@NotNull
public static String sanitizeUnit(@NotNull String unit) {
return INVALID_METRIC_UNIT_CHARACTERS_PATTERN.matcher(unit).replaceAll("_");
}

@NotNull
public static Map<String, String> mergeTags(
final @Nullable Map<String, String> tags, final @NotNull Map<String, String> defaultTags) {
Expand Down
51 changes: 33 additions & 18 deletions sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -59,31 +59,46 @@ class MetricsHelperTest {
}

@Test
fun sanitizeKey() {
assertEquals("foo-bar", MetricsHelper.sanitizeKey("foo-bar"))
assertEquals("foo_bar", MetricsHelper.sanitizeKey("foo\$\$\$bar"))
assertEquals("fo_-bar", MetricsHelper.sanitizeKey("foรถ-bar"))
fun sanitizeName() {
assertEquals("foo-bar", MetricsHelper.sanitizeName("foo-bar"))
assertEquals("foo_bar", MetricsHelper.sanitizeName("foo\$\$\$bar"))
assertEquals("fo_-bar", MetricsHelper.sanitizeName("foรถ-bar"))
}

@Test
fun sanitizeValue() {
assertEquals("\$foo", MetricsHelper.sanitizeValue("%\$foo"))
assertEquals("blah{}", MetricsHelper.sanitizeValue("blah{}"))
assertEquals("snwmn", MetricsHelper.sanitizeValue("snรถwmรคn"))
assertEquals("j e n g a", MetricsHelper.sanitizeValue("j e n g a!"))
fun sanitizeTagKey() {
// no replacement characters for tag keys
// - and / should be allowed
assertEquals("a/weird/tag-key/", MetricsHelper.sanitizeTagKey("a/weird/tag-key/:รค"))
}

@Test
fun sanitizeUnit() {
val items = listOf(
"Test123_." to "Test123_.",
"test{value}" to "test_value_",
"test-value" to "test_value"
)
fun sanitizeTagValue() {
// https://github.com/getsentry/relay/blob/3208e3ce5b1fe4d147aa44e0e966807c256993de/relay-metrics/src/protocol.rs#L142
assertEquals("plain", MetricsHelper.sanitizeTagValue("plain"))
assertEquals("plain text", MetricsHelper.sanitizeTagValue("plain text"))
assertEquals("plain%text", MetricsHelper.sanitizeTagValue("plain%text"))

// Escape sequences
assertEquals("plain \\\\ text", MetricsHelper.sanitizeTagValue("plain \\ text"))
assertEquals("plain\\u{2c}text", MetricsHelper.sanitizeTagValue("plain,text"))
assertEquals("plain\\u{7c}text", MetricsHelper.sanitizeTagValue("plain|text"))
assertEquals("plain ๐Ÿ˜…", MetricsHelper.sanitizeTagValue("plain ๐Ÿ˜…"))

// Escapable control characters (may be stripped by the parser)
assertEquals("plain\\ntext", MetricsHelper.sanitizeTagValue("plain\ntext"))
assertEquals("plain\\rtext", MetricsHelper.sanitizeTagValue("plain\rtext"))
assertEquals("plain\\ttext", MetricsHelper.sanitizeTagValue("plain\ttext"))

// Unescapable control characters remain, as they'll be stripped by relay
assertEquals("plain\u0007text", MetricsHelper.sanitizeTagValue("plain\u0007text"))
assertEquals("plain\u009Ctext", MetricsHelper.sanitizeTagValue("plain\u009ctext"))
}

for (item in items) {
assertEquals(item.second, MetricsHelper.sanitizeUnit(item.first))
}
@Test
fun sanitizeUnit() {
// no replacement characters for units
assertEquals("abcABC123_abcABC123", MetricsHelper.sanitizeUnit("abcABC123_-./รครถรผ\$%&abcABC123"))
}

@Test
Expand Down
Loading