From c080baeb46142f122e5496e6323c7a058567a75c Mon Sep 17 00:00:00 2001 From: Michael Hoffmann Date: Thu, 4 Apr 2024 01:43:16 +0200 Subject: [PATCH 1/5] Update metric normalization --- src/Serializer/EnvelopItems/MetricsItem.php | 34 ++++++++++-- .../EnvelopeItems/MetricsItemTest.php | 52 +++++++++++++++++++ tests/Serializer/PayloadSerializerTest.php | 14 ++--- 3 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 tests/Serializer/EnvelopeItems/MetricsItemTest.php diff --git a/src/Serializer/EnvelopItems/MetricsItem.php b/src/Serializer/EnvelopItems/MetricsItem.php index 77ae05757..23f342b97 100644 --- a/src/Serializer/EnvelopItems/MetricsItem.php +++ b/src/Serializer/EnvelopItems/MetricsItem.php @@ -19,12 +19,17 @@ class MetricsItem implements EnvelopeItemInterface /** * @var string */ - private const KEY_PATTERN = '/[^a-zA-Z0-9_\/.-]+/i'; + private const KEY_PATTERN = '/[^\w\-.]+/i'; /** * @var string */ - private const VALUE_PATTERN = '/[^\w\d\s_:\/@\.{}\[\]$-]+/i'; + private const UNIT_PATTERN = '/[^\w]+/i'; + + /** + * @var string + */ + private const TAG_KEY_PATTERN = '/[^\w\-.\/]+/i'; public static function toEnvelopeItem(Event $event): string { @@ -42,7 +47,7 @@ public static function toEnvelopeItem(Event $event): string if ($metric->getUnit() !== MetricsUnit::none()) { // unit - @second - $line .= '@' . $metric->getunit(); + $line .= '@' . preg_replace(self::UNIT_PATTERN, '', (string) $metric->getUnit()); } foreach ($metric->serialize() as $value) { @@ -55,8 +60,8 @@ public static function toEnvelopeItem(Event $event): string $tags = []; foreach ($metric->getTags() as $key => $value) { - $tags[] = preg_replace(self::KEY_PATTERN, '_', $key) . - ':' . preg_replace(self::VALUE_PATTERN, '', $value); + $tags[] = preg_replace(self::TAG_KEY_PATTERN, '', $key) . + ':' . self::replaceTagValueCharacters($value); } if (!empty($tags)) { @@ -110,4 +115,23 @@ public static function toEnvelopeItem(Event $event): string $statsdPayload ); } + + private static function replaceTagValueCharacters(string $tagValue): string + { + $tagValue = str_replace( + [ + '\\', + '|', + ',', + ], + [ + '\\\\', + '\u{7c}', + '\u{2c}', + ], + $tagValue + ); + + return $tagValue; + } } diff --git a/tests/Serializer/EnvelopeItems/MetricsItemTest.php b/tests/Serializer/EnvelopeItems/MetricsItemTest.php new file mode 100644 index 000000000..0358f4d17 --- /dev/null +++ b/tests/Serializer/EnvelopeItems/MetricsItemTest.php @@ -0,0 +1,52 @@ +assertSame($expectedResult, $result); + } + + public static function toEnvelopeItemDataProvider(): iterable + { + $metric = new CounterType('abcABC123_-./äöü$%&abcABC123', 1.0, MetricsUnit::custom('abcABC123_-./äöü$%&abcABC123'), [ + 'abcABC123_-./äöü$%&abcABC123' => 'abc|,\\123', + ], 1597790835); + + $event = Event::createMetrics(new EventId('fc9442f5aef34234bb22b9a615e30ccd')); + $event->setMetrics([ + $metric, + ]); + + yield [ + $event, + << 'bar', 'route' => 'GET /foo'], 1597790835); - $distribution = new DistributionType('distribution', 1.0, MetricsUnit::second(), ['$foo$' => '%bar%'], 1597790835); - $gauge = new GaugeType('gauge', 1.0, MetricsUnit::second(), ['föö' => 'bär'], 1597790835); - $set = new SetType('set', 1.0, MetricsUnit::second(), ['%{key}' => '$value$'], 1597790835); + $distribution = new DistributionType('distribution', 1.0, MetricsUnit::second(), ['foo' => 'bar'], 1597790835); + $gauge = new GaugeType('gauge', 1.0, MetricsUnit::second(), ['foo' => 'bar', 'bar' => 'baz'], 1597790835); + $set = new SetType('set', 1.0, MetricsUnit::second(), ['key' => 'value'], 1597790835); $noTags = new CounterType('no_tags', 1.0, MetricsUnit::second(), [], 1597790835); $event = Event::createMetrics(new EventId('fc9442f5aef34234bb22b9a615e30ccd')); @@ -433,11 +433,11 @@ public static function serializeAsEnvelopeDataProvider(): iterable $event, << Date: Thu, 4 Apr 2024 14:56:00 +0200 Subject: [PATCH 2/5] Escape control characters --- src/Serializer/EnvelopItems/MetricsItem.php | 6 ++++++ tests/Serializer/EnvelopeItems/MetricsItemTest.php | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Serializer/EnvelopItems/MetricsItem.php b/src/Serializer/EnvelopItems/MetricsItem.php index 23f342b97..6808189a5 100644 --- a/src/Serializer/EnvelopItems/MetricsItem.php +++ b/src/Serializer/EnvelopItems/MetricsItem.php @@ -120,11 +120,17 @@ private static function replaceTagValueCharacters(string $tagValue): string { $tagValue = str_replace( [ + "\n", + "\r", + "\t", '\\', '|', ',', ], [ + '\n', + '\r', + '\t', '\\\\', '\u{7c}', '\u{2c}', diff --git a/tests/Serializer/EnvelopeItems/MetricsItemTest.php b/tests/Serializer/EnvelopeItems/MetricsItemTest.php index 0358f4d17..138aee590 100644 --- a/tests/Serializer/EnvelopeItems/MetricsItemTest.php +++ b/tests/Serializer/EnvelopeItems/MetricsItemTest.php @@ -32,7 +32,7 @@ public function testToEnvelopeItem(Event $event, string $expectedResult): void public static function toEnvelopeItemDataProvider(): iterable { $metric = new CounterType('abcABC123_-./äöü$%&abcABC123', 1.0, MetricsUnit::custom('abcABC123_-./äöü$%&abcABC123'), [ - 'abcABC123_-./äöü$%&abcABC123' => 'abc|,\\123', + 'abcABC123_-./äöü$%&abcABC123' => "abc\n\r\t|,\\123", ], 1597790835); $event = Event::createMetrics(new EventId('fc9442f5aef34234bb22b9a615e30ccd')); @@ -43,8 +43,8 @@ public static function toEnvelopeItemDataProvider(): iterable yield [ $event, << Date: Thu, 4 Apr 2024 15:07:32 +0200 Subject: [PATCH 3/5] Escape `\` before adding `\` to string --- src/Serializer/EnvelopItems/MetricsItem.php | 8 +++----- tests/Serializer/EnvelopeItems/MetricsItemTest.php | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Serializer/EnvelopItems/MetricsItem.php b/src/Serializer/EnvelopItems/MetricsItem.php index 6808189a5..5367a9d32 100644 --- a/src/Serializer/EnvelopItems/MetricsItem.php +++ b/src/Serializer/EnvelopItems/MetricsItem.php @@ -118,26 +118,24 @@ public static function toEnvelopeItem(Event $event): string private static function replaceTagValueCharacters(string $tagValue): string { - $tagValue = str_replace( + return str_replace( [ + '\\', "\n", "\r", "\t", - '\\', '|', ',', ], [ + '\\\\', '\n', '\r', '\t', - '\\\\', '\u{7c}', '\u{2c}', ], $tagValue ); - - return $tagValue; } } diff --git a/tests/Serializer/EnvelopeItems/MetricsItemTest.php b/tests/Serializer/EnvelopeItems/MetricsItemTest.php index 138aee590..fff88c074 100644 --- a/tests/Serializer/EnvelopeItems/MetricsItemTest.php +++ b/tests/Serializer/EnvelopeItems/MetricsItemTest.php @@ -43,7 +43,7 @@ public static function toEnvelopeItemDataProvider(): iterable yield [ $event, << Date: Tue, 9 Apr 2024 12:39:25 +0200 Subject: [PATCH 4/5] Refactor tag value escaping --- src/Serializer/EnvelopItems/MetricsItem.php | 115 ++++++++++-------- .../EnvelopeItems/MetricsItemTest.php | 52 +++----- 2 files changed, 81 insertions(+), 86 deletions(-) diff --git a/src/Serializer/EnvelopItems/MetricsItem.php b/src/Serializer/EnvelopItems/MetricsItem.php index 5367a9d32..0d6f4ed45 100644 --- a/src/Serializer/EnvelopItems/MetricsItem.php +++ b/src/Serializer/EnvelopItems/MetricsItem.php @@ -6,6 +6,7 @@ use Sentry\Event; use Sentry\Metrics\MetricsUnit; +use Sentry\Metrics\Types\AbstractType; use Sentry\Serializer\Traits\StacktraceFrameSeralizerTrait; use Sentry\Util\JSON; @@ -42,37 +43,7 @@ public static function toEnvelopeItem(Event $event): string $metricMetaPayload = []; foreach ($metrics as $metric) { - // key - my.metric - $line = preg_replace(self::KEY_PATTERN, '_', $metric->getKey()); - - if ($metric->getUnit() !== MetricsUnit::none()) { - // unit - @second - $line .= '@' . preg_replace(self::UNIT_PATTERN, '', (string) $metric->getUnit()); - } - - foreach ($metric->serialize() as $value) { - // value - 2:3:4... - $line .= ':' . $value; - } - - // type - |c|, |d|, ... - $line .= '|' . $metric->getType() . '|'; - - $tags = []; - foreach ($metric->getTags() as $key => $value) { - $tags[] = preg_replace(self::TAG_KEY_PATTERN, '', $key) . - ':' . self::replaceTagValueCharacters($value); - } - - if (!empty($tags)) { - // tags - #key:value,key:value... - $line .= '#' . implode(',', $tags) . '|'; - } - - // timestamp - T123456789 - $line .= 'T' . $metric->getTimestamp(); - - $statsdPayload[] = $line; + $statsdPayload[] = self::seralizeMetric($metric); if ($metric->hasCodeLocation()) { $metricMetaPayload[$metric->getMri()][] = array_merge( @@ -116,26 +87,68 @@ public static function toEnvelopeItem(Event $event): string ); } - private static function replaceTagValueCharacters(string $tagValue): string + public static function seralizeMetric(AbstractType $metric): string { - return str_replace( - [ - '\\', - "\n", - "\r", - "\t", - '|', - ',', - ], - [ - '\\\\', - '\n', - '\r', - '\t', - '\u{7c}', - '\u{2c}', - ], - $tagValue - ); + // key - my.metric + $line = preg_replace(self::KEY_PATTERN, '_', $metric->getKey()); + + if ($metric->getUnit() !== MetricsUnit::none()) { + // unit - @second + $line .= '@' . preg_replace(self::UNIT_PATTERN, '', (string) $metric->getUnit()); + } + + foreach ($metric->serialize() as $value) { + // value - 2:3:4... + $line .= ':' . $value; + } + + // type - |c|, |d|, ... + $line .= '|' . $metric->getType() . '|'; + + $tags = []; + foreach ($metric->getTags() as $key => $value) { + $tags[] = preg_replace(self::TAG_KEY_PATTERN, '', $key) . + ':' . self::escapeTagValues($value); + } + + if (!empty($tags)) { + // tags - #key:value,key:value... + $line .= '#' . implode(',', $tags) . '|'; + } + + // timestamp - T123456789 + $line .= 'T' . $metric->getTimestamp(); + + return $line; + } + + public static function escapeTagValues(string $tagValue): string + { + $result = ''; + + for ($i = 0; $i < mb_strlen($tagValue); $i++) { + $character = mb_substr($tagValue, $i, 1); + $result .= str_replace( + [ + "\n", + "\r", + "\t", + '\\', + '|', + ',', + ], + [ + '\n', + '\r', + '\t', + '\\\\', + '\u{7c}', + '\u{2c}', + ], + $character + ); + } + + return $result; } } diff --git a/tests/Serializer/EnvelopeItems/MetricsItemTest.php b/tests/Serializer/EnvelopeItems/MetricsItemTest.php index fff88c074..5bb774b3c 100644 --- a/tests/Serializer/EnvelopeItems/MetricsItemTest.php +++ b/tests/Serializer/EnvelopeItems/MetricsItemTest.php @@ -5,48 +5,30 @@ namespace Sentry\Tests\Serializer; use PHPUnit\Framework\TestCase; -use Sentry\Event; -use Sentry\EventId; -use Sentry\Metrics\MetricsUnit; -use Sentry\Metrics\Types\CounterType; use Sentry\Serializer\EnvelopItems\MetricsItem; -use Symfony\Bridge\PhpUnit\ClockMock; -/** - * @group time-sensitive - */ final class MetricsItemTest extends TestCase { - /** - * @dataProvider toEnvelopeItemDataProvider - */ - public function testToEnvelopeItem(Event $event, string $expectedResult): void + public function testSeralizeMetric(): void { - ClockMock::withClockMock(1597790835); - - $result = MetricsItem::toEnvelopeItem($event); - - $this->assertSame($expectedResult, $result); } - public static function toEnvelopeItemDataProvider(): iterable + public function testEscapeTagValues(): void { - $metric = new CounterType('abcABC123_-./äöü$%&abcABC123', 1.0, MetricsUnit::custom('abcABC123_-./äöü$%&abcABC123'), [ - 'abcABC123_-./äöü$%&abcABC123' => "abc\n\r\t|,\\123", - ], 1597790835); - - $event = Event::createMetrics(new EventId('fc9442f5aef34234bb22b9a615e30ccd')); - $event->setMetrics([ - $metric, - ]); - - yield [ - $event, - <<assertSame('plain', MetricsItem::escapeTagValues('plain')); + $this->assertSame('plain text', MetricsItem::escapeTagValues('plain text')); + $this->assertSame('plain%text', MetricsItem::escapeTagValues('plain%text')); + + // Escape sequences + $this->assertSame('plain \\\\\\\\ text', MetricsItem::escapeTagValues('plain \\\\ text')); + $this->assertSame('plain\\u{2c}text', MetricsItem::escapeTagValues('plain,text')); + $this->assertSame('plain\\u{7c}text', MetricsItem::escapeTagValues('plain|text')); + $this->assertSame('plain 😅', MetricsItem::escapeTagValues('plain 😅')); + + // Escapable control characters + $this->assertSame('plain\\\\ntext', MetricsItem::escapeTagValues("plain\ntext")); + $this->assertSame('plain\\\\rtext', MetricsItem::escapeTagValues("plain\rtext")); + $this->assertSame('plain\\\\ttext', MetricsItem::escapeTagValues("plain\ttext")); } } From cc985439e2e817e7fc8b2d7a443599d305dc34ff Mon Sep 17 00:00:00 2001 From: Michael Hoffmann Date: Wed, 10 Apr 2024 13:53:18 +0200 Subject: [PATCH 5/5] Cleanup --- src/Serializer/EnvelopItems/MetricsItem.php | 2 +- tests/Serializer/EnvelopeItems/MetricsItemTest.php | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Serializer/EnvelopItems/MetricsItem.php b/src/Serializer/EnvelopItems/MetricsItem.php index 0d6f4ed45..9941ec8be 100644 --- a/src/Serializer/EnvelopItems/MetricsItem.php +++ b/src/Serializer/EnvelopItems/MetricsItem.php @@ -126,7 +126,7 @@ public static function escapeTagValues(string $tagValue): string { $result = ''; - for ($i = 0; $i < mb_strlen($tagValue); $i++) { + for ($i = 0; $i < mb_strlen($tagValue); ++$i) { $character = mb_substr($tagValue, $i, 1); $result .= str_replace( [ diff --git a/tests/Serializer/EnvelopeItems/MetricsItemTest.php b/tests/Serializer/EnvelopeItems/MetricsItemTest.php index 5bb774b3c..399fcf40b 100644 --- a/tests/Serializer/EnvelopeItems/MetricsItemTest.php +++ b/tests/Serializer/EnvelopeItems/MetricsItemTest.php @@ -9,10 +9,6 @@ final class MetricsItemTest extends TestCase { - public function testSeralizeMetric(): void - { - } - public function testEscapeTagValues(): void { // No escaping