Skip to content

Commit

Permalink
Support writing base64 JSON segments (dotnet#111041)
Browse files Browse the repository at this point in the history
  • Loading branch information
PranavSenthilnathan authored Jan 14, 2025
1 parent 7132268 commit 2bfe21b
Show file tree
Hide file tree
Showing 7 changed files with 912 additions and 312 deletions.
129 changes: 88 additions & 41 deletions src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System;
using System.Buffers;
using System.Buffers.Text;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
Expand Down Expand Up @@ -33,8 +34,11 @@ internal sealed class Utf8JsonWriterFuzzer : IFuzzer
private const byte NewLineFlag = 1 << 3;
private const byte SkipValidationFlag = 1 << 4;

// Options for choosing between UTF-8 and UTF-16 encoding
private const byte EncodingFlag = 1 << 5;
// Options for choosing between base64, UTF-8 and UTF-16 encoding
private const byte EncodingMask = 0b11 << 5;
private const byte Utf8EncodingFlag = 0b00 << 5;
private const byte Utf16EncodingFlag = 0b01 << 5;
private const byte Base64EncodingFlag = 0b10 << 5;

public void FuzzTarget(ReadOnlySpan<byte> bytes)
{
Expand All @@ -53,8 +57,13 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
ReadOnlySpan<char> chars = MemoryMarshal.Cast<byte, char>(bytes);

// Validate that the indices are within bounds of the input
bool utf8 = (optionsByte & EncodingFlag) == 0;
if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (utf8 ? bytes.Length : chars.Length)))
int encoding = optionsByte & EncodingMask;
if (encoding is not Utf8EncodingFlag and not Utf16EncodingFlag and not Base64EncodingFlag)
{
return;
}

if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (encoding is Utf16EncodingFlag ? chars.Length : bytes.Length)))
{
return;
}
Expand All @@ -63,7 +72,7 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
bool indented = (optionsByte & IndentFlag) == 0;
JsonWriterOptions options = new()
{
Encoder = (optionsByte & EncodingFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
Encoder = (optionsByte & EncoderFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
Indented = indented,
MaxDepth = (optionsByte & MaxDepthFlag) == 0 ? 1 : 0,
NewLine = (optionsByte & NewLineFlag) == 0 ? "\n" : "\r\n",
Expand All @@ -74,9 +83,9 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
int maxExpandedSizeBytes = 6 * bytes.Length + 2;
byte[] expectedBuffer = ArrayPool<byte>.Shared.Rent(maxExpandedSizeBytes);
Span<byte> expected =
expectedBuffer.AsSpan(0, utf8
? EncodeToUtf8(bytes, expectedBuffer, options.Encoder)
: EncodeToUtf8(chars, expectedBuffer, options.Encoder));
expectedBuffer.AsSpan(0, encoding == Utf16EncodingFlag
? EncodeToUtf8(chars, expectedBuffer, options.Encoder)
: EncodeToUtf8(bytes, expectedBuffer, options.Encoder, encoding == Base64EncodingFlag));

// Compute the actual result by using Utf8JsonWriter. Each iteration is a different slice of the input, but the result should be the same.
byte[] actualBuffer = new byte[expected.Length];
Expand All @@ -89,14 +98,14 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
{
using MemoryStream stream = new(actualBuffer);
using Utf8JsonWriter writer = new(stream, options);
if (utf8)

if (encoding == Utf16EncodingFlag)
{
WriteStringValueSegments(writer, bytes, ranges);
WriteStringValueSegments(writer, chars, ranges);
}
else
{
WriteStringValueSegments(writer, chars, ranges);
WriteStringValueSegments(writer, bytes, ranges, encoding == Base64EncodingFlag);
}

writer.Flush();
Expand All @@ -110,7 +119,7 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
}

// Additional test for mixing UTF-8 and UTF-16 encoding. The alignment math is easier in UTF-16 mode so just run it for that.
if (!utf8)
if (encoding == Utf16EncodingFlag)
{
Array.Clear(expectedBuffer);

Expand All @@ -124,9 +133,16 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
using MemoryStream stream = new(actualBuffer);
using Utf8JsonWriter writer = new(stream, options);

// UTF-16 + UTF-8
writer.WriteStringValueSegment(firstSegment, false);

Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteStringValueSegment(state, true), secondSegment);

stream.Position = 0;
writer.Reset();

// UTF-16 + Base64
writer.WriteStringValueSegment(firstSegment, false);
Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteBase64StringSegment(state, true), secondSegment);
}

Array.Clear(expectedBuffer);
Expand All @@ -135,25 +151,67 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
ReadOnlySpan<byte> firstSegment = bytes[0..(2 * slice1)];
ReadOnlySpan<char> secondSegment = chars[slice1..];

expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, secondSegment, expectedBuffer, options.Encoder));
expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder, base64Encode: false));

actualBuffer = new byte[expected.Length];
using MemoryStream stream = new(actualBuffer);
using Utf8JsonWriter writer = new(stream, options);

// UTF-8 + UTF-16
writer.WriteStringValueSegment(firstSegment, false);
Assert.Throws<InvalidOperationException, ReadOnlySpan<char>>(state => writer.WriteStringValueSegment(state, true), secondSegment);

stream.Position = 0;
writer.Reset();

// UTF-8 + Base64
writer.WriteStringValueSegment(firstSegment, false);
Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteBase64StringSegment(state, true), MemoryMarshal.AsBytes(secondSegment));
}

Array.Clear(expectedBuffer);

{
ReadOnlySpan<byte> firstSegment = bytes[0..(2 * slice1)];
ReadOnlySpan<char> secondSegment = chars[slice1..];

expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder, base64Encode: true));

actualBuffer = new byte[expected.Length];
using MemoryStream stream = new(actualBuffer);
using Utf8JsonWriter writer = new(stream, options);

// Base64 + UTF-16
writer.WriteBase64StringSegment(firstSegment, false);
Assert.Throws<InvalidOperationException, ReadOnlySpan<char>>(state => writer.WriteStringValueSegment(state, true), secondSegment);

stream.Position = 0;
writer.Reset();

// Base64 + UTF-8
writer.WriteBase64StringSegment(firstSegment, false);
Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteStringValueSegment(state, true), MemoryMarshal.AsBytes(secondSegment));
}
}

ArrayPool<byte>.Shared.Return(expectedBuffer);
}

private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges)
private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges, bool base64Encode)
{
for (int i = 0; i < ranges.Length; i++)
if (base64Encode)
{
for (int i = 0; i < ranges.Length; i++)
{
writer.WriteBase64StringSegment(bytes[ranges[i]], i == ranges.Length - 1);
}
}
else
{
writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1);
for (int i = 0; i < ranges.Length; i++)
{
writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1);
}
}
}

Expand All @@ -165,10 +223,20 @@ private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan
}
}

private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder)
private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder, bool base64Encode)
{
destBuffer[0] = (byte)'"';
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int written, isFinalBlock: true);

int written;
if (base64Encode)
{
Base64.EncodeToUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true);
}
else
{
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true);
}

destBuffer[++written] = (byte)'"';
return written + 1;
}
Expand All @@ -181,27 +249,6 @@ private static int EncodeToUtf8(ReadOnlySpan<char> chars, Span<byte> destBuffer,
return written + 1;
}

private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder)
{
int written = 1;
destBuffer[0] = (byte)'"';
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int writtenTemp, isFinalBlock: true);
written += writtenTemp;
destBuffer[written += EncodeTranscode(chars, destBuffer[written..], encoder, isFinalBlock: true)] = (byte)'"';
return written + 1;
}

private static int EncodeToUtf8(ReadOnlySpan<char> chars, ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder)
{
int written = 1;
destBuffer[0] = (byte)'"';
written += EncodeTranscode(chars, destBuffer[1..], encoder, isFinalBlock: true);
encoder.EncodeUtf8(bytes, destBuffer[written..], out _, out int writtenTemp, isFinalBlock: true);
written += writtenTemp;
destBuffer[written] = (byte)'"';
return written + 1;
}

private static int EncodeTranscode(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder, bool isFinalBlock = true)
{
var utf16buffer = ArrayPool<char>.Shared.Rent(6 * chars.Length);
Expand Down
1 change: 1 addition & 0 deletions src/libraries/System.Text.Json/ref/System.Text.Json.cs
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ public void WriteStringValue(string? value) { }
public void WriteStringValue(System.Text.Json.JsonEncodedText value) { }
public void WriteStringValueSegment(System.ReadOnlySpan<byte> value, bool isFinalSegment) { }
public void WriteStringValueSegment(System.ReadOnlySpan<char> value, bool isFinalSegment) { }
public void WriteBase64StringSegment(ReadOnlySpan<byte> value, bool isFinalSegment) { }
}
}
namespace System.Text.Json.Nodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ namespace System.Text.Json
{
public sealed partial class Utf8JsonWriter
{
private bool HasPartialCodePoint => PartialCodePointLength != 0;
private bool HasPartialStringData => PartialStringDataLength != 0;

private void ClearPartialCodePoint() => PartialCodePointLength = 0;
private void ClearPartialStringData() => PartialStringDataLength = 0;

private void ValidateEncodingDidNotChange(SegmentEncoding currentSegmentEncoding)
{
Expand All @@ -32,7 +32,7 @@ private void ValidateNotWithinUnfinalizedString()
}

Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None);
Debug.Assert(!HasPartialCodePoint);
Debug.Assert(!HasPartialStringData);
}

private void ValidateWritingValue()
Expand Down
Loading

0 comments on commit 2bfe21b

Please sign in to comment.