diff --git a/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs b/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs index 767821bcf875b1..5600ce7c357a00 100644 --- a/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs +++ b/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs @@ -3,6 +3,7 @@ using System; using System.Buffers; +using System.Buffers.Text; using System.Collections; using System.Collections.Generic; using System.Diagnostics; @@ -33,8 +34,11 @@ internal sealed class Utf8JsonWriterFuzzer : IFuzzer private const byte NewLineFlag = 1 << 3; private const byte SkipValidationFlag = 1 << 4; - // Options for choosing between UTF-8 and UTF-16 encoding - private const byte EncodingFlag = 1 << 5; + // Options for choosing between base64, UTF-8 and UTF-16 encoding + private const byte EncodingMask = 0b11 << 5; + private const byte Utf8EncodingFlag = 0b00 << 5; + private const byte Utf16EncodingFlag = 0b01 << 5; + private const byte Base64EncodingFlag = 0b10 << 5; public void FuzzTarget(ReadOnlySpan bytes) { @@ -53,8 +57,13 @@ public void FuzzTarget(ReadOnlySpan bytes) ReadOnlySpan chars = MemoryMarshal.Cast(bytes); // Validate that the indices are within bounds of the input - bool utf8 = (optionsByte & EncodingFlag) == 0; - if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (utf8 ? bytes.Length : chars.Length))) + int encoding = optionsByte & EncodingMask; + if (encoding is not Utf8EncodingFlag and not Utf16EncodingFlag and not Base64EncodingFlag) + { + return; + } + + if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (encoding is Utf16EncodingFlag ? chars.Length : bytes.Length))) { return; } @@ -63,7 +72,7 @@ public void FuzzTarget(ReadOnlySpan bytes) bool indented = (optionsByte & IndentFlag) == 0; JsonWriterOptions options = new() { - Encoder = (optionsByte & EncodingFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping, + Encoder = (optionsByte & EncoderFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping, Indented = indented, MaxDepth = (optionsByte & MaxDepthFlag) == 0 ? 1 : 0, NewLine = (optionsByte & NewLineFlag) == 0 ? "\n" : "\r\n", @@ -74,9 +83,9 @@ public void FuzzTarget(ReadOnlySpan bytes) int maxExpandedSizeBytes = 6 * bytes.Length + 2; byte[] expectedBuffer = ArrayPool.Shared.Rent(maxExpandedSizeBytes); Span expected = - expectedBuffer.AsSpan(0, utf8 - ? EncodeToUtf8(bytes, expectedBuffer, options.Encoder) - : EncodeToUtf8(chars, expectedBuffer, options.Encoder)); + expectedBuffer.AsSpan(0, encoding == Utf16EncodingFlag + ? EncodeToUtf8(chars, expectedBuffer, options.Encoder) + : EncodeToUtf8(bytes, expectedBuffer, options.Encoder, encoding == Base64EncodingFlag)); // Compute the actual result by using Utf8JsonWriter. Each iteration is a different slice of the input, but the result should be the same. byte[] actualBuffer = new byte[expected.Length]; @@ -89,14 +98,14 @@ public void FuzzTarget(ReadOnlySpan bytes) { using MemoryStream stream = new(actualBuffer); using Utf8JsonWriter writer = new(stream, options); - - if (utf8) + + if (encoding == Utf16EncodingFlag) { - WriteStringValueSegments(writer, bytes, ranges); + WriteStringValueSegments(writer, chars, ranges); } else { - WriteStringValueSegments(writer, chars, ranges); + WriteStringValueSegments(writer, bytes, ranges, encoding == Base64EncodingFlag); } writer.Flush(); @@ -110,7 +119,7 @@ public void FuzzTarget(ReadOnlySpan bytes) } // Additional test for mixing UTF-8 and UTF-16 encoding. The alignment math is easier in UTF-16 mode so just run it for that. - if (!utf8) + if (encoding == Utf16EncodingFlag) { Array.Clear(expectedBuffer); @@ -124,9 +133,16 @@ public void FuzzTarget(ReadOnlySpan bytes) using MemoryStream stream = new(actualBuffer); using Utf8JsonWriter writer = new(stream, options); + // UTF-16 + UTF-8 writer.WriteStringValueSegment(firstSegment, false); - Assert.Throws>(state => writer.WriteStringValueSegment(state, true), secondSegment); + + stream.Position = 0; + writer.Reset(); + + // UTF-16 + Base64 + writer.WriteStringValueSegment(firstSegment, false); + Assert.Throws>(state => writer.WriteBase64StringSegment(state, true), secondSegment); } Array.Clear(expectedBuffer); @@ -135,25 +151,67 @@ public void FuzzTarget(ReadOnlySpan bytes) ReadOnlySpan firstSegment = bytes[0..(2 * slice1)]; ReadOnlySpan secondSegment = chars[slice1..]; - expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, secondSegment, expectedBuffer, options.Encoder)); + expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder, base64Encode: false)); actualBuffer = new byte[expected.Length]; using MemoryStream stream = new(actualBuffer); using Utf8JsonWriter writer = new(stream, options); + // UTF-8 + UTF-16 writer.WriteStringValueSegment(firstSegment, false); Assert.Throws>(state => writer.WriteStringValueSegment(state, true), secondSegment); + + stream.Position = 0; + writer.Reset(); + + // UTF-8 + Base64 + writer.WriteStringValueSegment(firstSegment, false); + Assert.Throws>(state => writer.WriteBase64StringSegment(state, true), MemoryMarshal.AsBytes(secondSegment)); + } + + Array.Clear(expectedBuffer); + + { + ReadOnlySpan firstSegment = bytes[0..(2 * slice1)]; + ReadOnlySpan secondSegment = chars[slice1..]; + + expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder, base64Encode: true)); + + actualBuffer = new byte[expected.Length]; + using MemoryStream stream = new(actualBuffer); + using Utf8JsonWriter writer = new(stream, options); + + // Base64 + UTF-16 + writer.WriteBase64StringSegment(firstSegment, false); + Assert.Throws>(state => writer.WriteStringValueSegment(state, true), secondSegment); + + stream.Position = 0; + writer.Reset(); + + // Base64 + UTF-8 + writer.WriteBase64StringSegment(firstSegment, false); + Assert.Throws>(state => writer.WriteStringValueSegment(state, true), MemoryMarshal.AsBytes(secondSegment)); } } ArrayPool.Shared.Return(expectedBuffer); } - private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan bytes, ReadOnlySpan ranges) + private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan bytes, ReadOnlySpan ranges, bool base64Encode) { - for (int i = 0; i < ranges.Length; i++) + if (base64Encode) + { + for (int i = 0; i < ranges.Length; i++) + { + writer.WriteBase64StringSegment(bytes[ranges[i]], i == ranges.Length - 1); + } + } + else { - writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1); + for (int i = 0; i < ranges.Length; i++) + { + writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1); + } } } @@ -165,10 +223,20 @@ private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan } } - private static int EncodeToUtf8(ReadOnlySpan bytes, Span destBuffer, JavaScriptEncoder encoder) + private static int EncodeToUtf8(ReadOnlySpan bytes, Span destBuffer, JavaScriptEncoder encoder, bool base64Encode) { destBuffer[0] = (byte)'"'; - encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int written, isFinalBlock: true); + + int written; + if (base64Encode) + { + Base64.EncodeToUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true); + } + else + { + encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true); + } + destBuffer[++written] = (byte)'"'; return written + 1; } @@ -181,27 +249,6 @@ private static int EncodeToUtf8(ReadOnlySpan chars, Span destBuffer, return written + 1; } - private static int EncodeToUtf8(ReadOnlySpan bytes, ReadOnlySpan chars, Span destBuffer, JavaScriptEncoder encoder) - { - int written = 1; - destBuffer[0] = (byte)'"'; - encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int writtenTemp, isFinalBlock: true); - written += writtenTemp; - destBuffer[written += EncodeTranscode(chars, destBuffer[written..], encoder, isFinalBlock: true)] = (byte)'"'; - return written + 1; - } - - private static int EncodeToUtf8(ReadOnlySpan chars, ReadOnlySpan bytes, Span destBuffer, JavaScriptEncoder encoder) - { - int written = 1; - destBuffer[0] = (byte)'"'; - written += EncodeTranscode(chars, destBuffer[1..], encoder, isFinalBlock: true); - encoder.EncodeUtf8(bytes, destBuffer[written..], out _, out int writtenTemp, isFinalBlock: true); - written += writtenTemp; - destBuffer[written] = (byte)'"'; - return written + 1; - } - private static int EncodeTranscode(ReadOnlySpan chars, Span destBuffer, JavaScriptEncoder encoder, bool isFinalBlock = true) { var utf16buffer = ArrayPool.Shared.Rent(6 * chars.Length); diff --git a/src/libraries/System.Text.Json/ref/System.Text.Json.cs b/src/libraries/System.Text.Json/ref/System.Text.Json.cs index 469ea5e4f07c89..0b1d60e5c8b4c1 100644 --- a/src/libraries/System.Text.Json/ref/System.Text.Json.cs +++ b/src/libraries/System.Text.Json/ref/System.Text.Json.cs @@ -681,6 +681,7 @@ public void WriteStringValue(string? value) { } public void WriteStringValue(System.Text.Json.JsonEncodedText value) { } public void WriteStringValueSegment(System.ReadOnlySpan value, bool isFinalSegment) { } public void WriteStringValueSegment(System.ReadOnlySpan value, bool isFinalSegment) { } + public void WriteBase64StringSegment(ReadOnlySpan value, bool isFinalSegment) { } } } namespace System.Text.Json.Nodes diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs index a8440144d4cf88..00f927aae9c791 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs @@ -12,9 +12,9 @@ namespace System.Text.Json { public sealed partial class Utf8JsonWriter { - private bool HasPartialCodePoint => PartialCodePointLength != 0; + private bool HasPartialStringData => PartialStringDataLength != 0; - private void ClearPartialCodePoint() => PartialCodePointLength = 0; + private void ClearPartialStringData() => PartialStringDataLength = 0; private void ValidateEncodingDidNotChange(SegmentEncoding currentSegmentEncoding) { @@ -32,7 +32,7 @@ private void ValidateNotWithinUnfinalizedString() } Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None); - Debug.Assert(!HasPartialCodePoint); + Debug.Assert(!HasPartialStringData); } private void ValidateWritingValue() diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.StringSegment.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.StringSegment.cs index 08b517cce9648d..97efe65aba655c 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.StringSegment.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.StringSegment.cs @@ -3,10 +3,7 @@ using System.Buffers; using System.Buffers.Text; -using System.ComponentModel; using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; namespace System.Text.Json { @@ -34,7 +31,7 @@ public void WriteStringValueSegment(ReadOnlySpan value, bool isFinalSegmen if (_tokenType != Utf8JsonWriter.StringSegmentSentinel) { Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None); - Debug.Assert(!HasPartialCodePoint); + Debug.Assert(!HasPartialStringData); if (!_options.SkipValidation) { @@ -53,7 +50,7 @@ public void WriteStringValueSegment(ReadOnlySpan value, bool isFinalSegmen // The steps to write a string segment are to complete the previous partial code point // and escape either of which might not be required so there is a fast path for each of these steps. - if (HasPartialCodePoint) + if (HasPartialStringData) { WriteStringSegmentWithLeftover(value, isFinalSegment); } @@ -74,35 +71,35 @@ public void WriteStringValueSegment(ReadOnlySpan value, bool isFinalSegmen private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan value, bool isFinalSegment) { - Debug.Assert(HasPartialCodePoint); + Debug.Assert(HasPartialStringData); Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf16); - scoped ReadOnlySpan partialCodePointBuffer = PartialUtf16CodePoint; + scoped ReadOnlySpan partialStringDataBuffer = PartialUtf16StringData; Span combinedBuffer = stackalloc char[2]; - combinedBuffer = combinedBuffer.Slice(0, ConcatInto(partialCodePointBuffer, value, combinedBuffer)); + combinedBuffer = combinedBuffer.Slice(0, ConcatInto(partialStringDataBuffer, value, combinedBuffer)); switch (Rune.DecodeFromUtf16(combinedBuffer, out _, out int charsConsumed)) { case OperationStatus.NeedMoreData: - Debug.Assert(value.Length + partialCodePointBuffer.Length < 2); - Debug.Assert(charsConsumed == value.Length + partialCodePointBuffer.Length); + Debug.Assert(value.Length + partialStringDataBuffer.Length < 2); + Debug.Assert(charsConsumed == value.Length + partialStringDataBuffer.Length); // Let the encoder deal with the error if this is a final buffer. value = combinedBuffer.Slice(0, charsConsumed); - partialCodePointBuffer = ReadOnlySpan.Empty; + partialStringDataBuffer = []; break; case OperationStatus.Done: - Debug.Assert(charsConsumed > partialCodePointBuffer.Length); + Debug.Assert(charsConsumed > partialStringDataBuffer.Length); Debug.Assert(charsConsumed <= 2); // Divide up the code point chars into its own buffer and the remainder of the input buffer. - value = value.Slice(charsConsumed - partialCodePointBuffer.Length); - partialCodePointBuffer = combinedBuffer.Slice(0, charsConsumed); + value = value.Slice(charsConsumed - partialStringDataBuffer.Length); + partialStringDataBuffer = combinedBuffer.Slice(0, charsConsumed); break; case OperationStatus.InvalidData: - Debug.Assert(charsConsumed >= partialCodePointBuffer.Length); + Debug.Assert(charsConsumed >= partialStringDataBuffer.Length); Debug.Assert(charsConsumed <= 2); - value = value.Slice(charsConsumed - partialCodePointBuffer.Length); - partialCodePointBuffer = combinedBuffer.Slice(0, charsConsumed); + value = value.Slice(charsConsumed - partialStringDataBuffer.Length); + partialStringDataBuffer = combinedBuffer.Slice(0, charsConsumed); break; case OperationStatus.DestinationTooSmall: default: @@ -111,7 +108,7 @@ private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan value, boo } // The "isFinalSegment" argument indicates whether input that NeedsMoreData should be consumed as an error or not. - // Because we have validated above that partialCodePointBuffer will be the next consumed chars during Rune decoding + // Because we have validated above that partialStringDataBuffer will be the next consumed chars during Rune decoding // (even if this is because it is invalid), we should pass isFinalSegment = true to indicate to the decoder to // parse the code units without extra data. // @@ -119,9 +116,9 @@ private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan value, boo // to determine that only the first unit should be consumed (as invalid). So this method will get only ['\uD800']. // Because we know more data will not be able to complete this code point, we need to pass isFinalSegment = true // to ensure that the encoder consumes this data eagerly instead of leaving it and returning NeedsMoreData. - WriteStringSegmentEscape(partialCodePointBuffer, true); + WriteStringSegmentEscape(partialStringDataBuffer, true); - ClearPartialCodePoint(); + ClearPartialStringData(); WriteStringSegmentEscape(value, isFinalSegment); } @@ -163,7 +160,7 @@ private void WriteStringSegmentEscapeValue(ReadOnlySpan value, int firstEs { Debug.Assert(!isFinalSegment); Debug.Assert(value.Length - consumed < 2); - PartialUtf16CodePoint = value.Slice(consumed); + PartialUtf16StringData = value.Slice(consumed); } if (valueArray != null) @@ -210,7 +207,7 @@ public void WriteStringValueSegment(ReadOnlySpan value, bool isFinalSegmen if (_tokenType != Utf8JsonWriter.StringSegmentSentinel) { Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None); - Debug.Assert(!HasPartialCodePoint); + Debug.Assert(!HasPartialStringData); if (!_options.SkipValidation) { @@ -229,7 +226,7 @@ public void WriteStringValueSegment(ReadOnlySpan value, bool isFinalSegmen // The steps to write a string segment are to complete the previous partial code point // and escape either of which might not be required so there is a fast path for each of these steps. - if (HasPartialCodePoint) + if (HasPartialStringData) { WriteStringSegmentWithLeftover(value, isFinalSegment); } @@ -250,35 +247,35 @@ public void WriteStringValueSegment(ReadOnlySpan value, bool isFinalSegmen private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan utf8Value, bool isFinalSegment) { - Debug.Assert(HasPartialCodePoint); + Debug.Assert(HasPartialStringData); Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf8); - scoped ReadOnlySpan partialCodePointBuffer = PartialUtf8CodePoint; + scoped ReadOnlySpan partialStringDataBuffer = PartialUtf8StringData; Span combinedBuffer = stackalloc byte[4]; - combinedBuffer = combinedBuffer.Slice(0, ConcatInto(partialCodePointBuffer, utf8Value, combinedBuffer)); + combinedBuffer = combinedBuffer.Slice(0, ConcatInto(partialStringDataBuffer, utf8Value, combinedBuffer)); switch (Rune.DecodeFromUtf8(combinedBuffer, out _, out int bytesConsumed)) { case OperationStatus.NeedMoreData: - Debug.Assert(utf8Value.Length + partialCodePointBuffer.Length < 4); - Debug.Assert(bytesConsumed == utf8Value.Length + partialCodePointBuffer.Length); + Debug.Assert(utf8Value.Length + partialStringDataBuffer.Length < 4); + Debug.Assert(bytesConsumed == utf8Value.Length + partialStringDataBuffer.Length); // Let the encoder deal with the error if this is a final buffer. utf8Value = combinedBuffer.Slice(0, bytesConsumed); - partialCodePointBuffer = ReadOnlySpan.Empty; + partialStringDataBuffer = []; break; case OperationStatus.Done: - Debug.Assert(bytesConsumed > partialCodePointBuffer.Length); + Debug.Assert(bytesConsumed > partialStringDataBuffer.Length); Debug.Assert(bytesConsumed <= 4); // Divide up the code point bytes into its own buffer and the remainder of the input buffer. - utf8Value = utf8Value.Slice(bytesConsumed - partialCodePointBuffer.Length); - partialCodePointBuffer = combinedBuffer.Slice(0, bytesConsumed); + utf8Value = utf8Value.Slice(bytesConsumed - partialStringDataBuffer.Length); + partialStringDataBuffer = combinedBuffer.Slice(0, bytesConsumed); break; case OperationStatus.InvalidData: - Debug.Assert(bytesConsumed >= partialCodePointBuffer.Length); + Debug.Assert(bytesConsumed >= partialStringDataBuffer.Length); Debug.Assert(bytesConsumed <= 4); - utf8Value = utf8Value.Slice(bytesConsumed - partialCodePointBuffer.Length); - partialCodePointBuffer = combinedBuffer.Slice(0, bytesConsumed); + utf8Value = utf8Value.Slice(bytesConsumed - partialStringDataBuffer.Length); + partialStringDataBuffer = combinedBuffer.Slice(0, bytesConsumed); break; case OperationStatus.DestinationTooSmall: default: @@ -287,7 +284,7 @@ private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan utf8Value, } // The "isFinalSegment" argument indicates whether input that NeedsMoreData should be consumed as an error or not. - // Because we have validated above that partialCodePointBuffer will be the next consumed bytes during Rune decoding + // Because we have validated above that partialStringDataBuffer will be the next consumed bytes during Rune decoding // (even if this is because it is invalid), we should pass isFinalSegment = true to indicate to the decoder to // parse the code units without extra data. // @@ -296,9 +293,9 @@ private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan utf8Value, // So this method will get only <3-size prefix code unit>. Because we know more data will not be able // to complete this code point, we need to pass isFinalSegment = true to ensure that the encoder consumes this data eagerly // instead of leaving it and returning NeedsMoreData. - WriteStringSegmentEscape(partialCodePointBuffer, true); + WriteStringSegmentEscape(partialStringDataBuffer, true); - ClearPartialCodePoint(); + ClearPartialStringData(); WriteStringSegmentEscape(utf8Value, isFinalSegment); } @@ -337,7 +334,7 @@ private void WriteStringSegmentEscapeValue(ReadOnlySpan utf8Value, int fir { Debug.Assert(!isFinalSegment); Debug.Assert(utf8Value.Length - consumed < 4); - PartialUtf8CodePoint = utf8Value.Slice(consumed); + PartialUtf8StringData = utf8Value.Slice(consumed); } if (valueArray != null) @@ -363,6 +360,128 @@ private void WriteStringSegmentData(ReadOnlySpan escapedValue) BytesPending += escapedValue.Length; } + /// + /// Writes the input bytes as a partial JSON string. + /// + /// The bytes to be written as a JSON string element of a JSON array. + /// Indicates that this is the final segment of the string. + /// + /// Thrown when the specified value is too large. + /// + /// + /// Thrown if this would result in invalid JSON being written (while validation is enabled) or + /// if the previously written segment (if any) was not written with this same overload. + /// + public void WriteBase64StringSegment(ReadOnlySpan value, bool isFinalSegment) + { + if (value.Length > Base64.GetMaxDecodedFromUtf8Length(int.MaxValue)) + { + ThrowHelper.ThrowArgumentException_ValueTooLarge(value.Length); + } + + if (_tokenType != Utf8JsonWriter.StringSegmentSentinel) + { + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None); + Debug.Assert(!HasPartialStringData); + + if (!_options.SkipValidation) + { + ValidateWritingValue(); + } + + WriteStringSegmentPrologue(); + + PreviousSegmentEncoding = SegmentEncoding.Base64; + _tokenType = Utf8JsonWriter.StringSegmentSentinel; + } + else + { + ValidateEncodingDidNotChange(SegmentEncoding.Base64); + } + + // The steps to write a string segment are to complete the previous partial string data + // and escape either of which might not be required so there is a fast path for each of these steps. + if (HasPartialStringData) + { + WriteBase64StringSegmentWithLeftover(value, isFinalSegment); + } + else + { + WriteBase64StringSegmentData(value, isFinalSegment); + } + + if (isFinalSegment) + { + WriteStringSegmentEpilogue(); + + SetFlagToAddListSeparatorBeforeNextItem(); + PreviousSegmentEncoding = SegmentEncoding.None; + _tokenType = JsonTokenType.String; + } + } + + private void WriteBase64StringSegmentWithLeftover(scoped ReadOnlySpan bytes, bool isFinalSegment) + { + Debug.Assert(HasPartialStringData); + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Base64); + + scoped ReadOnlySpan partialStringDataBuffer = PartialBase64StringData; + + Span combinedBuffer = stackalloc byte[3]; + combinedBuffer = combinedBuffer.Slice(0, ConcatInto(partialStringDataBuffer, bytes, combinedBuffer)); + if (combinedBuffer.Length is 3) + { + // Divide up the partial bytes into its own buffer and the remainder of the input buffer. + bytes = bytes.Slice(3 - partialStringDataBuffer.Length); + partialStringDataBuffer = combinedBuffer.Slice(0, 3); + } + else + { + Debug.Assert(combinedBuffer.Length is 1 or 2); + // Need more data. If this is a final segment, then the encoder will append '=' as needed. + Debug.Assert(bytes.Length + partialStringDataBuffer.Length < 3); + bytes = combinedBuffer; + partialStringDataBuffer = []; + } + + // It doesn't matter if we pass true or false for isFinalSegment since we are guaranteed to not have partial data + // here (it is either empty or completed using the combined buffer above). + WriteBase64StringSegmentData(partialStringDataBuffer, false); + + ClearPartialStringData(); + + WriteBase64StringSegmentData(bytes, isFinalSegment); + } + + private void WriteBase64StringSegmentData(ReadOnlySpan bytes, bool isFinalSegment) + { + int leftoverSize; + if (!isFinalSegment && (leftoverSize = bytes.Length % 3) != 0) + { + // If this is not the final segment, we need to wait for more data to come in. + PartialBase64StringData = bytes.Slice(bytes.Length - leftoverSize); + bytes = bytes.Slice(0, bytes.Length - leftoverSize); + } + + if (bytes.Length == 0) + { + return; + } + + int requiredBytes = Base64.GetMaxEncodedToUtf8Length(bytes.Length); + + if (_memory.Length - BytesPending < requiredBytes) + { + Grow(requiredBytes); + } + + Span output = _memory.Span; + + // For non-final segments, the input is sliced to be a multiple of 3 bytes above which guarantees + // that the base64 encoding will never end with padding since 3x input bytes turn into exactly 4x base64 bytes. + Base64EncodeAndWrite(bytes, output); + } + private void WriteStringSegmentPrologue() { if (_options.Indented) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs index da9dc4b6bac503..50c102a0b14eac 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs @@ -8,7 +8,6 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; -using System.ComponentModel; namespace System.Text.Json { @@ -38,12 +37,13 @@ public sealed partial class Utf8JsonWriter : IDisposable, IAsyncDisposable // A special value for JsonTokenType that lets the writer keep track of string segments. private const JsonTokenType StringSegmentSentinel = (JsonTokenType)255; - // Masks and flags for the length and encoding of the partial code point - private const byte PartialCodePointLengthMask = 0b000_000_11; - private const byte PartialCodePointEncodingMask = 0b000_111_00; + // Masks and flags for the length and encoding of the partial string data. + private const byte PartialStringDataLengthMask = 0b000_000_11; + private const byte PartialStringDataEncodingMask = 0b000_111_00; - private const byte PartialCodePointUtf8EncodingFlag = 0b000_001_00; - private const byte PartialCodePointUtf16EncodingFlag = 0b000_010_00; + private const byte PartialStringDataUtf8EncodingFlag = 0b000_001_00; + private const byte PartialStringDataUtf16EncodingFlag = 0b000_010_00; + private const byte PartialStringDataBase64EncodingFlag = 0b000_100_00; private IBufferWriter? _output; private Stream? _stream; @@ -57,15 +57,15 @@ public sealed partial class Utf8JsonWriter : IDisposable, IAsyncDisposable private BitStack _bitStack; /// - /// This 3-byte array stores the partial code point leftover when writing a string value + /// This 3-byte array stores the partial string data leftover when writing a string value /// segment that is split across multiple segment write calls. /// #if !NET - private byte[]? _partialCodePoint; - private Span PartialCodePointRaw => _partialCodePoint ??= new byte[3]; + private byte[]? _partialStringData; + private Span PartialStringDataRaw => _partialStringData ??= new byte[3]; #else - private Inline3ByteArray _partialCodePoint; - private Span PartialCodePointRaw => _partialCodePoint; + private Inline3ByteArray _partialStringData; + private Span PartialStringDataRaw => _partialStringData; [InlineArray(3)] private struct Inline3ByteArray @@ -75,11 +75,11 @@ private struct Inline3ByteArray #endif /// - /// Stores the length and encoding of the partial code point. Outside of segment writes, this value is 0. + /// Stores the length and encoding of the partial string data. Outside of segment writes, this value is 0. /// Across segment writes, this value is always non-zero even if the length is 0, to indicate the encoding of the segment. /// This allows detection of encoding changes across segment writes. /// - private byte _partialCodePointFlags; + private byte _partialStringDataFlags; // The highest order bit of _currentDepth is used to discern whether we are writing the first item in a list or not. // if (_currentDepth >> 31) == 1, add a list separator before writing the item @@ -128,68 +128,96 @@ private struct Inline3ByteArray public int CurrentDepth => _currentDepth & JsonConstants.RemoveFlagsBitMask; /// - /// Length of the partial code point. + /// Length of the partial string data. /// - private byte PartialCodePointLength + private byte PartialStringDataLength { - get => (byte)(_partialCodePointFlags & PartialCodePointLengthMask); - set => _partialCodePointFlags = (byte)((_partialCodePointFlags & ~PartialCodePointLengthMask) | (byte)value); + get => (byte)(_partialStringDataFlags & PartialStringDataLengthMask); + set => _partialStringDataFlags = (byte)((_partialStringDataFlags & ~PartialStringDataLengthMask) | value); } /// /// The partial UTF-8 code point. /// - private ReadOnlySpan PartialUtf8CodePoint + private ReadOnlySpan PartialUtf8StringData { get { Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf8); - ReadOnlySpan partialCodePointBytes = PartialCodePointRaw; - Debug.Assert(partialCodePointBytes.Length == 3); + ReadOnlySpan partialStringDataBytes = PartialStringDataRaw; + Debug.Assert(partialStringDataBytes.Length == 3); - byte length = PartialCodePointLength; + byte length = PartialStringDataLength; Debug.Assert(length < 4); - return partialCodePointBytes.Slice(0, length); + return partialStringDataBytes.Slice(0, length); } set { Debug.Assert(value.Length <= 3); - Span partialCodePointBytes = PartialCodePointRaw; + Span partialStringDataBytes = PartialStringDataRaw; - value.CopyTo(partialCodePointBytes); - PartialCodePointLength = (byte)value.Length; + value.CopyTo(partialStringDataBytes); + PartialStringDataLength = (byte)value.Length; } } /// /// The partial UTF-16 code point. /// - private ReadOnlySpan PartialUtf16CodePoint + private ReadOnlySpan PartialUtf16StringData { get { Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf16); - ReadOnlySpan partialCodePointBytes = PartialCodePointRaw; - Debug.Assert(partialCodePointBytes.Length == 3); + ReadOnlySpan partialStringDataBytes = PartialStringDataRaw; + Debug.Assert(partialStringDataBytes.Length == 3); - byte length = PartialCodePointLength; + byte length = PartialStringDataLength; Debug.Assert(length is 2 or 0); - return MemoryMarshal.Cast(partialCodePointBytes.Slice(0, length)); + return MemoryMarshal.Cast(partialStringDataBytes.Slice(0, length)); } set { Debug.Assert(value.Length <= 1); - Span partialCodePointBytes = PartialCodePointRaw; + Span partialStringDataBytes = PartialStringDataRaw; - value.CopyTo(MemoryMarshal.Cast(partialCodePointBytes)); - PartialCodePointLength = (byte)(2 * value.Length); + value.CopyTo(MemoryMarshal.Cast(partialStringDataBytes)); + PartialStringDataLength = (byte)(2 * value.Length); + } + } + + /// + /// The partial base64 data. + /// + private ReadOnlySpan PartialBase64StringData + { + get + { + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Base64); + + ReadOnlySpan partialStringDataBytes = PartialStringDataRaw; + Debug.Assert(partialStringDataBytes.Length == 3); + + byte length = PartialStringDataLength; + Debug.Assert(length < 3); + + return partialStringDataBytes.Slice(0, length); + } + set + { + Debug.Assert(value.Length < 3); + + Span partialStringDataBytes = PartialStringDataRaw; + + value.CopyTo(partialStringDataBytes); + PartialStringDataLength = (byte)value.Length; } } @@ -198,18 +226,19 @@ private ReadOnlySpan PartialUtf16CodePoint /// private SegmentEncoding PreviousSegmentEncoding { - get => (SegmentEncoding)(_partialCodePointFlags & PartialCodePointEncodingMask); - set => _partialCodePointFlags = (byte)((_partialCodePointFlags & ~PartialCodePointEncodingMask) | (byte)value); + get => (SegmentEncoding)(_partialStringDataFlags & PartialStringDataEncodingMask); + set => _partialStringDataFlags = (byte)((_partialStringDataFlags & ~PartialStringDataEncodingMask) | (byte)value); } /// - /// Convenience enumeration to track the encoding of the partial code point. This must be kept in sync with the PartialCodePoint*Encoding flags. + /// Convenience enumeration to track the encoding of the partial string data. This must be kept in sync with the PartialStringData*Encoding flags. /// internal enum SegmentEncoding : byte { None = 0, - Utf8 = PartialCodePointUtf8EncodingFlag, - Utf16 = PartialCodePointUtf16EncodingFlag, + Utf8 = PartialStringDataUtf8EncodingFlag, + Utf16 = PartialStringDataUtf16EncodingFlag, + Base64 = PartialStringDataBase64EncodingFlag, } private Utf8JsonWriter() @@ -390,8 +419,8 @@ private void ResetHelper() _bitStack = default; - _partialCodePoint = default; - _partialCodePointFlags = default; + _partialStringData = default; + _partialStringDataFlags = default; } private void CheckNotDisposed() diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.Values.StringSegment.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.Values.StringSegment.cs index 9d95eeb0f9a26a..4a274ff926c5c6 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.Values.StringSegment.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.Values.StringSegment.cs @@ -4,10 +4,13 @@ using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Linq; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text.Encodings.Web; +using System.Text.Unicode; using Xunit; namespace System.Text.Json.Tests @@ -62,7 +65,7 @@ public static void WriteStringValueSegment_Utf16_SplitCodePointsReplacement(char options.Encoder.Encode(inputArr, expectedChars, out int charsConsumed, out int charsWritten); Assert.Equal(inputArr.Length, charsConsumed); - SplitCodePointsHelper(inputArr, $@"""{new string(expectedChars, 0, charsWritten)}""", options); + SplitStringDataHelper(inputArr, options, $@"""{new string(expectedChars, 0, charsWritten)}""", StringValueEncodingType.Utf16); } public static IEnumerable InvalidUtf8Data() @@ -120,31 +123,34 @@ public static void WriteStringValueSegment_Utf8_SplitCodePointsReplacement(byte[ string expectedString = $@"""{Encoding.UTF8.GetString(expectedBytes, 0, bytesWritten)}"""; - SplitCodePointsHelper(inputArr, expectedString, options); + SplitStringDataHelper(inputArr, options, expectedString, StringValueEncodingType.Utf8); } - private static void SplitCodePointsHelper( + private static void SplitStringDataHelper( T[] inputArr, + JsonWriterOptions options, string expected, - JsonWriterOptions options) + StringValueEncodingType encoding) where T : struct { - SplitCodePointsHelper(inputArr, options, output => JsonTestHelper.AssertContents(expected, output)); + SplitStringDataHelper(inputArr, options, output => JsonTestHelper.AssertContents(expected, output), encoding); } - private static void SplitCodePointsHelper( + private static void SplitStringDataHelper( T[] inputArr, JsonWriterOptions options, - Action> assert) + Action> assert, + StringValueEncodingType encoding) where T : struct { - SplitCodePointsHelper(inputArr.AsSpan(), options, assert); + SplitStringDataHelper(inputArr.AsSpan(), options, assert, encoding); } - private static void SplitCodePointsHelper( + private static void SplitStringDataHelper( ReadOnlySpan inputArr, JsonWriterOptions options, - Action> assert) + Action> assert, + StringValueEncodingType encoding) where T : struct { ReadOnlySpan input = inputArr; @@ -155,7 +161,7 @@ private static void SplitCodePointsHelper( using (var writer = new Utf8JsonWriter(output, options)) { - WriteStringValueHelper(writer, input); + WriteStringValueHelper(writer, input, encoding); writer.Flush(); } @@ -168,7 +174,7 @@ private static void SplitCodePointsHelper( using (var writer = new Utf8JsonWriter(output, options)) { - WriteStringValueSegmentsHelper(writer, input.Slice(0, splitIndex), input.Slice(splitIndex)); + WriteStringValueSegmentsHelper(writer, input.Slice(0, splitIndex), input.Slice(splitIndex), encoding); writer.Flush(); } @@ -183,7 +189,7 @@ private static void SplitCodePointsHelper( using (var writer = new Utf8JsonWriter(output, options)) { - WriteStringValueSegmentsHelper(writer, input.Slice(0, splitIndex), input.Slice(splitIndex, splitIndex2 - splitIndex), input.Slice(splitIndex2)); + WriteStringValueSegmentsHelper(writer, input.Slice(0, splitIndex), input.Slice(splitIndex, splitIndex2 - splitIndex), input.Slice(splitIndex2), encoding); writer.Flush(); } @@ -201,9 +207,25 @@ public static void WriteStringValueSegment_Utf16_Basic(JsonWriterOptions options " Wor".AsSpan(), "ld!".AsSpan(), options.Encoder.Encode("Hello"), - options.Encoder.Encode(" Wor"), - options.Encoder.Encode("ld!"), - options); + options.Encoder.Encode("Hello Wor"), + options.Encoder.Encode("Hello World!"), + options, + StringValueEncodingType.Utf16); + } + + [Theory] + [MemberData(nameof(BasicStringJsonOptions_TestData))] + public static void WriteStringValueSegment_Utf16_BasicSplit(JsonWriterOptions options) + { + WriteStringValueSegment_BasicHelper( + "\uD800 <- Invalid Partial -> \uD800".AsSpan(), + "\uDC00 <- Partial".AsSpan(), + " Invalid -> \uD800".AsSpan(), + options.Encoder.Encode("\uD800 <- Invalid Partial -> \uD800"), + options.Encoder.Encode("\uD800 <- Invalid Partial -> \uD800\uDC00 <- Partial"), + options.Encoder.Encode("\uD800 <- Invalid Partial -> \uD800\uDC00 <- Partial Invalid -> \uD800"), + options, + StringValueEncodingType.Utf16); } [Theory] @@ -215,9 +237,57 @@ public static void WriteStringValueSegment_Utf8_Basic(JsonWriterOptions options) " Wor"u8, "ld!"u8, options.Encoder.Encode("Hello"), - options.Encoder.Encode(" Wor"), - options.Encoder.Encode("ld!"), - options); + options.Encoder.Encode("Hello Wor"), + options.Encoder.Encode("Hello World!"), + options, + StringValueEncodingType.Utf8); + } + + [Theory] + [MemberData(nameof(BasicStringJsonOptions_TestData))] + public static void WriteStringValueSegment_Utf8_BasicSplit(JsonWriterOptions options) + { + byte[] segment1 = [0b10_000000, .. " <- Invalid Partial -> "u8, 0b110_11111]; + byte[] segment2 = [0b10_111111, .. " <- Partial"u8]; + byte[] segment3 = [.. " Invalid -> "u8, 0b110_11111]; + WriteStringValueSegment_BasicHelper( + segment1, + segment2, + segment3, + // Since we're using string (base-16) encode for convenience, we just use an invalid utf-16 character + options.Encoder.Encode("\udc00 <- Invalid Partial -> \udc00"), + options.Encoder.Encode("\udc00 <- Invalid Partial -> \u07ff <- Partial"), + options.Encoder.Encode("\udc00 <- Invalid Partial -> \u07ff <- Partial Invalid -> \udc00"), + options, + StringValueEncodingType.Utf8); + } + + [Fact] + public static void WriteStringValueSegment_Base64_Basic() + { + { + WriteStringValueSegment_BasicHelper( + "Hello"u8, + " Worl"u8, + "d!"u8, + "SGVsbG8=", + "SGVsbG8gV29ybA==", + "SGVsbG8gV29ybGQh", + new JsonWriterOptions { Indented = false }, + StringValueEncodingType.Base64); + } + + { + WriteStringValueSegment_BasicHelper( + "Hello"u8, + " Worl"u8, + "d!"u8, + "SGVsbG8=", + "SGVsbG8gV29ybA==", + "SGVsbG8gV29ybGQh", + new JsonWriterOptions { Indented = true }, + StringValueEncodingType.Base64); + } } private static void WriteStringValueSegment_BasicHelper( @@ -227,27 +297,29 @@ private static void WriteStringValueSegment_BasicHelper( string expected1, string expected2, string expected3, - JsonWriterOptions options) + JsonWriterOptions options, + StringValueEncodingType encoding) where T : struct { + JavaScriptEncoder encoder = options.Encoder ?? JavaScriptEncoder.Default; string indent = options.Indented ? new string(options.IndentCharacter, options.IndentSize) : ""; string n = options.Indented ? options.NewLine : ""; string ni = n + indent; string nii = ni + indent; string s = options.Indented ? " " : ""; string e1 = '"' + expected1 + '"'; - string e2 = '"' + expected1 + expected2 + '"'; - string e3 = '"' + expected1 + expected2 + expected3 + '"'; - string foo = '"' + options.Encoder.Encode("foo") + '"'; - string bar = '"' + options.Encoder.Encode("bar") + '"'; - string baz = '"' + options.Encoder.Encode("baz") + '"'; - string inner = '"' + options.Encoder.Encode("inner") + '"'; + string e2 = '"' + expected2 + '"'; + string e3 = '"' + expected3 + '"'; + string foo = '"' + encoder.Encode("foo") + '"'; + string bar = '"' + encoder.Encode("bar") + '"'; + string baz = '"' + encoder.Encode("baz") + '"'; + string inner = '"' + encoder.Encode("inner") + '"'; // JSON string { var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output, options); - WriteStringValueSegmentsHelper(jsonUtf8, segment1); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, encoding); jsonUtf8.Flush(); JsonTestHelper.AssertContents(e1, output); @@ -256,7 +328,7 @@ private static void WriteStringValueSegment_BasicHelper( { var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output, options); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, encoding); jsonUtf8.Flush(); JsonTestHelper.AssertContents(e2, output); @@ -265,7 +337,7 @@ private static void WriteStringValueSegment_BasicHelper( { var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output, options); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3, encoding); jsonUtf8.Flush(); JsonTestHelper.AssertContents(e3, output); @@ -276,7 +348,7 @@ private static void WriteStringValueSegment_BasicHelper( var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartArray(); - WriteStringValueSegmentsHelper(jsonUtf8, segment1); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, encoding); jsonUtf8.WriteEndArray(); jsonUtf8.Flush(); @@ -289,7 +361,7 @@ private static void WriteStringValueSegment_BasicHelper( var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartArray(); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, encoding); jsonUtf8.WriteEndArray(); jsonUtf8.Flush(); @@ -302,7 +374,7 @@ private static void WriteStringValueSegment_BasicHelper( var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartArray(); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3, encoding); jsonUtf8.WriteEndArray(); jsonUtf8.Flush(); @@ -317,7 +389,7 @@ private static void WriteStringValueSegment_BasicHelper( using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartArray(); jsonUtf8.WriteBooleanValue(true); - WriteStringValueSegmentsHelper(jsonUtf8, segment1); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, encoding); jsonUtf8.WriteBooleanValue(false); jsonUtf8.WriteEndArray(); jsonUtf8.Flush(); @@ -332,7 +404,7 @@ private static void WriteStringValueSegment_BasicHelper( using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartArray(); jsonUtf8.WriteBooleanValue(true); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, encoding); jsonUtf8.WriteBooleanValue(false); jsonUtf8.WriteEndArray(); jsonUtf8.Flush(); @@ -347,7 +419,7 @@ private static void WriteStringValueSegment_BasicHelper( using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartArray(); jsonUtf8.WriteBooleanValue(true); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3, encoding); jsonUtf8.WriteBooleanValue(false); jsonUtf8.WriteEndArray(); jsonUtf8.Flush(); @@ -364,7 +436,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartArray(); jsonUtf8.WriteStartArray(); jsonUtf8.WriteBooleanValue(true); - WriteStringValueSegmentsHelper(jsonUtf8, segment1); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, encoding); jsonUtf8.WriteBooleanValue(false); jsonUtf8.WriteEndArray(); jsonUtf8.WriteEndArray(); @@ -381,7 +453,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartArray(); jsonUtf8.WriteStartArray(); jsonUtf8.WriteBooleanValue(true); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, encoding); jsonUtf8.WriteBooleanValue(false); jsonUtf8.WriteEndArray(); jsonUtf8.WriteEndArray(); @@ -398,7 +470,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartArray(); jsonUtf8.WriteStartArray(); jsonUtf8.WriteBooleanValue(true); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3, encoding); jsonUtf8.WriteBooleanValue(false); jsonUtf8.WriteEndArray(); jsonUtf8.WriteEndArray(); @@ -415,7 +487,7 @@ private static void WriteStringValueSegment_BasicHelper( using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartObject(); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, encoding); jsonUtf8.WriteEndObject(); jsonUtf8.Flush(); @@ -429,7 +501,7 @@ private static void WriteStringValueSegment_BasicHelper( using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartObject(); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, encoding); jsonUtf8.WriteEndObject(); jsonUtf8.Flush(); @@ -443,7 +515,7 @@ private static void WriteStringValueSegment_BasicHelper( using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartObject(); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3, encoding); jsonUtf8.WriteEndObject(); jsonUtf8.Flush(); @@ -459,7 +531,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartObject(); jsonUtf8.WriteBoolean("bar", true); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, encoding); jsonUtf8.WriteBoolean("baz", false); jsonUtf8.WriteEndObject(); jsonUtf8.Flush(); @@ -475,7 +547,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartObject(); jsonUtf8.WriteBoolean("bar", true); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, encoding); jsonUtf8.WriteBoolean("baz", false); jsonUtf8.WriteEndObject(); jsonUtf8.Flush(); @@ -491,7 +563,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartObject(); jsonUtf8.WriteBoolean("bar", true); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3, encoding); jsonUtf8.WriteBoolean("baz", false); jsonUtf8.WriteEndObject(); jsonUtf8.Flush(); @@ -509,7 +581,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartObject("inner"); jsonUtf8.WriteBoolean("bar", true); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, encoding); jsonUtf8.WriteBoolean("baz", false); jsonUtf8.WriteEndObject(); jsonUtf8.WriteEndObject(); @@ -527,7 +599,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartObject("inner"); jsonUtf8.WriteBoolean("bar", true); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, encoding); jsonUtf8.WriteBoolean("baz", false); jsonUtf8.WriteEndObject(); jsonUtf8.WriteEndObject(); @@ -545,7 +617,7 @@ private static void WriteStringValueSegment_BasicHelper( jsonUtf8.WriteStartObject("inner"); jsonUtf8.WriteBoolean("bar", true); jsonUtf8.WritePropertyName("foo"); - WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3, encoding); jsonUtf8.WriteBoolean("baz", false); jsonUtf8.WriteEndObject(); jsonUtf8.WriteEndObject(); @@ -639,6 +711,17 @@ public static void WriteStringValueSegment_Utf8_Split8CodePointsBasic() JsonTestHelper.AssertContents($"{{\"full\":\"{result}\",\"segmented\":\"{result}\"}}", output); } + [Theory] + [InlineData("", "")] + [InlineData("0 padding", "MCBwYWRkaW5n")] + [InlineData("_1 padding", "XzEgcGFkZGluZw==")] + [InlineData("__2 padding", "X18yIHBhZGRpbmc=")] + public static void WriteStringValueSegment_Base64_SplitDataBasic(string input, string expected) + { + byte[] bytes = input.Select(c => (byte)c).ToArray(); + SplitStringDataHelper(bytes, new JsonWriterOptions { Indented = true }, "\"" + expected + "\"", StringValueEncodingType.Base64); + } + [Fact] public static void WriteStringValueSegment_Utf8_ClearedPartial() { @@ -699,9 +782,9 @@ public static void WriteStringValueSegment_Utf16_ClearedPartial() jsonUtf8.WriteStartArray(); - WriteStringValueSegmentsHelper(jsonUtf8, ['\uD800'], ['\uDC00']); - WriteStringValueSegmentsHelper(jsonUtf8, ['\uDC00']); - WriteStringValueSegmentsHelper(jsonUtf8, ['\uD800'], ['\uDC00'], ['\uDC00']); + WriteStringValueSegmentsHelper(jsonUtf8, ['\uD800'], ['\uDC00'], StringValueEncodingType.Utf16); + WriteStringValueSegmentsHelper(jsonUtf8, ['\uDC00'], StringValueEncodingType.Utf16); + WriteStringValueSegmentsHelper(jsonUtf8, ['\uD800'], ['\uDC00'], ['\uDC00'], StringValueEncodingType.Utf16); jsonUtf8.WriteEndArray(); @@ -712,22 +795,108 @@ public static void WriteStringValueSegment_Utf16_ClearedPartial() } } + [Fact] + public static void WriteStringValueSegment_Base64_ClearedPartial() + { + var output = new ArrayBufferWriter(); + + { + var expected = new StringBuilder(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + jsonUtf8.WriteStartArray(); + expected.Append('['); + + // Helpers to build up the expected string + var buffer = new List(); + ReadOnlySpan AddPartial(ReadOnlySpan arr) + { + foreach (byte b in arr) buffer.Add(b); + + return arr; + } + + ReadOnlySpan AddFinal(ReadOnlySpan arr) + { + foreach (byte b in arr) buffer.Add(b); + + expected.Append('"'); + expected.Append(Convert.ToBase64String(buffer.ToArray())); + expected.Append('"'); + expected.Append(','); + buffer.Clear(); + + return arr; + } + + // 1 segment + WriteStringValueSegmentsHelper(jsonUtf8, AddFinal([]), StringValueEncodingType.Base64); + WriteStringValueSegmentsHelper(jsonUtf8, AddFinal([0]), StringValueEncodingType.Base64); + WriteStringValueSegmentsHelper(jsonUtf8, AddFinal([0, 1]), StringValueEncodingType.Base64); + WriteStringValueSegmentsHelper(jsonUtf8, AddFinal([0, 1, 2]), StringValueEncodingType.Base64); + + // 2 segments + for (int i = 0; i <= 3; i++) + { + for (int j = 0; j <= 3; j++) + { + WriteStringValueSegmentsHelper( + jsonUtf8, + AddPartial([..Enumerable.Range(0, i).Select(x => (byte)x)]), + AddFinal([..Enumerable.Range(i, j).Select(x => (byte)x)]), + StringValueEncodingType.Base64); + } + } + + // 3 segments + for (int i = 0; i <= 3; i++) + { + for (int j = 0; j <= 3; j++) + { + for (int k = 0; k <= 3; k++) + { + WriteStringValueSegmentsHelper( + jsonUtf8, + AddPartial([..Enumerable.Range(0, i).Select(x => (byte)x)]), + AddPartial([..Enumerable.Range(i, j).Select(x => (byte)x)]), + AddFinal([.. Enumerable.Range(i + j, k).Select(x => (byte)x)]), + StringValueEncodingType.Base64); + } + } + } + + // Remove trailing comma + expected.Remove(expected.Length - 1, 1); + + jsonUtf8.WriteEndArray(); + expected.Append(']'); + + jsonUtf8.Flush(); + + // First code point is written and the second is replaced. + JsonTestHelper.AssertContents(expected.ToString(), output); + } + } + [Fact] public static void WriteStringValueSegment_Flush() { var noEscape = JavaScriptEncoder.UnsafeRelaxedJsonEscaping; - TestFlushImpl('\uD800', '\uDC00', new(), @"""\uD800\uDC00"""); - TestFlushImpl(0b110_11111, 0b10_111111, new(), @"""\u07FF"""); - TestFlushImpl(0b110_11111, 0b10_111111, new() { Encoder = noEscape }, "\"\u07FF\""); - - void TestFlushImpl(T unit1, T unit2, JsonWriterOptions options, string expected) + TestFlushImpl(['\uD800'], ['\uDC00'], new(), @"""\uD800\uDC00""", StringValueEncodingType.Utf16); + TestFlushImpl([0b110_11111], [0b10_111111], new(), @"""\u07FF""", StringValueEncodingType.Utf8); + TestFlushImpl([0b110_11111], [0b10_111111], new() { Encoder = noEscape }, "\"\u07FF\"", StringValueEncodingType.Utf8); + TestFlushImpl([], [0, 0, 0], new(), "\"AAAA\"", StringValueEncodingType.Base64); + TestFlushImpl([0], [0, 0], new(), "\"AAAA\"", StringValueEncodingType.Base64); + TestFlushImpl([0, 0], [0], new(), "\"AAAA\"", StringValueEncodingType.Base64); + + void TestFlushImpl(ReadOnlySpan unit1, ReadOnlySpan unit2, JsonWriterOptions options, string expected, StringValueEncodingType encoding) where T : struct { byte[] expectedBytes = Encoding.UTF8.GetBytes(expected); var output = new ArrayBufferWriter(); using Utf8JsonWriter jsonUtf8 = new(output, options); - WriteStringValueSegmentHelper(jsonUtf8, [unit1], false); + WriteStringValueSegmentHelper(jsonUtf8, unit1, false, encoding); Assert.Equal(0, output.WrittenCount); Assert.Equal(0, jsonUtf8.BytesCommitted); @@ -738,7 +907,7 @@ void TestFlushImpl(T unit1, T unit2, JsonWriterOptions options, string expect Assert.Equal(1, jsonUtf8.BytesCommitted); Assert.Equal(0, jsonUtf8.BytesPending); - WriteStringValueSegmentHelper(jsonUtf8, [unit2], true); + WriteStringValueSegmentHelper(jsonUtf8, unit2, true, encoding); Assert.Equal(1, output.WrittenCount); Assert.Equal(1, jsonUtf8.BytesCommitted); @@ -813,6 +982,36 @@ public static void WriteStringValueSegment_Utf8_Reset() JsonTestHelper.AssertContents('"' + expected, output); } + [Fact] + public static void WriteStringValueSegment_Base64_Reset() + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + jsonUtf8.WriteBase64StringSegment([0], false); + jsonUtf8.Flush(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(1, jsonUtf8.BytesCommitted); + + jsonUtf8.Reset(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(0, jsonUtf8.BytesCommitted); + + jsonUtf8.WriteBase64StringSegment([0, 0, 0], true); + + string expected = @"""AAAA"""; + Assert.Equal(expected.Length, jsonUtf8.BytesPending); + Assert.Equal(0, jsonUtf8.BytesCommitted); + + jsonUtf8.Flush(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(expected.Length, jsonUtf8.BytesCommitted); + JsonTestHelper.AssertContents('"' + expected, output); + } + [Fact] public static void WriteStringValueSegment_MixEncoding() { @@ -826,6 +1025,26 @@ public static void WriteStringValueSegment_MixEncoding() Assert.Throws(() => jsonUtf8.WriteStringValueSegment([0b10_111111], true)); } + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + // High surrogate + jsonUtf8.WriteStringValueSegment("\uD8D8".AsSpan(), false); + + Assert.Throws(() => jsonUtf8.WriteBase64StringSegment([0], true)); + } + + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + // Start of a 3-byte sequence + jsonUtf8.WriteStringValueSegment([0b1110_1111], false); + + Assert.Throws(() => jsonUtf8.WriteStringValueSegment("\u8080".AsSpan(), true)); + } + { var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output); @@ -833,6 +1052,26 @@ public static void WriteStringValueSegment_MixEncoding() // Start of a 3-byte sequence jsonUtf8.WriteStringValueSegment([0b1110_1111], false); + Assert.Throws(() => jsonUtf8.WriteBase64StringSegment([0], true)); + } + + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + // Partial Base64 + jsonUtf8.WriteBase64StringSegment([0], false); + + Assert.Throws(() => jsonUtf8.WriteStringValueSegment([0b10_111111], true)); + } + + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + // Partial Base64 + jsonUtf8.WriteBase64StringSegment([0], false); + Assert.Throws(() => jsonUtf8.WriteStringValueSegment("\u8080".AsSpan(), true)); } @@ -852,6 +1091,38 @@ public static void WriteStringValueSegment_MixEncoding() Assert.Throws(() => jsonUtf8.WriteStringValueSegment(ReadOnlySpan.Empty, false)); } + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment([0b110_11111], false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty base64 sequence will still keep the partial code point + jsonUtf8.WriteStringValueSegment(ReadOnlySpan.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-8 sequence will throw + Assert.Throws(() => jsonUtf8.WriteBase64StringSegment(ReadOnlySpan.Empty, false)); + } + + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(['\uD800'], false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-16 sequence will still keep the partial code point + jsonUtf8.WriteStringValueSegment(ReadOnlySpan.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-8 sequence will throw + Assert.Throws(() => jsonUtf8.WriteStringValueSegment(ReadOnlySpan.Empty, false)); + } + { var output = new ArrayBufferWriter(); using var jsonUtf8 = new Utf8JsonWriter(output); @@ -864,9 +1135,41 @@ public static void WriteStringValueSegment_MixEncoding() jsonUtf8.Flush(); JsonTestHelper.AssertContents("\"", output); - // Writing empty UTF-8 sequence will dump the partial UTF-16 code point + // Writing empty base64 sequence will throw + Assert.Throws(() => jsonUtf8.WriteBase64StringSegment(ReadOnlySpan.Empty, false)); + } + + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteBase64StringSegment([0], false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty base64 sequence will still keep the partial code point + jsonUtf8.WriteBase64StringSegment(ReadOnlySpan.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-8 sequence will throw Assert.Throws(() => jsonUtf8.WriteStringValueSegment(ReadOnlySpan.Empty, false)); } + + { + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteBase64StringSegment([0], false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty base64 sequence will still keep the partial code point + jsonUtf8.WriteBase64StringSegment(ReadOnlySpan.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-16 sequence will throw + Assert.Throws(() => jsonUtf8.WriteStringValueSegment(ReadOnlySpan.Empty, false)); + } } [Fact] @@ -941,146 +1244,191 @@ public static void WriteStringValueSegment_Empty() jsonUtf8.Flush(); JsonTestHelper.AssertContents("\"\"", output); } - } - // Switch this to use an enum discriminator input when base64 is supported - private static void WriteStringValueHelper(Utf8JsonWriter writer, ReadOnlySpan value) - where T : struct - { - if (typeof(T) == typeof(char)) - { - writer.WriteStringValue(MemoryMarshal.Cast(value)); - } - else if (typeof(T) == typeof(byte)) { - writer.WriteStringValue(MemoryMarshal.Cast(value)); + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteBase64StringSegment([], true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); } - else + { - if (typeof(T) == typeof(int)) - { - Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueHelper)}."); - } - else - { - Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueHelper)}."); - } + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteBase64StringSegment([], false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); } - } - // Switch this to use an enum discriminator input when base64 is supported - private static void WriteStringValueSegmentHelper(Utf8JsonWriter writer, ReadOnlySpan value, bool isFinal) - where T : struct - { - if (typeof(T) == typeof(char)) { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value), isFinal); + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteBase64StringSegment([], false); + jsonUtf8.WriteBase64StringSegment([], true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); } - else if (typeof(T) == typeof(byte)) + { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value), isFinal); + var output = new ArrayBufferWriter(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteBase64StringSegment([], false); + jsonUtf8.WriteBase64StringSegment([], false); + jsonUtf8.WriteBase64StringSegment([], true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); } - else + } + + enum StringValueEncodingType + { + Utf8, + Utf16, + Base64, + } + + private static void EnsureByteOrChar([CallerMemberName]string caller = "") + { + if (typeof(T) != typeof(byte) && typeof(T) != typeof(char)) { if (typeof(T) == typeof(int)) { - Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); - } - else - { - Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {caller}."); } + + Assert.Fail($"Type {typeof(T)} is not supported by {caller}."); } } - // Switch this to use an enum discriminator input when base64 is supported - private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, ReadOnlySpan value) + private static void WriteStringValueHelper(Utf8JsonWriter writer, ReadOnlySpan value, StringValueEncodingType encoding) where T : struct { - if (typeof(T) == typeof(char)) + EnsureByteOrChar(); + + switch (encoding) { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value), true); + case StringValueEncodingType.Utf16: + writer.WriteStringValue(MemoryMarshal.Cast(value)); + break; + case StringValueEncodingType.Utf8: + writer.WriteStringValue(MemoryMarshal.Cast(value)); + break; + case StringValueEncodingType.Base64: + writer.WriteBase64StringValue(MemoryMarshal.Cast(value)); + break; + default: + Assert.Fail($"Encoding {encoding} not valid."); + break; } - else if (typeof(T) == typeof(byte)) + } + + private static void WriteStringValueSegmentHelper(Utf8JsonWriter writer, ReadOnlySpan value, bool isFinal, StringValueEncodingType encoding) + where T : struct + { + EnsureByteOrChar(); + + switch (encoding) { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value), true); + case StringValueEncodingType.Utf16: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value), isFinal); + break; + case StringValueEncodingType.Utf8: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value), isFinal); + break; + case StringValueEncodingType.Base64: + writer.WriteBase64StringSegment(MemoryMarshal.Cast(value), isFinal); + break; + default: + Assert.Fail($"Encoding {encoding} not valid."); + break; } - else + } + + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, ReadOnlySpan value, StringValueEncodingType encoding) + where T : struct + { + EnsureByteOrChar(); + + switch (encoding) { - if (typeof(T) == typeof(int)) - { - Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); - } - else - { - Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); - } + case StringValueEncodingType.Utf16: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value), true); + break; + case StringValueEncodingType.Utf8: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value), true); + break; + case StringValueEncodingType.Base64: + writer.WriteBase64StringSegment(MemoryMarshal.Cast(value), true); + break; + default: + Assert.Fail($"Encoding {encoding} not valid."); + break; } } // Switch this to use an enum discriminator input when base64 is supported - private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, ReadOnlySpan value1, ReadOnlySpan value2) + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, ReadOnlySpan value1, ReadOnlySpan value2, StringValueEncodingType encoding) where T : struct { - if (typeof(T) == typeof(char)) - { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); - writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), true); - } - else if (typeof(T) == typeof(byte)) - { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); - writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), true); - } - else + EnsureByteOrChar(); + + switch (encoding) { - if (typeof(T) == typeof(int)) - { - Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); - } - else - { - Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); - } + case StringValueEncodingType.Utf16: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), true); + break; + case StringValueEncodingType.Utf8: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), true); + break; + case StringValueEncodingType.Base64: + writer.WriteBase64StringSegment(MemoryMarshal.Cast(value1), false); + writer.WriteBase64StringSegment(MemoryMarshal.Cast(value2), true); + break; + default: + Assert.Fail($"Encoding {encoding} not valid."); + break; } } // Switch this to use an enum discriminator input when base64 is supported - private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, ReadOnlySpan value1, ReadOnlySpan value2, ReadOnlySpan value3) + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, ReadOnlySpan value1, ReadOnlySpan value2, ReadOnlySpan value3, StringValueEncodingType encoding) where T : struct { - if (typeof(T) == typeof(char)) - { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); - writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), false); - writer.WriteStringValueSegment(MemoryMarshal.Cast(value3), true); - } - else if (typeof(T) == typeof(byte)) - { - writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); - writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), false); - writer.WriteStringValueSegment(MemoryMarshal.Cast(value3), true); - } - else + EnsureByteOrChar(); + + switch (encoding) { - if (typeof(T) == typeof(int)) - { - Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); - } - else - { - Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); - } + case StringValueEncodingType.Utf16: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast(value3), true); + break; + case StringValueEncodingType.Utf8: + writer.WriteStringValueSegment(MemoryMarshal.Cast(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast(value2), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast(value3), true); + break; + case StringValueEncodingType.Base64: + writer.WriteBase64StringSegment(MemoryMarshal.Cast(value1), false); + writer.WriteBase64StringSegment(MemoryMarshal.Cast(value2), false); + writer.WriteBase64StringSegment(MemoryMarshal.Cast(value3), true); + break; + default: + Assert.Fail($"Encoding {encoding} not valid."); + break; } } - private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value) - => WriteStringValueSegmentsHelper(writer, value.AsSpan()); + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value, StringValueEncodingType encoding) + => WriteStringValueSegmentsHelper(writer, value.AsSpan(), encoding); - private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value1, string value2) - => WriteStringValueSegmentsHelper(writer, value1.AsSpan(), value2.AsSpan()); + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value1, string value2, StringValueEncodingType encoding) + => WriteStringValueSegmentsHelper(writer, value1.AsSpan(), value2.AsSpan(), encoding); - private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value1, string value2, string value3) - => WriteStringValueSegmentsHelper(writer, value1.AsSpan(), value2.AsSpan(), value3.AsSpan()); + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value1, string value2, string value3, StringValueEncodingType encoding) + => WriteStringValueSegmentsHelper(writer, value1.AsSpan(), value2.AsSpan(), value3.AsSpan(), encoding); } } diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs index 382349214beb1b..b9564958092f9a 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs @@ -272,17 +272,17 @@ public void EscapingTestWhileWriting(char replacementChar, JavaScriptEncoder enc if (dataLength < 10) { - SplitCodePointsHelper(changed, writerOptions, output => + SplitStringDataHelper(newStr.AsSpan(), writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(changed, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf8); } } @@ -303,17 +303,17 @@ public void EscapingTestWhileWriting(char replacementChar, JavaScriptEncoder enc if (dataLength < 10) { - SplitCodePointsHelper(changed, writerOptions, output => + SplitStringDataHelper(changed, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(sourceUtf8, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf8); } } } @@ -427,15 +427,15 @@ public unsafe void WriteString_NonAscii(char replacementChar, JavaScriptEncoder if (dataLength < 10) { - SplitCodePointsHelper(str, writerOptions, output => + SplitStringDataHelper(str, writerOptions, output => { Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(sourceUtf8, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); - }); + }, StringValueEncodingType.Utf8); } for (int i = 0; i < dataLength; i++) @@ -455,19 +455,19 @@ public unsafe void WriteString_NonAscii(char replacementChar, JavaScriptEncoder if (dataLength < 10) { - SplitCodePointsHelper(source.ToCharArray(), writerOptions, output => + SplitStringDataHelper(source.AsSpan(), writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); // Each CJK character expands to 3 utf-8 bytes. Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(sourceUtf8, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); // Each CJK character expands to 3 utf-8 bytes. Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf8); } } } @@ -538,15 +538,15 @@ public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder) if (dataLength < 10) { - SplitCodePointsHelper(str, writerOptions, output => + SplitStringDataHelper(str, writerOptions, output => { Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(sourceUtf8, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); - }); + }, StringValueEncodingType.Utf8); } for (int i = 0; i < dataLength - 1; i++) @@ -567,17 +567,17 @@ public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder) if (dataLength < 10) { - SplitCodePointsHelper(changed, writerOptions, output => + SplitStringDataHelper(changed, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(i + 1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(sourceUtf8, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(i + 1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf8); } } @@ -603,17 +603,17 @@ public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder) if (dataLength < 10) { - SplitCodePointsHelper(changed, writerOptions, output => + SplitStringDataHelper(changed, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(sourceUtf8, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); Assert.Equal(1, escapedIndex); // Account for the start quote - }); + }, StringValueEncodingType.Utf8); } } } @@ -667,15 +667,15 @@ public unsafe void WriteStringInvalidCharacter(char replacementChar, JavaScriptE if (dataLength < 10) { - SplitCodePointsHelper(changed, writerOptions, output => + SplitStringDataHelper(changed, writerOptions, output => { Assert.True(BeginsWithReplacementCharacter(output.WrittenSpan.Slice(i + 1))); // +1 to account for starting quote - }); + }, StringValueEncodingType.Utf16); - SplitCodePointsHelper(sourceUtf8, writerOptions, output => + SplitStringDataHelper(sourceUtf8, writerOptions, output => { Assert.True(BeginsWithReplacementCharacter(output.WrittenSpan.Slice(i + 1))); // +1 to account for starting quote - }); + }, StringValueEncodingType.Utf8); } } } @@ -1886,6 +1886,18 @@ public void InvalidJsonDueToWritingMultipleValues(JsonWriterOptions options, Jso ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo"u8, false), options.SkipValidation); } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteBase64StringSegment("foo"u8, true), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteBase64StringSegment("foo"u8, false), options.SkipValidation); + } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) { WritePreamble(jsonUtf8, kind); @@ -2020,6 +2032,18 @@ public void InvalidJsonDueToWritingMultipleValuesWithComments(JsonWriterOptions ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo"u8, false), options.SkipValidation); } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind, addComments: true); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteBase64StringSegment("foo"u8, true), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind, addComments: true); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteBase64StringSegment("foo"u8, false), options.SkipValidation); + } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) { WritePreamble(jsonUtf8, kind, addComments: true); @@ -2258,6 +2282,10 @@ public void InvalidJsonMismatch(JsonWriterOptions options) jsonUtf8.WriteStringValueSegment([65, 66], true); jsonUtf8.WriteStringValueSegment([65, 66], false); jsonUtf8.WriteStringValueSegment([65, 66], true); + + jsonUtf8.WriteBase64StringSegment([65, 66], true); + jsonUtf8.WriteBase64StringSegment([65, 66], false); + jsonUtf8.WriteBase64StringSegment([65, 66], true); } else { @@ -2265,6 +2293,8 @@ public void InvalidJsonMismatch(JsonWriterOptions options) Assert.Throws(() => jsonUtf8.WriteStringValueSegment(['a', 'b'], false)); Assert.Throws(() => jsonUtf8.WriteStringValueSegment([65, 66], true)); Assert.Throws(() => jsonUtf8.WriteStringValueSegment([65, 66], false)); + Assert.Throws(() => jsonUtf8.WriteBase64StringSegment([65, 66], true)); + Assert.Throws(() => jsonUtf8.WriteBase64StringSegment([65, 66], false)); } } @@ -2830,6 +2860,19 @@ public void InvalidJsonPrimitive(JsonWriterOptions options) Assert.Throws(() => jsonUtf8.WritePropertyName("test name")); } } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + jsonUtf8.WriteBase64StringSegment("a"u8, true); + if (options.SkipValidation) + { + jsonUtf8.WritePropertyName("test name"); + } + else + { + Assert.Throws(() => jsonUtf8.WritePropertyName("test name")); + } + } } // Name is present in the test data to make it easier to identify the test case @@ -2867,6 +2910,19 @@ public void InvalidJsonStringValueSegment(string _, Action write } } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + jsonUtf8.WriteBase64StringSegment("foo"u8, isFinalSegment: false); + if (options.SkipValidation) + { + write(jsonUtf8); + } + else + { + Assert.Throws(() => write(jsonUtf8)); + } + } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) { jsonUtf8.WriteStringValueSegment("foo".ToCharArray(), isFinalSegment: false); @@ -3236,7 +3292,7 @@ public void WritingTooLargeBase64Bytes(JsonWriterOptions options) byte[] value = new byte[200_000_000]; value.AsSpan().Fill((byte)'a'); - var output = new ArrayBufferWriter(value.Length); + var output = new ArrayBufferWriter(value.Length); using (var jsonUtf8 = new Utf8JsonWriter(output, options)) {