Skip to content

Commit

Permalink
Add System.Text fuzzing for encoders (dotnet#103968)
Browse files Browse the repository at this point in the history
  • Loading branch information
steveharter committed Jun 27, 2024
1 parent 51176f0 commit 1bce1f7
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 0 deletions.
8 changes: 8 additions & 0 deletions eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ extends:
SYSTEM_ACCESSTOKEN: $(System.AccessToken)
displayName: Send SearchValuesStringFuzzer to OneFuzz

- task: onefuzz-task@0
inputs:
onefuzzOSes: 'Windows'
env:
onefuzzDropDirectory: $(fuzzerProject)/deployment/TextEncodingFuzzer
SYSTEM_ACCESSTOKEN: $(System.AccessToken)
displayName: Send TextEncodingFuzzer to OneFuzz

- task: onefuzz-task@0
inputs:
onefuzzOSes: 'Windows'
Expand Down
189 changes: 189 additions & 0 deletions src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/TextEncodingFuzzer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Text;

namespace DotnetFuzzing.Fuzzers;

// The fuzzing infrastructure currently does not support fuzzing .NET Framework.
// However, this test class, while running under .NET Core, was used to foward the fuzzing
// input to a .NET Framework console app. That app had the same test semantics as the tests
// here, although used slightly different supporting APIs since not all supporting library
// and language features are present in .NET Framework.
// This fowarding approach and .NET Framework test code is presevered in the original Pull
// Request for this file. The approach used Base64 encoding to convert the incoming
// ReadOnlySpan<byte> to a string which was then passed to the Main() method of the .NET
// Framework app which was then converted back to bytes before being passed to the .NET
// Framework fuzzing tests.
internal sealed class TextEncodingFuzzer : IFuzzer
{
string[] IFuzzer.TargetAssemblies => [];
string[] IFuzzer.TargetCoreLibPrefixes { get; } = ["System.Text"];

void IFuzzer.FuzzTarget(ReadOnlySpan<byte> bytes)
{
using PooledBoundedMemory<byte> poisonAfter = PooledBoundedMemory<byte>.Rent(bytes, PoisonPagePlacement.After);

TestLatin1(poisonAfter.Span);
TestASCII(poisonAfter.Span);
TestUnicode(poisonAfter.Span);
TestUtf32(poisonAfter.Span);
TestUtf7(poisonAfter.Span);
TestUtf8(poisonAfter.Span);
}

// We use individual methods for each encoding, so if there's an exception then
// it's clear which encoding failed based on the call stack.

private static void TestLatin1(ReadOnlySpan<byte> input)
{
TestWithSubstitution(input, Encoding.GetEncoding("ISO-8859-1"));
TestWithConvert(input, Encoding.GetEncoding("ISO-8859-1"));
}

private static void TestASCII(ReadOnlySpan<byte> input)
{
TestWithSubstitution(input, new ASCIIEncoding());
TestWithConvert(input, new ASCIIEncoding());
}

private static void TestUnicode(ReadOnlySpan<byte> input)
{
TestWithSubstitution(input, new UnicodeEncoding());
TestWithExceptions(input, new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true));
TestWithConvert(input, new UnicodeEncoding());
}

private static void TestUtf32(ReadOnlySpan<byte> input)
{
TestWithSubstitution(input, new UTF32Encoding());
TestWithExceptions(input, new UTF32Encoding(bigEndian: false, byteOrderMark: false, throwOnInvalidCharacters: true));
TestWithConvert(input, new UTF32Encoding());
}

private static void TestUtf7(ReadOnlySpan<byte> input)
{
#pragma warning disable SYSLIB0001 // Type or member is obsolete
TestWithSubstitution(input, new UTF7Encoding());
#pragma warning restore SYSLIB0001
}

private static void TestUtf8(ReadOnlySpan<byte> input)
{
TestWithSubstitution(input, new UTF8Encoding());
TestWithExceptions(input, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true));
TestWithConvert(input, new UTF8Encoding());
}

private static void TestWithSubstitution(ReadOnlySpan<byte> input, Encoding encoding)
{
Decoder decoder = encoding.GetDecoder();
int charCount = decoder.GetCharCount(input, flush: true);

using PooledBoundedMemory<char> chars = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After);
using PooledBoundedMemory<char> chars2 = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After);

// *4 for worst case scenario (*2 for char->byte + *2 for encoding)
// +2 is for possible Base64 padding with UTF7Encoding.
using PooledBoundedMemory<byte> bytes = PooledBoundedMemory<byte>.Rent(charCount * 4 + 2, PoisonPagePlacement.After);

decoder.Reset();
int written = decoder.GetChars(input, chars.Span, flush: true);
Assert.Equal(charCount, written);

Encoder encoder = encoding.GetEncoder();
// We use flush:true here for UTF7Encoding which may do Base64 padding at the end.
int bytesWritten = encoder.GetBytes(chars.Span, bytes.Span, flush: true);

// Decode the encoded values. Any substitutions will be comparable now.
decoder.Reset();
written = decoder.GetChars(bytes.Span.Slice(0, bytesWritten), chars2.Span, flush: true);
Assert.Equal(charCount, written);

// Verify that we round-tripped the values.
Assert.SequenceEqual<char>(chars.Span, chars2.Span);
}

// If there are substitutions, these cases will fail with DecoderFallbackException early on,
// otherwise there should be no DecoderFallbackExceptions.
private static void TestWithExceptions(ReadOnlySpan<byte> input, Encoding encoding)
{
Assert.Equal(typeof(DecoderExceptionFallback), encoding.DecoderFallback.GetType());
Assert.Equal(typeof(EncoderExceptionFallback), encoding.EncoderFallback.GetType());

Decoder decoder = encoding.GetDecoder();

int charCount;
try
{
charCount = decoder.GetCharCount(input, flush: true);
}
catch (DecoderFallbackException)
{
// The input is not valid without fallbacks.
return;
}

TestWithSubstitution(input, encoding);
}

private static void TestWithConvert(ReadOnlySpan<byte> input, Encoding encoding)
{
// Use a few boundary cases.
TestWithConvert(input, encoding, 1);
TestWithConvert(input, encoding, 2);
TestWithConvert(input, encoding, 3);
TestWithConvert(input, encoding, 4);
TestWithConvert(input, encoding, input.Length);

if (input.Length >= 6)
{
TestWithConvert(input, encoding, input.Length - 1);

if (input.Length >= 12)
{
TestWithConvert(input, encoding, input.Length / 2);
}
}
}

// Verify that obtaining data using several Convert() calls matches the result from a single GetChars() call.
private static void TestWithConvert(ReadOnlySpan<byte> input, Encoding encoding, int blockSize)
{
Decoder decoder = encoding.GetDecoder();
Encoder encoder = encoding.GetEncoder();

int charCount = decoder.GetCharCount(input, flush: true);

using PooledBoundedMemory<char> chars = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After);
using PooledBoundedMemory<char> chars2 = PooledBoundedMemory<char>.Rent(charCount, PoisonPagePlacement.After);

decoder.Reset();
int charsUsedTotal = 0;
int i = 0;

while (i < input.Length)
{
bool lastIteration = i + blockSize >= input.Length;
int bytesToRead = lastIteration ? input.Length - i : blockSize;

decoder.Convert(
input.Slice(i, bytesToRead),
chars.Span.Slice(charsUsedTotal, charCount - charsUsedTotal),
flush: lastIteration,
out int bytesUsed,
out int charsUsed,
out bool _);

i += bytesUsed;
charsUsedTotal += charsUsed;
}

Assert.Equal(charsUsedTotal, charCount);
decoder.Reset();
decoder.GetChars(input, chars2.Span, flush: true);
Assert.SequenceEqual<char>(chars.Span, chars2.Span);
}
}

0 comments on commit 1bce1f7

Please sign in to comment.