Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Base64Decoder to ignore space chars, add IsValid methods and tests #79334

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
6d72060
Allow Base64Decoder to ignore space chars, add IsValid methods and tests
heathbm Dec 7, 2022
35302b4
Address PR feedback regarding Base64.IsValid
heathbm Dec 8, 2022
837b4bd
Address PR feedback: General optimizations
heathbm Dec 13, 2022
9b3b581
Address PR feedback: Use vectorized decoding while enough src
heathbm Dec 14, 2022
9983889
Address PR feedback: General optimization
heathbm Dec 15, 2022
f389289
Address PR feedback: Optimize for whitespace (\r\n) every 76 bytes
heathbm Dec 20, 2022
8482bbd
Address PR feedback: Implement driver/worker pattern with original code
heathbm Dec 23, 2022
dca7ee6
Address PR feedback: Reuse existing decoding method with whitespace
heathbm Dec 24, 2022
fe32cd3
Merge branch 'main' into add-base64-is-valid
heathbm Jan 6, 2023
261885c
Address PR feedback: Remove redundant empty buffer check
heathbm Jan 8, 2023
fb9e8de
Address PR Feedback: Add missing magic constant comment
heathbm Jan 8, 2023
b68d36c
Address PR Feedback: Avoid validation logic duplication
heathbm Mar 2, 2023
689de25
Merge branch 'main' into add-base64-is-valid
heathbm Mar 2, 2023
745fd41
Throw Base64FormatException when whitespace should not be ignored
heathbm Mar 3, 2023
b35ce12
Adress PR feedback: Improve naming of Base64Validator.cs internals
heathbm Mar 10, 2023
5848058
Adress PR feedback: Add test to demonstrate extra whitespace is counted
heathbm Mar 10, 2023
7c022b0
Address PR feedback: avoid bound-check
heathbm Mar 14, 2023
04989b5
Address PR feedback: Base64.IsValid: Return when no more invalid chars
heathbm Mar 14, 2023
5007df7
Address PR feedback: Refactor Bas64.IsValid method
heathbm Mar 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 101 additions & 5 deletions src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Linq;
using System.Text;
using Xunit;

namespace System.Buffers.Text.Tests
{
public class Base64DecoderUnitTests
public class Base64DecoderUnitTests : Base64TestBase
{
[Fact]
public void BasicDecoding()
Expand Down Expand Up @@ -157,7 +158,7 @@ public void DecodingOutputTooSmall()

Span<byte> decodedBytes = new byte[3];
int consumed, written;
if (numBytes % 4 == 0)
if (numBytes >= 8)
{
Assert.True(OperationStatus.DestinationTooSmall ==
Base64.DecodeFromUtf8(source, decodedBytes, out consumed, out written), "Number of Input Bytes: " + numBytes);
Expand Down Expand Up @@ -373,7 +374,9 @@ public void DecodingInvalidBytes(bool isFinalBlock)
for (int i = 0; i < invalidBytes.Length; i++)
{
// Don't test padding (byte 61 i.e. '='), which is tested in DecodingInvalidBytesPadding
if (invalidBytes[i] == Base64TestHelper.EncodingPad)
// Don't test chars to be ignored (spaces: 9, 10, 13, 32 i.e. '\n', '\t', '\r', ' ')
if (invalidBytes[i] == Base64TestHelper.EncodingPad
|| Base64TestHelper.IsByteToBeIgnored(invalidBytes[i]))
continue;

// replace one byte with an invalid input
Expand Down Expand Up @@ -568,7 +571,9 @@ public void DecodeInPlaceInvalidBytes()
Span<byte> buffer = "2222PPPP"u8.ToArray(); // valid input

// Don't test padding (byte 61 i.e. '='), which is tested in DecodeInPlaceInvalidBytesPadding
if (invalidBytes[i] == Base64TestHelper.EncodingPad)
// Don't test chars to be ignored (spaces: 9, 10, 13, 32 i.e. '\n', '\t', '\r', ' ')
if (invalidBytes[i] == Base64TestHelper.EncodingPad
|| Base64TestHelper.IsByteToBeIgnored(invalidBytes[i]))
continue;

// replace one byte with an invalid input
Expand All @@ -594,7 +599,7 @@ public void DecodeInPlaceInvalidBytes()
{
Span<byte> buffer = "2222PPP"u8.ToArray(); // incomplete input
Assert.Equal(OperationStatus.InvalidData, Base64.DecodeFromUtf8InPlace(buffer, out int bytesWritten));
Assert.Equal(0, bytesWritten);
Assert.Equal(3, bytesWritten);
}
}

Expand Down Expand Up @@ -667,5 +672,96 @@ public void DecodeInPlaceInvalidBytesPadding()
}
}

[Theory]
[MemberData(nameof(ValidBase64Strings_WithCharsThatMustBeIgnored))]
public void BasicDecodingIgnoresCharsToBeIgnoredAsConvertToBase64Does(string utf8WithCharsToBeIgnored, byte[] expectedBytes)
{
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithCharsToBeIgnored);
byte[] resultBytes = new byte[5];
OperationStatus result = Base64.DecodeFromUtf8(utf8BytesWithByteToBeIgnored, resultBytes, out int bytesConsumed, out int bytesWritten);

// Control value from Convert.FromBase64String
byte[] stringBytes = Convert.FromBase64String(utf8WithCharsToBeIgnored);

Assert.Equal(OperationStatus.Done, result);
Assert.Equal(utf8WithCharsToBeIgnored.Length, bytesConsumed);
Assert.Equal(expectedBytes.Length, bytesWritten);
Assert.True(expectedBytes.SequenceEqual(resultBytes));
Assert.True(stringBytes.SequenceEqual(resultBytes));
}

[Theory]
[MemberData(nameof(ValidBase64Strings_WithCharsThatMustBeIgnored))]
public void DecodeInPlaceIgnoresCharsToBeIgnoredAsConvertToBase64Does(string utf8WithCharsToBeIgnored, byte[] expectedBytes)
{
Span<byte> utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithCharsToBeIgnored);
OperationStatus result = Base64.DecodeFromUtf8InPlace(utf8BytesWithByteToBeIgnored, out int bytesWritten);
Span<byte> bytesOverwritten = utf8BytesWithByteToBeIgnored.Slice(0, bytesWritten);
byte[] resultBytesArray = bytesOverwritten.ToArray();

// Control value from Convert.FromBase64String
byte[] stringBytes = Convert.FromBase64String(utf8WithCharsToBeIgnored);

Assert.Equal(OperationStatus.Done, result);
Assert.Equal(expectedBytes.Length, bytesWritten);
Assert.True(expectedBytes.SequenceEqual(resultBytesArray));
Assert.True(stringBytes.SequenceEqual(resultBytesArray));
}

[Theory]
[MemberData(nameof(StringsOnlyWithCharsToBeIgnored))]
public void BasicDecodingWithOnlyCharsToBeIgnored(string utf8WithCharsToBeIgnored)
{
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithCharsToBeIgnored);
byte[] resultBytes = new byte[5];
OperationStatus result = Base64.DecodeFromUtf8(utf8BytesWithByteToBeIgnored, resultBytes, out int bytesConsumed, out int bytesWritten);

Assert.Equal(OperationStatus.Done, result);
Assert.Equal(0, bytesWritten);
}

[Theory]
[MemberData(nameof(StringsOnlyWithCharsToBeIgnored))]
public void DecodingInPlaceWithOnlyCharsToBeIgnored(string utf8WithCharsToBeIgnored)
{
Span<byte> utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithCharsToBeIgnored);
OperationStatus result = Base64.DecodeFromUtf8InPlace(utf8BytesWithByteToBeIgnored, out int bytesWritten);

Assert.Equal(OperationStatus.Done, result);
Assert.Equal(0, bytesWritten);
}

[Theory]
[InlineData("AQ==", 4, 1)]
[InlineData("AQ== ", 5, 1)]
Comment on lines +735 to +736
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you mix in some variants like

Suggested change
[InlineData("AQ==", 4, 1)]
[InlineData("AQ== ", 5, 1)]
[InlineData("AQ==", 4, 1)]
[InlineData("AQ== ", 5, 1)]
[InlineData("AQ ==", 5, 1)]
[InlineData("AQ= =", 5, 1)]

or something like that covered by the test ValidBase64Strings_WithCharsThatMustBeIgnored below?
(Sorry, that I don't remember the generated testcases, it's too long ago 😉)

Also in https://gist.github.com/heathbm/f59662bd2334761d28288755a34e29ec you had some cool tests that generated inputs with whitespace at various places. In my lib that gist got into unit tests. Is everything covered here or should these tests be added too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I covered some of these cases here: https://github.com/dotnet/runtime/pull/79334/files#diff-6b4fec85572fdbcc9b03c81fe66cf8bf31cbb5620235abce7dab3c6dc034d862R11
It's a lighter version of the gist, as I was not sure if a test with that many loops/asserts would be acceptable in the pipeline.

[InlineData("AQ== ", 6, 1)]
[InlineData("AQ== ", 7, 1)]
[InlineData("AQ== ", 8, 1)]
[InlineData("AQ== ", 9, 1)]
[InlineData("AQ==\n", 5, 1)]
[InlineData("AQ==\n\n", 6, 1)]
[InlineData("AQ==\n\n\n", 7, 1)]
[InlineData("AQ==\n\n\n\n", 8, 1)]
[InlineData("AQ==\n\n\n\n\n", 9, 1)]
[InlineData("AQ==\t", 5, 1)]
[InlineData("AQ==\t\t", 6, 1)]
[InlineData("AQ==\t\t\t", 7, 1)]
[InlineData("AQ==\t\t\t\t", 8, 1)]
[InlineData("AQ==\t\t\t\t\t", 9, 1)]
[InlineData("AQ==\r", 5, 1)]
[InlineData("AQ==\r\r", 6, 1)]
[InlineData("AQ==\r\r\r", 7, 1)]
[InlineData("AQ==\r\r\r\r", 8, 1)]
[InlineData("AQ==\r\r\r\r\r", 9, 1)]
public void BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes(string inputString, int expectedConsumed, int expectedWritten)
{
Span<byte> source = Encoding.ASCII.GetBytes(inputString);
Span<byte> decodedBytes = new byte[Base64.GetMaxDecodedFromUtf8Length(source.Length)];

Assert.Equal(OperationStatus.Done, Base64.DecodeFromUtf8(source, decodedBytes, out int consumed, out int decodedByteCount));
Assert.Equal(expectedConsumed, consumed);
Assert.Equal(expectedWritten, decodedByteCount);
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes));
}
}
}
111 changes: 111 additions & 0 deletions src/libraries/System.Memory/tests/Base64/Base64TestBase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.utf8Bytes, utf8Bytes.Length

using System.Collections.Generic;
using System.Text;

namespace System.Buffers.Text.Tests
{
public class Base64TestBase
{
public static IEnumerable<object[]> ValidBase64Strings_WithCharsThatMustBeIgnored()
{
// Create a Base64 string
string text = "a b c";
byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
string base64Utf8String = Convert.ToBase64String(utf8Bytes);

// Split the base64 string in half
int stringLength = base64Utf8String.Length / 2;
string firstSegment = base64Utf8String.Substring(0, stringLength);
string secondSegment = base64Utf8String.Substring(stringLength, stringLength);

// Insert ignored chars between the base 64 string
// One will have 1 char, another will have 3

// Line feed
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(9), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(9), 3), utf8Bytes };

// Horizontal tab
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(10), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(10), 3), utf8Bytes };

// Carriage return
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(13), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(13), 3), utf8Bytes };

// Space
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(32), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedInTheMiddle(Convert.ToChar(32), 3), utf8Bytes };

string GetBase64StringWithPassedCharInsertedInTheMiddle(char charToInsert, int numberOfTimesToInsert) => $"{firstSegment}{new string(charToInsert, numberOfTimesToInsert)}{secondSegment}";

// Insert ignored chars at the start of the base 64 string
// One will have 1 char, another will have 3

// Line feed
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(9), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(9), 3), utf8Bytes };

// Horizontal tab
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(10), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(10), 3), utf8Bytes };

// Carriage return
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(13), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(13), 3), utf8Bytes };

// Space
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(32), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheStart(Convert.ToChar(32), 3), utf8Bytes };

string GetBase64StringWithPassedCharInsertedAtTheStart(char charToInsert, int numberOfTimesToInsert) => $"{new string(charToInsert, numberOfTimesToInsert)}{firstSegment}{secondSegment}";

// Insert ignored chars at the end of the base 64 string
// One will have 1 char, another will have 3
// Whitespace after end/padding is not included in consumed bytes

// Line feed
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(9), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(9), 3), utf8Bytes };

// Horizontal tab
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(10), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(10), 3), utf8Bytes };

// Carriage return
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(13), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(13), 3), utf8Bytes };

// Space
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(32), 1), utf8Bytes };
yield return new object[] { GetBase64StringWithPassedCharInsertedAtTheEnd(Convert.ToChar(32), 3), utf8Bytes };

string GetBase64StringWithPassedCharInsertedAtTheEnd(char charToInsert, int numberOfTimesToInsert) => $"{firstSegment}{secondSegment}{new string(charToInsert, numberOfTimesToInsert)}";
}

public static IEnumerable<object[]> StringsOnlyWithCharsToBeIgnored()
{
// One will have 1 char, another will have 3

// Line feed
yield return new object[] { GetRepeatedChar(Convert.ToChar(9), 1) };
yield return new object[] { GetRepeatedChar(Convert.ToChar(9), 3) };

// Horizontal tab
yield return new object[] { GetRepeatedChar(Convert.ToChar(10), 1) };
yield return new object[] { GetRepeatedChar(Convert.ToChar(10), 3) };

// Carriage return
yield return new object[] { GetRepeatedChar(Convert.ToChar(13), 1) };
yield return new object[] { GetRepeatedChar(Convert.ToChar(13), 3) };

// Space
yield return new object[] { GetRepeatedChar(Convert.ToChar(32), 1) };
yield return new object[] { GetRepeatedChar(Convert.ToChar(32), 3) };

string GetRepeatedChar(char charToInsert, int numberOfTimesToInsert) => new string(charToInsert, numberOfTimesToInsert);
}
}
}
14 changes: 14 additions & 0 deletions src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ public static class Base64TestHelper
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

public static bool IsByteToBeIgnored(byte charByte)
{
switch (charByte)
{
case 9: // Line feed
case 10: // Horizontal tab
case 13: // Carriage return
case 32: // Space
return true;
default:
return false;
}
}

public const byte EncodingPad = (byte)'='; // '=', for padding
public const sbyte InvalidByte = -1; // Designating -1 for invalid bytes in the decoding map

Expand Down
Loading