Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Base64url encoding/decoding #102364

Merged
merged 40 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
763212a
Base64Url encoding, validation impelementation
buyaa-n Apr 24, 2024
ea764bb
Validation related updates
buyaa-n Apr 26, 2024
4489fcb
Try fix perf regression in vectorized methods
buyaa-n Apr 29, 2024
238b763
Add decoder implementation and unit tests
buyaa-n May 6, 2024
f378eb5
Share code in place decoding code
buyaa-n May 7, 2024
3f6ea88
Add span<char> oveloads with vectorization
buyaa-n May 13, 2024
2fe5d49
Generalize Span char/byte implementations
buyaa-n May 16, 2024
febb4d9
Move ref update to runtime and other cleanup
buyaa-n May 17, 2024
d40034f
Merge branch 'main' of github.com:dotnet/runtime into base64url
buyaa-n May 17, 2024
0994bc8
Generalize the AdvSimd.Arm64 vectorization added recently
buyaa-n May 17, 2024
492694b
Apply suggestions from code review
buyaa-n May 17, 2024
c45f58c
Apply some feedback
buyaa-n May 17, 2024
4bcd58a
Try fix ARM failure
buyaa-n May 17, 2024
b0115c5
Use array pool whenever applicable
buyaa-n May 18, 2024
d1faf6a
Handle '%' as url padding, add more tests and fix findings
buyaa-n May 20, 2024
c2ff2cb
Fix assertion failure, apply some feedback, try fix ARM failure
buyaa-n May 20, 2024
b1d4f76
Update docs, small clean ups
buyaa-n May 21, 2024
0f6fe6a
Try fix ARM failure
buyaa-n May 21, 2024
344bbb6
Update src/libraries/System.Private.CoreLib/src/System/Buffers/Text/B…
buyaa-n May 22, 2024
b657a99
Rename StoreToDetionation overloads, reuse some duplicate code
buyaa-n May 22, 2024
140663d
Improve perf for Base.IsValid() oveerloads, exclude ARM vectorization…
buyaa-n May 22, 2024
c4c605d
Apply feedbacks
buyaa-n May 24, 2024
61095dd
Revert Assert
buyaa-n May 24, 2024
b338a94
Fix ARM vectorization failure for char overload
buyaa-n May 24, 2024
7d4242d
Apply suggestions from code review
buyaa-n May 24, 2024
f6c4d93
Apply suggestions from code review
buyaa-n May 28, 2024
6c1035d
Apply more feedback
buyaa-n May 28, 2024
2243060
Apply suggestions from code review
buyaa-n May 28, 2024
42543e8
Apply review comment left overs
buyaa-n May 30, 2024
bdbdbab
Apply suggestions from code review
buyaa-n May 30, 2024
b871d6d
Apply remaining feedback
buyaa-n May 30, 2024
73bd7df
Apply suggestions from code review
buyaa-n Jun 7, 2024
070f685
Apply the doc feedback for other API docs
buyaa-n Jun 7, 2024
aed94cb
Merge branch 'main' of github.com:dotnet/runtime into base64url
buyaa-n Jun 7, 2024
00139b2
Fix Base64Url fuzzer findings
buyaa-n Jun 11, 2024
dea9006
Apply suggestions from code review
buyaa-n Jun 11, 2024
e617b8f
Rename utf8 -> source/destintion
buyaa-n Jun 11, 2024
c3ff207
Apply feedbacks
buyaa-n Jun 12, 2024
154b400
Apply feedback
buyaa-n Jun 12, 2024
4ccdde8
Apply left out feedbacks
buyaa-n Jun 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 8 additions & 18 deletions src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
Expand Down Expand Up @@ -273,6 +272,9 @@ public void BasicDecodingWithFinalBlockTrueKnownInputDone(string inputString, in

[Theory]
[InlineData("A", 0, 0)]
[InlineData("A===", 0, 0)]
[InlineData("A==", 0, 0)]
[InlineData("A=", 0, 0)]
[InlineData("AQ", 0, 0)]
[InlineData("AQI", 0, 0)]
[InlineData("AQIDBA", 4, 3)]
Expand All @@ -285,16 +287,18 @@ public void BasicDecodingWithFinalBlockTrueKnownInputInvalid(string inputString,
Assert.Equal(OperationStatus.InvalidData, Base64.DecodeFromUtf8(source, decodedBytes, out int consumed, out int decodedByteCount));
Assert.Equal(expectedConsumed, consumed);
Assert.Equal(expectedWritten, decodedByteCount); // expectedWritten == decodedBytes.Length
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, decodedBytes.Length, source, decodedBytes));
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes));
}

[Theory]
[InlineData("\u00ecz/T", 0, 0)] // scalar code-path
[InlineData("z/Ta123\u00ec", 4, 3)]
[InlineData("\u00ecz/TpH7sqEkerqMweH1uSw==", 0, 0)] // Vector128 code-path
[InlineData("z/TpH7sqEkerqMweH1uSw\u00ec==", 20, 15)]
[InlineData("\u00ecz/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo==", 0, 0)] // Vector256 / AVX code-path
[InlineData("z/TpH7sqEkerqMweH1uSw\u5948==", 20, 15)]
[InlineData("\u5948/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo==", 0, 0)] // Vector256 / AVX code-path
[InlineData("z/TpH7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo\u00ec==", 44, 33)]
[InlineData("\u5948z+T/H7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo01234567890123456789012345678901234567890123456789==", 0, 0)] // Vector512 / Avx512Vbmi code-path
[InlineData("z/T+H7sqEkerqMweH1uSw1a5ebaAF9xa8B0ze1wet4epo01234567890123456789012345678901234567890123456789\u5948==", 92, 69)]
public void BasicDecodingNonAsciiInputInvalid(string inputString, int expectedConsumed, int expectedWritten)
{
Span<byte> source = Encoding.UTF8.GetBytes(inputString);
Expand Down Expand Up @@ -749,19 +753,5 @@ public void BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes(strin
Assert.Equal(expectedWritten, decodedByteCount);
Assert.True(Base64TestHelper.VerifyDecodingCorrectness(expectedConsumed, expectedWritten, source, decodedBytes));
}

public static IEnumerable<object[]> BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes_MemberData()
{
var r = new Random(42);
for (int i = 0; i < 5; i++)
{
yield return new object[] { "AQ==" + new string(r.GetItems<char>(" \n\t\r", i)), 4 + i, 1 };
}

foreach (string s in new[] { "MTIz", "M TIz", "MT Iz", "MTI z", "MTIz ", "M TI z", "M T I Z " })
{
yield return new object[] { s + s + s + s, s.Length * 4, 12 };
}
}
}
}
14 changes: 14 additions & 0 deletions src/libraries/System.Memory/tests/Base64/Base64TestBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,19 @@ public static IEnumerable<object[]> StringsOnlyWithCharsToBeIgnored()

string GetRepeatedChar(char charToInsert, int numberOfTimesToInsert) => new string(charToInsert, numberOfTimesToInsert);
}

public static IEnumerable<object[]> BasicDecodingWithExtraWhitespaceShouldBeCountedInConsumedBytes_MemberData()
{
var r = new Random(42);
for (int i = 0; i < 5; i++)
{
yield return new object[] { "AQ==" + new string(r.GetItems<char>(" \n\t\r", i)), 4 + i, 1 };
}

foreach (string s in new[] { "MTIz", "M TIz", "MT Iz", "MTI z", "MTIz ", "M TI z", "M T I Z " })
{
yield return new object[] { s + s + s + s, s.Length * 4, 12 };
}
}
}
}
88 changes: 84 additions & 4 deletions src/libraries/System.Memory/tests/Base64/Base64TestHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,23 @@ public static class Base64TestHelper
52, 53, 54, 55, 56, 57, 43, 47 //4..9, +, /
};

public static readonly byte[] s_urlEncodingMap = {
65, 66, 67, 68, 69, 70, 71, 72, //A..H
73, 74, 75, 76, 77, 78, 79, 80, //I..P
81, 82, 83, 84, 85, 86, 87, 88, //Q..X
89, 90, 97, 98, 99, 100, 101, 102, //Y..Z, a..f
103, 104, 105, 106, 107, 108, 109, 110, //g..n
111, 112, 113, 114, 115, 116, 117, 118, //o..v
119, 120, 121, 122, 48, 49, 50, 51, //w..z, 0..3
52, 53, 54, 55, 56, 57, 45, 95 //4..9, -, _
};

// Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests)
public static readonly sbyte[] s_decodingMap = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, //62 is placed at index 43 (for +), 63 at index 47 (for /)
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9), 64 at index 61 (for =)
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9)
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, //0-25 are placed at index 65-90 (for A-Z)
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
Expand All @@ -44,9 +55,29 @@ public static class Base64TestHelper
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

public static readonly sbyte[] s_urlDecodingMap = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, //62 is placed at index 45 (for -), 63 at index 95 (for _)
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9)
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, //0-25 are placed at index 65-90 (for A-Z)
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, //26-51 are placed at index 97-122 (for a-z)
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are invalid and cannot be decoded
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map with 255, which indicates invalid input
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

public static bool IsByteToBeIgnored(byte charByte) => charByte is (byte)' ' or (byte)'\t' or (byte)'\r' or (byte)'\n';

public const byte EncodingPad = (byte)'='; // '=', for padding
public const byte UrlEncodingPad = (byte)'%'; // '%', for url padding
public const sbyte InvalidByte = -1; // Designating -1 for invalid bytes in the decoding map

public static byte[] InvalidBytes
Expand All @@ -60,6 +91,17 @@ public static byte[] InvalidBytes
}
}

public static byte[] UrlInvalidBytes
{
get
{
int[] indices = s_urlDecodingMap.FindAllIndexOf(InvalidByte);
// Workaround for indices.Cast<byte>().ToArray() since it throws
// InvalidCastException: Unable to cast object of type 'System.Int32' to type 'System.Byte'
return indices.Select(i => (byte)i).ToArray();
}
}

internal static void InitializeBytes(Span<byte> bytes, int seed = 100)
{
var rnd = new Random(seed);
Expand All @@ -79,6 +121,26 @@ internal static void InitializeDecodableBytes(Span<byte> bytes, int seed = 100)
}
}

internal static void InitializeUrlDecodableChars(Span<char> bytes, int seed = 100)
{
var rnd = new Random(seed);
for (int i = 0; i < bytes.Length; i++)
{
int index = (byte)rnd.Next(0, s_urlEncodingMap.Length);
bytes[i] = (char)s_urlEncodingMap[index];
}
}

internal static void InitializeUrlDecodableBytes(Span<byte> bytes, int seed = 100)
{
var rnd = new Random(seed);
for (int i = 0; i < bytes.Length; i++)
{
int index = (byte)rnd.Next(0, s_urlEncodingMap.Length);
bytes[i] = s_urlEncodingMap[index];
}
}

[Fact]
public static void GenerateEncodingMapAndVerify()
{
Expand Down Expand Up @@ -112,16 +174,34 @@ public static int[] FindAllIndexOf<T>(this IEnumerable<T> values, T valueToFind)

public static bool VerifyEncodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> encodedBytes)
{
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed).ToArray());
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten).ToArray());
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed));
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten));
return expectedText.Equals(encodedText);
}

public static bool VerifyUrlEncodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> encodedBytes)
{
string expectedText = Convert.ToBase64String(source.Slice(0, expectedConsumed))
.Replace('+', '-').Replace('/', '_').TrimEnd('=');
string encodedText = Encoding.ASCII.GetString(encodedBytes.Slice(0, expectedWritten));
return expectedText.Equals(encodedText);
}

public static bool VerifyDecodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> decodedBytes)
{
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed).ToArray());
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed));
byte[] expectedBytes = Convert.FromBase64String(sourceString);
return expectedBytes.AsSpan().SequenceEqual(decodedBytes.Slice(0, expectedWritten));
}

public static bool VerifyUrlDecodingCorrectness(int expectedConsumed, int expectedWritten, Span<byte> source, Span<byte> decodedBytes)
{
string sourceString = Encoding.ASCII.GetString(source.Slice(0, expectedConsumed));
string padded = sourceString.Length % 4 == 0 ? sourceString :
sourceString.PadRight(sourceString.Length + (4 - sourceString.Length % 4), '=');
string base64 = padded.Replace('_', '/').Replace('-', '+').Replace('%', '=');
byte[] expectedBytes = Convert.FromBase64String(base64);
return expectedBytes.AsSpan().SequenceEqual(decodedBytes.Slice(0, expectedWritten));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public void BasicValidationInvalidInputLengthBytes()
} while (numBytes % 4 == 0); // ensure we have a invalid length

Span<byte> source = new byte[numBytes];
Base64TestHelper.InitializeDecodableBytes(source, numBytes);

Assert.False(Base64.IsValid(source));
Assert.False(Base64.IsValid(source, out int decodedLength));
Expand All @@ -88,10 +89,16 @@ public void BasicValidationInvalidInputLengthChars()
numBytes = rnd.Next(100, 1000 * 1000);
} while (numBytes % 4 == 0); // ensure we have a invalid length

Span<char> source = new char[numBytes];
Span<byte> source = new byte[numBytes];
Base64TestHelper.InitializeDecodableBytes(source, numBytes);
Span<char> chars = source
.ToArray()
.Select(Convert.ToChar)
.ToArray()
.AsSpan();

Assert.False(Base64.IsValid(source));
Assert.False(Base64.IsValid(source, out int decodedLength));
Assert.False(Base64.IsValid(chars));
Assert.False(Base64.IsValid(chars, out int decodedLength));
Assert.Equal(0, decodedLength);
}
}
Expand Down Expand Up @@ -267,7 +274,7 @@ public void InvalidSizeBytes(string utf8WithByteToBeIgnored)
[InlineData("Y")]
public void InvalidSizeChars(string utf8WithByteToBeIgnored)
{
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithByteToBeIgnored);
ReadOnlySpan<char> utf8BytesWithByteToBeIgnored = utf8WithByteToBeIgnored;

Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored));
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored, out int decodedLength));
Expand Down Expand Up @@ -329,10 +336,10 @@ public void InvalidBase64Bytes(string utf8WithByteToBeIgnored)
[InlineData(" a ")]
public void InvalidBase64Chars(string utf8WithByteToBeIgnored)
{
byte[] utf8BytesWithByteToBeIgnored = UTF8Encoding.UTF8.GetBytes(utf8WithByteToBeIgnored);
ReadOnlySpan<char> utf8CharsWithCharToBeIgnored = utf8WithByteToBeIgnored;

Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored));
Assert.False(Base64.IsValid(utf8BytesWithByteToBeIgnored, out int decodedLength));
Assert.False(Base64.IsValid(utf8CharsWithCharToBeIgnored));
Assert.False(Base64.IsValid(utf8CharsWithCharToBeIgnored, out int decodedLength));
Assert.Equal(0, decodedLength);
}
}
Expand Down
Loading
Loading