Skip to content

Commit

Permalink
Revert "Improve Span.Reverse fast path performance (#70944)" (#78605)
Browse files Browse the repository at this point in the history
This reverts commit 6ddd06c.
  • Loading branch information
stephentoub authored Nov 20, 2022
1 parent 3ebeb75 commit de005e5
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 206 deletions.
109 changes: 32 additions & 77 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers.Binary;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Numerics;
Expand Down Expand Up @@ -1130,23 +1129,21 @@ private static unsafe nuint UnalignedCountVector128(ref byte searchSpace)

public static void Reverse(ref byte buf, nuint length)
{
Debug.Assert(length > 1);

nint remainder = (nint)length;
nint offset = 0;

if (Avx2.IsSupported && remainder >= Vector256<byte>.Count)
if (Avx2.IsSupported && (nuint)Vector256<byte>.Count * 2 <= length)
{
Vector256<byte> reverseMask = Vector256.Create(
(byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, // first 128-bit lane
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); // second 128-bit lane

nint lastOffset = remainder - Vector256<byte>.Count;
do
nuint numElements = (nuint)Vector256<byte>.Count;
nuint numIters = (length / numElements) / 2;
for (nuint i = 0; i < numIters; i++)
{
// Load the values into vectors
Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref buf, (nuint)offset);
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref buf, (nuint)lastOffset);
nuint firstOffset = i * numElements;
nuint lastOffset = length - ((1 + i) * numElements);

// Load in values from beginning and end of the array.
Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset);
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref buf, lastOffset);

// Avx2 operates on two 128-bit lanes rather than the full 256-bit vector.
// Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector:
Expand All @@ -1173,23 +1170,24 @@ public static void Reverse(ref byte buf, nuint length)
tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01);

// Store the reversed vectors
tempLast.StoreUnsafe(ref buf, (nuint)offset);
tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset);

offset += Vector256<byte>.Count;
lastOffset -= Vector256<byte>.Count;
} while (lastOffset >= offset);

remainder = lastOffset + Vector256<byte>.Count - offset;
tempLast.StoreUnsafe(ref buf, firstOffset);
tempFirst.StoreUnsafe(ref buf, lastOffset);
}
buf = ref Unsafe.Add(ref buf, numIters * numElements);
length -= numIters * numElements * 2;
}
else if (Vector128.IsHardwareAccelerated && remainder >= Vector128<byte>.Count)
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<byte>.Count * 2 <= length)
{
nint lastOffset = remainder - Vector128<byte>.Count;
do
nuint numElements = (nuint)Vector128<byte>.Count;
nuint numIters = (length / numElements) / 2;
for (nuint i = 0; i < numIters; i++)
{
// Load the values into vectors
Vector128<byte> tempFirst = Vector128.LoadUnsafe(ref buf, (nuint)offset);
Vector128<byte> tempLast = Vector128.LoadUnsafe(ref buf, (nuint)lastOffset);
nuint firstOffset = i * numElements;
nuint lastOffset = length - ((1 + i) * numElements);

// Load in values from beginning and end of the array.
Vector128<byte> tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset);
Vector128<byte> tempLast = Vector128.LoadUnsafe(ref buf, lastOffset);

// Shuffle to reverse each vector:
// +---------------------------------------------------------------+
Expand All @@ -1205,58 +1203,15 @@ public static void Reverse(ref byte buf, nuint length)
(byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));

// Store the reversed vectors
tempLast.StoreUnsafe(ref buf, (nuint)offset);
tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset);

offset += Vector128<byte>.Count;
lastOffset -= Vector128<byte>.Count;
} while (lastOffset >= offset);

remainder = lastOffset + Vector128<byte>.Count - offset;
}

if (remainder >= sizeof(long))
{
nint lastOffset = (nint)length - offset - sizeof(long);
do
{
long tempFirst = Unsafe.ReadUnaligned<long>(ref Unsafe.Add(ref buf, offset));
long tempLast = Unsafe.ReadUnaligned<long>(ref Unsafe.Add(ref buf, lastOffset));

// swap and store in reversed position
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, offset), BinaryPrimitives.ReverseEndianness(tempLast));
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, lastOffset), BinaryPrimitives.ReverseEndianness(tempFirst));

offset += sizeof(long);
lastOffset -= sizeof(long);
} while (lastOffset >= offset);

remainder = lastOffset + sizeof(long) - offset;
}

if (remainder >= sizeof(int))
{
nint lastOffset = (nint)length - offset - sizeof(int);
do
{
int tempFirst = Unsafe.ReadUnaligned<int>(ref Unsafe.Add(ref buf, offset));
int tempLast = Unsafe.ReadUnaligned<int>(ref Unsafe.Add(ref buf, lastOffset));

// swap and store in reversed position
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, offset), BinaryPrimitives.ReverseEndianness(tempLast));
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, lastOffset), BinaryPrimitives.ReverseEndianness(tempFirst));

offset += sizeof(int);
lastOffset -= sizeof(int);
} while (lastOffset >= offset);

remainder = lastOffset + sizeof(int) - offset;
tempLast.StoreUnsafe(ref buf, firstOffset);
tempFirst.StoreUnsafe(ref buf, lastOffset);
}
buf = ref Unsafe.Add(ref buf, numIters * numElements);
length -= numIters * numElements * 2;
}

if (remainder > 1)
{
ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder);
}
// Store any remaining values one-by-one
ReverseInner(ref buf, length);
}
}
}
82 changes: 38 additions & 44 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -733,25 +733,23 @@ private static unsafe nint UnalignedCountVector128(ref char searchSpace)

public static void Reverse(ref char buf, nuint length)
{
Debug.Assert(length > 1);

nint remainder = (nint)length;
nint offset = 0;

if (Avx2.IsSupported && remainder >= Vector256<ushort>.Count)
if (Avx2.IsSupported && (nuint)Vector256<short>.Count * 2 <= length)
{
ref byte bufByte = ref Unsafe.As<char, byte>(ref buf);
nuint byteLength = length * sizeof(char);
Vector256<byte> reverseMask = Vector256.Create(
(byte)14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, // first 128-bit lane
14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); // second 128-bit lane

nint lastOffset = remainder - Vector256<ushort>.Count;
do
nuint numElements = (nuint)Vector256<byte>.Count;
nuint numIters = (byteLength / numElements) / 2;
for (nuint i = 0; i < numIters; i++)
{
ref byte first = ref Unsafe.As<char, byte>(ref Unsafe.Add(ref buf, offset));
ref byte last = ref Unsafe.As<char, byte>(ref Unsafe.Add(ref buf, lastOffset));
nuint firstOffset = i * numElements;
nuint lastOffset = byteLength - ((1 + i) * numElements);

Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref first);
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref last);
// Load in values from beginning and end of the array.
Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref bufByte, firstOffset);
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref bufByte, lastOffset);

// Avx2 operates on two 128-bit lanes rather than the full 256-bit vector.
// Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector:
Expand All @@ -772,25 +770,27 @@ public static void Reverse(ref char buf, nuint length)
tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01);

// Store the reversed vectors
tempLast.StoreUnsafe(ref first);
tempFirst.StoreUnsafe(ref last);

offset += Vector256<ushort>.Count;
lastOffset -= Vector256<ushort>.Count;
} while (lastOffset >= offset);

remainder = (lastOffset + Vector256<ushort>.Count - offset);
tempLast.StoreUnsafe(ref bufByte, firstOffset);
tempFirst.StoreUnsafe(ref bufByte, lastOffset);
}
bufByte = ref Unsafe.Add(ref bufByte, numIters * numElements);
length -= numIters * (nuint)Vector256<short>.Count * 2;
// Store any remaining values one-by-one
buf = ref Unsafe.As<byte, char>(ref bufByte);
}
else if (Vector128.IsHardwareAccelerated && remainder >= Vector128<ushort>.Count)
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<short>.Count * 2 <= length)
{
nint lastOffset = remainder - Vector128<ushort>.Count;
do
ref short bufShort = ref Unsafe.As<char, short>(ref buf);
nuint numElements = (nuint)Vector128<short>.Count;
nuint numIters = (length / numElements) / 2;
for (nuint i = 0; i < numIters; i++)
{
ref ushort first = ref Unsafe.As<char, ushort>(ref Unsafe.Add(ref buf, offset));
ref ushort last = ref Unsafe.As<char, ushort>(ref Unsafe.Add(ref buf, lastOffset));
nuint firstOffset = i * numElements;
nuint lastOffset = length - ((1 + i) * numElements);

Vector128<ushort> tempFirst = Vector128.LoadUnsafe(ref first);
Vector128<ushort> tempLast = Vector128.LoadUnsafe(ref last);
// Load in values from beginning and end of the array.
Vector128<short> tempFirst = Vector128.LoadUnsafe(ref bufShort, firstOffset);
Vector128<short> tempLast = Vector128.LoadUnsafe(ref bufShort, lastOffset);

// Shuffle to reverse each vector:
// +-------------------------------+
Expand All @@ -800,25 +800,19 @@ public static void Reverse(ref char buf, nuint length)
// +-------------------------------+
// | H | G | F | E | D | C | B | A |
// +-------------------------------+
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0));
tempLast = Vector128.Shuffle(tempLast, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0));
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0));
tempLast = Vector128.Shuffle(tempLast, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0));

// Store the reversed vectors
tempLast.StoreUnsafe(ref first);
tempFirst.StoreUnsafe(ref last);

offset += Vector128<ushort>.Count;
lastOffset -= Vector128<ushort>.Count;
} while (lastOffset >= offset);

remainder = (lastOffset + Vector128<ushort>.Count - offset);
}

// Store any remaining values one-by-one
if (remainder > 1)
{
ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder);
tempLast.StoreUnsafe(ref bufShort, firstOffset);
tempFirst.StoreUnsafe(ref bufShort, lastOffset);
}
bufShort = ref Unsafe.Add(ref bufShort, numIters * numElements);
length -= numIters * (nuint)Vector128<short>.Count * 2;
// Store any remaining values one-by-one
buf = ref Unsafe.As<short, char>(ref bufShort);
}
ReverseInner(ref buf, length);
}
}
}
Loading

0 comments on commit de005e5

Please sign in to comment.