diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 3a316d8f740891..9de591aaaa9618 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Binary; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Numerics; @@ -1129,21 +1130,23 @@ private static unsafe nuint UnalignedCountVector128(ref byte searchSpace) public static void Reverse(ref byte buf, nuint length) { - if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + Debug.Assert(length > 1); + + nint remainder = (nint)length; + nint offset = 0; + + if (Avx2.IsSupported && remainder >= Vector256.Count) { Vector256 reverseMask = Vector256.Create( (byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, // first 128-bit lane 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); // second 128-bit lane - nuint numElements = (nuint)Vector256.Count; - nuint numIters = (length / numElements) / 2; - for (nuint i = 0; i < numIters; i++) - { - nuint firstOffset = i * numElements; - nuint lastOffset = length - ((1 + i) * numElements); - // Load in values from beginning and end of the array. - Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset); - Vector256 tempLast = Vector256.LoadUnsafe(ref buf, lastOffset); + nint lastOffset = remainder - Vector256.Count; + do + { + // Load the values into vectors + Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, (nuint)offset); + Vector256 tempLast = Vector256.LoadUnsafe(ref buf, (nuint)lastOffset); // Avx2 operates on two 128-bit lanes rather than the full 256-bit vector. // Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector: @@ -1170,24 +1173,23 @@ public static void Reverse(ref byte buf, nuint length) tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01); // Store the reversed vectors - tempLast.StoreUnsafe(ref buf, firstOffset); - tempFirst.StoreUnsafe(ref buf, lastOffset); - } - buf = ref Unsafe.Add(ref buf, numIters * numElements); - length -= numIters * numElements * 2; + tempLast.StoreUnsafe(ref buf, (nuint)offset); + tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset); + + offset += Vector256.Count; + lastOffset -= Vector256.Count; + } while (lastOffset >= offset); + + remainder = lastOffset + Vector256.Count - offset; } - else if (Vector128.IsHardwareAccelerated && (nuint)Vector128.Count * 2 <= length) + else if (Vector128.IsHardwareAccelerated && remainder >= Vector128.Count) { - nuint numElements = (nuint)Vector128.Count; - nuint numIters = (length / numElements) / 2; - for (nuint i = 0; i < numIters; i++) + nint lastOffset = remainder - Vector128.Count; + do { - nuint firstOffset = i * numElements; - nuint lastOffset = length - ((1 + i) * numElements); - - // Load in values from beginning and end of the array. - Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset); - Vector128 tempLast = Vector128.LoadUnsafe(ref buf, lastOffset); + // Load the values into vectors + Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, (nuint)offset); + Vector128 tempLast = Vector128.LoadUnsafe(ref buf, (nuint)lastOffset); // Shuffle to reverse each vector: // +---------------------------------------------------------------+ @@ -1203,15 +1205,58 @@ public static void Reverse(ref byte buf, nuint length) (byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); // Store the reversed vectors - tempLast.StoreUnsafe(ref buf, firstOffset); - tempFirst.StoreUnsafe(ref buf, lastOffset); - } - buf = ref Unsafe.Add(ref buf, numIters * numElements); - length -= numIters * numElements * 2; + tempLast.StoreUnsafe(ref buf, (nuint)offset); + tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset); + + offset += Vector128.Count; + lastOffset -= Vector128.Count; + } while (lastOffset >= offset); + + remainder = lastOffset + Vector128.Count - offset; + } + + if (remainder >= sizeof(long)) + { + nint lastOffset = (nint)length - offset - sizeof(long); + do + { + long tempFirst = Unsafe.ReadUnaligned(ref Unsafe.Add(ref buf, offset)); + long tempLast = Unsafe.ReadUnaligned(ref Unsafe.Add(ref buf, lastOffset)); + + // swap and store in reversed position + Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, offset), BinaryPrimitives.ReverseEndianness(tempLast)); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, lastOffset), BinaryPrimitives.ReverseEndianness(tempFirst)); + + offset += sizeof(long); + lastOffset -= sizeof(long); + } while (lastOffset >= offset); + + remainder = lastOffset + sizeof(long) - offset; } - // Store any remaining values one-by-one - ReverseInner(ref buf, length); + if (remainder >= sizeof(int)) + { + nint lastOffset = (nint)length - offset - sizeof(int); + do + { + int tempFirst = Unsafe.ReadUnaligned(ref Unsafe.Add(ref buf, offset)); + int tempLast = Unsafe.ReadUnaligned(ref Unsafe.Add(ref buf, lastOffset)); + + // swap and store in reversed position + Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, offset), BinaryPrimitives.ReverseEndianness(tempLast)); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, lastOffset), BinaryPrimitives.ReverseEndianness(tempFirst)); + + offset += sizeof(int); + lastOffset -= sizeof(int); + } while (lastOffset >= offset); + + remainder = lastOffset + sizeof(int) - offset; + } + + if (remainder > 1) + { + ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder); + } } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 8a44db8cc3aed8..ca1af16e719c66 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -733,23 +733,25 @@ private static unsafe nint UnalignedCountVector128(ref char searchSpace) public static void Reverse(ref char buf, nuint length) { - if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + Debug.Assert(length > 1); + + nint remainder = (nint)length; + nint offset = 0; + + if (Avx2.IsSupported && remainder >= Vector256.Count) { - ref byte bufByte = ref Unsafe.As(ref buf); - nuint byteLength = length * sizeof(char); Vector256 reverseMask = Vector256.Create( (byte)14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, // first 128-bit lane 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); // second 128-bit lane - nuint numElements = (nuint)Vector256.Count; - nuint numIters = (byteLength / numElements) / 2; - for (nuint i = 0; i < numIters; i++) + + nint lastOffset = remainder - Vector256.Count; + do { - nuint firstOffset = i * numElements; - nuint lastOffset = byteLength - ((1 + i) * numElements); + ref byte first = ref Unsafe.As(ref Unsafe.Add(ref buf, offset)); + ref byte last = ref Unsafe.As(ref Unsafe.Add(ref buf, lastOffset)); - // Load in values from beginning and end of the array. - Vector256 tempFirst = Vector256.LoadUnsafe(ref bufByte, firstOffset); - Vector256 tempLast = Vector256.LoadUnsafe(ref bufByte, lastOffset); + Vector256 tempFirst = Vector256.LoadUnsafe(ref first); + Vector256 tempLast = Vector256.LoadUnsafe(ref last); // Avx2 operates on two 128-bit lanes rather than the full 256-bit vector. // Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector: @@ -770,27 +772,25 @@ public static void Reverse(ref char buf, nuint length) tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01); // Store the reversed vectors - tempLast.StoreUnsafe(ref bufByte, firstOffset); - tempFirst.StoreUnsafe(ref bufByte, lastOffset); - } - bufByte = ref Unsafe.Add(ref bufByte, numIters * numElements); - length -= numIters * (nuint)Vector256.Count * 2; - // Store any remaining values one-by-one - buf = ref Unsafe.As(ref bufByte); + tempLast.StoreUnsafe(ref first); + tempFirst.StoreUnsafe(ref last); + + offset += Vector256.Count; + lastOffset -= Vector256.Count; + } while (lastOffset >= offset); + + remainder = (lastOffset + Vector256.Count - offset); } - else if (Vector128.IsHardwareAccelerated && (nuint)Vector128.Count * 2 <= length) + else if (Vector128.IsHardwareAccelerated && remainder >= Vector128.Count) { - ref short bufShort = ref Unsafe.As(ref buf); - nuint numElements = (nuint)Vector128.Count; - nuint numIters = (length / numElements) / 2; - for (nuint i = 0; i < numIters; i++) + nint lastOffset = remainder - Vector128.Count; + do { - nuint firstOffset = i * numElements; - nuint lastOffset = length - ((1 + i) * numElements); + ref ushort first = ref Unsafe.As(ref Unsafe.Add(ref buf, offset)); + ref ushort last = ref Unsafe.As(ref Unsafe.Add(ref buf, lastOffset)); - // Load in values from beginning and end of the array. - Vector128 tempFirst = Vector128.LoadUnsafe(ref bufShort, firstOffset); - Vector128 tempLast = Vector128.LoadUnsafe(ref bufShort, lastOffset); + Vector128 tempFirst = Vector128.LoadUnsafe(ref first); + Vector128 tempLast = Vector128.LoadUnsafe(ref last); // Shuffle to reverse each vector: // +-------------------------------+ @@ -800,19 +800,25 @@ public static void Reverse(ref char buf, nuint length) // +-------------------------------+ // | H | G | F | E | D | C | B | A | // +-------------------------------+ - tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0)); - tempLast = Vector128.Shuffle(tempLast, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0)); + tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0)); + tempLast = Vector128.Shuffle(tempLast, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0)); // Store the reversed vectors - tempLast.StoreUnsafe(ref bufShort, firstOffset); - tempFirst.StoreUnsafe(ref bufShort, lastOffset); - } - bufShort = ref Unsafe.Add(ref bufShort, numIters * numElements); - length -= numIters * (nuint)Vector128.Count * 2; - // Store any remaining values one-by-one - buf = ref Unsafe.As(ref bufShort); + tempLast.StoreUnsafe(ref first); + tempFirst.StoreUnsafe(ref last); + + offset += Vector128.Count; + lastOffset -= Vector128.Count; + } while (lastOffset >= offset); + + remainder = (lastOffset + Vector128.Count - offset); + } + + // Store any remaining values one-by-one + if (remainder > 1) + { + ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder); } - ReverseInner(ref buf, length); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index fa40a769f92e36..dbc9b6e79b7351 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -408,19 +408,19 @@ public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLe public static void Reverse(ref int buf, nuint length) { - if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + Debug.Assert(length > 1); + + nint remainder = (nint)length; + nint offset = 0; + + if (Avx2.IsSupported && remainder >= Vector256.Count) { - nuint numElements = (nuint)Vector256.Count; - nuint numIters = (length / numElements) / 2; - Vector256 reverseMask = Vector256.Create(7, 6, 5, 4, 3, 2, 1, 0); - for (nuint i = 0; i < numIters; i++) + nint lastOffset = remainder - Vector256.Count; + do { - nuint firstOffset = i * numElements; - nuint lastOffset = length - ((1 + i) * numElements); - // Load the values into vectors - Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset); - Vector256 tempLast = Vector256.LoadUnsafe(ref buf, lastOffset); + Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, (nuint)offset); + Vector256 tempLast = Vector256.LoadUnsafe(ref buf, (nuint)lastOffset); // Permute to reverse each vector: // +-------------------------------+ @@ -430,28 +430,27 @@ public static void Reverse(ref int buf, nuint length) // +-------------------------------+ // | H | G | F | E | D | C | B | A | // +-------------------------------+ - tempFirst = Avx2.PermuteVar8x32(tempFirst, reverseMask); - tempLast = Avx2.PermuteVar8x32(tempLast, reverseMask); + tempFirst = Avx2.PermuteVar8x32(tempFirst, Vector256.Create(7, 6, 5, 4, 3, 2, 1, 0)); + tempLast = Avx2.PermuteVar8x32(tempLast, Vector256.Create(7, 6, 5, 4, 3, 2, 1, 0)); - // Store the values into final location - tempLast.StoreUnsafe(ref buf, firstOffset); - tempFirst.StoreUnsafe(ref buf, lastOffset); - } - buf = ref Unsafe.Add(ref buf, numIters * numElements); - length -= numIters * numElements * 2; + // Store the reversed vectors + tempLast.StoreUnsafe(ref buf, (nuint)offset); + tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset); + + offset += Vector256.Count; + lastOffset -= Vector256.Count; + } while (lastOffset >= offset); + + remainder = lastOffset + Vector256.Count - offset; } - else if (Vector128.IsHardwareAccelerated && (nuint)Vector128.Count * 2 <= length) + else if (Vector128.IsHardwareAccelerated && remainder >= Vector128.Count) { - nuint numElements = (nuint)Vector128.Count; - nuint numIters = (length / numElements) / 2; - for (nuint i = 0; i < numIters; i++) + nint lastOffset = remainder - Vector128.Count; + do { - nuint firstOffset = i * numElements; - nuint lastOffset = length - ((1 + i) * numElements); - - // Load the values into vectors - Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset); - Vector128 tempLast = Vector128.LoadUnsafe(ref buf, lastOffset); + // Load in values from beginning and end of the array. + Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, (nuint)offset); + Vector128 tempLast = Vector128.LoadUnsafe(ref buf, (nuint)lastOffset); // Shuffle to reverse each vector: // +---------------+ @@ -464,30 +463,39 @@ public static void Reverse(ref int buf, nuint length) tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(3, 2, 1, 0)); tempLast = Vector128.Shuffle(tempLast, Vector128.Create(3, 2, 1, 0)); - // Store the values into final location - tempLast.StoreUnsafe(ref buf, firstOffset); - tempFirst.StoreUnsafe(ref buf, lastOffset); - } - buf = ref Unsafe.Add(ref buf, numIters * numElements); - length -= numIters * numElements * 2; + // Store the reversed vectors + tempLast.StoreUnsafe(ref buf, (nuint)offset); + tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset); + + offset += Vector128.Count; + lastOffset -= Vector128.Count; + } while (lastOffset >= offset); + + remainder = lastOffset + Vector128.Count - offset; } - ReverseInner(ref buf, length); + // Store any remaining values one-by-one + if (remainder > 1) + { + ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder); + } } public static void Reverse(ref long buf, nuint length) { - if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + Debug.Assert(length > 1); + + nint remainder = (nint)length; + nint offset = 0; + + if (Avx2.IsSupported && remainder >= Vector256.Count) { - nuint numElements = (nuint)Vector256.Count; - nuint numIters = (length / numElements) / 2; - for (nuint i = 0; i < numIters; i++) + nint lastOffset = remainder - Vector256.Count; + do { - nuint firstOffset = i * numElements; - nuint lastOffset = length - ((1 + i) * numElements); // Load the values into vectors - Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset); - Vector256 tempLast = Vector256.LoadUnsafe(ref buf, lastOffset); + Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, (nuint)offset); + Vector256 tempLast = Vector256.LoadUnsafe(ref buf, (nuint)lastOffset); // Permute to reverse each vector: // +---------------+ @@ -500,24 +508,24 @@ public static void Reverse(ref long buf, nuint length) tempFirst = Avx2.Permute4x64(tempFirst, 0b00_01_10_11); tempLast = Avx2.Permute4x64(tempLast, 0b00_01_10_11); - // Store the values into final location - tempLast.StoreUnsafe(ref buf, firstOffset); - tempFirst.StoreUnsafe(ref buf, lastOffset); - } - buf = ref Unsafe.Add(ref buf, numIters * numElements); - length -= numIters * numElements * 2; + // Store the reversed vectors + tempLast.StoreUnsafe(ref buf, (nuint)offset); + tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset); + + offset += Vector256.Count; + lastOffset -= Vector256.Count; + } while (lastOffset >= offset); + + remainder = lastOffset + Vector256.Count - offset; } - else if (Vector128.IsHardwareAccelerated && (nuint)Vector128.Count * 2 <= length) + else if (Vector128.IsHardwareAccelerated && remainder >= Vector128.Count) { - nuint numElements = (nuint)Vector128.Count; - nuint numIters = (length / numElements) / 2; - for (nuint i = 0; i < numIters; i++) + nint lastOffset = remainder - Vector128.Count; + do { - nuint firstOffset = i * numElements; - nuint lastOffset = length - ((1 + i) * numElements); - // Load the values into vectors - Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset); - Vector128 tempLast = Vector128.LoadUnsafe(ref buf, lastOffset); + // Load in values from beginning and end of the array. + Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, (nuint)offset); + Vector128 tempLast = Vector128.LoadUnsafe(ref buf, (nuint)lastOffset); // Shuffle to reverse each vector: // +-------+ @@ -530,22 +538,29 @@ public static void Reverse(ref long buf, nuint length) tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(1, 0)); tempLast = Vector128.Shuffle(tempLast, Vector128.Create(1, 0)); - // Store the values into final location - tempLast.StoreUnsafe(ref buf, firstOffset); - tempFirst.StoreUnsafe(ref buf, lastOffset); - } - buf = ref Unsafe.Add(ref buf, numIters * numElements); - length -= numIters * (nuint)Vector128.Count * 2; + // Store the reversed vectors + tempLast.StoreUnsafe(ref buf, (nuint)offset); + tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset); + + offset += Vector128.Count; + lastOffset -= Vector128.Count; + } while (lastOffset >= offset); + + remainder = lastOffset + Vector128.Count - offset; } // Store any remaining values one-by-one - ReverseInner(ref buf, length); + if (remainder > 1) + { + ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder); + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Reverse(ref T elements, nuint length) { - Debug.Assert(length > 0); + Debug.Assert(length > 1); + if (!RuntimeHelpers.IsReferenceOrContainsReferences()) { if (Unsafe.SizeOf() == sizeof(byte)) @@ -569,6 +584,7 @@ public static void Reverse(ref T elements, nuint length) return; } } + ReverseInner(ref elements, length); } @@ -576,10 +592,10 @@ public static void Reverse(ref T elements, nuint length) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ReverseInner(ref T elements, nuint length) { - if (length <= 1) - return; + Debug.Assert(length > 1); + ref T first = ref elements; - ref T last = ref Unsafe.Subtract(ref Unsafe.Add(ref first, (int)length), 1); + ref T last = ref Unsafe.Subtract(ref Unsafe.Add(ref first, length), 1); do { T temp = first;