From a44adae7e85876ba1d9aafbfd40536e723919da3 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 15 Apr 2021 19:40:08 -0700 Subject: [PATCH 1/5] Span.Fill perf improvements --- .../System.Memory/tests/Span/Fill.cs | 51 ++++++ .../System.Private.CoreLib/src/System/Span.cs | 49 +----- .../src/System/SpanHelpers.T.cs | 149 +++++++++++++++++- 3 files changed, 207 insertions(+), 42 deletions(-) diff --git a/src/libraries/System.Memory/tests/Span/Fill.cs b/src/libraries/System.Memory/tests/Span/Fill.cs index 558291da9b4e4..5bbba5ea5e2af 100644 --- a/src/libraries/System.Memory/tests/Span/Fill.cs +++ b/src/libraries/System.Memory/tests/Span/Fill.cs @@ -1,6 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Collections.Generic; +using System.Linq; +using System.Reflection; using System.Runtime.InteropServices; using Xunit; using static System.TestHelpers; @@ -147,5 +150,53 @@ public static unsafe void FillNativeBytes() Marshal.FreeHGlobal(new IntPtr(ptr)); } } + + public static IEnumerable FillData + { + get + { + yield return new object[] { typeof(sbyte), (sbyte)0x20 }; + yield return new object[] { typeof(byte), (byte)0x20 }; + yield return new object[] { typeof(bool), true }; + yield return new object[] { typeof(short), (short)0x1234 }; + yield return new object[] { typeof(ushort), (ushort)0x1234 }; + yield return new object[] { typeof(char), 'x' }; + yield return new object[] { typeof(int), (int)0x12345678 }; + yield return new object[] { typeof(uint), (uint)0x12345678 }; + yield return new object[] { typeof(long), (long)0x0123456789abcdef }; + yield return new object[] { typeof(ulong), (ulong)0x0123456789abcdef }; + yield return new object[] { typeof(nint), unchecked((nint)0x0123456789abcdef) }; + yield return new object[] { typeof(nuint), unchecked((nuint)0x0123456789abcdef) }; + yield return new object[] { typeof(float), 1.0f }; + yield return new object[] { typeof(double), 1.0d }; + yield return new object[] { typeof(string), "Hello world!" }; // shouldn't go down SIMD path + yield return new object[] { typeof(decimal), 1.0m }; // shouldn't go down SIMD path + } + } + + [Theory] + [MemberData(nameof(FillData))] + public static void FillWithRecognizedType(Type expectedType, object value) + { + Assert.IsType(expectedType, value); + typeof(SpanTests).GetMethod(nameof(FillWithRecognizedType_RunTest), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static) + .MakeGenericMethod(expectedType) + .Invoke(null, BindingFlags.DoNotWrapExceptions, null, new object[] { value }, null); + } + + private static void FillWithRecognizedType_RunTest(T value) + { + T[] arr = new T[128]; + + // Run tests for lengths := 0 to 64, ensuring we don't overrun our buffer + + for (int i = 0; i <= 64; i++) + { + arr.AsSpan(0, i).Fill(value); + Assert.Equal(Enumerable.Repeat(value, i), arr.Take(i)); // first i entries should've been populated with 'value' + Assert.Equal(Enumerable.Repeat(default(T), arr.Length - i), arr.Skip(i)); // remaining entries should contain default(T) + Array.Clear(arr, 0, arr.Length); + } + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Span.cs b/src/libraries/System.Private.CoreLib/src/System/Span.cs index 60de1c0328665..6042331d30529 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Span.cs @@ -5,7 +5,6 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Versioning; -using System.Text; using EditorBrowsableAttribute = System.ComponentModel.EditorBrowsableAttribute; using EditorBrowsableState = System.ComponentModel.EditorBrowsableState; using Internal.Runtime.CompilerServices; @@ -280,53 +279,21 @@ public unsafe void Clear() /// /// Fills the contents of this span with the given value. /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Fill(T value) { if (Unsafe.SizeOf() == 1) { - uint length = (uint)_length; - if (length == 0) - return; - - T tmp = value; // Avoid taking address of the "value" argument. It would regress performance of the loop below. - Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _pointer.Value), Unsafe.As(ref tmp), length); + // Special-case single-byte types like byte / sbyte / bool. + // The runtime eventually calls memset, which can efficiently support large buffers. + // We don't need to check IsReferenceOrContainsReferences because no references + // can ever be stored in types this small. + Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _pointer.Value), Unsafe.As(ref value), (uint)_length); } else { - // Do all math as nuint to avoid unnecessary 64->32->64 bit integer truncations - nuint length = (uint)_length; - if (length == 0) - return; - - ref T r = ref _pointer.Value; - - // TODO: Create block fill for value types of power of two sizes e.g. 2,4,8,16 - - nuint elementSize = (uint)Unsafe.SizeOf(); - nuint i = 0; - for (; i < (length & ~(nuint)7); i += 8) - { - Unsafe.AddByteOffset(ref r, (i + 0) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 1) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 2) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 3) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 4) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 5) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 6) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 7) * elementSize) = value; - } - if (i < (length & ~(nuint)3)) - { - Unsafe.AddByteOffset(ref r, (i + 0) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 1) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 2) * elementSize) = value; - Unsafe.AddByteOffset(ref r, (i + 3) * elementSize) = value; - i += 4; - } - for (; i < length; i++) - { - Unsafe.AddByteOffset(ref r, i * elementSize) = value; - } + // Call our optimized workhorse method for all other types. + SpanHelpers.Fill(ref _pointer.Value, (uint)_length, value); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 2f2763f24a1a1..8572e0a25e304 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -2,13 +2,160 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; - +using System.Numerics; +using System.Runtime.CompilerServices; using Internal.Runtime.CompilerServices; namespace System { internal static partial class SpanHelpers // .T { + public static void Fill(ref T refData, uint numElements, T value) + { + // n.b. If Fill is ever adapted to work for lengths beyond uint.MaxValue, double-check + // where it's used in the arithmetic operations below to ensure no integer overflow is possible. + + if (numElements == 0) + { + return; // nothing to do + } + + if (typeof(T) != typeof(float) && typeof(T) != typeof(double) && !RuntimeHelpers.IsBitwiseEquatable()) + { + goto CannotVectorize; // it might not be valid to SIMD-spray this value into the backing span + } + + if (Vector.IsHardwareAccelerated) + { + if (numElements < (uint)(Vector.Count / Unsafe.SizeOf())) + { + goto CannotVectorize; // not enough data for even a single run through the vectorized loop + } + + Vector vector; + + if (typeof(T) == typeof(float)) + { + vector = (Vector)(new Vector((float)(object)value!)); + } + else if (typeof(T) == typeof(double)) + { + vector = (Vector)(new Vector((double)(object)value!)); + } + else + { + T tmp = value; // Avoid taking address of the "value" argument. It would regress performance of the loops below. + if (Unsafe.SizeOf() == 1) + { + vector = new Vector(Unsafe.As(ref tmp)); + } + else if (Unsafe.SizeOf() == 2) + { + vector = (Vector)(new Vector(Unsafe.As(ref tmp))); + } + else if (Unsafe.SizeOf() == 4) + { + vector = (Vector)(new Vector(Unsafe.As(ref tmp))); + } + else if (Unsafe.SizeOf() == 8) + { + vector = (Vector)(new Vector(Unsafe.As(ref tmp))); + } + else + { + Debug.Fail("This should never happen. Did we miss special-casing a bitwise equatable type?"); + goto CannotVectorize; + } + } + + nuint totalByteLength = numElements * (nuint)Unsafe.SizeOf(); // get this calculation ready ahead of time + nuint stopLoopAtOffset = totalByteLength & (nuint)(-Vector.Count * 2); + nuint offset = 0; + + // Loop, writing 2 vectors at a time. + + if (numElements >= 2 * (uint)(Vector.Count / Unsafe.SizeOf())) + { + do + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), offset + (uint)Vector.Count), vector); + offset += 2 * (uint)Vector.Count; + } while (offset < stopLoopAtOffset); + } + + // There are [ 0, 2 * sizeof(Vector) ) bytes remaining. + // If there are >= sizeof(Vector) bytes remaining, write one vector now. + + if ((totalByteLength & (nuint)Vector.Count) != 0) + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), offset), vector); + } + + // If there's any remaining space that won't fill a full vector, write a vector at + // the very end of the destination buffer. This will result in overwriting a previous + // entry, but that's ok since we're splatting the same value for all entries, so this + // won't result in data corruption. + + if ((totalByteLength & (nuint)(Vector.Count - 1)) != 0) + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), totalByteLength - (uint)Vector.Count), vector); + } + + // And we're done! + + return; + } + + CannotVectorize: + + { + nuint i = 0; + nuint stopLoopAtOffset = numElements & ~(uint)7; + + // Write 8 elements at a time + + for (; i < stopLoopAtOffset; i += 8) + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + Unsafe.Add(ref refData, (nint)i + 2) = value; + Unsafe.Add(ref refData, (nint)i + 3) = value; + Unsafe.Add(ref refData, (nint)i + 4) = value; + Unsafe.Add(ref refData, (nint)i + 5) = value; + Unsafe.Add(ref refData, (nint)i + 6) = value; + Unsafe.Add(ref refData, (nint)i + 7) = value; + } + + // Write next 4 elements if needed + + if ((numElements & 4) != 0) + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + Unsafe.Add(ref refData, (nint)i + 2) = value; + Unsafe.Add(ref refData, (nint)i + 3) = value; + i += 4; + } + + // Write next 2 elements if needed + + if ((numElements & 2) != 0) + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + i += 2; + } + + // Write final element if needed + + if ((numElements & 1) != 0) + { + Unsafe.Add(ref refData, (nint)i) = value; + } + } + } + public static int IndexOf(ref T searchSpace, int searchSpaceLength, ref T value, int valueLength) where T : IEquatable { Debug.Assert(searchSpaceLength >= 0); From 4e7b47d5a59305b38ab2268ab7dd5d262478f447 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 16 Apr 2021 12:40:52 -0700 Subject: [PATCH 2/5] PR feedback, slight perf tweaks --- .../System.Memory/tests/Span/Fill.cs | 129 +++++++++---- .../src/System/SpanHelpers.T.cs | 178 ++++++++++-------- 2 files changed, 193 insertions(+), 114 deletions(-) diff --git a/src/libraries/System.Memory/tests/Span/Fill.cs b/src/libraries/System.Memory/tests/Span/Fill.cs index 5bbba5ea5e2af..c6741219aad93 100644 --- a/src/libraries/System.Memory/tests/Span/Fill.cs +++ b/src/libraries/System.Memory/tests/Span/Fill.cs @@ -1,9 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Collections.Generic; using System.Linq; -using System.Reflection; using System.Runtime.InteropServices; using Xunit; using static System.TestHelpers; @@ -151,52 +149,113 @@ public static unsafe void FillNativeBytes() } } - public static IEnumerable FillData + [Fact] + public static void FillWithRecognizedType() + { + RunTest(0x20); + RunTest(0x20); + RunTest(true); + RunTest(0x1234); + RunTest(0x1234); + RunTest('x'); + RunTest(0x12345678); + RunTest(0x12345678); + RunTest(0x0123456789abcdef); + RunTest(0x0123456789abcdef); + RunTest(unchecked((nint)0x0123456789abcdef)); + RunTest(unchecked((nuint)0x0123456789abcdef)); + RunTest((Half)1.0); + RunTest(1.0f); + RunTest(1.0); + RunTest(StringComparison.CurrentCultureIgnoreCase); // should be treated as underlying primitive + RunTest("Hello world!"); // ref type, no SIMD + RunTest(1.0m); // 128-bit struct + RunTest(new Guid("29e07627-2481-4f43-8fbf-09cf21180239")); // 128-bit struct + RunTest(new(0x11111111, 0x22222222, 0x33333333)); // 96-bit struct, no SIMD + RunTest(new(0x1111111111111111, 0x2222222222222222, 0x3333333333333333, 0x4444444444444444)); + RunTest(new( + 0x1111111111111111, 0x2222222222222222, 0x3333333333333333, 0x4444444444444444, + 0x5555555555555555, 0x6666666666666666, 0x7777777777777777, 0x8888888888888888)); // 512-bit struct, no SIMD + RunTest(new("Hello world!")); // struct contains refs, no SIMD + + static void RunTest(T value) + { + T[] arr = new T[128]; + + // Run tests for lengths := 0 to 64, ensuring we don't overrun our buffer + + for (int i = 0; i <= 64; i++) + { + arr.AsSpan(0, i).Fill(value); + Assert.Equal(Enumerable.Repeat(value, i), arr.Take(i)); // first i entries should've been populated with 'value' + Assert.Equal(Enumerable.Repeat(default(T), arr.Length - i), arr.Skip(i)); // remaining entries should contain default(T) + Array.Clear(arr, 0, arr.Length); + } + } + } + + private readonly struct My96BitStruct { - get + public My96BitStruct(int data0, int data1, int data2) { - yield return new object[] { typeof(sbyte), (sbyte)0x20 }; - yield return new object[] { typeof(byte), (byte)0x20 }; - yield return new object[] { typeof(bool), true }; - yield return new object[] { typeof(short), (short)0x1234 }; - yield return new object[] { typeof(ushort), (ushort)0x1234 }; - yield return new object[] { typeof(char), 'x' }; - yield return new object[] { typeof(int), (int)0x12345678 }; - yield return new object[] { typeof(uint), (uint)0x12345678 }; - yield return new object[] { typeof(long), (long)0x0123456789abcdef }; - yield return new object[] { typeof(ulong), (ulong)0x0123456789abcdef }; - yield return new object[] { typeof(nint), unchecked((nint)0x0123456789abcdef) }; - yield return new object[] { typeof(nuint), unchecked((nuint)0x0123456789abcdef) }; - yield return new object[] { typeof(float), 1.0f }; - yield return new object[] { typeof(double), 1.0d }; - yield return new object[] { typeof(string), "Hello world!" }; // shouldn't go down SIMD path - yield return new object[] { typeof(decimal), 1.0m }; // shouldn't go down SIMD path + Data0 = data0; + Data1 = data1; + Data2 = data2; } + + public readonly int Data0; + public readonly int Data1; + public readonly int Data2; } - [Theory] - [MemberData(nameof(FillData))] - public static void FillWithRecognizedType(Type expectedType, object value) + private readonly struct My256BitStruct { - Assert.IsType(expectedType, value); - typeof(SpanTests).GetMethod(nameof(FillWithRecognizedType_RunTest), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static) - .MakeGenericMethod(expectedType) - .Invoke(null, BindingFlags.DoNotWrapExceptions, null, new object[] { value }, null); + public My256BitStruct(ulong data0, ulong data1, ulong data2, ulong data3) + { + Data0 = data0; + Data1 = data1; + Data2 = data2; + Data3 = data3; + } + + public readonly ulong Data0; + public readonly ulong Data1; + public readonly ulong Data2; + public readonly ulong Data3; } - private static void FillWithRecognizedType_RunTest(T value) + private readonly struct My512BitStruct { - T[] arr = new T[128]; + public My512BitStruct(ulong data0, ulong data1, ulong data2, ulong data3, ulong data4, ulong data5, ulong data6, ulong data7) + { + Data0 = data0; + Data1 = data1; + Data2 = data2; + Data3 = data3; + Data4 = data4; + Data5 = data5; + Data6 = data6; + Data7 = data7; + } - // Run tests for lengths := 0 to 64, ensuring we don't overrun our buffer + public readonly ulong Data0; + public readonly ulong Data1; + public readonly ulong Data2; + public readonly ulong Data3; + public readonly ulong Data4; + public readonly ulong Data5; + public readonly ulong Data6; + public readonly ulong Data7; + } - for (int i = 0; i <= 64; i++) + private readonly struct MyRefContainingStruct + { + public MyRefContainingStruct(object data) { - arr.AsSpan(0, i).Fill(value); - Assert.Equal(Enumerable.Repeat(value, i), arr.Take(i)); // first i entries should've been populated with 'value' - Assert.Equal(Enumerable.Repeat(default(T), arr.Length - i), arr.Skip(i)); // remaining entries should contain default(T) - Array.Clear(arr, 0, arr.Length); + Data = data; } + + public readonly object Data; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 8572e0a25e304..fd994b0be47ee 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -4,103 +4,119 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; using Internal.Runtime.CompilerServices; namespace System { internal static partial class SpanHelpers // .T { - public static void Fill(ref T refData, uint numElements, T value) + public static void Fill(ref T refData, nuint numElements, T value) { - // n.b. If Fill is ever adapted to work for lengths beyond uint.MaxValue, double-check - // where it's used in the arithmetic operations below to ensure no integer overflow is possible. + // Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts. + // - T cannot contain references (GC can't track references in vectors) + // - Vectorization must be hardware-accelerated + // - T's size must not exceed the vector's size and must be a whole power of 2 - if (numElements == 0) - { - return; // nothing to do - } - - if (typeof(T) != typeof(float) && typeof(T) != typeof(double) && !RuntimeHelpers.IsBitwiseEquatable()) - { - goto CannotVectorize; // it might not be valid to SIMD-spray this value into the backing span - } + if (RuntimeHelpers.IsReferenceOrContainsReferences()) { goto CannotVectorize; } + if (!Vector.IsHardwareAccelerated) { goto CannotVectorize; } + if (Unsafe.SizeOf() > Vector.Count) { goto CannotVectorize; } + if ((Unsafe.SizeOf() & (Unsafe.SizeOf() - 1)) != 0) { goto CannotVectorize; } // power of 2 check - if (Vector.IsHardwareAccelerated) + if (numElements > (uint)(Vector.Count / Unsafe.SizeOf())) { - if (numElements < (uint)(Vector.Count / Unsafe.SizeOf())) - { - goto CannotVectorize; // not enough data for even a single run through the vectorized loop - } + // We have enough data for at least one vectorized write. + T tmp = value; // Avoid taking address of the "value" argument. It would regress performance of the loops below. Vector vector; - if (typeof(T) == typeof(float)) + if (Unsafe.SizeOf() == 1) { - vector = (Vector)(new Vector((float)(object)value!)); + vector = new Vector(Unsafe.As(ref tmp)); } - else if (typeof(T) == typeof(double)) + else if (Unsafe.SizeOf() == 2) { - vector = (Vector)(new Vector((double)(object)value!)); + vector = (Vector)(new Vector(Unsafe.As(ref tmp))); } - else + else if (Unsafe.SizeOf() == 4) { - T tmp = value; // Avoid taking address of the "value" argument. It would regress performance of the loops below. - if (Unsafe.SizeOf() == 1) - { - vector = new Vector(Unsafe.As(ref tmp)); - } - else if (Unsafe.SizeOf() == 2) - { - vector = (Vector)(new Vector(Unsafe.As(ref tmp))); - } - else if (Unsafe.SizeOf() == 4) + // special-case float since it's already passed in a SIMD reg + vector = (typeof(T) == typeof(float)) + ? (Vector)(new Vector((float)(object)tmp!)) + : (Vector)(new Vector(Unsafe.As(ref tmp))); + } + else if (Unsafe.SizeOf() == 8) + { + // special-case double since it's already passed in a SIMD reg + vector = (typeof(T) == typeof(double)) + ? (Vector)(new Vector((double)(object)tmp!)) + : (Vector)(new Vector(Unsafe.As(ref tmp))); + } + else if (Unsafe.SizeOf() == 16) + { + Vector128 vec128 = Unsafe.As>(ref tmp); + if (Vector.Count == 16) { - vector = (Vector)(new Vector(Unsafe.As(ref tmp))); + vector = vec128.AsVector(); } - else if (Unsafe.SizeOf() == 8) + else if (Vector.Count == 32) { - vector = (Vector)(new Vector(Unsafe.As(ref tmp))); + vector = Vector256.Create(vec128, vec128).AsVector(); } else { - Debug.Fail("This should never happen. Did we miss special-casing a bitwise equatable type?"); + Debug.Fail("Vector isn't 128 or 256 bits in size?"); goto CannotVectorize; } } + else if (Unsafe.SizeOf() == 32) + { + vector = Unsafe.As>(ref tmp).AsVector(); + } + else + { + Debug.Fail("Vector is greater than 256 bits in size?"); + goto CannotVectorize; + } + ref byte refDataAsBytes = ref Unsafe.As(ref refData); nuint totalByteLength = numElements * (nuint)Unsafe.SizeOf(); // get this calculation ready ahead of time - nuint stopLoopAtOffset = totalByteLength & (nuint)(-Vector.Count * 2); + nuint stopLoopAtOffset = totalByteLength & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit nuint offset = 0; // Loop, writing 2 vectors at a time. + // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency + // on the very recently calculated 'stopLoopAtOffset' value. - if (numElements >= 2 * (uint)(Vector.Count / Unsafe.SizeOf())) + if (numElements >= (uint)(2 * Vector.Count / Unsafe.SizeOf())) { do { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), offset), vector); - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), offset + (uint)Vector.Count), vector); - offset += 2 * (uint)Vector.Count; + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refDataAsBytes, offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refDataAsBytes, offset + (nuint)Vector.Count), vector); + offset += (uint)(2 * Vector.Count); } while (offset < stopLoopAtOffset); } - // There are [ 0, 2 * sizeof(Vector) ) bytes remaining. - // If there are >= sizeof(Vector) bytes remaining, write one vector now. + // At this point, if any data remains to be written, it's strictly less than + // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors. + // If the total byte length instead involves us writing an odd number of vectors, write + // one additional vector now. The bit check below tells us if we're in an "odd vector + // count" situation. if ((totalByteLength & (nuint)Vector.Count) != 0) { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refDataAsBytes, offset), vector); } - // If there's any remaining space that won't fill a full vector, write a vector at - // the very end of the destination buffer. This will result in overwriting a previous - // entry, but that's ok since we're splatting the same value for all entries, so this - // won't result in data corruption. + // It's possible that some small buffer remains to be populated - something that won't + // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write + // a vector at the very end of the buffer. This may involve overwriting previously + // populated data, which is fine since we're splatting the same value for all entries. + // There's no need to perform a length check here because we already performed this + // check before entering the vectorized code path. - if ((totalByteLength & (nuint)(Vector.Count - 1)) != 0) - { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref Unsafe.As(ref refData), totalByteLength - (uint)Vector.Count), vector); - } + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refDataAsBytes, totalByteLength - (nuint)Vector.Count), vector); // And we're done! @@ -109,13 +125,17 @@ public static void Fill(ref T refData, uint numElements, T value) CannotVectorize: - { - nuint i = 0; - nuint stopLoopAtOffset = numElements & ~(uint)7; + // If we reached this point, we cannot vectorize this T, or there are too few + // elements for us to vectorize. Fall back to an unrolled loop. + + nuint i = 0; - // Write 8 elements at a time + // Write 8 elements at a time - for (; i < stopLoopAtOffset; i += 8) + if (numElements >= 8) + { + nuint stopLoopAtOffset = numElements & ~(nuint)7; + do { Unsafe.Add(ref refData, (nint)i + 0) = value; Unsafe.Add(ref refData, (nint)i + 1) = value; @@ -125,34 +145,34 @@ public static void Fill(ref T refData, uint numElements, T value) Unsafe.Add(ref refData, (nint)i + 5) = value; Unsafe.Add(ref refData, (nint)i + 6) = value; Unsafe.Add(ref refData, (nint)i + 7) = value; - } + } while ((i += 8) < stopLoopAtOffset); + } - // Write next 4 elements if needed + // Write next 4 elements if needed - if ((numElements & 4) != 0) - { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; - Unsafe.Add(ref refData, (nint)i + 2) = value; - Unsafe.Add(ref refData, (nint)i + 3) = value; - i += 4; - } + if ((numElements & 4) != 0) + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + Unsafe.Add(ref refData, (nint)i + 2) = value; + Unsafe.Add(ref refData, (nint)i + 3) = value; + i += 4; + } - // Write next 2 elements if needed + // Write next 2 elements if needed - if ((numElements & 2) != 0) - { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; - i += 2; - } + if ((numElements & 2) != 0) + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + i += 2; + } - // Write final element if needed + // Write final element if needed - if ((numElements & 1) != 0) - { - Unsafe.Add(ref refData, (nint)i) = value; - } + if ((numElements & 1) != 0) + { + Unsafe.Add(ref refData, (nint)i) = value; } } From ba5ce88e1c7cc18cf799b30ff51eb6fffab25656 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 16 Apr 2021 13:54:31 -0700 Subject: [PATCH 3/5] Fix > to >= --- .../System.Private.CoreLib/src/System/SpanHelpers.T.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index fd994b0be47ee..f58d388451662 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -23,7 +23,7 @@ public static void Fill(ref T refData, nuint numElements, T value) if (Unsafe.SizeOf() > Vector.Count) { goto CannotVectorize; } if ((Unsafe.SizeOf() & (Unsafe.SizeOf() - 1)) != 0) { goto CannotVectorize; } // power of 2 check - if (numElements > (uint)(Vector.Count / Unsafe.SizeOf())) + if (numElements >= (uint)(Vector.Count / Unsafe.SizeOf())) { // We have enough data for at least one vectorized write. From 0bf26c300f82ef95136319af1024af09d90fb90d Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 16 Apr 2021 17:15:27 -0700 Subject: [PATCH 4/5] PR feedback --- .../src/System/SpanHelpers.T.cs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index f58d388451662..339aa157f8b7e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -16,12 +16,13 @@ public static void Fill(ref T refData, nuint numElements, T value) // Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts. // - T cannot contain references (GC can't track references in vectors) // - Vectorization must be hardware-accelerated - // - T's size must not exceed the vector's size and must be a whole power of 2 + // - T's size must not exceed the vector's size + // - T's size must be a whole power of 2 if (RuntimeHelpers.IsReferenceOrContainsReferences()) { goto CannotVectorize; } if (!Vector.IsHardwareAccelerated) { goto CannotVectorize; } if (Unsafe.SizeOf() > Vector.Count) { goto CannotVectorize; } - if ((Unsafe.SizeOf() & (Unsafe.SizeOf() - 1)) != 0) { goto CannotVectorize; } // power of 2 check + if (!BitOperations.IsPow2(Unsafe.SizeOf())) { goto CannotVectorize; } if (numElements >= (uint)(Vector.Count / Unsafe.SizeOf())) { @@ -71,7 +72,15 @@ public static void Fill(ref T refData, nuint numElements, T value) } else if (Unsafe.SizeOf() == 32) { - vector = Unsafe.As>(ref tmp).AsVector(); + if (Vector.Count == 32) + { + vector = Unsafe.As>(ref tmp).AsVector(); + } + else + { + Debug.Fail("Vector isn't 256 bits in size?"); + goto CannotVectorize; + } } else { From 22ad8fa775945659689240836171aad01da87e23 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 16 Apr 2021 21:43:40 -0700 Subject: [PATCH 5/5] Work around initblk null check on mono --- .../System.Private.CoreLib/src/System/Span.cs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Span.cs b/src/libraries/System.Private.CoreLib/src/System/Span.cs index 6042331d30529..753e9f279f374 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Span.cs @@ -284,11 +284,18 @@ public void Fill(T value) { if (Unsafe.SizeOf() == 1) { - // Special-case single-byte types like byte / sbyte / bool. - // The runtime eventually calls memset, which can efficiently support large buffers. - // We don't need to check IsReferenceOrContainsReferences because no references - // can ever be stored in types this small. - Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _pointer.Value), Unsafe.As(ref value), (uint)_length); +#if MONO + // Mono runtime's implementation of initblk performs a null check on the address. + // We'll perform a length check here to avoid passing a null address in the empty span case. + if (_length != 0) +#endif + { + // Special-case single-byte types like byte / sbyte / bool. + // The runtime eventually calls memset, which can efficiently support large buffers. + // We don't need to check IsReferenceOrContainsReferences because no references + // can ever be stored in types this small. + Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _pointer.Value), Unsafe.As(ref value), (uint)_length); + } } else {