From 8cd3cd59c4d889ce6bc7777c37420f26fc6dfa66 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Wed, 16 Mar 2022 21:06:29 +0000 Subject: [PATCH 1/3] add flags checks to BMI1 intrinsic lowering to prevent decomposed longs in x84 from being altered --- src/coreclr/jit/lowerxarch.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index b4cdcab88332f..b58aaf063a22a 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3789,6 +3789,13 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) return nullptr; } + // prevent decomposed 64 integer node parts in x86 from being recognised + if (((addOp1->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((addOp2->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || + ((op2->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((andNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS)) + { + return nullptr; + } + NamedIntrinsic intrinsic; if (op1->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) { @@ -3868,6 +3875,12 @@ GenTree* Lowering::TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode) return nullptr; } + // prevent decomposed 64 integer node parts in x86 from being recognised + if (((opNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((negNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS)) + { + return nullptr; + } + NamedIntrinsic intrinsic; if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) { @@ -3947,6 +3960,12 @@ GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode) return nullptr; } + // prevent decomposed 64 integer node parts in x86 from being recognised + if (((andNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((notNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS)) + { + return nullptr; + } + NamedIntrinsic intrinsic; if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64)) { From b5bcd873cd70521e498c5e08c7691037b6512aa3 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Wed, 16 Mar 2022 23:15:01 +0000 Subject: [PATCH 2/3] update bmji1intrinsics tests --- src/tests/JIT/Intrinsics/BMI1Intrinsics.cs | 65 +++++++++++++++++++--- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/src/tests/JIT/Intrinsics/BMI1Intrinsics.cs b/src/tests/JIT/Intrinsics/BMI1Intrinsics.cs index 783d769be88b5..67bfa1553d436 100644 --- a/src/tests/JIT/Intrinsics/BMI1Intrinsics.cs +++ b/src/tests/JIT/Intrinsics/BMI1Intrinsics.cs @@ -25,29 +25,76 @@ static int Main(string[] args) foreach (var value in values) { - Test(value.input1, AndNot(value.input1, value.input2), value.andnExpected, nameof(AndNot)); - Test(value.input1, ExtractLowestSetIsolatedBit(value.input1), value.blsiExpected, nameof(ExtractLowestSetIsolatedBit)); - Test(value.input1, ResetLowestSetBit(value.input1), value.blsrExpected, nameof(ResetLowestSetBit)); - Test(value.input1, GetMaskUpToLowestSetBit(value.input1), value.blmskExpected, nameof(GetMaskUpToLowestSetBit)); + Test(value.input1, AndNot_32bit(value.input1, value.input2), value.andnExpected, nameof(AndNot_32bit)); + Test(value.input1, ExtractLowestSetIsolatedBit_32bit(value.input1), value.blsiExpected, nameof(ExtractLowestSetIsolatedBit_32bit)); + Test(value.input1, ResetLowestSetBit_32bit(value.input1), value.blsrExpected, nameof(ResetLowestSetBit_32bit)); + Test(value.input1, GetMaskUpToLowestSetBit_32bit(value.input1), value.blmskExpected, nameof(GetMaskUpToLowestSetBit_32bit)); + } + + + var values2 = new (ulong input1, ulong input2, ulong andnExpected, ulong blsiExpected, ulong blsrExpected, ulong blmskExpected)[] { + (0, 0, 0, 0, 0, 0), + (1, 0, 1, 1, 0,0xFFFFFFFF_FFFFFFFE), + (ulong.MaxValue / 2, 0,0x7FFFFFFF_FFFFFFFF, 1,0x7FFFFFFF_FFFFFFFE,0xFFFFFFFF_FFFFFFFE), + ((ulong.MaxValue / 2) - 1, 0,0x7FFFFFFF_FFFFFFFE, 2,0x7FFFFFFF_FFFFFFFC,0xFFFFFFFF_FFFFFFFC), + ((ulong.MaxValue / 2) + 1, 0,0x80000000_00000000,0x80000000_00000000, 0, 0), + (ulong.MaxValue - 1, 0,0xFFFFFFFF_FFFFFFFE, 2,0xFFFFFFFF_FFFFFFFC,0xFFFFFFFF_FFFFFFFC), + (ulong.MaxValue, 0,0xFFFFFFFF_FFFFFFFF, 1,0xFFFFFFFF_FFFFFFFE,0xFFFFFFFF_FFFFFFFE), + (0xAAAAAAAA_AAAAAAAA,0xAAAAAAAA_AAAAAAAA, 0, 2,0xAAAAAAAA_AAAAAAA8,0xFFFFFFFF_FFFFFFFC), + (0xAAAAAAAA_AAAAAAAA,0x55555555_55555555,0xAAAAAAAA_AAAAAAAA, 2,0xAAAAAAAA_AAAAAAA8,0xFFFFFFFF_FFFFFFFC), + }; + + foreach (var value in values2) + { + Test(value.input1, AndNot_64bit(value.input1, value.input2), value.andnExpected, nameof(AndNot_64bit)); + Test(value.input1, ExtractLowestSetIsolatedBit_64bit(value.input1), value.blsiExpected, nameof(ExtractLowestSetIsolatedBit_64bit)); + Test(value.input1, ResetLowestSetBit_64bit(value.input1), value.blsrExpected, nameof(ResetLowestSetBit_64bit)); + Test(value.input1, GetMaskUpToLowestSetBit_64bit(value.input1), value.blmskExpected, nameof(GetMaskUpToLowestSetBit_64bit)); } return _errorCode; } [MethodImpl(MethodImplOptions.NoInlining)] - private static uint AndNot(uint x, uint y) => x & (~y); // bmi1 andn + private static uint AndNot_32bit(uint x, uint y) => x & (~y); // bmi1 andn + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong AndNot_64bit(ulong x, ulong y) => x & (~y); // bmi1 andn + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint ExtractLowestSetIsolatedBit_32bit(uint x) => (uint)(x & (-x)); // bmi1 blsi + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong ExtractLowestSetIsolatedBit_64bit(ulong x) => x & (ulong)(-(long)x); // bmi1 blsi + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint ResetLowestSetBit_32bit(uint x) => x & (x - 1); // bmi1 blsr [MethodImpl(MethodImplOptions.NoInlining)] - private static uint ExtractLowestSetIsolatedBit(uint x) => (uint)(x & (-x)); // bmi1 blsi + private static ulong ResetLowestSetBit_64bit(ulong x) => x & (x - 1); // bmi1 blsr [MethodImpl(MethodImplOptions.NoInlining)] - private static uint ResetLowestSetBit(uint x) => x & (x - 1); // bmi1 blsr + private static uint GetMaskUpToLowestSetBit_32bit(uint x) => (uint)(x ^ (-x)); // bmi1 blmsk [MethodImpl(MethodImplOptions.NoInlining)] - private static uint GetMaskUpToLowestSetBit(uint x) => (uint)(x ^ (-x)); // bmi1 blmsk + private static ulong GetMaskUpToLowestSetBit_64bit(ulong x) => x ^ (ulong)(-(long)x); // bmi1 blmsk + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Test(uint input, uint output, uint expected, string callerName) + { + if (output != expected) + { + Console.WriteLine($"{callerName} failed."); + Console.WriteLine($"Input: {input:X}"); + Console.WriteLine($"Output: {output:X}"); + Console.WriteLine($"Expected: {expected:X}"); + + _errorCode++; + } + } [MethodImpl(MethodImplOptions.NoInlining)] - private static void Test(uint input, uint output, uint expected,string callerName) + private static void Test(ulong input, ulong output, ulong expected, string callerName) { if (output != expected) { From ddf90e59a8c6c421dec908010682026be3821a15 Mon Sep 17 00:00:00 2001 From: Wraith2 Date: Thu, 17 Mar 2022 19:43:30 +0000 Subject: [PATCH 3/3] change flags comparison and update comment based on feedback. --- src/coreclr/jit/lowerxarch.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index b58aaf063a22a..f20af690fa7fe 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3789,9 +3789,9 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) return nullptr; } - // prevent decomposed 64 integer node parts in x86 from being recognised - if (((addOp1->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((addOp2->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || - ((op2->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((andNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS)) + // Subsequent nodes may rely on CPU flags set by these nodes in which case we cannot remove them + if (((addOp2->gtFlags & GTF_SET_FLAGS) != 0) || ((op2->gtFlags & GTF_SET_FLAGS) != 0) || + ((andNode->gtFlags & GTF_SET_FLAGS) != 0)) { return nullptr; } @@ -3875,8 +3875,8 @@ GenTree* Lowering::TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode) return nullptr; } - // prevent decomposed 64 integer node parts in x86 from being recognised - if (((opNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((negNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS)) + // Subsequent nodes may rely on CPU flags set by these nodes in which case we cannot remove them + if (((opNode->gtFlags & GTF_SET_FLAGS) != 0) || ((negNode->gtFlags & GTF_SET_FLAGS) != 0)) { return nullptr; } @@ -3960,8 +3960,8 @@ GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode) return nullptr; } - // prevent decomposed 64 integer node parts in x86 from being recognised - if (((andNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS) || ((notNode->gtFlags & GTF_SET_FLAGS) == GTF_SET_FLAGS)) + // Subsequent nodes may rely on CPU flags set by these nodes in which case we cannot remove them + if (((andNode->gtFlags & GTF_SET_FLAGS) != 0) || ((notNode->gtFlags & GTF_SET_FLAGS) != 0)) { return nullptr; }