From d3637349fbadf1644486ef7e569299a5a5b0b20f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 12 Apr 2022 14:16:02 +0200 Subject: [PATCH 1/3] Make BSWAP16 nodes normalize upper 16 bits Currently the JIT's constant folding (gtFoldExprConst and VNs EvalOpSpecialized) assumes that BSWAP16 zero extends into the upper 16 bits. This was not the case, and in fact the behavior of BSWAP16 depended on platform. Normally this would not be a problem since we always insert normalizing casts when creating BSWAP16 nodes, however VN was smart enough to remove this cast in some cases (see the test). Change the semantics of BSWAP16 nodes to zero extend into the upper 16 bits to match constant folding, and add a small peephole to avoid inserting this normalization in the common case where it is not necessary. Fixes #67723 Subsumes #67726 --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarm64.cpp | 11 +++++++--- src/coreclr/jit/codegencommon.cpp | 34 +++++++++++++++++++++++++++++++ src/coreclr/jit/codegenxarch.cpp | 5 +++++ src/coreclr/jit/gtlist.h | 2 +- 5 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index f1c1b49b2578b..c6ec999846cba 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1265,6 +1265,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCodeForIndir(GenTreeIndir* tree); void genCodeForNegNot(GenTree* tree); void genCodeForBswap(GenTree* tree); + bool genCanOmitNormalizationForBswap16(GenTree* tree); void genCodeForLclVar(GenTreeLclVar* tree); void genCodeForLclFld(GenTreeLclFld* tree); void genCodeForStoreLclFld(GenTreeLclFld* tree); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d24d0f9db1522..1ee6ee1974499 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3071,13 +3071,18 @@ void CodeGen::genCodeForBswap(GenTree* tree) assert(operand->isUsedFromReg()); regNumber operandReg = genConsumeReg(operand); - if (tree->OperIs(GT_BSWAP16)) + if (tree->OperIs(GT_BSWAP)) { - inst_RV_RV(INS_rev16, targetReg, operandReg, targetType); + inst_RV_RV(INS_rev, targetReg, operandReg, targetType); } else { - inst_RV_RV(INS_rev, targetReg, operandReg, targetType); + inst_RV_RV(INS_rev16, targetReg, operandReg, targetType); + + if (!genCanOmitNormalizationForBswap16(tree)) + { + GetEmitter()->emitIns_Mov(INS_uxth, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } } genProduceReg(tree); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 0eccb2abfc8e5..d7ab445fda026 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -9591,3 +9591,37 @@ void CodeGen::genCodeForBitCast(GenTreeOp* treeNode) } genProduceReg(treeNode); } + +//---------------------------------------------------------------------- +// genCanOmitNormalizationForBswap16: +// Small peephole to check if a bswap16 node can omit normalization. +// +// Arguments: +// tree - The BSWAP16 node +// +// Remarks: +// BSWAP16 nodes are required to zero extend the upper 16 bits, but since the +// importer always inserts a normalizing cast (either sign or zero extending) +// we almost never need to actually do this. +// +bool CodeGen::genCanOmitNormalizationForBswap16(GenTree* tree) +{ + if (compiler->opts.OptimizationDisabled()) + { + return false; + } + + assert(tree->OperIs(GT_BSWAP16)); + if ((tree->gtNext == nullptr) || !tree->gtNext->OperIs(GT_CAST)) + { + return false; + } + + GenTreeCast* cast = tree->gtNext->AsCast(); + if (cast->gtOverflow()) + { + return false; + } + + return (cast->gtCastType == TYP_USHORT) || (cast->gtCastType == TYP_SHORT); +} diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 80ed7fadd4b69..076795ec6974e 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -577,6 +577,11 @@ void CodeGen::genCodeForBswap(GenTree* tree) { // 16-bit byte swaps use "ror reg.16, 8" inst_RV_IV(INS_ror_N, targetReg, 8 /* val */, emitAttr::EA_2BYTE); + + if (!genCanOmitNormalizationForBswap16(tree)) + { + GetEmitter()->emitIns_Mov(INS_movzx, EA_2BYTE, targetReg, targetReg, /* canSkip */ false); + } } genProduceReg(tree); diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index c902b21ec72f4..3f3f50f730c72 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -104,7 +104,7 @@ GTNODE(RUNTIMELOOKUP , GenTreeRuntimeLookup, 0,GTK_UNOP|GTK_EXOP|DBK_NOTLIR) GTNODE(ARR_ADDR , GenTreeArrAddr ,0,GTK_UNOP|GTK_EXOP|DBK_NOTLIR) // Wraps an array address expression GTNODE(BSWAP , GenTreeOp ,0,GTK_UNOP) // Byte swap (32-bit or 64-bit) -GTNODE(BSWAP16 , GenTreeOp ,0,GTK_UNOP) // Byte swap (16-bit) +GTNODE(BSWAP16 , GenTreeOp ,0,GTK_UNOP) // Byte swap lower 16-bits and zero upper 16 bits //----------------------------------------------------------------------------- // Binary operators (2 operands): From 92a65906cc88973bcca484cdc3d3987796142b92 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 12 Apr 2022 16:40:26 +0200 Subject: [PATCH 2/3] Fix interference check --- src/coreclr/jit/codegencommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index d7ab445fda026..a1a022931e4ea 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -9618,7 +9618,7 @@ bool CodeGen::genCanOmitNormalizationForBswap16(GenTree* tree) } GenTreeCast* cast = tree->gtNext->AsCast(); - if (cast->gtOverflow()) + if (cast->gtOverflow() || (cast->CastOp() != tree)) { return false; } From a311859b369a9c4ecf7c97f9ce9da042f84ba42b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 18 Apr 2022 11:51:50 +0200 Subject: [PATCH 3/3] Add test --- .../JitBlue/Runtime_67723/Runtime_67723.cs | 14 ++++++++++++++ .../JitBlue/Runtime_67723/Runtime_67723.csproj | 9 +++++++++ 2 files changed, 23 insertions(+) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.csproj diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.cs b/src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.cs new file mode 100644 index 0000000000000..8cff92ca69dd0 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.cs @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers.Binary; + +public class Runtime_67223 +{ + public static int Main() + { + short[] foo = { short.MinValue }; + int test = BinaryPrimitives.ReverseEndianness(foo[0]); + return test == 0x80 ? 100 : -1; + } +} \ No newline at end of file diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.csproj new file mode 100644 index 0000000000000..f492aeac9d056 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_67723/Runtime_67723.csproj @@ -0,0 +1,9 @@ + + + Exe + True + + + + + \ No newline at end of file