[RISC-V] Fix errors in crosgen2 for risc-v (#97368)

* [RISC-V] Fix crossgen2 * [RISC-V] Fix typo in codegen * [RISC-V] Update assert to support all rel insts * [RISC-V] Fix helper function call * [RISC-V] Update GetRISCV64PassStructInRegisterFlags * [RISC-V] Fix int arg reg passing for float type A failure during crossgen2 SPC.dll `System.Diagnostics.Tracing.NativeRuntimeEventSource:LogThreadPoolWorkerThreadAdjustmentStats(double,double,double,double,double,double,double,double,double,ushort,ushort)` * [RISC-V] Update EPILOG_WITH_TRANSITION_BLOCK_RETURN * [RISC-V] Update indentation in dump * [RISC-V] Fix stubs * [RISC-V] Fix virtualcallstubcpu * [RISC-V] Update a comment * [RISC-V] Update RO data block * [RISC-V] Fix data overwrites * [RISC-V] Fix typo * [RISC-V] Fix register number of A3 * [RISC-V] Code Formatting * [RISC-V] Fix format and typos * Revert "[RISC-V] Fix int arg reg passing for float type" This reverts commit 381858c. * [RISC-V] Fix a typo in a comment * [RISC-V] Fix error when arg type and arg reg mismatch * [RISC-V] Rename according to review * [LoongArch64] Remove LA64 specific handling for unspilling
dotnet · Jan 30, 2024 · fe51bd7 · fe51bd7
1 parent 5e80e3e
commit fe51bd7
Show file tree

Hide file tree

Showing 16 changed files with 141 additions and 58 deletions.
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
@@ -1261,13 +1261,6 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
                 unspillType = lcl->TypeGet();
             }
 
-#if defined(TARGET_LOONGARCH64)
-            if (varTypeIsFloating(unspillType) && emitter::isGeneralRegister(tree->GetRegNum()))
-            {
-                unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
-            }
-#endif
-
             bool reSpill   = ((unspillTree->gtFlags & GTF_SPILL) != 0);
             bool isLastUse = lcl->IsLastUse(0);
             genUnspillLocal(lcl->GetLclNum(), unspillType, lcl->AsLclVar(), tree->GetRegNum(), reSpill, isLastUse);

diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp
@@ -5172,7 +5172,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
     {
         if (compiler->opts.compReloc)
         {
-            emit->emitIns_R_AI(INS_jalr, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            emit->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
         }
         else
         {

diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp
@@ -1133,9 +1133,10 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
 
 #endif // DEBUG
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 
-    // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
+    // For arm64/LoongArch64/RISCV64, we want to allocate JIT data always adjacent to code similar to what native
+    // compiler does.
     // This way allows us to use a single `ldr` to access such data like float constant/jmp table.
     // For LoongArch64 using `pcaddi + ld` to access such data.
 
@@ -1149,7 +1150,7 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
     args->hotCodeSize                 = roDataOffset + args->roDataSize;
     args->roDataSize                  = 0;
 
-#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 
     info.compCompHnd->allocMem(args);
 
@@ -1166,15 +1167,15 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
 
 #endif // DEBUG
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 
     // Fix up data section pointers.
     assert(args->roDataBlock == nullptr);
     assert(args->roDataBlockRW == nullptr);
     args->roDataBlock   = ((BYTE*)args->hotCodeBlock) + roDataOffset;
     args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset;
 
-#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 }
 
 void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)

diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp
@@ -1423,7 +1423,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
         assert(callType == EC_FUNC_TOKEN);
         assert(addr != NULL);
 
-        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
+        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jalr ra/r0,rd,0
         id->idAddr()->iiaAddr = (BYTE*)addr;
 
         if (emitComp->opts.compReloc)
@@ -1546,7 +1546,7 @@ unsigned emitter::emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, c
 #endif
         emitOutput_Instr(dst, 0x00000067 | (REG_DEFAULT_HELPER_CALL_TARGET << 15) | reg2 << 7);
 
-        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_JALR);
+        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_PC);
     }
     else
     {
@@ -4382,6 +4382,12 @@ void emitter::emitDispIns(
         instrSize = sizeof(code_t);
         code_t instruction;
         memcpy(&instruction, instr, instrSize);
+#ifdef DEBUG
+        if (emitComp->verbose && i != 0)
+        {
+            printf("        ");
+        }
+#endif
         emitDispInsName(instruction, instr, doffs, offset, id, ig);
     }
 }

diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
@@ -4541,6 +4541,15 @@ struct CallArgABIInformation
 #endif
     }
 
+    bool IsMismatchedArgType() const
+    {
+#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+        return isValidIntArgReg(GetRegNum()) && varTypeUsesFloatReg(ArgType);
+#else
+        return false;
+#endif // TARGET_LOONGARCH64 || TARGET_RISCV64
+    }
+
     void SetByteSize(unsigned byteSize, unsigned byteAlignment, bool isStruct, bool isFloatHfa);
 
     // Get the number of bytes that this argument is occupying on the stack,
@@ -5528,7 +5537,7 @@ struct GenTreeCall final : public GenTree
             return WellKnownArg::VirtualStubCell;
         }
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64)
         // For ARM architectures, we always use an indirection cell for R2R calls.
         if (IsR2RRelativeIndir() && !IsDelegateInvoke())
         {

diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
@@ -1677,9 +1677,10 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late)
     {
 
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-        if (call->IsVarargs() || comp->opts.compUseSoftFP)
+        if (call->IsVarargs() || comp->opts.compUseSoftFP || callArg->AbiInfo.IsMismatchedArgType())
         {
             // For vararg call or on armel, reg args should be all integer.
+            // For arg type and arg reg mismatch, reg arg should be integer on riscv64
             // Insert copies as needed to move float value to integer register.
             GenTree* newNode = LowerFloatArg(ppArg, callArg);
             if (newNode != nullptr)
@@ -1710,7 +1711,7 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late)
 
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 //------------------------------------------------------------------------
-// LowerFloatArg: Lower float call arguments on the arm/LoongArch64 platform.
+// LowerFloatArg: Lower float call arguments on the arm/LoongArch64/RiscV64 platform.
 //
 // Arguments:
 //    arg  - The arg node

diff --git a/src/coreclr/pal/inc/rt/ntimage.h b/src/coreclr/pal/inc/rt/ntimage.h
@@ -1025,7 +1025,6 @@ typedef IMAGE_RELOCATION UNALIGNED *PIMAGE_RELOCATION;
 // RISCV64 relocation types
 //
 #define IMAGE_REL_RISCV64_PC            0x0003
-#define IMAGE_REL_RISCV64_JALR          0x0004
 
 //
 // CEF relocation types.

diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc
@@ -293,8 +293,14 @@ C_FUNC(\Name\()_End):
 .endm
 
 .macro EPILOG_WITH_TRANSITION_BLOCK_RETURN
-// TODO RISCV NYI
-    sw  ra, 0(zero)
+
+    RESTORE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters
+
+    EPILOG_RESTORE_REG_PAIR       fp, ra, __PWTB_CalleeSavedRegisters
+
+    EPILOG_STACK_FREE             __PWTB_StackAlloc
+
+    ret
 .endm
 
 

diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/ObjectDataBuilder.cs
@@ -321,7 +321,6 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0)
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC:
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_JIR:
 
-                //TODO: consider removal of IMAGE_REL_RISCV64_JALR from runtime too
                 case RelocType.IMAGE_REL_BASED_RISCV64_PC:
                     Debug.Assert(delta == 0);
                     // Do not vacate space for this kind of relocation, because

diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs
@@ -423,9 +423,11 @@ private static unsafe int GetRiscV64PC(uint* pCode)
             // first get the high 20 bits,
             int imm = (int)((auipcInstr & 0xfffff000));
             // then get the low 12 bits,
-            uint addiInstr = *(pCode + 1);
-            Debug.Assert((addiInstr & 0x707f) == 0x00000013);
-            imm += ((int)(addiInstr)) >> 20;
+            uint nextInstr = *(pCode + 1);
+            Debug.Assert((nextInstr & 0x707f) == 0x00000013 ||
+                         (nextInstr & 0x707f) == 0x00000067 ||
+                         (nextInstr & 0x707f) == 0x00003003);
+            imm += ((int)(nextInstr)) >> 20;
 
             return imm;
         }
@@ -437,6 +439,10 @@ private static unsafe int GetRiscV64PC(uint* pCode)
         //  case:EA_PTR_DSP_RELOC
         //   auipc  reg, off-hi-20bits
         //   ld     reg, reg, off-lo-12bits
+        //  case:
+        // INS_OPTS_C
+        //   auipc  reg, off-hi-20bits
+        //   jalr   reg, reg, off-lo-12bits
         private static unsafe void PutRiscV64PC(uint* pCode, long imm32)
         {
             // Verify that we got a valid offset
@@ -449,10 +455,12 @@ private static unsafe void PutRiscV64PC(uint* pCode, long imm32)
             auipcInstr |= (uint)((imm32 + 0x800) & 0xfffff000);
             *pCode = auipcInstr;
 
-            uint addiInstr = *(pCode + 1);
-            Debug.Assert((addiInstr & 0x707f) == 0x00000013);
-            addiInstr |= (uint)((doff & 0xfff) << 20);
-            *(pCode + 1) = addiInstr;
+            uint nextInstr = *(pCode + 1);
+            Debug.Assert((nextInstr & 0x707f) == 0x00000013 ||
+                         (nextInstr & 0x707f) == 0x00000067 ||
+                         (nextInstr & 0x707f) == 0x00003003);
+            nextInstr |= (uint)((doff & 0xfff) << 20);
+            *(pCode + 1) = nextInstr;
 
             Debug.Assert(GetRiscV64PC(pCode) == imm32);
         }

diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
@@ -89,6 +89,10 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ
                 if (potentialType.Namespace != "System.Runtime.Intrinsics.Arm")
                     return "";
             }
+            else if (architecture == TargetArchitecture.RiscV64)
+            {
+                return "";
+            }
             else
             {
                 throw new InternalCompilerErrorException("Unknown architecture");

diff --git a/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs
@@ -52,7 +52,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
                     // to the same alignment as __m128, which is supported by the ABI.
                     alignment = new LayoutInt(8);
                 }
-                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
+                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
                 {
                     // The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
                     // 16-byte alignment for __m256.
@@ -73,7 +73,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
                     // to the same alignment as __m128, which is supported by the ABI.
                     alignment = new LayoutInt(8);
                 }
-                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
+                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
                 {
                     // The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
                     // 16-byte alignment for __m256.

diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
@@ -412,9 +412,10 @@ private CompilationResult CompileMethodInternal(IMethodNode methodCodeNodeNeedin
 
             if (codeSize < _code.Length)
             {
-                if (_compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.ARM64)
+                if (_compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.ARM64
+                    && _compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.RiscV64)
                 {
-                    // For xarch/arm32, the generated code is sometimes smaller than the memory allocated.
+                    // For xarch/arm32/RiscV64, the generated code is sometimes smaller than the memory allocated.
                     // In that case, trim the codeBlock to the actual value.
                     //
                     // For arm64, the allocation request of `hotCodeSize` also includes the roData size
@@ -4105,6 +4106,9 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
             if (targetArchitecture == TargetArchitecture.ARM && !_compilation.TypeSystemContext.Target.IsWindows)
                 flags.Set(CorJitFlag.CORJIT_FLAG_RELATIVE_CODE_RELOCS);
 
+            if (targetArchitecture == TargetArchitecture.RiscV64)
+                flags.Set(CorJitFlag.CORJIT_FLAG_FRAMED);
+
             if (this.MethodBeingCompiled.IsUnmanagedCallersOnly)
             {
                 // Validate UnmanagedCallersOnlyAttribute usage

diff --git a/src/coreclr/tools/Common/JitInterface/RISCV64PassStructInRegister.cs b/src/coreclr/tools/Common/JitInterface/RISCV64PassStructInRegister.cs
@@ -29,12 +29,6 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
                 return (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
             }
 
-            //// The SIMD Intrinsic types are meant to be handled specially and should not be passed as struct registers
-            if (typeDesc.IsIntrinsic)
-            {
-                throw new NotImplementedException("For RISCV64, SIMD would be implemented later");
-            }
-
             MetadataType mdType = typeDesc as MetadataType;
             Debug.Assert(mdType != null);
 
@@ -85,6 +79,16 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
                         {
                             floatFieldFlags |= (uint)StructFloatFieldInfoFlags.STRUCT_SECOND_FIELD_DOUBLE;
                         }
+
+                        // Pass with two integer registers in `struct {int a, int b, float/double c}` cases
+                        if (fieldIndex == 1 &&
+                                (floatFieldFlags |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FIRST_FIELD_SIZE_IS8 |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND) ==
+                                floatFieldFlags)
+                        {
+                            floatFieldFlags = (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
+                        }
                     }
                     break;
 
@@ -106,6 +110,16 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
                         {
                             floatFieldFlags |= (uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND;
                         }
+
+                        // Pass with two integer registers in `struct {int a, int b, float/double c}` cases
+                        if (fieldIndex == 1 &&
+                                (floatFieldFlags |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FIRST_FIELD_SIZE_IS8 |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND) ==
+                                floatFieldFlags)
+                        {
+                            floatFieldFlags = (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
+                        }
                     }
                     break;
 

diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp
@@ -1560,7 +1560,24 @@ PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCOD
 
     BEGIN_DYNAMIC_HELPER_EMIT(32);
 
-    EmitHelperWithArg(p, rxOffset, pAllocator, arg, target);
+    const IntReg RegR0 = 0, RegT0 = 5, RegA0 = 10;
+
+    *(DWORD*)p = UTypeInstr(0x17, RegT0, 0);// auipc t0, 0
+    p += 4;
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA0, RegT0, 16);// ld a0, 16(t0)
+    p += 4;
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 24);// ld t0, 24(t0)
+    p += 4;
+    *(DWORD*)p = ITypeInstr(0x67, 0, RegR0, RegT0, 0);// jalr zero, 0(t0)
+    p += 4;
+
+    // label:
+    // arg
+    *(TADDR*)p = arg;
+    p += 8;
+    // target
+    *(PCODE*)p = target;
+    p += 8;
 
     END_DYNAMIC_HELPER_EMIT();
 }
@@ -1570,13 +1587,13 @@ void DynamicHelpers::EmitHelperWithArg(BYTE*& p, size_t rxOffset, LoaderAllocato
 {
     STANDARD_VM_CONTRACT;
 
-    const IntReg RegR0 = 0, RegT0 = 5, RegA0 = 10;
+    const IntReg RegR0 = 0, RegT0 = 5, RegA1 = 11;
 
     *(DWORD*)p = UTypeInstr(0x17, RegT0, 0);// auipc t0, 0
     p += 4;
-    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA0, RegT0, 16);// ld a0, 16(t0)
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA1, RegT0, 16);// ld a1, 16(t0)
     p += 4;
-    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 24);;// ld t0, 24(t0)
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 24);// ld t0, 24(t0)
     p += 4;
     *(DWORD*)p = ITypeInstr(0x67, 0, RegR0, RegT0, 0);// jalr zero, 0(t0)
     p += 4;
@@ -1772,15 +1789,15 @@ PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADD
 
     BEGIN_DYNAMIC_HELPER_EMIT(48);
 
-    const IntReg RegR0 = 0, RegT0 = 5, RegA2 = 12, RegA3 = 1;
+    const IntReg RegR0 = 0, RegT0 = 5, RegA2 = 12, RegA3 = 13;
 
     *(DWORD*)p = UTypeInstr(0x17, RegT0, 0);// auipc t0, 0
     p += 4;
     *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA2, RegT0, 24);// ld a2,24(t0)
     p += 4;
     *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA3, RegT0, 32);// ld a3,32(t0)
     p += 4;
-    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 40);;// ld t0,40(t0)
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 40);// ld t0,40(t0)
     p += 4;
     *(DWORD*)p = ITypeInstr(0x67, 0, RegR0, RegT0, 0);// jalr x0, 0(t0)
     p += 4;