From 3fa994628b133fc73f1c1d0d9f9d3cd8f8ea4add Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:54:12 +0300 Subject: [PATCH 01/19] part1 --- eng/Subsets.props | 2 +- src/coreclr/CMakeLists.txt | 2 +- .../src/Internal/Runtime/TransitionBlock.cs | 70 ++++- src/coreclr/nativeaot/Directory.Build.props | 3 + src/coreclr/nativeaot/Runtime/CommonMacros.h | 2 +- src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 25 +- src/coreclr/nativeaot/Runtime/ICodeManager.h | 21 ++ src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 36 +++ src/coreclr/nativeaot/Runtime/PalRedhawk.h | 90 ++++++ .../nativeaot/Runtime/PalRedhawkCommon.h | 52 ++++ .../nativeaot/Runtime/StackFrameIterator.cpp | 192 +++++++++++- .../nativeaot/Runtime/StackFrameIterator.h | 12 + .../nativeaot/Runtime/ThunksMapping.cpp | 23 +- .../nativeaot/Runtime/inc/TargetPtrs.h | 2 +- src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 68 ++++- src/coreclr/nativeaot/Runtime/regdisplay.h | 58 +++- .../nativeaot/Runtime/unix/UnixContext.cpp | 170 +++++++++++ .../nativeaot/Runtime/unix/UnixContext.h | 58 ++++ .../Runtime/unix/UnixNativeCodeManager.cpp | 155 +++++++++- .../nativeaot/Runtime/unix/UnwindHelpers.cpp | 288 ++++++++++++++++++ .../nativeaot/Runtime/unix/unixasmmacros.inc | 2 + .../TypeLoader/TypeSystemContextFactory.cs | 2 + .../ILCompiler.Compiler/Compiler/JitHelper.cs | 1 + .../include/__libunwind_config.h | 4 +- .../external/llvm-libunwind/src/Registers.hpp | 37 ++- 25 files changed, 1349 insertions(+), 26 deletions(-) diff --git a/eng/Subsets.props b/eng/Subsets.props index 488b8c43847c0c..300e7a6d3696a6 100644 --- a/eng/Subsets.props +++ b/eng/Subsets.props @@ -120,7 +120,7 @@ <_NativeAotSupportedOS Condition="'$(TargetOS)' == 'windows' or '$(TargetOS)' == 'linux' or '$(TargetOS)' == 'osx' or '$(TargetOS)' == 'maccatalyst' or '$(TargetOS)' == 'iossimulator' or '$(TargetOS)' == 'ios' or '$(TargetOS)' == 'tvossimulator' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'freebsd'">true - <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'loongarch64' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true + <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'loongarch64' or '$(TargetArchitecture)' == 'riscv64' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true true true diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index 2e9a6c76743e5f..8b018c624642f3 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -150,7 +150,7 @@ add_subdirectory(tools/aot/jitinterface) if(NOT CLR_CROSS_COMPONENTS_BUILD) # NativeAOT only buildable for a subset of CoreCLR-supported configurations - if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_LOONGARCH64 OR (CLR_CMAKE_HOST_ARCH_I386 AND CLR_CMAKE_HOST_WIN32)) + if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_LOONGARCH64 OR CLR_CMAKE_HOST_ARCH_RISCV64 OR (CLR_CMAKE_HOST_ARCH_I386 AND CLR_CMAKE_HOST_WIN32)) add_subdirectory(nativeaot) endif() endif(NOT CLR_CROSS_COMPONENTS_BUILD) diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs index ccff78114d4e9a..7932ba9300dc3a 100644 --- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs +++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs @@ -35,7 +35,7 @@ #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE #define ENREGISTERED_PARAMTYPE_MAXSIZE #elif TARGET_WASM -#elif TARGET_LOONGARCH64 +#elif TARGET_LOONGARCH64 || TARGET_RISCV64 #define CALLDESCR_ARGREGS // CallDescrWorker has ArgumentRegister parameter #define CALLDESCR_FPARGREGS // CallDescrWorker has FloatArgumentRegisters parameter #define ENREGISTERED_RETURNTYPE_MAXSIZE @@ -360,6 +360,60 @@ internal struct ArchitectureConstants public const int STACK_ELEM_SIZE = 8; public static int StackElemSize(int size) { return (((size) + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); } } +#elif TARGET_RISCV64 + [StructLayout(LayoutKind.Sequential)] + internal struct ReturnBlock + { + private IntPtr returnValue; + private IntPtr returnValue2; + private IntPtr returnValue3; + private IntPtr returnValue4; + } + + [StructLayout(LayoutKind.Sequential)] + internal struct ArgumentRegisters + { + private IntPtr a0; + private IntPtr a1; + private IntPtr a2; + private IntPtr a3; + private IntPtr a4; + private IntPtr a5; + private IntPtr a6; + private IntPtr a7; + public static unsafe int GetOffsetOfa7() + { + return sizeof(IntPtr) * 7; + } + } + + [StructLayout(LayoutKind.Sequential)] + internal struct FloatArgumentRegisters + { + private double fa0; + private double fa1; + private double fa2; + private double fa3; + private double fa4; + private double fa5; + private double fa6; + private double fa7; + } + + internal struct ArchitectureConstants + { + // To avoid corner case bugs, limit maximum size of the arguments with sufficient margin + public const int MAX_ARG_SIZE = 0xFFFFFF; + + public const int NUM_ARGUMENT_REGISTERS = 8; + public const int ARGUMENTREGISTERS_SIZE = NUM_ARGUMENT_REGISTERS * 8; + public const int ENREGISTERED_RETURNTYPE_MAXSIZE = 32; // bytes (four FP registers: fa0, fa1, fa2, and fa3) + public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE = 16; // bytes (two int registers: a0 and a1) + public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE_PRIMITIVE = 8; + public const int ENREGISTERED_PARAMTYPE_MAXSIZE = 16; // bytes (max value type size that can be passed by value) + public const int STACK_ELEM_SIZE = 8; + public static int StackElemSize(int size) { return (((size) + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); } + } #endif // @@ -465,6 +519,20 @@ public static unsafe int GetOffsetOfArgumentRegisters() return sizeof(ReturnBlock); } + public IntPtr m_alignmentPad; +#elif TARGET_RISCV64 + public ReturnBlock m_returnBlock; + public static unsafe int GetOffsetOfReturnValuesBlock() + { + return 0; + } + + public ArgumentRegisters m_argumentRegisters; + public static unsafe int GetOffsetOfArgumentRegisters() + { + return sizeof(ReturnBlock); + } + public IntPtr m_alignmentPad; #else #error Portability problem diff --git a/src/coreclr/nativeaot/Directory.Build.props b/src/coreclr/nativeaot/Directory.Build.props index c01756cfc8aba2..54d44cd4947fbc 100644 --- a/src/coreclr/nativeaot/Directory.Build.props +++ b/src/coreclr/nativeaot/Directory.Build.props @@ -92,6 +92,9 @@ TARGET_64BIT;TARGET_LOONGARCH64;$(DefineConstants) + + TARGET_64BIT;TARGET_RISCV64;$(DefineConstants) + TARGET_WINDOWS;$(DefineConstants) diff --git a/src/coreclr/nativeaot/Runtime/CommonMacros.h b/src/coreclr/nativeaot/Runtime/CommonMacros.h index c429057a140583..22077753082af7 100644 --- a/src/coreclr/nativeaot/Runtime/CommonMacros.h +++ b/src/coreclr/nativeaot/Runtime/CommonMacros.h @@ -119,7 +119,7 @@ inline bool IS_ALIGNED(T* val, uintptr_t alignment); #define LOG2_PTRSIZE 2 #define POINTER_SIZE 4 -#elif defined(HOST_LOONGARCH64) +#elif defined(HOST_LOONGARCH64) || defined (HOST_RISCV64) #define LOG2_PTRSIZE 3 #define POINTER_SIZE 8 diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 060e19f3c525e2..22d3b9acf0213a 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -203,6 +203,25 @@ FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, pContext->Sp = pPalContext->SP; pContext->Ra = pPalContext->RA; pContext->Pc = pPalContext->IP; +#elif defined(HOST_RISCV64) + pContext->x1 = pPalContext->RA; // Return address + pContext->x2 = pPalContext->SP; // Stack pointer + pContext->x8 = pPalContext->S0; // Saved register/frame pointer + pContext->x9 = pPalContext->S1; // Saved register + pContext->x18 = pPalContext->S2; // Saved register + pContext->x19 = pPalContext->S3; // Saved register + pContext->x20 = pPalContext->S4; // Saved register + pContext->x21 = pPalContext->S5; // Saved register + pContext->x22 = pPalContext->S6; // Saved register + pContext->x23 = pPalContext->S7; // Saved register + pContext->x24 = pPalContext->S8; // Saved register + pContext->x25 = pPalContext->S9; // Saved register + pContext->x26 = pPalContext->S10; // Saved register + pContext->x27 = pPalContext->S11; // Saved register + pContext->Fp = pPalContext->S0; // Frame pointer (alias for x8) + pContext->Sp = pPalContext->SP; // Stack pointer (alias for x2) + pContext->Ra = pPalContext->RA; // Return address (alias for x1) + pContext->Pc = pPalContext->IP; // Program counter #elif defined(HOST_WASM) // No registers, no work to do yet #else @@ -295,7 +314,7 @@ EXTERN_C CODE_LOCATION RhpCheckedAssignRefEBPAVLocation; #endif EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation1; -#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) +#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation2; #endif @@ -328,7 +347,7 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP) (uintptr_t)&RhpCheckedAssignRefEBPAVLocation, #endif (uintptr_t)&RhpByRefAssignRefAVLocation1, -#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) +#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) (uintptr_t)&RhpByRefAssignRefAVLocation2, #endif }; @@ -410,7 +429,7 @@ static uintptr_t UnwindSimpleHelperToCaller( pContext->SetSp(sp+sizeof(uintptr_t)); // pop the stack #elif defined(HOST_ARM) || defined(HOST_ARM64) uintptr_t adjustedFaultingIP = pContext->GetLr(); -#elif defined(HOST_LOONGARCH64) +#elif defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) uintptr_t adjustedFaultingIP = pContext->GetRa(); #else uintptr_t adjustedFaultingIP = 0; // initializing to make the compiler happy diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index d1dbd47e51985e..ec0598b9e45efa 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -86,6 +86,27 @@ inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) return returnKind; } +#elif defined(TARGET_RISCV64) +// Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back +C_ASSERT(PTFF_A0_IS_GCREF == ((uint64_t)GCRK_Object << 32)); +C_ASSERT(PTFF_A0_IS_BYREF == ((uint64_t)GCRK_Byref << 32)); +C_ASSERT(PTFF_A1_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 32)); +C_ASSERT(PTFF_A1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 32)); + +inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) +{ + // just need to report gc ref bits here. + // appropriate PTFF_SAVE_ bits will be added by the frame building routine. + return ((uint64_t)returnKind << 32); +} + +inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) +{ + GCRefKind returnKind = (GCRefKind)((transFrameFlags & (PTFF_A0_IS_GCREF | PTFF_A0_IS_BYREF | PTFF_A1_IS_GCREF | PTFF_A1_IS_BYREF)) >> 32); + ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_A0) && (transFrameFlags & PTFF_SAVE_A1))); + return returnKind; +} + #elif defined(TARGET_AMD64) // Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index 3351326ad3071e..6de3287f7ab79e 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -334,6 +334,7 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) int64_t distToTarget = ((int64_t)pCode[0] << 38) >> 36; return (uint8_t *)pCode + distToTarget; } + #elif TARGET_LOONGARCH64 uint32_t * pCode = (uint32_t *)pCodeOrg; // is this "addi.d $a0, $a0, 8"? @@ -369,6 +370,41 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) distToTarget += ((((int64_t)pCode[1] & ~0x3ff) << 38) >> 46); return (uint8_t *)((int64_t)pCode + distToTarget); } + +#elif TARGET_RISCV64 + uint32_t * pCode = (uint32_t *)pCodeOrg; + // is this "addi a0, a0, 8"? + if (pCode[0] == 0x0002_00b3) // Encoding for `addi a0, a0, 8` in 32-bit RISC-V + { + // unboxing sequence + unboxingStub = true; + pCode++; + } + // is this an indirect jump? + // lui t0, imm; jalr t0, t0, imm12 + if ((pCode[0] & 0x000f_f000) == 0x0002_0000 && + (pCode[1] & 0x000f_f000) == 0x0000_00a0 && + (pCode[2] & 0x0000_000f) == 0x0000_0000) + { + // normal import stub - dist to IAT cell is relative to (PC & ~0xfff) + // lui: imm = SignExtend(imm20:Zeros(12), 64); + int64_t distToIatCell = ((((int64_t)pCode[0] & ~0xfff) << 12) >> 32); + // jalr: offset = SignExtend(imm12, 64); + distToIatCell += (int64_t)(pCode[1] & 0xfff); + uint8_t ** pIatCell = (uint8_t **)(((int64_t)pCode & ~0xfff) + distToIatCell); + return *pIatCell; + } + // is this an unboxing stub followed by a relative jump? + // jal ra, imm + else if (unboxingStub && (pCode[0] & 0xffe0_0000) == 0x0000_0000 && + (pCode[1] & 0x0000_ffff) == 0x0000_0000) + { + // relative jump - dist is relative to the instruction + // offset = SignExtend(imm20:Zeros(12), 64); + int64_t distToTarget = ((int64_t)(pCode[1] & 0xffff) << 12) >> 12; + return (uint8_t *)pCode + distToTarget; + } + #else UNREFERENCED_PARAMETER(unboxingStub); PORTABILITY_ASSERT("RhGetCodeTarget"); diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 22f12552530f84..fb9a2ed2724dfc 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -531,6 +531,96 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { } } CONTEXT, *PCONTEXT; +#elif defined(TARGET_RISCV64) + +#define CONTEXT_RISCV64 0x01000000L + +#define CONTEXT_CONTROL (CONTEXT_RISCV64 | 0x1L) +#define CONTEXT_INTEGER (CONTEXT_RISCV64 | 0x2L) + +// Specify the number of breakpoints and watchpoints that the OS +// will track. Architecturally, RISC-V supports up to 16. In practice, +// almost no one implements more than 4 of each. + +#define RISCV64_MAX_BREAKPOINTS 8 +#define RISCV64_MAX_WATCHPOINTS 2 + +typedef struct DECLSPEC_ALIGN(16) _CONTEXT { + // + // Control flags. + // + uint32_t ContextFlags; + + // + // Integer registers + // + uint64_t X0; // Zero Register + uint64_t X1; // Return Address + uint64_t X2; // Stack Pointer + uint64_t X3; // Global Pointer + uint64_t X4; // Thread Pointer + uint64_t X5; // Temporary Register + uint64_t X6; // Temporary Register + uint64_t X7; // Temporary Register + uint64_t X8; // Saved Register + uint64_t X9; // Saved Register + uint64_t X10; // Function Argument + uint64_t X11; // Function Argument + uint64_t X12; // Function Argument + uint64_t X13; // Function Argument + uint64_t X14; // Function Argument + uint64_t X15; // Function Argument + uint64_t X16; // Function Argument + uint64_t X17; // Function Argument + uint64_t X18; // Function Argument + uint64_t X19; // Function Argument + uint64_t X20; // Function Argument + uint64_t X21; // Function Argument + uint64_t X22; // Function Argument + uint64_t X23; // Function Argument + uint64_t X24; // Function Argument + uint64_t X25; // Function Argument + uint64_t X26; // Function Argument + uint64_t X27; // Function Argument + uint64_t X28; // Function Argument + uint64_t X29; // Function Argument + uint64_t X30; // Function Argument + uint64_t X31; // Function Argument + uint64_t Pc; // Program Counter + + // + // Floating Point Registers + // + uint64_t F[32]; // Floating-point registers + uint32_t Fcsr; // Floating-point Control and Status Register + + // + // Debug registers + // + uint32_t Bcr[RISCV64_MAX_BREAKPOINTS]; + uint64_t Bvr[RISCV64_MAX_BREAKPOINTS]; + uint32_t Wcr[RISCV64_MAX_WATCHPOINTS]; + uint64_t Wvr[RISCV64_MAX_WATCHPOINTS]; + + void SetIp(uintptr_t ip) { Pc = ip; } + void SetArg0Reg(uintptr_t val) { X10 = val; } // X10 typically used for first argument + void SetArg1Reg(uintptr_t val) { X11 = val; } // X11 typically used for second argument + uintptr_t GetIp() { return Pc; } + uintptr_t GetLr() { return X1; } // Return Address + uintptr_t GetSp() { return X2; } // Stack Pointer + void SetSp(uintptr_t sp) { X2 = sp; } + + template + void ForEachPossibleObjectRef(F lambda) + { + for (uint64_t* pReg = &X0; pReg <= &X31; pReg++) + lambda((size_t*)pReg); + + // X1 (Return Address) can be used as a scratch register + lambda((size_t*)&X1); + } +} CONTEXT, *PCONTEXT; + #elif defined(HOST_WASM) typedef struct DECLSPEC_ALIGN(8) _CONTEXT { diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index b5e430779c6760..12655bdfcb4116 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -61,6 +61,7 @@ struct PAL_LIMITED_CONTEXT uintptr_t GetLr() const { return LR; } void SetIp(uintptr_t ip) { IP = ip; } void SetSp(uintptr_t sp) { SP = sp; } + #elif defined(TARGET_ARM64) uintptr_t FP; uintptr_t LR; @@ -91,6 +92,7 @@ struct PAL_LIMITED_CONTEXT uintptr_t GetLr() const { return LR; } void SetIp(uintptr_t ip) { IP = ip; } void SetSp(uintptr_t sp) { SP = sp; } + #elif defined(TARGET_LOONGARCH64) uintptr_t FP; uintptr_t RA; @@ -122,6 +124,56 @@ struct PAL_LIMITED_CONTEXT void SetIp(uintptr_t ip) { IP = ip; } void SetSp(uintptr_t sp) { SP = sp; } +#elif defined(TARGET_RISCV64) + uintptr_t FP; + uintptr_t RA; + + uintptr_t X0; + uintptr_t X1; + uintptr_t X2; + uintptr_t X3; + uintptr_t X4; + uintptr_t X5; + uintptr_t X6; + uintptr_t X7; + uintptr_t X8; + uintptr_t X9; + uintptr_t X10; + uintptr_t X11; + uintptr_t X12; + uintptr_t X13; + uintptr_t X14; + uintptr_t X15; + uintptr_t X16; + uintptr_t X17; + uintptr_t X18; + uintptr_t X19; + uintptr_t X20; + uintptr_t X21; + uintptr_t X22; + uintptr_t X23; + uintptr_t X24; + uintptr_t X25; + uintptr_t X26; + uintptr_t X27; + uintptr_t X28; + uintptr_t X29; + uintptr_t X30; + uintptr_t X31; + + uintptr_t SP; + uintptr_t IP; + + uint64_t F[32 - 16]; // Only the bottom 64-bit values of the F registers F16..F31 need to be preserved + // (F0-F15 are not preserved according to the ABI spec). + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return SP; } + uintptr_t GetFp() const { return FP; } + uintptr_t GetRa() const { return RA; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { SP = sp; } + #elif defined(UNIX_AMD64_ABI) // Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15 uintptr_t IP; diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 5cd5b6dcf2872e..468a9f901078ea 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -287,6 +287,54 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF m_HijackedReturnValueKind = retValueKind; } +#elif defined(TARGET_RISCV64) + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_FramePointer); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_RIP); + + ASSERT(!(pFrame->m_Flags & PTFF_SAVE_FP)); // FP should never contain a GC ref + + if (pFrame->m_Flags & PTFF_SAVE_X19) { m_RegDisplay.pX19 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X20) { m_RegDisplay.pX20 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X21) { m_RegDisplay.pX21 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X22) { m_RegDisplay.pX22 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X23) { m_RegDisplay.pX23 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X24) { m_RegDisplay.pX24 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X25) { m_RegDisplay.pX25 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X26) { m_RegDisplay.pX26 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X27) { m_RegDisplay.pX27 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X28) { m_RegDisplay.pX28 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_X0) { m_RegDisplay.pX0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X1) { m_RegDisplay.pX1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X2) { m_RegDisplay.pX2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X3) { m_RegDisplay.pX3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X4) { m_RegDisplay.pX4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X5) { m_RegDisplay.pX5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X6) { m_RegDisplay.pX6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X7) { m_RegDisplay.pX7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X8) { m_RegDisplay.pX8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X9) { m_RegDisplay.pX9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X10) { m_RegDisplay.pX10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X11) { m_RegDisplay.pX11 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X12) { m_RegDisplay.pX12 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X13) { m_RegDisplay.pX13 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X14) { m_RegDisplay.pX14 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X15) { m_RegDisplay.pX15 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X16) { m_RegDisplay.pX16 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X17) { m_RegDisplay.pX17 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_X18) { m_RegDisplay.pX18 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_RA) { m_RegDisplay.pRA = pPreservedRegsCursor++; } + + GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); + if (retValueKind != GCRK_Scalar) + { + m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pX0; + m_HijackedReturnValueKind = retValueKind; + } + #else // TARGET_ARM if (pFrame->m_Flags & PTFF_SAVE_RBX) { m_RegDisplay.pRbx = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_RSI) { m_RegDisplay.pRsi = pPreservedRegsCursor++; } @@ -731,6 +779,60 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pR20 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R20); m_RegDisplay.pR21 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R21); +#elif defined(TARGET_LOONGARCH64) + + // + // preserved regs + // + m_RegDisplay.pR23 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R23); + m_RegDisplay.pR24 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R24); + m_RegDisplay.pR25 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R25); + m_RegDisplay.pR26 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R26); + m_RegDisplay.pR27 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R27); + m_RegDisplay.pR28 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R28); + m_RegDisplay.pR29 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R29); + m_RegDisplay.pR30 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R30); + m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R31); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_REG(pCtx, FP); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_REG(pCtx, RA); + + // + // scratch regs + // + m_RegDisplay.pR0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R0); + m_RegDisplay.pR2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R2); + m_RegDisplay.pR4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R4); + m_RegDisplay.pR5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R5); + m_RegDisplay.pR6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R6); + m_RegDisplay.pR7 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R7); + m_RegDisplay.pR8 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R8); + m_RegDisplay.pR9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R9); + m_RegDisplay.pR10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R10); + m_RegDisplay.pR11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R11); + m_RegDisplay.pR12 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R12); + m_RegDisplay.pR13 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R13); + m_RegDisplay.pR14 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R14); + m_RegDisplay.pR15 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R15); + m_RegDisplay.pR16 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R16); + m_RegDisplay.pR17 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R17); + m_RegDisplay.pR18 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R18); + m_RegDisplay.pR19 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R19); + m_RegDisplay.pR20 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R20); + m_RegDisplay.pR21 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R21); + + // + // Special handling for SP and IP (if necessary) + // + if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = (uintptr_t)PTR_TO_REG(pCtx, SP); } + if (pFrame->m_Flags & PTFF_SAVE_IP) { m_RegDisplay.IP = (uintptr_t)PTR_TO_REG(pCtx, IP); } + + GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); + if (retValueKind != GCRK_Scalar) + { + m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pR4; // Assuming R4 is used for return value + m_HijackedReturnValueKind = retValueKind; + } + #else PORTABILITY_ASSERT("StackFrameIterator::InternalInit"); #endif // TARGET_ARM @@ -887,6 +989,17 @@ void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSou m_RegDisplay.pR31 = thisFuncletPtrs.pR31; m_RegDisplay.pFP = thisFuncletPtrs.pFP; +#elif defined(TARGET_RISCV64) + m_RegDisplay.pFP = thisFuncletPtrs.pFP; + m_RegDisplay.pRA = thisFuncletPtrs.pRA; + m_RegDisplay.pX8 = thisFuncletPtrs.pX8; + m_RegDisplay.pX9 = thisFuncletPtrs.pX9; + m_RegDisplay.pX10 = thisFuncletPtrs.pX10; + m_RegDisplay.pX11 = thisFuncletPtrs.pX11; + m_RegDisplay.pX12 = thisFuncletPtrs.pX12; + m_RegDisplay.pSP = thisFuncletPtrs.pSP; + m_RegDisplay.pIP = thisFuncletPtrs.pIP; + #elif defined(UNIX_AMD64_ABI) // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. m_RegDisplay.pRbp = thisFuncletPtrs.pRbp; @@ -1169,12 +1282,52 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pR30 = SP++; m_RegDisplay.pR31 = SP++; +#elif defined(TARGET_RISCV64) + PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); + + for (int i = 0; i < 8; i++) + { + m_RegDisplay.F[i] = *f++; + } + + SP = (PTR_uintptr_t)f; + + if (!isFilterInvoke) + { + // RhpCallCatchFunclet puts a couple of extra things on the stack that aren't put there by the other two + // thunks, but we don't need to know what they are here, so we just skip them. + SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 6 : 4; + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pX8 = m_RegDisplay.pX8; + m_funcletPtrs.pX9 = m_RegDisplay.pX9; + m_funcletPtrs.pX10 = m_RegDisplay.pX10; + m_funcletPtrs.pX11 = m_RegDisplay.pX11; + m_funcletPtrs.pX12 = m_RegDisplay.pX12; + m_funcletPtrs.pX13 = m_RegDisplay.pX13; + m_funcletPtrs.pX14 = m_RegDisplay.pX14; + m_funcletPtrs.pX15 = m_RegDisplay.pX15; + m_funcletPtrs.pFP = m_RegDisplay.pFP; + } + + m_RegDisplay.pFP = SP++; + + m_RegDisplay.SetIP(*SP++); + + m_RegDisplay.pX8 = SP++; + m_RegDisplay.pX9 = SP++; + m_RegDisplay.pX10 = SP++; + m_RegDisplay.pX11 = SP++; + m_RegDisplay.pX12 = SP++; + m_RegDisplay.pX13 = SP++; + m_RegDisplay.pX14 = SP++; + m_RegDisplay.pX15 = SP++; + #else SP = (PTR_uintptr_t)(m_RegDisplay.SP); ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) m_RegDisplay.SetIP(PCODEToPINSTR(*SP++)); #endif @@ -1333,6 +1486,29 @@ struct UniversalTransitionStackFrame pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP); } +#elif defined(TARGET_RISCV64) + + // Conservative GC reporting must be applied to everything between the base of the + // ReturnBlock and the top of the StackPassedArgs. +private: + uintptr_t m_pushedFP; // ChildSP+000 CallerSP-100 (0x08 bytes) (fp) + uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0F8 (0x08 bytes) (ra) + Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0F0 (0x80 bytes) (f0-f7) + uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-070 (0x40 bytes) + uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-050 (0x40 bytes) (x0-x7) + uintptr_t m_alignmentPad; // ChildSP+0F0 CallerSP-010 (0x08 bytes) + uintptr_t m_stackPassedArgs[1]; // ChildSP+0F8 CallerSP+000 (unknown size) + +public: + PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } + PTR_uintptr_t get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedRA); } + PTR_uintptr_t get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP); + } + #elif defined(TARGET_WASM) private: // WASMTODO: #error NYI for this arch @@ -1410,7 +1586,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() #define STACK_ALIGN_SIZE 16 #elif defined(TARGET_X86) #define STACK_ALIGN_SIZE 4 -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define STACK_ALIGN_SIZE 16 #elif defined(TARGET_WASM) #define STACK_ALIGN_SIZE 4 @@ -1496,6 +1672,18 @@ void StackFrameIterator::UnwindThrowSiteThunk() m_RegDisplay.pR30 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R30); m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R31); m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); +#elif defined(TARGET_RISCV64) + m_RegDisplay.pX19 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X19); + m_RegDisplay.pX20 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X20); + m_RegDisplay.pX21 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X21); + m_RegDisplay.pX22 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X22); + m_RegDisplay.pX23 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X23); + m_RegDisplay.pX24 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X24); + m_RegDisplay.pX25 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X25); + m_RegDisplay.pX26 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X26); + m_RegDisplay.pX27 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X27); + m_RegDisplay.pX28 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X28); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); #else ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index f174edd4c473b2..471185ba2152ca 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -191,6 +191,18 @@ class StackFrameIterator PTR_uintptr_t pR30; PTR_uintptr_t pR31; PTR_uintptr_t pFP; +#elif defined(TARGET_RISCV64) + PTR_uintptr_t pX19; + PTR_uintptr_t pX20; + PTR_uintptr_t pX21; + PTR_uintptr_t pX22; + PTR_uintptr_t pX23; + PTR_uintptr_t pX24; + PTR_uintptr_t pX25; + PTR_uintptr_t pX26; + PTR_uintptr_t pX27; + PTR_uintptr_t pX28; + PTR_uintptr_t pFP; #elif defined(UNIX_AMD64_ABI) PTR_uintptr_t pRbp; PTR_uintptr_t pRbx; diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index c8f91a07a2819f..794d676e4e76dc 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -22,7 +22,7 @@ #define THUNK_SIZE 20 #elif TARGET_ARM64 #define THUNK_SIZE 16 -#elif TARGET_LOONGARCH64 +#elif TARGET_LOONGARCH64 || TARGET_RISCV64 #define THUNK_SIZE 16 #else #define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 @@ -255,6 +255,27 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() *((uint32_t*)pCurrentThunkAddress) = 0x4C000280; pCurrentThunkAddress += 4; +#elif defined(TARGET_RISCV64) + + // lui t0, %hi(delta) // Load upper immediate with address high bits + // addi t0, t0, %lo(delta) // Add lower immediate + // ld t1, 0(t0) // Load data from address in t0 + // jalr x0, t1, 0 // Jump and link register (set PC to t1) + + int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); + *((uint32_t*)pCurrentThunkAddress) = 0x0002A013 | (((delta & 0x3FFFFC) >> 2) << 12); // lui + addi + pCurrentThunkAddress += 4; + + delta += OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2) - 4; + *((uint32_t*)pCurrentThunkAddress) = 0x0002B014 | (((delta & 0x3FFFFC) >> 2) << 12); // lui + addi + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x0002C294; // Example opcode, specific to RISC-V + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x0004C280; // Example opcode, specific to RISC-V + pCurrentThunkAddress += 4; + #else UNREFERENCED_PARAMETER(pCurrentDataAddress); UNREFERENCED_PARAMETER(pCurrentThunkAddress); diff --git a/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h b/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h index ece8ae50b379e6..ae76c7ba92e2be 100644 --- a/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h +++ b/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h @@ -13,7 +13,7 @@ typedef uint32_t UIntTarget; typedef uint64_t UIntTarget; #elif defined(TARGET_WASM) typedef uint32_t UIntTarget; -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef uint64_t UIntTarget; #else #error unexpected target architecture diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index db238e24acbc16..621240f6e77c0e 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -400,6 +400,68 @@ enum PInvokeTransitionFrameFlags : uint64_t PTFF_THREAD_ABORT = 0x0000001000000000, // indicates that ThreadAbortException should be thrown when returning from the transition }; +#elif defined(TARGET_RISCV64) +enum PInvokeTransitionFrameFlags : uint64_t +{ + // NOTE: Keep in sync with src\coreclr\nativeaot\Runtime\riscv64\AsmMacros.h + + // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has + // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp + + // standard preserved registers + PTFF_SAVE_X19 = 0x0000000000000001, + PTFF_SAVE_X20 = 0x0000000000000002, + PTFF_SAVE_X21 = 0x0000000000000004, + PTFF_SAVE_X22 = 0x0000000000000008, + PTFF_SAVE_X23 = 0x0000000000000010, + PTFF_SAVE_X24 = 0x0000000000000020, + PTFF_SAVE_X25 = 0x0000000000000040, + PTFF_SAVE_X26 = 0x0000000000000080, + PTFF_SAVE_X27 = 0x0000000000000100, + PTFF_SAVE_X28 = 0x0000000000000200, + + PTFF_SAVE_SP = 0x0000000000000400, // Used for 'coop pinvokes' in runtime helper routines. Methods with + // PInvokes are required to have a frame pointers, but methods which + // call runtime helpers are not. Therefore, methods that call runtime + // helpers may need SP to seed the stackwalk. + + // Scratch registers + PTFF_SAVE_X0 = 0x0000000000000800, + PTFF_SAVE_X1 = 0x0000000000001000, + PTFF_SAVE_X2 = 0x0000000000002000, + PTFF_SAVE_X3 = 0x0000000000004000, + PTFF_SAVE_X4 = 0x0000000000008000, + PTFF_SAVE_X5 = 0x0000000000010000, + PTFF_SAVE_X6 = 0x0000000000020000, + PTFF_SAVE_X7 = 0x0000000000040000, + PTFF_SAVE_X8 = 0x0000000000080000, + PTFF_SAVE_X9 = 0x0000000000100000, + PTFF_SAVE_X10 = 0x0000000000200000, + PTFF_SAVE_X11 = 0x0000000000400000, + PTFF_SAVE_X12 = 0x0000000000800000, + PTFF_SAVE_X13 = 0x0000000001000000, + PTFF_SAVE_X14 = 0x0000000002000000, + PTFF_SAVE_X15 = 0x0000000004000000, + PTFF_SAVE_X16 = 0x0000000008000000, + PTFF_SAVE_X17 = 0x0000000010000000, + PTFF_SAVE_X18 = 0x0000000020000000, + + PTFF_SAVE_FP = 0x0000000040000000, // should never be used, we require FP frames for methods with + // pinvoke and it is saved into the frame pointer field instead + + PTFF_SAVE_RA = 0x0000000080000000, // this is useful for the case of loop hijacking where we need both + // a return address pointing into the hijacked method and that method's + // ra register, which may hold a gc pointer + + // used by hijack handler to report return value of hijacked method + PTFF_X0_IS_GCREF = 0x0000000100000000, + PTFF_X0_IS_BYREF = 0x0000000200000000, + PTFF_X1_IS_GCREF = 0x0000000400000000, + PTFF_X1_IS_BYREF = 0x0000000800000000, + + PTFF_THREAD_ABORT = 0x0000001000000000, // indicates that ThreadAbortException should be thrown when returning from the transition +}; + #else // TARGET_ARM enum PInvokeTransitionFrameFlags { @@ -461,7 +523,7 @@ struct PInvokeTransitionFrame #else // USE_PORTABLE_HELPERS struct PInvokeTransitionFrame { -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The FP and LR registers are pushed in different order when setting up frames TgtPTR_Void m_FramePointer; TgtPTR_Void m_RIP; @@ -473,7 +535,7 @@ struct PInvokeTransitionFrame // can be an invalid pointer in universal transition cases (which never need to call GetThread) #ifdef TARGET_ARM64 uint64_t m_Flags; // PInvokeTransitionFrameFlags -#elif TARGET_LOONGARCH64 +#elif TARGET_LOONGARCH64 || TARGET_RISCV64 uint64_t m_Flags; // PInvokeTransitionFrameFlags #else uint32_t m_Flags; // PInvokeTransitionFrameFlags @@ -499,7 +561,7 @@ struct PInvokeTransitionFrame #define OFFSETOF__Thread__m_pTransitionFrame 0x40 #elif defined(TARGET_ARM64) #define OFFSETOF__Thread__m_pTransitionFrame 0x40 -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define OFFSETOF__Thread__m_pTransitionFrame 0x40 #elif defined(TARGET_X86) #define OFFSETOF__Thread__m_pTransitionFrame 0x2c diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index cd6f13418b778c..3c385a7ffdd6ab 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -231,6 +231,62 @@ struct REGDISPLAY inline void SetSP(uintptr_t SP) { this->SP = SP; } }; +#elif defined(TARGET_RISCV64) + +struct REGDISPLAY +{ + PTR_uintptr_t pX0; + PTR_uintptr_t pX1; + PTR_uintptr_t pX2; + PTR_uintptr_t pX3; + PTR_uintptr_t pX4; + PTR_uintptr_t pX5; + PTR_uintptr_t pX6; + PTR_uintptr_t pX7; + PTR_uintptr_t pX8; + PTR_uintptr_t pX9; + PTR_uintptr_t pX10; + PTR_uintptr_t pX11; + PTR_uintptr_t pX12; + PTR_uintptr_t pX13; + PTR_uintptr_t pX14; + PTR_uintptr_t pX15; + PTR_uintptr_t pX16; + PTR_uintptr_t pX17; + PTR_uintptr_t pX18; + PTR_uintptr_t pX19; + PTR_uintptr_t pX20; + PTR_uintptr_t pX21; + PTR_uintptr_t pX22; + PTR_uintptr_t pX23; + PTR_uintptr_t pX24; + PTR_uintptr_t pX25; + PTR_uintptr_t pX26; + PTR_uintptr_t pX27; + PTR_uintptr_t pX28; + PTR_uintptr_t pX29; + PTR_uintptr_t pX30; + PTR_uintptr_t pX31; + PTR_uintptr_t pFP; // Frame pointer + + uintptr_t SP; // Stack pointer + + PCODE IP; // Instruction pointer + + uint64_t F[32-24]; // Only the F registers F24..F31 need to be preserved + // (F0-F23 are not preserved according to the ABI spec). + // These need to be unwound during a stack walk + // for EH, but not adjusted, so we only need + // their values, not their addresses + + inline PCODE GetIP() { return IP; } + inline uintptr_t GetSP() { return SP; } + inline uintptr_t GetFP() { return *pFP; } + + inline void SetIP(PCODE IP) { this->IP = IP; } + inline void SetSP(uintptr_t SP) { this->SP = SP; } +}; + #elif defined(TARGET_WASM) struct REGDISPLAY @@ -247,7 +303,7 @@ struct REGDISPLAY inline void SetIP(PCODE IP) { } inline void SetSP(uintptr_t SP) { } }; -#endif // HOST_X86 || HOST_AMD64 || HOST_ARM || HOST_ARM64 || HOST_WASM || HOST_LOONGARCH64 +#endif typedef REGDISPLAY * PREGDISPLAY; diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index e084fb35e391f3..9d8ebcfda14691 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -116,6 +116,44 @@ #define MCREG_S8(mc) ((mc).__gregs[31]) #define MCREG_Pc(mc) ((mc).__pc) +#elif defined(HOST_RISCV64) + +#define MCREG_X0(mc) ((mc).__gregs[0]) +#define MCREG_X1(mc) ((mc).__gregs[1]) +#define MCREG_X2(mc) ((mc).__gregs[2]) +#define MCREG_X3(mc) ((mc).__gregs[3]) +#define MCREG_X4(mc) ((mc).__gregs[4]) +#define MCREG_X5(mc) ((mc).__gregs[5]) +#define MCREG_X6(mc) ((mc).__gregs[6]) +#define MCREG_X7(mc) ((mc).__gregs[7]) +#define MCREG_X8(mc) ((mc).__gregs[8]) +#define MCREG_X9(mc) ((mc).__gregs[9]) +#define MCREG_X10(mc) ((mc).__gregs[10]) +#define MCREG_X11(mc) ((mc).__gregs[11]) +#define MCREG_X12(mc) ((mc).__gregs[12]) +#define MCREG_X13(mc) ((mc).__gregs[13]) +#define MCREG_X14(mc) ((mc).__gregs[14]) +#define MCREG_X15(mc) ((mc).__gregs[15]) +#define MCREG_X16(mc) ((mc).__gregs[16]) +#define MCREG_X17(mc) ((mc).__gregs[17]) +#define MCREG_X18(mc) ((mc).__gregs[18]) +#define MCREG_X19(mc) ((mc).__gregs[19]) +#define MCREG_X20(mc) ((mc).__gregs[20]) +#define MCREG_X21(mc) ((mc).__gregs[21]) +#define MCREG_X22(mc) ((mc).__gregs[22]) +#define MCREG_X23(mc) ((mc).__gregs[23]) +#define MCREG_X24(mc) ((mc).__gregs[24]) +#define MCREG_X25(mc) ((mc).__gregs[25]) +#define MCREG_X26(mc) ((mc).__gregs[26]) +#define MCREG_X27(mc) ((mc).__gregs[27]) +#define MCREG_X28(mc) ((mc).__gregs[28]) +#define MCREG_X29(mc) ((mc).__gregs[29]) +#define MCREG_X30(mc) ((mc).__gregs[30]) +#define MCREG_X31(mc) ((mc).__gregs[31]) +#define MCREG_FP(mc) ((mc).__gregs[32]) // Assuming FP is stored in __gregs[32] +#define MCREG_SP(mc) ((mc).__gregs[33]) // Assuming SP is stored in __gregs[33] +#define MCREG_PC(mc) ((mc).__pc) + #elif HOST_64BIT #define MCREG_Rip(mc) ((mc).__gregs[_REG_RIP]) #define MCREG_Rsp(mc) ((mc).__gregs[_REG_RSP]) @@ -187,6 +225,42 @@ #define MCREG_S8(mc) ((mc).__gregs[31]) #define MCREG_Pc(mc) ((mc).__pc) +#elif defined(HOST_RISCV64) + +#define MCREG_X0(mc) ((mc).__gregs[0]) +#define MCREG_X1(mc) ((mc).__gregs[1]) +#define MCREG_X2(mc) ((mc).__gregs[2]) +#define MCREG_X3(mc) ((mc).__gregs[3]) +#define MCREG_X4(mc) ((mc).__gregs[4]) +#define MCREG_X5(mc) ((mc).__gregs[5]) +#define MCREG_X6(mc) ((mc).__gregs[6]) +#define MCREG_X7(mc) ((mc).__gregs[7]) +#define MCREG_X8(mc) ((mc).__gregs[8]) +#define MCREG_X9(mc) ((mc).__gregs[9]) +#define MCREG_X10(mc) ((mc).__gregs[10]) +#define MCREG_X11(mc) ((mc).__gregs[11]) +#define MCREG_X12(mc) ((mc).__gregs[12]) +#define MCREG_X13(mc) ((mc).__gregs[13]) +#define MCREG_X14(mc) ((mc).__gregs[14]) +#define MCREG_X15(mc) ((mc).__gregs[15]) +#define MCREG_X16(mc) ((mc).__gregs[16]) +#define MCREG_X17(mc) ((mc).__gregs[17]) +#define MCREG_X18(mc) ((mc).__gregs[18]) +#define MCREG_X19(mc) ((mc).__gregs[19]) +#define MCREG_X20(mc) ((mc).__gregs[20]) +#define MCREG_X21(mc) ((mc).__gregs[21]) +#define MCREG_X22(mc) ((mc).__gregs[22]) +#define MCREG_X23(mc) ((mc).__gregs[23]) +#define MCREG_X24(mc) ((mc).__gregs[24]) +#define MCREG_X25(mc) ((mc).__gregs[25]) +#define MCREG_X26(mc) ((mc).__gregs[26]) +#define MCREG_X27(mc) ((mc).__gregs[27]) +#define MCREG_X28(mc) ((mc).__gregs[28]) +#define MCREG_X29(mc) ((mc).__gregs[29]) +#define MCREG_X30(mc) ((mc).__gregs[30]) +#define MCREG_X31(mc) ((mc).__gregs[31]) +#define MCREG_PC(mc) ((mc).__pc) + #elif HOST_64BIT #define MCREG_Rip(mc) ((mc).gregs[REG_RIP]) #define MCREG_Rsp(mc) ((mc).gregs[REG_RSP]) @@ -332,6 +406,42 @@ #define MCREG_S8(mc) ((mc).regs[31]) #define MCREG_Pc(mc) ((mc).pc) +#elif defined(HOST_RISCV64) + +#define MCREG_X0(mc) ((mc).regs[0]) +#define MCREG_X1(mc) ((mc).regs[1]) +#define MCREG_X2(mc) ((mc).regs[2]) +#define MCREG_X3(mc) ((mc).regs[3]) +#define MCREG_X4(mc) ((mc).regs[4]) +#define MCREG_X5(mc) ((mc).regs[5]) +#define MCREG_X6(mc) ((mc).regs[6]) +#define MCREG_X7(mc) ((mc).regs[7]) +#define MCREG_X8(mc) ((mc).regs[8]) +#define MCREG_X9(mc) ((mc).regs[9]) +#define MCREG_X10(mc) ((mc).regs[10]) +#define MCREG_X11(mc) ((mc).regs[11]) +#define MCREG_X12(mc) ((mc).regs[12]) +#define MCREG_X13(mc) ((mc).regs[13]) +#define MCREG_X14(mc) ((mc).regs[14]) +#define MCREG_X15(mc) ((mc).regs[15]) +#define MCREG_X16(mc) ((mc).regs[16]) +#define MCREG_X17(mc) ((mc).regs[17]) +#define MCREG_X18(mc) ((mc).regs[18]) +#define MCREG_X19(mc) ((mc).regs[19]) +#define MCREG_X20(mc) ((mc).regs[20]) +#define MCREG_X21(mc) ((mc).regs[21]) +#define MCREG_X22(mc) ((mc).regs[22]) +#define MCREG_X23(mc) ((mc).regs[23]) +#define MCREG_X24(mc) ((mc).regs[24]) +#define MCREG_X25(mc) ((mc).regs[25]) +#define MCREG_X26(mc) ((mc).regs[26]) +#define MCREG_X27(mc) ((mc).regs[27]) +#define MCREG_X28(mc) ((mc).regs[28]) +#define MCREG_X29(mc) ((mc).regs[29]) +#define MCREG_X30(mc) ((mc).regs[30]) +#define MCREG_X31(mc) ((mc).regs[31]) +#define MCREG_Pc(mc) ((mc).pc) + #else // For FreeBSD, as found in x86/ucontext.h @@ -496,6 +606,29 @@ MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ MCREG_A1(nativeContext->uc_mcontext) = arg1Reg; +#elif defined(HOST_LOONGARCH64) + +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Pc, PC) \ + ASSIGN_REG(Sp, SP) \ + ASSIGN_REG(Fp, FP) \ + ASSIGN_REG(Ra, RA) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(R23, R23) \ + ASSIGN_REG(R24, R24) \ + ASSIGN_REG(R25, R25) \ + ASSIGN_REG(R26, R26) \ + ASSIGN_REG(R27, R27) \ + ASSIGN_REG(R28, R28) \ + ASSIGN_REG(R29, R29) \ + ASSIGN_REG(R30, R30) \ + ASSIGN_REG(R31, R31) + +#define ASSIGN_TWO_ARGUMENT_REGS \ + MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ + MCREG_A1(nativeContext->uc_mcontext) = arg1Reg; + #elif defined(HOST_WASM) // TODO: determine how unwinding will work on WebAssembly #define ASSIGN_CONTROL_REGS @@ -696,6 +829,43 @@ uint64_t GetPC(void* context) uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } // R3 uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } +#elif defined(HOST_RISCV64) + + uint64_t& UNIX_CONTEXT::X0() { return (uint64_t&)MCREG_X0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X1() { return (uint64_t&)MCREG_X1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X2() { return (uint64_t&)MCREG_X2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X3() { return (uint64_t&)MCREG_X3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X4() { return (uint64_t&)MCREG_X4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X5() { return (uint64_t&)MCREG_X5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X6() { return (uint64_t&)MCREG_X6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X7() { return (uint64_t&)MCREG_X7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X8() { return (uint64_t&)MCREG_X8(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X9() { return (uint64_t&)MCREG_X9(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X10() { return (uint64_t&)MCREG_X10(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X11() { return (uint64_t&)MCREG_X11(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X12() { return (uint64_t&)MCREG_X12(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X13() { return (uint64_t&)MCREG_X13(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X14() { return (uint64_t&)MCREG_X14(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X15() { return (uint64_t&)MCREG_X15(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X16() { return (uint64_t&)MCREG_X16(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X17() { return (uint64_t&)MCREG_X17(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X18() { return (uint64_t&)MCREG_X18(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X19() { return (uint64_t&)MCREG_X19(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X20() { return (uint64_t&)MCREG_X20(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X21() { return (uint64_t&)MCREG_X21(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X22() { return (uint64_t&)MCREG_X22(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X23() { return (uint64_t&)MCREG_X23(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X24() { return (uint64_t&)MCREG_X24(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X25() { return (uint64_t&)MCREG_X25(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X26() { return (uint64_t&)MCREG_X26(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X27() { return (uint64_t&)MCREG_X27(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X28() { return (uint64_t&)MCREG_X28(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X29() { return (uint64_t&)MCREG_X29(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X30() { return (uint64_t&)MCREG_X30(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X31() { return (uint64_t&)MCREG_X31(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } + #else PORTABILITY_ASSERT("UNIX_CONTEXT"); #endif // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index 662b697715da0a..b1199a85bbfde8 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -213,6 +213,64 @@ struct UNIX_CONTEXT lambda((size_t*)&Ra()); } +#elif defined(TARGET_RISCV64) + + uint64_t& X0(); + uint64_t& X1(); + uint64_t& X2(); + uint64_t& X3(); + uint64_t& X4(); + uint64_t& X5(); + uint64_t& X6(); + uint64_t& X7(); + uint64_t& X8(); + uint64_t& X9(); + uint64_t& X10(); + uint64_t& X11(); + uint64_t& X12(); + uint64_t& X13(); + uint64_t& X14(); + uint64_t& X15(); + uint64_t& X16(); + uint64_t& X17(); + uint64_t& X18(); + uint64_t& X19(); + uint64_t& X20(); + uint64_t& X21(); + uint64_t& X22(); + uint64_t& X23(); + uint64_t& X24(); + uint64_t& X25(); + uint64_t& X26(); + uint64_t& X27(); + uint64_t& X28(); + uint64_t& X29(); + uint64_t& X30(); + uint64_t& X31(); + uint64_t& Fp(); // X29 + uint64_t& Ra(); // X1 + uint64_t& Sp(); // X2 + uint64_t& Pc(); + + uintptr_t GetIp() { return (uintptr_t)Pc(); } + uintptr_t GetSp() { return (uintptr_t)Sp(); } + + template + void ForEachPossibleObjectRef(F lambda) + { + // It is doubtful anyone would implement X0-X31 not as a contiguous array + // Just in case - here are some asserts. + ASSERT(&X0() + 1 == &X1()); + ASSERT(&X0() + 10 == &X10()); + ASSERT(&X0() + 20 == &X20()); + + for (uint64_t* pReg = &X0(); pReg <= &X31(); pReg++) + lambda((size_t*)pReg); + + // Ra can be used as a scratch register + lambda((size_t*)&Ra()); + } + #else PORTABILITY_ASSERT("UNIX_CONTEXT"); #endif // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index b12d63bf726129..664c438d40ab14 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -399,7 +399,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) ASSERT(((uintptr_t)pvAddress & 1) == 0); #endif -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) MethodInfo methodInfo; FindMethodInfo(pvAddress, &methodInfo); pMethodInfo = &methodInfo; @@ -722,6 +722,63 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre return savedFp && savedRa && establishedFp ? 0 : 1; +#elif defined(TARGET_RISCV64) + +// store pair with signed offset +// 0100 00xx xxxxxxxx xxxx xxxx xxxx xxxx +#define STW_PAIR_BITS 0x04000000 +#define STW_PAIR_MASK 0xFC000000 + +// add fp, sp, x +// addi fp, sp, x +// 0000 0001 100x xxxx xxxx xxxx 0000 0000 +#define ADD_FP_SP_BITS 0x01C00000 +#define ADD_FP_SP_MASK 0xFFFFE000 + +#define STW_PAIR_RS1_MASK 0xF80 +#define STW_PAIR_RS1_SP 0xF80 +#define STW_PAIR_RS1_FP 0xF00 +#define STW_PAIR_RS2_MASK 0xF00 +#define STW_PAIR_RS2_FP 0xF00 +#define STW_PAIR_RS2_RA 0xF40 + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + ASSERT(pNativeMethodInfo != NULL); + + uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; + bool savedFp = false; + bool savedRa = false; + bool establishedFp = false; + + for (uint32_t* pInstr = (uint32_t*)start; pInstr < pvAddress && !(savedFp && savedRa && establishedFp); pInstr++) + { + uint32_t instr = *pInstr; + + if (((instr & STW_PAIR_MASK) == STW_PAIR_BITS) && + ((instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_SP || (instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_FP) && + ((instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP || (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA)) + { + // SP/FP-relative store of pair of registers + savedFp |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP; + savedRa |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA; + } + else if ((instr & ADD_FP_SP_MASK) == ADD_FP_SP_BITS) + { + establishedFp = true; + } + else + { + // JIT generates other patterns into the prolog that we currently don't + // recognize (saving unpaired register, stack pointer adjustments). We + // don't need to recognize these patterns unless a compact unwinding code + // is generated for them in ILC. + // https://github.com/dotnet/runtime/issues/76371 + return -1; + } + } + + return savedFp && savedRa && establishedFp ? 0 : 1; + #else return -1; @@ -1152,6 +1209,62 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho } } +#elif defined(TARGET_RISCV64) + +// Load with immediate +// LUI, LD, etc. +// 0000 0000 0000 0000 1111 1111 1111 1111 +#define LUI_BITS 0x00000037 +#define LUI_MASK 0x0000007F + +// Load with register offset +// LD with register offset +// 0000 0000 0000 0000 0111 0000 0000 0000 +#define LD_BITS 0x00000003 +#define LD_MASK 0x0000007F + +// Branches, Jumps, System calls +// BEQ, BNE, JAL, etc. +// 1100 0000 0000 0000 0000 0000 0000 0000 +#define BEGS_BITS 0x00000063 +#define BEGS_MASK 0x7F000000 + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + ASSERT(pNativeMethodInfo != NULL); + + uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; + + // Since we stop on branches, the search is roughly limited by the containing basic block. + // We typically examine just 1-5 instructions and in rare cases up to 30. + // + // TODO: we can also limit the search by the longest possible epilogue length, but + // we must be sure the longest length considers all possibilities, + // which is somewhat nontrivial to derive/prove. + // It does not seem urgent, but it could be nice to have a constant upper bound. + for (uint32_t* pInstr = (uint32_t*)pvAddress - 1; pInstr > start; pInstr--) + { + uint32_t instr = *pInstr; + + // check for Branches, Jumps, System calls. + // If we see such instruction before seeing registers restored, we are not in an epilog. + // Note: this includes RET, branches, jumps, system calls, etc... + if ((instr & BEGS_MASK) == BEGS_BITS) + { + // not in an epilogue + break; + } + + // check for restoring registers with LD or LUI + int rd = (instr >> 7) & 0x1F; + if (rd == 2 || rd == 1) // example register numbers for FP or RA + { + if ((instr & LD_MASK) == LD_BITS || (instr & LUI_MASK) == LUI_BITS) + { + return -1; + } + } + } + #endif return 0; @@ -1194,9 +1307,9 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // Decode the GC info for the current method to determine its return type GcInfoDecoderFlags flags = DECODE_RETURN_KIND; -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS); -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif GcInfoDecoder decoder(GCInfoToken(p), flags); *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); @@ -1316,6 +1429,42 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->pRA; return true; + +#elif defined(TARGET_RISCV64) + + if (decoder.HasTailCalls()) + { + // Do not hijack functions that have tail calls, since there are two problems: + // 1. When a function that tail calls another one is hijacked, the RA may be + // stored at a different location in the stack frame of the tail call target. + // So just by performing tail call, the hijacked location becomes invalid and + // unhijacking would corrupt stack by writing to that location. + // 2. There is a small window after the caller pops RA from the stack in its + // epilog and before the tail called function pushes RA in its prolog when + // the hijacked return address would not be on the stack and so we would + // not be able to unhijack. + return false; + } + + PTR_uintptr_t pRA = pRegisterSet->pRA; + if (!VirtualUnwind(pMethodInfo, pRegisterSet)) + { + return false; + } + + if (pRegisterSet->pRA == pRA) + { + // This is the case when we are either: + // + // 1) In a leaf method that does not push RA on stack, OR + // 2) In the prolog/epilog of a non-leaf method that has not yet pushed RA on stack + // or has RA already popped off. + return false; + } + + *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->pRA; + return true; + #else return false; #endif // defined(TARGET_AMD64) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index 67701b45dd9489..566f8fc15ff28f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -36,6 +36,8 @@ using libunwind::Registers_arm64; using libunwind::CompactUnwinder_arm64; #elif defined(TARGET_LOONGARCH64) using libunwind::Registers_loongarch; +#elif defined(TARGET_RISCV64) +using libunwind::Registers_riscv; #elif defined(TARGET_X86) using libunwind::Registers_x86; #else @@ -1088,6 +1090,278 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) #endif // TARGET_LOONGARCH64 +#if defined(TARGET_RISCV64) + +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + inline static int getArch() { return libunwind::REGISTERS_RISCV64; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV64; } + + bool validRegister(int num) const; + bool validFloatRegister(int num) { return false; }; + bool validVectorRegister(int num) const; + + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value, uint64_t location); + + double getFloatRegister(int num) const { abort(); } + void setFloatRegister(int num, double value) { abort(); } + + libunwind::v128 getVectorRegister(int num) const; + void setVectorRegister(int num, libunwind::v128 value); + + uint64_t getSP() const { return SP; } + void setSP(uint64_t value, uint64_t location) { SP = value; } + uint64_t getIP() const { return IP; } + void setIP(uint64_t value, uint64_t location) { IP = value; } + uint64_t getFP() const { return *pFP; } + void setFP(uint64_t value, uint64_t location) { pFP = (PTR_uintptr_t)location; } +}; + +inline bool Registers_REGDISPLAY::validRegister(int num) const { + if (num == UNW_REG_SP || num == UNW_RISCV_X2) + return true; + + if (num == UNW_REG_IP) + return true; + + if (num >= UNW_RISCV_X0 && num <= UNW_RISCV_X31) + return true; + + return false; +} + +bool Registers_REGDISPLAY::validVectorRegister(int num) const +{ + if (num >= UNW_RISCV_V0 && num <= UNW_RISCV_V31) + return true; + + return false; +} + +inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { + if (regNum == UNW_REG_SP || regNum == UNW_RISCV_X2) + return SP; + + if (regNum == UNW_RISCV_X8) + return *pFP; + + if (regNum == UNW_RISCV_X1) + return *pRA; + + if (regNum == UNW_REG_IP) + return IP; + + switch (regNum) + { + case (UNW_RISCV_X0): + return *pX0; + case (UNW_RISCV_X3): + return *pX3; + case (UNW_RISCV_X4): + return *pX4; + case (UNW_RISCV_X5): + return *pX5; + case (UNW_RISCV_X6): + return *pX6; + case (UNW_RISCV_X7): + return *pX7; + case (UNW_RISCV_X9): + return *pX9; + case (UNW_RISCV_X10): + return *pX10; + case (UNW_RISCV_X11): + return *pX11; + case (UNW_RISCV_X12): + return *pX12; + case (UNW_RISCV_X13): + return *pX13; + case (UNW_RISCV_X14): + return *pX14; + case (UNW_RISCV_X15): + return *pX15; + case (UNW_RISCV_X16): + return *pX16; + case (UNW_RISCV_X17): + return *pX17; + case (UNW_RISCV_X18): + return *pX18; + case (UNW_RISCV_X19): + return *pX19; + case (UNW_RISCV_X20): + return *pX20; + case (UNW_RISCV_X21): + return *pX21; + case (UNW_RISCV_X22): + return *pX22; + case (UNW_RISCV_X23): + return *pX23; + case (UNW_RISCV_X24): + return *pX24; + case (UNW_RISCV_X25): + return *pX25; + case (UNW_RISCV_X26): + return *pX26; + case (UNW_RISCV_X27): + return *pX27; + case (UNW_RISCV_X28): + return *pX28; + case (UNW_RISCV_X29): + return *pX29; + case (UNW_RISCV_X30): + return *pX30; + case (UNW_RISCV_X31): + return *pX31; + } + + PORTABILITY_ASSERT("unsupported riscv64 register"); +} + +void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) +{ + if (num == UNW_REG_SP || num == UNW_RISCV_X2) { + SP = (uintptr_t)value; + return; + } + + if (num == UNW_RISCV_X8) { + pFP = (PTR_uintptr_t)location; + return; + } + + if (num == UNW_RISCV_X1) { + pRA = (PTR_uintptr_t)location; + return; + } + + if (num == UNW_REG_IP) { + IP = value; + return; + } + + switch (num) + { + case (UNW_RISCV_X0): + pX0 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X3): + pX3 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X4): + pX4 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X5): + pX5 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X6): + pX6 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X7): + pX7 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X9): + pX9 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X10): + pX10 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X11): + pX11 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X12): + pX12 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X13): + pX13 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X14): + pX14 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X15): + pX15 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X16): + pX16 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X17): + pX17 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X18): + pX18 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X19): + pX19 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X20): + pX20 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X21): + pX21 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X22): + pX22 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X23): + pX23 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X24): + pX24 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X25): + pX25 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X26): + pX26 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X27): + pX27 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X28): + pX28 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X29): + pX29 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X30): + pX30 = (PTR_uintptr_t)location; + break; + case (UNW_RISCV_X31): + pX31 = (PTR_uintptr_t)location; + break; + default: + PORTABILITY_ASSERT("unsupported riscv64 register"); + } +} + +libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +{ + if (num >= UNW_RISCV_V0 && num <= UNW_RISCV_V31) { + libunwind::v128 result; + // Assuming a 128-bit vector register split into four 32-bit elements + result.vec[0] = V[num].v0; + result.vec[1] = V[num].v1; + result.vec[2] = V[num].v2; + result.vec[3] = V[num].v3; + return result; + } + PORTABILITY_ASSERT("unsupported riscv64 vector register"); +} + +void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +{ + if (num >= UNW_RISCV_V0 && num <= UNW_RISCV_V31) { + V[num].v0 = value.vec[0]; + V[num].v1 = value.vec[1]; + V[num].v2 = value.vec[2]; + V[num].v3 = value.vec[3]; + } else { + PORTABILITY_ASSERT("unsupported riscv64 vector register"); + } +} + +#endif // TARGET_RISCV64 + bool UnwindHelpers::StepFrame(REGDISPLAY *regs, unw_word_t start_ip, uint32_t format, unw_word_t unwind_info) { #if _LIBUNWIND_SUPPORT_DWARF_UNWIND @@ -1106,6 +1380,12 @@ bool UnwindHelpers::StepFrame(REGDISPLAY *regs, unw_word_t start_ip, uint32_t fo int stepRet = compactInst.stepWithCompactEncoding(format, start_ip, _addressSpace, *(Registers_REGDISPLAY*)regs); return stepRet == UNW_STEP_SUCCESS; } +#elif defined(TARGET_RISCV64) + if ((format & UNWIND_RISCV64_MODE_MASK) != UNWIND_RISCV64_MODE_DWARF) { + CompactUnwinder_riscv64 compactInst; + int stepRet = compactInst.stepWithCompactEncoding(format, start_ip, _addressSpace, *(Registers_REGDISPLAY*)regs); + return stepRet == UNW_STEP_SUCCESS; + } #elif defined(TARGET_AMD64) if ((format & UNWIND_X86_64_MODE_MASK) != UNWIND_X86_64_MODE_DWARF) { CompactUnwinder_x86_64 compactInst; @@ -1157,6 +1437,8 @@ bool UnwindHelpers::GetUnwindProcInfo(PCODE pc, UnwindInfoSections &uwInfoSectio libunwind::UnwindCursor uc(_addressSpace); #elif defined(HOST_LOONGARCH64) libunwind::UnwindCursor uc(_addressSpace); +#elif defined(HOST_RISCV64) + libunwind::UnwindCursor uc(_addressSpace); #else #error "Unwinding is not implemented for this architecture yet." #endif @@ -1181,6 +1463,12 @@ bool UnwindHelpers::GetUnwindProcInfo(PCODE pc, UnwindInfoSections &uwInfoSectio } else { dwarfOffsetHint = procInfo->format & UNWIND_LOONGARCH64_DWARF_SECTION_OFFSET; } +#elif defined(TARGET_RISCV64) + if ((procInfo->format & UNWIND_RISCV64_MODE_MASK) != UNWIND_RISCV64_MODE_DWARF) { + return true; + } else { + dwarfOffsetHint = procInfo->format & UNWIND_RISCV64_DWARF_SECTION_OFFSET; + } #elif defined(TARGET_AMD64) if ((procInfo->format & UNWIND_X86_64_MODE_MASK) != UNWIND_X86_64_MODE_DWARF) { return true; diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index 68ba993209e42b..e4ca8268cc2e65 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -44,4 +44,6 @@ #include "unixasmmacrosx86.inc" #elif defined(HOST_LOONGARCH64) #include "unixasmmacrosloongarch64.inc" +#elif defined(HOST_RISCV64) +#include "unixasmmacrosriscvh64.inc" #endif diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs index c7a96e12ddf177..ab3583c38d1456 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs @@ -44,6 +44,8 @@ public static TypeSystemContext Create() TargetArchitecture.Wasm32, #elif TARGET_LOONGARCH64 TargetArchitecture.LoongArch64, +#elif TARGET_RISCV64 + TargetArchitecture.RiscV64, #else #error Unknown architecture #endif diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index caa6c733a783c5..7339d06398f557 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -66,6 +66,7 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, { TargetArchitecture.ARM64 => "RhpAssignRefArm64", TargetArchitecture.LoongArch64 => "RhpAssignRefLoongArch64", + TargetArchitecture.RiscV64 => "RhpAssignRefRiscV64", _ => "RhpAssignRef" }; break; diff --git a/src/native/external/llvm-libunwind/include/__libunwind_config.h b/src/native/external/llvm-libunwind/include/__libunwind_config.h index d521890f17f869..c2fc7c9e8a3aa4 100644 --- a/src/native/external/llvm-libunwind/include/__libunwind_config.h +++ b/src/native/external/llvm-libunwind/include/__libunwind_config.h @@ -151,11 +151,11 @@ # else # define RISCV_FLEN 0 # endif -# define _LIBUNWIND_CONTEXT_SIZE (32 * (__riscv_xlen + RISCV_FLEN) / 64) +# define _LIBUNWIND_CONTEXT_SIZE (32 * (__riscv_xlen + RISCV_FLEN) / 64) + 33 # if __riscv_xlen == 32 # define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 7) # elif __riscv_xlen == 64 -# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 12) +# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 12) + 33 # else # error "Unsupported RISC-V ABI" # endif diff --git a/src/native/external/llvm-libunwind/src/Registers.hpp b/src/native/external/llvm-libunwind/src/Registers.hpp index b76f24ea67da55..6a6d35467e518c 100644 --- a/src/native/external/llvm-libunwind/src/Registers.hpp +++ b/src/native/external/llvm-libunwind/src/Registers.hpp @@ -4240,13 +4240,14 @@ class _LIBUNWIND_HIDDEN Registers_riscv { bool validRegister(int num) const; reg_t getRegister(int num) const; - void setRegister(int num, reg_t value); + void setRegister(int num, reg_t value, uint64_t location); bool validFloatRegister(int num) const; fp_t getFloatRegister(int num) const; void setFloatRegister(int num, fp_t value); bool validVectorRegister(int num) const; v128 getVectorRegister(int num) const; void setVectorRegister(int num, v128 value); + uint64_t getRegisterLocation(int num) const; static const char *getRegisterName(int num); void jumpto(); static constexpr int lastDwarfRegNum() { @@ -4255,13 +4256,14 @@ class _LIBUNWIND_HIDDEN Registers_riscv { static int getArch() { return REGISTERS_RISCV; } reg_t getSP() const { return _registers[2]; } - void setSP(reg_t value) { _registers[2] = value; } + void setSP(reg_t value, uint64_t location) { _registers[2] = value; } reg_t getIP() const { return _registers[0]; } - void setIP(reg_t value) { _registers[0] = value; } + void setIP(reg_t value, uint64_t location) { _registers[0] = value; } private: // _registers[0] holds the pc reg_t _registers[32]; + reg_t _registerLocations[32]; # if defined(__riscv_flen) fp_t _floats[32]; # endif @@ -4271,6 +4273,7 @@ inline Registers_riscv::Registers_riscv(const void *registers) { static_assert((check_fit::does_fit), "riscv registers do not fit into unw_context_t"); memcpy(&_registers, registers, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); # if __riscv_xlen == 32 static_assert(sizeof(_registers) == 0x80, "expected float registers to be at offset 128"); @@ -4290,6 +4293,7 @@ inline Registers_riscv::Registers_riscv(const void *registers) { inline Registers_riscv::Registers_riscv() { memset(&_registers, 0, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); # if defined(__riscv_flen) memset(&_floats, 0, sizeof(_floats)); # endif @@ -4326,20 +4330,41 @@ inline reg_t Registers_riscv::getRegister(int regNum) const { _LIBUNWIND_ABORT("unsupported riscv register"); } -inline void Registers_riscv::setRegister(int regNum, reg_t value) { - if (regNum == UNW_REG_IP) +inline void Registers_riscv::setRegister(int regNum, reg_t value, uint64_t location) { + if (regNum == UNW_REG_IP) { _registers[0] = value; + _registerLocations[0] = value; + } else if (regNum == UNW_REG_SP) _registers[2] = value; else if (regNum == UNW_RISCV_X0) /* x0 is hardwired to zero */ return; - else if ((regNum > 0) && (regNum < 32)) + else if ((regNum > 0) && (regNum < 32)) { _registers[regNum] = value; + _registerLocations[regNum - UNW_LOONGARCH_R0] = location; + } else _LIBUNWIND_ABORT("unsupported riscv register"); } +inline uint64_t Registers_riscv::getRegisterLocation(int regNum) const { + if (regNum == UNW_REG_IP) + return _registerLocations[0]; + if (regNum == UNW_REG_SP) + return _registerLocations[2]; + if (regNum == UNW_RISCV_X0) + return 0; + if ((regNum > 0) && (regNum < 32)) + return _registerLocations[regNum]; + if (regNum == UNW_RISCV_VLENB) { + reg_t vlenb; + __asm__("csrr %0, 0xC22" : "=r"(vlenb)); + return vlenb; + } + _LIBUNWIND_ABORT("unsupported riscv register"); +} + inline const char *Registers_riscv::getRegisterName(int regNum) { switch (regNum) { case UNW_REG_IP: From 429e67c9c1c658f10d531ccd9043c077f12134b5 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:22:07 +0300 Subject: [PATCH 02/19] part2 --- .../nativeaot/Runtime/riscv64/AllocFast.S | 280 +++++++ .../nativeaot/Runtime/riscv64/AsmOffsetsCpu.h | 67 ++ .../Runtime/riscv64/ExceptionHandling.S | 775 ++++++++++++++++++ .../nativeaot/Runtime/riscv64/GcProbe.S | 165 ++++ .../Runtime/riscv64/InteropThunksHelpers.S | 51 ++ .../nativeaot/Runtime/riscv64/MiscStubs.S | 5 + .../nativeaot/Runtime/riscv64/PInvoke.S | 55 ++ .../nativeaot/Runtime/riscv64/StubDispatch.S | 113 +++ .../Runtime/riscv64/UniversalTransition.S | 189 +++++ .../nativeaot/Runtime/riscv64/WriteBarriers.S | 365 +++++++++ .../Runtime/unix/unixasmmacrosriscv64.inc | 325 ++++++++ 11 files changed, 2390 insertions(+) create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/MiscStubs.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S create mode 100644 src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S create mode 100644 src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S new file mode 100644 index 00000000000000..f8091cc3f77cef --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -0,0 +1,280 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +// GC type flags +GC_ALLOC_FINALIZE = 1 + +// +// Rename fields of nested structs +// +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// $a0 == MethodTable + LEAF_ENTRY RhpNewFast, _TEXT + + // a1 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD $a1 +#endif + + // + // a0 contains MethodTable pointer + // + ld $a2, OFFSETOF__MethodTable__m_uBaseSize($a0) + + // + // a0: MethodTable pointer + // a1: Thread pointer + // a2: base size + // + + // Load potential new object address into t3. + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a1) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add $a2, $a2, $t3 + ld $t4, OFFSETOF__Thread__m_alloc_context__alloc_limit($a1) + bltu $t4, $a2, RhpNewFast_RarePath + + // Update the alloc pointer to account for the allocation. + sd $a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a1) + + // Set the new objects MethodTable pointer + sd $a0, OFFSETOF__Object__m_pEEType($t3) + + mv $a0, $t3 + j RhpNewFast_Return + +RhpNewFast_RarePath: + mv $a1, x0 + j RhpNewObject + +RhpNewFast_Return: + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// a0 == MethodTable + LEAF_ENTRY RhpNewFinalizable, _TEXT + li $a1, GC_ALLOC_FINALIZE + j RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// a0 == MethodTable +// a1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME $a3 + + // a3: transition frame + + // Preserve the MethodTable in s0 + mv $s0, $a0 + + li $a2, 0 // numElements + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's MethodTable pointer on success. + beq $a0, x0, NewOutOfMemory + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state + +NewOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mv $a0, $s0 // MethodTable pointer + li $a1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + j C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewObject, _TEXT + +// Allocate a string. +// a0 == MethodTable +// a1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the overall allocation size won't overflow + lui $a2, (MAX_STRING_LENGTH >> 12) & 0xFFFFF + ori $a2, $a2, MAX_STRING_LENGTH & 0xFFF + bltu $a2, $a1, StringSizeOverflow + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + li $a2, STRING_COMPONENT_SIZE + mul $a2, $a1, $a2 // $a2 = (a1 * STRING_COMPONENT_SIZE) + addi $a2, $a2, STRING_BASE_SIZE + 7 // $a2 = $a2 + STRING_BASE_SIZE + 7 + andi $a2, $a2, ~0x7 // clear the bits[2:0] of $a2 + + // a0 == MethodTable + // a1 == element count + // a2 == string size + +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_3 +#else + INLINE_GETTHREAD $a3 +#endif + + // Load potential new object address into t3. + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add $a2, $a2, $t3 + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_limit($a3) + bltu $t3, $a2, RhNewString_Rare + + // Reload new object address into t3. + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + + // Update the alloc pointer to account for the allocation. + sd $a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + + // Set the new object's MethodTable pointer and element count. + sd $a0, OFFSETOF__Object__m_pEEType($t3) + sd $a1, OFFSETOF__Array__m_Length($t3) + + // Return the object allocated in $a0. + mv $a0, $t3 + + j RhNewString_Return + +StringSizeOverflow: + // We get here if the length of the final string object cannot be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // a0 holds MethodTable pointer already + li $a1, 1 // Indicate that we should throw OverflowException + j C_FUNC(RhExceptionHandling_FailedAllocation) + +RhNewString_Rare: + j C_FUNC(RhpNewArrayRare) + +RhNewString_Return: + LEAF_END RhNewString, _TEXT + +// Allocate one-dimensional, zero-based array (SZARRAY). +// $a0 == MethodTable +// $a1 == element count + LEAF_ENTRY RhpNewArray, _TEXT + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + lui $a2, 0x7ffff + ori $a2, $a2, 0xfff + bltu $a2, $a1, ArraySizeOverflow + + ld $a2, OFFSETOF__MethodTable__m_usComponentSize($a0) + mul $a2, $a1, $a2 + ld $a3, OFFSETOF__MethodTable__m_uBaseSize($a0) + add $a2, $a2, $a3 + addi $a2, $a2, 7 + andi $a2, $a2, ~0x7 // clear the bits[2:0] of $a2 + + // a0 == MethodTable + // a1 == element count + // a2 == array size + + INLINE_GETTHREAD $a3 + + // Load potential new object address into t3. + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add $a2, $a2, $t3 + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_limit($a3) + bltu $t3, $a2, RhpNewArray_Rare + + // Reload new object address into t3. + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + + // Update the alloc pointer to account for the allocation. + sd $a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + + // Set the new object's MethodTable pointer and element count. + sd $a0, OFFSETOF__Object__m_pEEType($t3) + sd $a1, OFFSETOF__Array__m_Length($t3) + + // Return the object allocated in $a0. + mv $a0, $t3 + + j RhpNewArray_Return + +ArraySizeOverflow: + // We get here if the size of the final array object cannot be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // $a0 holds MethodTable pointer already + li $a1, 1 // Indicate that we should throw OverflowException + j C_FUNC(RhExceptionHandling_FailedAllocation) + +RhpNewArray_Rare: + j C_FUNC(RhpNewArrayRare) + +RhpNewArray_Return: + LEAF_END RhpNewArray, _TEXT + +// Allocate one-dimensional, zero-based array (SZARRAY) using the slow path that calls a runtime helper. +// a0 == MethodTable +// a1 == element count +// a2 == array size + Thread::m_alloc_context::alloc_ptr +// a3 == Thread + NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from a2. + ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + sub $a2, $a2, $t3 + + PUSH_COOP_PINVOKE_FRAME $a3 + + // Preserve data we will need later into the callee saved registers + mv $s0, $a0 // Preserve MethodTable + + mv $a2, $a1 // numElements + li $a1, 0 // uFlags + + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's MethodTable pointer and length on success. + beq $a0, x0, ArrayOutOfMemory + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state + +ArrayOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mv $a0, $s0 // MethodTable Pointer + li $a1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + j C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h new file mode 100644 index 00000000000000..8b099baacb5ddc --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(320, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(10, ExInfo, m_exception) +PLAT_ASM_OFFSET(18, ExInfo, m_kind) +PLAT_ASM_OFFSET(1C, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(20, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(24, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(2D8, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RA) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(268, StackFrameIterator) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(0F8, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(100, StackFrameIterator, m_pPreviousTransitionFrame) + +PLAT_ASM_SIZEOF(120, PAL_LIMITED_CONTEXT) + +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R4) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R5) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R6) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, R7) +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R8) +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R9) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R10) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R11) +PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R12) +PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, R13) +PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, R14) +PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, R15) +PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP) +PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, TP) + +PLAT_ASM_SIZEOF(128, REGDISPLAY) + +PLAT_ASM_OFFSET(18, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(B8, REGDISPLAY, pR4) +PLAT_ASM_OFFSET(C0, REGDISPLAY, pR5) +PLAT_ASM_OFFSET(C8, REGDISPLAY, pR6) +PLAT_ASM_OFFSET(D0, REGDISPLAY, pR7) +PLAT_ASM_OFFSET(D8, REGDISPLAY, pR8) +PLAT_ASM_OFFSET(E0, REGDISPLAY, pR9) +PLAT_ASM_OFFSET(E8, REGDISPLAY, pR10) +PLAT_ASM_OFFSET(F0, REGDISPLAY, pR11) +PLAT_ASM_OFFSET(F8, REGDISPLAY, pR12) +PLAT_ASM_OFFSET(100, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(108, REGDISPLAY, pRA) +PLAT_ASM_OFFSET(110, REGDISPLAY, F) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S new file mode 100644 index 00000000000000..3b8267ca43a3ec --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -0,0 +1,775 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +.global RhpTrapThreads + +// ----------------------------------------------------------------------------- +// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) + .macro ALLOC_THROW_FRAME exceptionType + + addi a3, sp, 0 + + // Setup a PAL_LIMITED_CONTEXT on the stack + .if \exceptionType == HARDWARE_EXCEPTION + addi sp, sp, -80 + .cfi_adjust_cfa_offset 80 + sd a3, 0(sp) // a3 is the SP and a1 is the IP of the fault site + sd a1, 8(sp) + .else + PROLOG_STACK_ALLOC 80 + .cfi_adjust_cfa_offset 80 + sd a3, 0(sp) // a3 is the SP and ra is the IP of the fault site + sd ra, 8(sp) + .endif + fsd f24, 16(sp) + fsd f25, 24(sp) + fsd f26, 32(sp) + fsd f27, 40(sp) + fsd f28, 48(sp) + fsd f29, 56(sp) + fsd f30, 64(sp) + fsd f31, 72(sp) + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 112 + sd zero, 16(sp) // locations reserved for return value, not used for exception handling + sd zero, 24(sp) + PROLOG_SAVE_REG_PAIR 23, 24, 32 + PROLOG_SAVE_REG_PAIR 25, 26, 48 + PROLOG_SAVE_REG_PAIR 27, 28, 64 + PROLOG_SAVE_REG_PAIR 29, 30, 80 + PROLOG_SAVE_REG_PAIR 31, 2, 96 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + .endm + +// ----------------------------------------------------------------------------- +// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers + .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,ra, #-60! + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 96 + PROLOG_SAVE_REG_PAIR 23, 24, 16 + PROLOG_SAVE_REG_PAIR 25, 26, 32 + PROLOG_SAVE_REG_PAIR 27, 28, 48 + PROLOG_SAVE_REG_PAIR 29, 30, 64 + PROLOG_SAVE_REG_PAIR 31, 2, 80 + addi fp, sp, 0 + .cfi_def_cfa_register 22 //fp + + .if \extraStackSize != 0 + PROLOG_STACK_ALLOC \extraStackSize + .endif + .endm + +// ----------------------------------------------------------------------------- +// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers. +// It needs to match the value passed to the corresponding +// ALLOC_CALL_FUNCLET_FRAME. + .macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR 23, 24, 16 + EPILOG_RESTORE_REG_PAIR 25, 26, 32 + EPILOG_RESTORE_REG_PAIR 27, 28, 48 + EPILOG_RESTORE_REG_PAIR 29, 30, 64 + EPILOG_RESTORE_REG_PAIR 31, 2, 80 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 96 + .endm + +// ----------------------------------------------------------------------------- +// Macro used to restore preserved general purpose and FP registers from REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro RESTORE_PRESERVED_REGISTERS regdisplayReg + + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + ld s0, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + ld s1, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + ld s2, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + ld s3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + ld s4, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + ld s5, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + ld s6, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + ld s7, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + ld s8, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + ld fp, t3, 0 + + // load FP preserved regs + addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fld f24, t3, 0x00 + fld f25, t3, 0x08 + fld f26, t3, 0x10 + fld f27, t3, 0x18 + fld f28, t3, 0x20 + fld f29, t3, 0x28 + fld f30, t3, 0x30 + fld f31, t3, 0x38 + .endm + +// ----------------------------------------------------------------------------- +// Macro used to save preserved general purpose and FP registers to REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro SAVE_PRESERVED_REGISTERS regdisplayReg + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + sd s0, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + sd s1, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + sd s2, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + sd s3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + sd s4, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + sd s5, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + sd s6, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + sd s7, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + sd s8, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + sd fp, t3, 0 + + // store FP preserved regs + addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fsd f24, t3, 0x00 + fsd f25, t3, 0x08 + fsd f26, t3, 0x10 + fsd f27, t3, 0x18 + fsd f28, t3, 0x20 + fsd f29, t3, 0x28 + fsd f30, t3, 0x30 + fsd f31, t3, 0x38 + .endm + +// ----------------------------------------------------------------------------- +// Macro used to thrash preserved general purpose registers in REGDISPLAY +// to make sure nobody uses them +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg + +#if _DEBUG + lui a3, 0xBAD // 0xBAAD + ori a3, a3, 0xEED + lui t3, 0xDDEE // 0xDDEED + addi t3, t3, -1110 // 0xBAA + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + sd a3, t3, 0 + ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + sd a3, t3, 0 +#endif // _DEBUG + .endm + +.macro GetThreadA2 + addi sp, sp, -16 + sd a0, 0(sp) + sd a1, 8(sp) + call C_FUNC(RhpGetThread) + addi a2, a0, 0 + ld a0, 0(sp) + ld a1, 8(sp) + addi sp, sp, 16 +.endm + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +// +// RhpThrowHwEx +// +// INPUT: a0[31:0]: exception code of fault +// a1: faulting IP +// +// OUTPUT: +// + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + GetThreadA2 + + addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, 0(a1) // pExInfo->m_exception = null + li a3, 1 + sb a3, 8(a1) // pExInfo->m_passNumber = 1 + li a3, -1 + sw a3, 12(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, 2 + sb a3, 16(a1) // pExInfo->m_kind = ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + + // a0[31:0]: exception code + // a1: ExInfo* + call C_FUNC(RhThrowHwEx) + + ALTERNATE_ENTRY RhpThrowHwEx2 + + // no return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx, _TEXT + +// +// RhpThrowEx +// +// INPUT: a0: exception object +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadA2 + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + + // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + // where the tail-calling thread had saved RA, which may not match where we have saved RA. + + ld a1, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + beq a1, zero, NotHijacked + + ld a3, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + + // a0: exception object + // a1: hijacked return address + // a2: pThread + // a3: hijacked return address location + + addi t3, sp, STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT // re-compute SP at callsite + bltu a3, t3, TailCallWasHijacked // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + + // normal case where a valid return address location is hijacked + sd a1, 0(a3) + b ClearThreadState + +TailCallWasHijacked: + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + + // stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. + ori ra, a1, zero + sd ra, rsp_offsetof_Context(sp) + OFFSETOF__PAL_LIMITED_CONTEXT__RA + sd ra, rsp_offsetof_Context(sp) + OFFSETOF__PAL_LIMITED_CONTEXT__IP + +ClearThreadState: + + // clear the Thread's hijack state + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + +NotHijacked: + + addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 + li a3, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + + // a0: exception object + // a1: ExInfo* + call RhThrowEx + + ALTERNATE_ENTRY RhpThrowEx2 + + // no return + BREAK + NESTED_END RhpThrowEx, _TEXT + + +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// + + NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadA2 + + addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null + sb zero, OFFSETOF__ExInfo__m_kind(a1) // init to a deterministic value (ExKind.None) + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 + li a3, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + + // link the ExInfo into the thread's ExInfo chain + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + mv a0, a3 // a0 <- current ExInfo + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + + // a0 contains the currently active ExInfo + // a1 contains the address of the new ExInfo + call RhRethrow + + ALTERNATE_ENTRY RhpRethrow2 + + // no return + BREAK + NESTED_END RhpRethrow, _TEXT + +// +// void* FASTCALL RhpCallCatchFunclet(OBJECTREF exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: a0: exception object +// a1: handler funclet address +// a2: REGDISPLAY* +// a3: ExInfo* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x70 + fsd f24, 0(sp) + fsd f25, 8(sp) + fsd f26, 16(sp) + fsd f27, 24(sp) + fsd f28, 32(sp) + fsd f29, 40(sp) + fsd f30, 48(sp) + fsd f31, 56(sp) + sd a0, 64(sp) // a0 to a3 are stored to restore them anytime + sd a1, 72(sp) + sd a2, 80(sp) + sd a3, 88(sp) + sd zero, 96(sp) // $zero makes space for the local "is_not_handling_thread_abort"; last qword will store the thread obj + +#define rsp_offset_is_not_handling_thread_abort 96 +#define rsp_offset_a0 64 +#define rsp_offset_a1 72 +#define rsp_offset_a2 80 +#define rsp_offset_a3 88 +#define rsp_CatchFunclet_offset_thread 104 + + // + // clear the DoNotTriggerGc flag, trashes a4-a6 + // + + call RhpGetThread + sd a0, rsp_CatchFunclet_offset_thread(sp) + mv a5, a0 + ld a0, rsp_offset_a0(sp) + ld a1, rsp_offset_a1(sp) + ld a2, rsp_offset_a2(sp) + ld a3, rsp_offset_a3(sp) + + ld a4, OFFSETOF__Thread__m_threadAbortException(a5) + sub a4, a4, a0 + sd a4, rsp_offset_is_not_handling_thread_abort(sp) // Non-zero if the exception is not ThreadAbortException + + addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags + + addi a6, zero, -17 // a6 = a6 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + and a4, a6, t3 + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS a2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE a2 + + // + // call the funclet + // + // a0 still contains the exception object + jalr ra, a1, 0 + + ALTERNATE_ENTRY RhpCallCatchFunclet2 + + // a0 contains resume IP + + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + sd a0, rsp_offset_a0(sp) + + SAVE_PRESERVED_REGISTERS a2 + + ld a0, rsp_CatchFunclet_offset_thread(sp) // a0 <- Thread* + ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + call RhpValidateExInfoPop + + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS a2 + + ld a0, rsp_offset_a0(sp) // reload resume IP +#endif + + ld a1, rsp_CatchFunclet_offset_thread(sp) + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 + + ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + +PopExInfoLoop: + ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo + beq a3, zero, DonePopping // if (pExInfo == null) { we're done } + blt a3, a2, PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +DonePopping: + sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // store the new head on the Thread + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 + + andi t7, a3, TrapThreadsFlags_AbortInProgress_Bit + beq t7, zero, NoAbort + + ld a3, rsp_offset_is_not_handling_thread_abort(sp) + bne a3, zero, NoAbort + + // It was the ThreadAbortException, so rethrow it + // reset SP + mv a1, a0 // a1 <- continuation address as exception PC + li a0, STATUS_REDHAWK_THREAD_ABORT + mv sp, a2 + call RhpThrowHwEx + +NoAbort: + // reset SP and jump to continuation address + mv sp, a2 + jalr zero, a0, 0 + +#undef rsp_offset_is_not_handling_thread_abort +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CatchFunclet_offset_thread + + NESTED_END RhpCallCatchFunclet, _TEXT + +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: handler funclet address +// a1: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x60 + fsd f24, 0(sp) + fsd f25, 8(sp) + fsd f26, 16(sp) + fsd f27, 24(sp) + fsd f28, 32(sp) + fsd f29, 40(sp) + fsd f30, 48(sp) + fsd f31, 56(sp) + sd a0, 64(sp) // a0 and a1 are saved so we have them later + sd a1, 72(sp) + +#define rsp_offset_a1 72 +#define rsp_FinallyFunclet_offset_thread 80 + + // We want to suppress hijacking between invocations of subsequent finallys. We do this because we + // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + // method) and then been popped off the stack, leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + + // clear the DoNotTriggerGc flag, trashes a2-a4 + call RhpGetThread + sd a0, rsp_FinallyFunclet_offset_thread(sp) + mv a2, a0 + ld a0, rsp_offset_a0(sp) + ld a1, rsp_offset_a1(sp) + + addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags + + addi a3, zero, -17 // a3 = a3 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + and a4, a3, t3 + + // set preserved regs to the values expected by the funclet + RESTORE_PRESERVED_REGISTERS a1 + // trash the values at the old homes to make sure nobody uses them + TRASH_PRESERVED_REGISTERS_STORAGE a1 + + // call the funclet + jalr ra, a0, 0 + + ALTERNATE_ENTRY RhpCallFinallyFunclet2 + + ld a1, rsp_offset_a1(sp) // reload REGDISPLAY pointer + + // save new values of preserved regs into REGDISPLAY + SAVE_PRESERVED_REGISTERS a1 + + // set the DoNotTriggerGc flag, trashes a1-a3 + ld a2, rsp_FinallyFunclet_offset_thread(sp) + addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags + addi a3, zero, -17 // a3 = a3 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + and a1, a3, t3 + + fld f24, 0(sp) + fld f25, 8(sp) + fld f26, 16(sp) + fld f27, 24(sp) + fld f28, 32(sp) + fld f29, 40(sp) + fld f30, 48(sp) + fld f31, 56(sp) + + FREE_CALL_FUNCLET_FRAME 0x60 + EPILOG_RETURN + +#undef rsp_offset_a1 +#undef rsp_FinallyFunclet_offset_thread + + NESTED_END RhpCallFinallyFunclet, _TEXT + + +// +// void* FASTCALL RhpCallFilterFunclet(OBJECTREF exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: exception object +// a1: filter funclet address +// a2: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + ALLOC_CALL_FUNCLET_FRAME 0x40 + fsd f24, 0(sp) + fsd f25, 8(sp) + fsd f26, 16(sp) + fsd f27, 24(sp) + fsd f28, 32(sp) + fsd f29, 40(sp) + fsd f30, 48(sp) + fsd f31, 56(sp) + + ld t3, OFFSETOF__REGDISPLAY__pFP(a2) + ld fp, t3, 0 + + // call the funclet + // a0 still contains the exception object + jalr ra, a1, 0 + + ALTERNATE_ENTRY RhpCallFilterFunclet2 + + fld f24, 0(sp) + fld f25, 8(sp) + fld f26, 16(sp) + fld f27, 24(sp) + fld f28, 32(sp) + fld f29, 40(sp) + fld f30, 48(sp) + fld f31, 56(sp) + + FREE_CALL_FUNCLET_FRAME 0x40 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet, _TEXT + +#ifdef FEATURE_OBJCMARSHAL + +// +// void* FASTCALL RhpCallPropagateExceptionCallback(void* pCallbackContext, void* pCallback, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo, PInvokeTransitionFrame* pPreviousTransitionFrame) +// +// INPUT: a0: callback context +// a1: callback +// a2: REGDISPLAY* +// a3: ExInfo* +// a4: pPreviousTransitionFrame +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler + +#define rsp_offset_a0 16 +#define rsp_offset_a1 24 +#define rsp_offset_a2 32 +#define rsp_offset_a3 40 +#define rsp_offset_a4 48 +#define rsp_CallPropagationCallback_offset_thread 56 + + // Using the NO_FP macro so that the debugger unwinds using SP. + // This makes backtraces work even after using RESTORE_PRESERVED_REGISTERS. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 64 + mv fp, sp + sd a0, rsp_offset_a0(sp) // a0 to a4 are stored to restore them anytime + sd a1, rsp_offset_a1(sp) + sd a2, rsp_offset_a2(sp) + sd a3, rsp_offset_a3(sp) + sd a4, rsp_offset_a4(sp) + sd zero, rsp_CallPropagationCallback_offset_thread(sp) // zero makes space to store the thread obj + + // clear the DoNotTriggerGc flag, trashes a4-a6 + call RhpGetThread + sd a0, rsp_CallPropagationCallback_offset_thread(sp) + mv a5, a0 + ld a0, rsp_offset_a0(sp) + ld a1, rsp_offset_a1(sp) + ld a2, rsp_offset_a2(sp) + ld a3, rsp_offset_a3(sp) + + addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags + + addi a6, zero, -17 // a6 = a6 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + and a4, a6, t3 + + // set preserved regs to the values expected by the funclet + RESTORE_PRESERVED_REGISTERS a2 + // trash the values at the old homes to make sure nobody uses them + TRASH_PRESERVED_REGISTERS_STORAGE a2 + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + SAVE_PRESERVED_REGISTERS a2 + + ld a0, rsp_CallPropagationCallback_offset_thread(sp) // a0 <- Thread* + ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* + ld a2, a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + call RhpValidateExInfoPop + + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS a2 +#endif + + ld a1, rsp_CallPropagationCallback_offset_thread(sp) + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 + + ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* + ld a2, a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + +Propagate_PopExInfoLoop: + ld a3, a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo + beqz a3, Propagate_DonePopping // if (pExInfo == null) { we're done } + blt a3, a2, Propagate_PopExInfoLoop // if (pExInfo < resume SP) { keep going } + +Propagate_DonePopping: + sd a3, a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread + + // restore preemptive mode + ld a4, rsp_offset_a4(sp) // pPreviousTransitionFrame + sd a4, a1, OFFSETOF__Thread__m_pTransitionFrame + + // reset SP and RA and jump to continuation address + ld a0, rsp_offset_a0(sp) // callback context + ld a1, rsp_offset_a1(sp) // callback + ld a2, rsp_offset_a2(sp) // REGDISPLAY* + ld a3, a2, OFFSETOF__REGDISPLAY__pRA // a3 <- &resume RA value + ld ra, a3 + ld a3, a2, OFFSETOF__REGDISPLAY__SP // a3 <- resume SP value + mv sp, a3 + jalr ra, a1, 0 + +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CallPropagationCallback_offset_thread + + NESTED_END RhpCallPropagateExceptionCallback, _TEXT + +#endif // FEATURE_OBJCMARSHAL diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S new file mode 100644 index 00000000000000..05eb00620f0af6 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -0,0 +1,165 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +PROBE_FRAME_SIZE = 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + + // 10 * 8 for callee saved registers + + // 1 * 8 for caller SP + + // 2 * 8 for int returns + + // 1 * 8 for alignment padding + + // 4 * 16 for FP returns + +// Define the prolog for setting up the PInvokeTransitionFrame +.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK + + // Save the current stack frame and registers + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE + + // Save callee-saved registers + PROLOG_SAVE_REG_PAIR 23, 24, 0x20 + PROLOG_SAVE_REG_PAIR 25, 26, 0x30 + PROLOG_SAVE_REG_PAIR 27, 28, 0x40 + PROLOG_SAVE_REG_PAIR 29, 30, 0x50 + PROLOG_SAVE_REG_PAIR 31, 2, 0x60 + + // Save caller's SP + sd sp, 0x70(sp) + + // Save integer return registers + sd a0, 0x78(sp) + sd a1, 0x80(sp) + + // Alignment padding + // (No need to explicitly handle alignment in RISC-V assembly if stack size is a multiple of 16 bytes) + + // Save FP return registers + fsd f0, 0x90(sp) + fsd f1, 0x98(sp) + fsd f2, 0xA0(sp) + fsd f3, 0xA8(sp) + + // Initialize the PInvokeTransitionFrame + sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) + sd \BITMASK, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) + + // Save caller's SP in the PInvokeTransitionFrame + addi \trashReg, sp, PROBE_FRAME_SIZE + sd \trashReg, 0x70(sp) + + // Link the frame into the Thread + sd zero, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) +.endm + +// Define the prolog for removing the PInvokeTransitionFrame +.macro POP_PROBE_FRAME + + // Restore integer return registers + ld a0, 0x78(sp) + ld a1, 0x80(sp) + + // Restore FP return registers + fld f0, 0x90(sp) + fld f1, 0x98(sp) + fld f2, 0xA0(sp) + fld f3, 0xA8(sp) + + // Restore callee-saved registers + EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 + EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 + EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 + EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 + EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 + + // Restore stack frame + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE +.endm + +// Fix up the hijacked callstack +.macro FixupHijackedCallstack + + // a2 <- GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else + INLINE_GETTHREAD a2 +#endif + + // Fix the stack by restoring the original return address + ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + ld t3, OFFSETOF__Thread__m_pvHijackedReturnAddress + 8(a2) + + // Clear hijack state + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + sd zero, OFFSETOF__Thread__m_uHijackedReturnValueFlags(a2) +.endm + +// GC Probe Hijack target +NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler + FixupHijackedCallstack + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 + andi t8, a3, TrapThreadsFlags_TrapThreads_Bit + bne t8, zero, WaitForGC + jalr ra + +WaitForGC: + lui t7, ((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) >> 12) & 0xfffff + ori t7, t7, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) & 0xfff + or t3, t3, t7 + jal C_FUNC(RhpWaitForGC) +NESTED_END RhpGcProbeHijack + +.global C_FUNC(RhpThrowHwEx) + +// Wait for GC function +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + PUSH_PROBE_FRAME a2, a3, t3 + + ld a0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a2) + jal C_FUNC(RhpWaitForGC2) + + ld a2, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) + andi t8, a2, PTFF_THREAD_ABORT_BIT + bne t8, zero, ThrowThreadAbort + + .cfi_remember_state + POP_PROBE_FRAME + EPILOG_RETURN + + .cfi_restore_state +ThrowThreadAbort: + POP_PROBE_FRAME + li a0, STATUS_REDHAWK_THREAD_ABORT + addi a1, ra, 0 // return address as exception PC + jal RhpThrowHwEx +NESTED_END RhpWaitForGC + +.global C_FUNC(RhpGcPoll2) + +// GC Poll function +LEAF_ENTRY RhpGcPoll + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a0 + bne a0, zero, C_FUNC(RhpGcPollRare) + jalr ra +LEAF_END RhpGcPoll + +// Rare GC Poll function +NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME a0 + jal RhpGcPoll2 + POP_COOP_PINVOKE_FRAME + jalr ra +NESTED_END RhpGcPollRare + +#ifdef FEATURE_GC_STRESS + +// GC Stress Hijack targets +LEAF_ENTRY RhpGcStressHijack, _TEXT + // Not Yet Implemented (NYI) + EMIT_BREAKPOINT +LEAF_END RhpGcStressHijack, _TEXT + +#endif // FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S new file mode 100644 index 00000000000000..fcb803268463c5 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -0,0 +1,51 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // + // RhCommonStub + // + // INPUT: tp: thunk's data block + // + // TRASHES: t0, t1, tp + // + LEAF_ENTRY RhCommonStub, _TEXT + // There are arbitrary callers passing arguments with arbitrary signatures. + // Custom calling convention: + // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + +#ifdef FEATURE_EMULATED_TLS + // If using TLS emulation, fetch the TLS data block address + GETTHUNKDATA_ETLS_9 +#else + // Inline function to get TLS variable + INLINE_GET_TLS_VAR t0, C_FUNC(tls_thunkData) +#endif + + // t0 = base address of TLS data + // tp = address of context cell in thunk's data + + // Load the thunk address from the data block and store it in the thread's static storage + ld t1, 0(t0) // Load thunk address into t1 from the TLS base address + sd t1, 0(t0) // Store the thunk address in thread static storage + + // Load the target address from the data block and jump to it + ld t1, POINTER_SIZE(t0) // Load target address into t1 from the data block + jalr t1 // Jump to the target address in t1 + + LEAF_END RhCommonStub, _TEXT + + // + // IntPtr RhGetCommonStubAddress() + // + LEAF_ENTRY RhGetCommonStubAddress, _TEXT + PREPARE_EXTERNAL_VAR RhCommonStub, a0 + jalr a0 // Return address in a0 + LEAF_END RhGetCommonStubAddress, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/riscv64/MiscStubs.S new file mode 100644 index 00000000000000..ea5d91a1a1c1f9 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/MiscStubs.S @@ -0,0 +1,5 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S new file mode 100644 index 00000000000000..8af5681ac5cfd0 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +.global RhpTrapThreads + +// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h +PTFF_SAVE_SP = 0x00000200 + +// Bit position for the flags above, to be used with andi+beq/bne instructions +PTFF_THREAD_ABORT_BIT = 36 + +// +// RhpPInvoke +// +// IN: a0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The code generator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the code generator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + sd fp, OFFSETOF__PInvokeTransitionFrame__m_FramePointer(a0) + sd ra, OFFSETOF__PInvokeTransitionFrame__m_RIP(a0) + sd sp, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0) + li t0, PTFF_SAVE_SP + sd t0, OFFSETOF__PInvokeTransitionFrame__m_Flags(a0) + + // get TLS global variable address +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD a1 +#endif + + sd a1, OFFSETOF__PInvokeTransitionFrame__m_pThread(a0) + sd a0, OFFSETOF__Thread__m_pTransitionFrame(a1) + jalr ra, 0 +NESTED_END RhpPInvoke, _TEXT + +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ld t0, OFFSETOF__PInvokeTransitionFrame__m_pThread(a0) + sd zero, OFFSETOF__Thread__m_pTransitionFrame(t0) + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a5 + + beq t0, zero, 1f // TrapThreadsFlags_None = 0 + jalr ra, 0 +1: + // passing transition frame pointer in a0 + jal C_FUNC(RhpWaitForGC2) +LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S new file mode 100644 index 00000000000000..625f5f52200934 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S @@ -0,0 +1,113 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + + // Macro that generates code to check a single cache entry. + .macro CHECK_CACHE_ENTRY entry + // Check a single entry in the cache. + // t0 : Cache data structure. Also used for target address jump. + // t1 : Instance MethodTable* + // t2 : Indirection cell address, preserved + // t3 : Trashed + ld t3, OFFSETOF__InterfaceDispatchCache__m_rgEntries(t0) + (\entry * 16) + bne t1, t3, 0f + ld t0, OFFSETOF__InterfaceDispatchCache__m_rgEntries(t0) + (\entry * 16) + 8 + jalr t0, 0 +0: + .endm + + // + // Macro that generates a stub consuming a cache with the given number of entries. + // + .macro DEFINE_INTERFACE_DISPATCH_STUB entries + + NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler + + // t2 holds the indirection cell address. Load the cache pointer. + ld t0, OFFSETOF__InterfaceDispatchCell__m_pCache(t8) + + // Load the MethodTable from the object instance in a0. + ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries + ld t1, 0(a0) + + .global CurrentEntry + .set CurrentEntry, 0 + + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr + + // t2 still contains the indirection cell address. + jal C_FUNC(RhpInterfaceDispatchSlow) + + NESTED_END "RhpInterfaceDispatch\entries", _TEXT + + .endm + + // + // Define all the stub routines we currently need. + // + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + + // + // Initial dispatch on an interface when we don't have a cache yet. + // + LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + // Trigger an AV if we're dispatching on a null this. + // The exception handling infrastructure is aware of the fact that this is the first + // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here + // to a NullReferenceException at the callsite. + ld zero, 0(a0) + + // Just tail call to the cache miss helper. + jal C_FUNC(RhpInterfaceDispatchSlow) + LEAF_END RhpInitialInterfaceDispatch, _TEXT + + // + // Stub dispatch routine for dispatch to a vtable slot + // + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // t2 contains the interface dispatch cell address. + // Load t3 to point to the vtable offset (which is stored in the m_pCache field). + ld t3, OFFSETOF__InterfaceDispatchCell__m_pCache(t2) + + // Load the MethodTable from the object instance in a0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ld t4, 0(a0) + add t3, t3, t4 + + // Load the target address of the vtable into t3 + ld t3, 0(t3) + + jalr t3, 0 + LEAF_END RhpVTableOffsetDispatch, _TEXT + + // + // Cache miss case, call the runtime to resolve the target and update the cache. + // Use universal transition helper to allow an exception to flow out of resolution. + // + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // t2 contains the interface dispatch cell address. + // Calling convention of the universal thunk is: + // t7: target address for the thunk to call + // t8: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, t7 + jal C_FUNC(RhpUniversalTransition_DebugStepTailCall) + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S new file mode 100644 index 00000000000000..8e86d82dfefc79 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -0,0 +1,189 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + .global RhpIntegerTrashValues + .global RhpFpTrashValues +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + +// Padding to account for the odd number of saved integer registers +#define ALIGNMENT_PADDING_SIZE (8) + +#define COUNT_ARG_REGISTERS (9) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (16) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_RA_SIZE (8) +#define PUSHED_FP_SIZE (8) + +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ALIGNMENT_PADDING_SIZE +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_RA_SIZE +// PUSHED_FP_SIZE +// + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +// RhpUniversalTransition +// +// At input to this function, a0-7/tp, f0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +// t7 will contain the managed function that is to be called by this transition function +// t8 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// a0 shall contain a pointer to the TransitionBlock +// a1 shall contain the value that was in t8 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+100 CallerSP+000 +// {AlignmentPad (0x8 bytes)} ChildSP+0F8 CallerSP-008 +// {IntArgRegs (a0-a7/tp) (0x48 bytes)} ChildSP+0B0 CallerSP-050 +// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-070 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0F0 +// {PushedRA} ChildSP+008 CallerSP-0F8 +// {PushedFP} ChildSP+000 CallerSP-100 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + + .text + + .macro UNIVERSAL_TRANSITION FunctionName + + NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + // FP and RA registers + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, STACK_SIZE // ;; Push down stack pointer and store FP and RA + + // Floating point registers + fsd fa0, FLOAT_ARG_OFFSET($sp) + fsd fa1, FLOAT_ARG_OFFSET+8($sp) + fsd fa2, FLOAT_ARG_OFFSET+16($sp) + fsd fa3, FLOAT_ARG_OFFSET+24($sp) + fsd fa4, FLOAT_ARG_OFFSET+32($sp) + fsd fa5, FLOAT_ARG_OFFSET+40($sp) + fsd fa6, FLOAT_ARG_OFFSET+48($sp) + fsd fa7, FLOAT_ARG_OFFSET+56($sp) + + // Space for return buffer data (0x40 bytes) + + // Save argument registers + sd a0, ARGUMENT_REGISTERS_OFFSET($sp) + sd a1, ARGUMENT_REGISTERS_OFFSET+8($sp) + sd a2, ARGUMENT_REGISTERS_OFFSET+16($sp) + sd a3, ARGUMENT_REGISTERS_OFFSET+24($sp) + sd a4, ARGUMENT_REGISTERS_OFFSET+32($sp) + sd a5, ARGUMENT_REGISTERS_OFFSET+40($sp) + sd a6, ARGUMENT_REGISTERS_OFFSET+48($sp) + sd a7, ARGUMENT_REGISTERS_OFFSET+56($sp) + sd t0, ARGUMENT_REGISTERS_OFFSET+64($sp) + sd t1, ARGUMENT_REGISTERS_OFFSET+72($sp) + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + PREPARE_EXTERNAL_VAR RhpFpTrashValues, a1 + + fld fa0, 0(a1) + fld fa1, 8(a1) + fld fa2, 16(a1) + fld fa3, 24(a1) + fld fa4, 32(a1) + fld fa5, 40(a1) + fld fa6, 48(a1) + fld fa7, 56(a1) + + PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, a1 + + ld a2, 16(a1) + ld a3, 24(a1) + ld a4, 32(a1) + ld a5, 40(a1) + ld a6, 48(a1) + ld a7, 56(a1) +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + addi a0, sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block + mv a1, t1 // Second parameter to target function + jalr t0, t1, 0 + + // We cannot make the label public as that tricks DIA stackwalker into thinking + // it's the beginning of a method. For this reason we export an auxiliary variable + // holding the address instead. + ALTERNATE_ENTRY ReturnFrom\FunctionName + + // Move the result (the target address) to t2 so it doesn't get overridden when we restore the + // argument registers. + mv t2, a0 + + // Restore floating point registers + fld fa0, FLOAT_ARG_OFFSET($sp) + fld fa1, FLOAT_ARG_OFFSET+8($sp) + fld fa2, FLOAT_ARG_OFFSET+16($sp) + fld fa3, FLOAT_ARG_OFFSET+24($sp) + fld fa4, FLOAT_ARG_OFFSET+32($sp) + fld fa5, FLOAT_ARG_OFFSET+40($sp) + fld fa6, FLOAT_ARG_OFFSET+48($sp) + fld fa7, FLOAT_ARG_OFFSET+56($sp) + + // Restore the argument registers + ld a0, ARGUMENT_REGISTERS_OFFSET($sp) + ld a1, ARGUMENT_REGISTERS_OFFSET+8($sp) + ld a2, ARGUMENT_REGISTERS_OFFSET+16($sp) + ld a3, ARGUMENT_REGISTERS_OFFSET+24($sp) + ld a4, ARGUMENT_REGISTERS_OFFSET+32($sp) + ld a5, ARGUMENT_REGISTERS_OFFSET+40($sp) + ld a6, ARGUMENT_REGISTERS_OFFSET+48($sp) + ld a7, ARGUMENT_REGISTERS_OFFSET+56($sp) + ld t0, ARGUMENT_REGISTERS_OFFSET+64($sp) + + // Restore FP and RA registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, STACK_SIZE + + // Tailcall to the target address. + jalr t2, t2, 0 + + NESTED_END Rhp\FunctionName, _TEXT + + .endm + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S new file mode 100644 index 00000000000000..fc35833337ac0c --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -0,0 +1,365 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +// during garbage collections to verify that object references were never written to the heap without using a +// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing +// new references to the real heap. Since this cannot be solved perfectly without critical sections around the +// entire update process, we instead update the shadow location and then re-check the real location (as two +// ordered operations) and if there is a disparity we will re-write the shadow location with a special value +// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK + + .global g_GCShadow + .global g_GCShadowEnd + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // + // On exit: + // t3,t4: trashed + // other registers are preserved + // + .macro UPDATE_GC_SHADOW destReg, refReg + + // If g_GCShadow is 0, don't perform the check. + la t3, g_GCShadow + ld t3, 0(t3) + beq t3, zero, 1f + li t4, 0 + + // Save destReg since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). + mv t4, \destReg + + // Transform destReg into the equivalent address in the shadow heap. + la t3, g_lowest_address + ld t3, 0(t3) + sub \destReg, \destReg, t3 + bltz \destReg, 0f + + la t3, g_GCShadow + ld t3, 0(t3) + add \destReg, \destReg, t3 + + la t3, g_GCShadowEnd + ld t3, 0(t3) + bgeu \destReg, t3, 0f + + // Update the shadow heap. + sd \refReg, 0(\destReg) + + // The following read must be strongly ordered with respect to the write we have just performed in order to + // prevent race conditions. + fence rw, rw + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + mv t3, t4 + ld t3, 0(t3) + beq t3, \refReg, 0f + + // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we cannot + // guarantee whose shadow update won. + lui t3, (INVALIDGCVALUE >> 12) & 0xFFFFF + ori t3, t3, INVALIDGCVALUE & 0xFFF + sd t3, 0(\destReg) + +0: + // Restore original destReg value + mv \destReg, t4 + +1: + .endm + +#else // WRITE_BARRIER_CHECK + + .macro UPDATE_GC_SHADOW destReg, refReg + .endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it is used in the definition of the name of the helper). + +// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +// some interlocked helpers that need an inline barrier. + + // On entry: + // destReg: location to be updated (cannot be t3,t4) + // refReg: objectref to be stored (cannot be t3,t4) + // + // On exit: + // t3,t4: trashed + // + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we are in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \destReg, \refReg + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + la t3, g_write_watch_table + ld t3, 0(t3) + beq t3, zero, 2f + srai t5, \destReg, 12 + add t3, t3, t5 // SoftwareWriteWatch::AddressToTableByteIndexShift + lb t4, 0(t3) + bne t4, zero, 2f + li t4, 0xFF + sb t4, 0(t3) +#endif + +2: + // We can skip the card table write if the reference is to + // an object not on the ephemeral segment. + la t3, g_ephemeral_low + ld t3, 0(t3) + blt \refReg, t3, 0f + + la t3, g_ephemeral_high + ld t3, 0(t3) + bgeu \refReg, t3, 0f + + // Set this object's card, if it has not already been set. + la t3, g_card_table + srai t5, \destReg, 11 + add t4, t3, t5 + + // Check that this card has not already been written. Avoiding useless writes is a big win on + // multi-processor systems since it avoids cache thrashing. + lb t3, 0(t4) + li t5, 0xFF + beq t3, t5, 0f + + li t3, 0xFF + sb t3, 0(t4) + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Check if we need to update the card bundle table + la t3, g_card_bundle_table + srai t5, \destReg, 21 + add t4, t3, t5 + lb t3, 0(t4) + li t5, 0xFF + beq t3, t5, 0f + + li t3, 0xFF + sb t3, 0(t4) +#endif + +0: + // Exit label + .endm + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // + // On exit: + // t3, t4: trashed + // + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg + + // The "check" of this checked write barrier - is destReg + // within the heap? If not, early out. + la t3, g_lowest_address + ld t3, 0(t3) + sltu t4, \destReg, t3 + + la t3, g_highest_address + ld t3, 0(t3) + + // If \destReg >= g_lowest_address, compare \destReg to g_highest_address. + // Otherwise, set the C flag (0x2) to take the next branch. + bnez t4, 1f + bgeu \destReg, t3, 0f + +1: + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg + +0: + // Exit label + .endm + +// void JIT_ByRefWriteBarrier +// On entry: +// t8 : the source address (points to object reference to write) +// t6 : the destination address (object reference written here) +// +// On exit: +// t8 : incremented by 8 +// t6 : incremented by 8 +// t7 : trashed +// t3, t4 : trashed +// +// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF +// if you add more trashed registers. +// +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address +LEAF_ENTRY RhpByRefAssignRef, _TEXT + + ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 + ld t7, 0(t8) + addi t8, t8, 8 + b C_FUNC(RhpCheckedAssignRef) + +LEAF_END RhpByRefAssignRef, _TEXT + +// JIT_CheckedWriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that may reside +// on the managed heap. +// +// On entry: +// t6 : the destination address (LHS of the assignment). +// May not be a heap location (hence the checked). +// t7 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t6 : incremented by 8 +LEAF_ENTRY RhpCheckedAssignRef, _TEXT + + // is destReg within the heap? + la t3, g_lowest_address + ld t3, 0(t3) + sltu t4, t6, t3 + + la t3, g_highest_address + ld t3, 0(t3) + sltu t0, t3, t6 + or t4, t0, t4 + beq t4, zero, C_FUNC(RhpAssignRefLoongArch64) + +NotInHeap: + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + sd t7, 0(t6) + addi t6, t6, 8 + jalr ra, 0 + +LEAF_END RhpCheckedAssignRef, _TEXT + +// JIT_WriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that are known to +// reside on the managed heap. +// +// On entry: +// t6 : the destination address (LHS of the assignment). +// t7 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t6 : incremented by 8 +LEAF_ENTRY RhpAssignRefLoongArch64, _TEXT + + ALTERNATE_ENTRY RhpAssignRefAVLocation + sd t7, 0(t6) + + INSERT_UNCHECKED_WRITE_BARRIER_CORE t6, t7 + + addi t6, t6, 8 + jalr ra, 0 + +LEAF_END RhpAssignRefLoongArch64, _TEXT + +// Same as RhpAssignRefLoongArch64, but with standard ABI. +LEAF_ENTRY RhpAssignRef, _TEXT + mv t6, a0 ; t6 = dst + mv t7, a1 ; t7 = val + b C_FUNC(RhpAssignRefLoongArch64) +LEAF_END RhpAssignRef, _TEXT + + +// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +// successful updates. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +// +// Interlocked compare exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// a2: comparand +// +// On exit: +// a0: original value of objectref +// t1, t3, t6, t4: trashed +// +LEAF_ENTRY RhpCheckedLockCmpXchg + + mv t1, a2 + ld t0, 0(a0) + beq t0, t1, EndOfExchange + mv t1, t0 + b EndOfExchange + sd a1, 0(a0) + +EndOfExchange: + bne a2, t1, CmpXchgNoUpdate + +DoCardsCmpXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in a0 and the value in a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1 + +CmpXchgNoUpdate: + // t1 still contains the original value. + mv a0, t1 + + jalr ra, 0 + +LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedXchg(Object** destination, Object* value) +// +// Interlocked exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// +// On exit: +// a0: original value of objectref +// t1: trashed +// t3, t6, t4: trashed +// +LEAF_ENTRY RhpCheckedXchg + + ld t1, 0(a0) + sd a1, 0(a0) + +DoCardsXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in a0 and the value in a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1 + + // t1 still contains the original value. + mv a0, t1 + + jalr ra, 0 + +LEAF_END RhpCheckedXchg, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc new file mode 100644 index 00000000000000..3a76faeaa1bb41 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -0,0 +1,325 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmOffsets.inc" + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4 (standard across most platforms) + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +.endm + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LABELED_RETURN_ADDRESS Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) + .type \Name, @function + .section \Section + .align 2 +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro LEAF_END Name, Section + .size \Name, .-\Name + .cfi_endproc +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + lui \HelperReg, %hi(C_FUNC(\Name)) + addi \HelperReg, \HelperReg, %lo(C_FUNC(\Name)) +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg + lui \HelperReg, %hi(C_FUNC(\Name)) + ld \HelperReg, %lo(C_FUNC(\Name))(\HelperReg) +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg + lui \HelperReg, %hi(C_FUNC(\Name)) + lw \HelperReg, %lo(C_FUNC(\Name))(\HelperReg) +.endm + +.macro PROLOG_STACK_ALLOC Size + addi sp, sp, -\Size +.endm + +.macro EPILOG_STACK_FREE Size + addi sp, sp, \Size + .cfi_adjust_cfa_offset -\Size +.endm + +.macro EPILOG_STACK_RESTORE + mv sp, s0 + .cfi_restore sp +.endm + +.macro PROLOG_SAVE_REG reg, ofs + sd \reg, \ofs(sp) + .cfi_rel_offset \reg, \ofs +.endm + +.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs + sd \reg1, \ofs(sp) + sd \reg2, \ofs+8(sp) + .cfi_rel_offset \reg1, \ofs + .cfi_rel_offset \reg2, \ofs+8 + .ifc \reg1, s0 + mv s0, sp + .cfi_def_cfa_register s0 + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ofs + sd \reg1, -\ofs(sp) + sd \reg2, -\ofs+8(sp) + addi sp, sp, -\ofs + .cfi_adjust_cfa_offset \ofs + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 + .ifc \reg1, s0 + mv s0, sp + .cfi_def_cfa_register s0 + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs + sd \reg1, \ofs(sp) + sd \reg2, \ofs+8(sp) + addi sp, sp, -\ofs + .cfi_adjust_cfa_offset \ofs + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 +.endm + +.macro EPILOG_RESTORE_REG reg, ofs + ld \reg, \ofs(sp) + .cfi_restore \reg +.endm + +.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs + ld \reg1, \ofs(sp) + ld \reg2, \ofs+8(sp) + .cfi_restore \reg1 + .cfi_restore \reg2 +.endm + +.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs + ld \reg1, (sp) + ld \reg2, 8(sp) + addi sp, sp, \ofs + .cfi_restore \reg1 + .cfi_restore \reg2 + .cfi_adjust_cfa_offset -\ofs +.endm + +.macro EPILOG_RETURN + jalr x0, ra +.endm + +.macro EMIT_BREAKPOINT + ebreak +.endm + +.macro EPILOG_BRANCH_REG reg + jalr \reg +.endm + +// Loads the address of a thread-local variable into the target register, +// which cannot be x0. Preserves all other registers. +.macro INLINE_GET_TLS_VAR target, var + .ifc \target, x0 + .error "target cannot be x0" + .endif + + sd x0, -16(sp) + sd ra, -8(sp) + + // RISC-V does not have a direct equivalent to Apple's or GNU's TLS + // handling, so we'll use an indirect approach and inline assembly + // if needed. + lui t0, %hi(\var) + addi t0, t0, %lo(\var) + ld \target, 0(t0) + + // This sequence is a placeholder; actual TLS handling may require + // platform-specific instructions or further customization. + + ld ra, -8(sp) + ld x0, -16(sp) +.endm +// Inlined version of RhpGetThread. Target cannot be x0. +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +.endm + +// Do not use these ETLS macros in functions that already create a stack frame. +// Creating two stack frames in one function can confuse the unwinder/debugger + +.macro GETTHREAD_ETLS_1 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + + call C_FUNC(RhpGetThread) + mv x1, x0 + + ld x0, 16(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 +.endm + +.macro GETTHREAD_ETLS_2 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + sd x1, 24(sp) + + call C_FUNC(RhpGetThread) + mv x2, x0 + + ld x0, 16(sp) + ld x1, 24(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 +.endm + +.macro GETTHREAD_ETLS_3 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -48 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + sd x1, 24(sp) + sd x2, 32(sp) + + call C_FUNC(RhpGetThread) + mv x3, x0 + + ld x0, 16(sp) + ld x1, 24(sp) + ld x2, 32(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 48 +.endm + +.macro GETTHUNKDATA_ETLS_9 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -96 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + sd x1, 24(sp) + sd x2, 32(sp) + sd x3, 40(sp) + sd x4, 48(sp) + sd x5, 56(sp) + sd x6, 64(sp) + sd x7, 72(sp) + sd x8, 80(sp) + sd x9, 88(sp) + + call C_FUNC(RhpGetThunkData) + mv x10, x0 + + ld x0, 16(sp) + ld x1, 24(sp) + ld x2, 32(sp) + ld x3, 40(sp) + ld x4, 48(sp) + ld x5, 56(sp) + ld x6, 64(sp) + ld x7, 72(sp) + ld x8, 80(sp) + ld x9, 88(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 96 +.endm + +.macro InterlockedOperationBarrier + fence rw, rw +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ld \trashReg1, OFFSETOF__Thread__m_pvHijackedReturnAddress(\threadReg) + beqz \trashReg1, 0f + + ld \trashReg2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(\threadReg) + sd \trashReg1, 0(\trashReg2) + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(\threadReg) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(\threadReg) +0: +.endm + +// Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_X0 = 0x00000800 +PTFF_SAVE_X1 = 0x00001000 +PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 + +DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -0x80 // Push down stack pointer and store FP and RA + + // 0x10 bytes reserved for Thread* and flags + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, 0x20 + PROLOG_SAVE_REG_PAIR x21, x22, 0x30 + PROLOG_SAVE_REG_PAIR x23, x24, 0x40 + PROLOG_SAVE_REG_PAIR x25, x26, 0x50 + PROLOG_SAVE_REG_PAIR x27, x28, 0x60 + + // Save the value of SP before stack allocation to the last slot in the frame (slot #15) + add \trashReg, sp, 0x80 + sd \trashReg, 0x70(sp) + + // Record the bitmask of saved registers in the frame (slot #3) + li \trashReg, DEFAULT_FRAME_SAVE_FLAGS + sd \trashReg, 0x18(sp) + + mv \trashReg, sp +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + EPILOG_RESTORE_REG_PAIR x19, x20, 0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, 0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, 0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, 0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, 0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x80 +.endm + +// Bit position for the flags above, to be used with tbz / tbnz instructions +PTFF_THREAD_ABORT_BIT = 36 + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 +#define TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC 0x18 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +TrapThreadsFlags_AbortInProgress_Bit = 0 +TrapThreadsFlags_TrapThreads_Bit = 1 + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 From e0ef18b973c19de4c699338c1585998e274c73e1 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sat, 10 Aug 2024 11:43:14 +0300 Subject: [PATCH 03/19] part3 --- .../RiscV64ReadyToRunGenericHelperNode.cs | 11 +++ .../RiscV64ReadyToRunHelperNode.cs | 6 +- .../ObjectWriter/Dwarf/DwarfBuilder.cs | 6 ++ .../Compiler/ObjectWriter/Dwarf/DwarfCie.cs | 13 +++ .../Dwarf/DwarfExpressionBuilder.cs | 4 + .../Compiler/ObjectWriter/ElfNative.cs | 93 +++++++++++++++++++ .../Compiler/ObjectWriter/ElfObjectWriter.cs | 47 ++++++++++ .../JitInterface/CorInfoImpl.RyuJit.cs | 4 +- 8 files changed, 179 insertions(+), 5 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs index a382331040c4dd..ecf9e903f7ef42 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs @@ -39,6 +39,17 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref RiscV64Emitter enco // Load the generic dictionary cell encoder.EmitLD(result, context, dictionarySlot * factory.Target.PointerSize); + + // If there's any invalid entries, we need to test for them + // + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // flag can change over the course of compilation and the bad slot helper dependency + // should be reported by someone else - the system should not rely on it coming from here. + if (!relocsOnly && _hasInvalidEntries) + { + encoder.EmitXOR(encoder.TargetRegister.IntraProcedureCallScratch1, result, 0); + encoder.EmitJE(encoder.TargetRegister.IntraProcedureCallScratch1, GetBadSlotHelper(factory)); + } } protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder, bool relocsOnly) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs index 620878463e3e30..c6ad35d1c76add 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs @@ -140,12 +140,12 @@ protected override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder case ReadyToRunHelperId.ResolveVirtualFunction: { - // Not tested - encoder.EmitBreak(); - MethodDesc targetMethod = (MethodDesc)Target; if (targetMethod.OwningType.IsInterface) { + // Not tested + encoder.EmitBreak(); + encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod)); encoder.EmitJMP(factory.ExternSymbol("RhpResolveInterfaceMethod")); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs index 643f14056bd4dc..1a02b7074be777 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs @@ -86,6 +86,12 @@ public DwarfBuilder( _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; break; + case TargetArchitecture.RISCV64: + _targetPointerSize = 8; + _frameRegister = 8; // FP + _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; + break; + default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs index e5303e64f1aa77..b78f73b3bf1c9a 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs @@ -93,6 +93,19 @@ public DwarfCie(TargetArchitecture targetArchitecture) InitialCFAOffset = 0; break; + case TargetArchitecture.RISCV64: + CodeAlignFactor = 1; + DataAlignFactor = -8; + ReturnAddressRegister = 1; // RA + Instructions = new byte[] + { + DW_CFA_def_cfa, + 2, // SP + 0, // Offset from SP + }; + InitialCFAOffset = 0; + break; + default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs index 89c21887744160..f32658bb8b193f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs @@ -165,6 +165,10 @@ public static int DwarfRegNum(TargetArchitecture architecture, int regNum) // Normal registers are directly mapped return regNum; + case TargetArchitecture.RISCV64: + // Normal registers are directly mapped + return regNum; + default: throw new NotSupportedException(); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs index 8288f7fe8bd35b..58f4eef9d96c56 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs @@ -25,6 +25,7 @@ internal static class ElfNative public const ushort EM_ARM = 40; public const ushort EM_X86_64 = 62; public const ushort EM_AARCH64 = 183; + public const ushort EM_RISCV = 243; public const ushort EM_LOONGARCH = 258; // Section header type @@ -553,5 +554,97 @@ internal static class ElfNative public const uint R_LARCH_TLS_LD_PCREL20_S2 = 124; public const uint R_LARCH_TLS_GD_PCREL20_S2 = 125; public const uint R_LARCH_TLS_DESC_PCREL20_S2 = 126; + + // Relocations (riscv) + public const uint R_RISCV_NONE = 0; + public const uint R_RISCV_32 = 1; + public const uint R_RISCV_64 = 2; + public const uint R_RISCV_RELATIVE = 3; + public const uint R_RISCV_COPY = 4; + public const uint R_RISCV_JUMP_SLOT = 5; + public const uint R_RISCV_TLS_DTPMOD32 = 6; + public const uint R_RISCV_TLS_DTPMOD64 = 7; + public const uint R_RISCV_TLS_DTPREL32 = 8; + public const uint R_RISCV_TLS_DTPREL64 = 9; + public const uint R_RISCV_TLS_TPREL32 = 10; + public const uint R_RISCV_TLS_TPREL64 = 11; + public const uint R_RISCV_IRELATIVE = 12; + public const uint R_RISCV_ADD8 = 13; + public const uint R_RISCV_ADD16 = 14; + public const uint R_RISCV_ADD32 = 15; + public const uint R_RISCV_ADD64 = 16; + public const uint R_RISCV_SUB8 = 17; + public const uint R_RISCV_SUB16 = 18; + public const uint R_RISCV_SUB32 = 19; + public const uint R_RISCV_SUB64 = 20; + public const uint R_RISCV_GOT_HI20 = 21; + public const uint R_RISCV_GOT_LO12 = 22; + public const uint R_RISCV_GOT64_HI20 = 23; + public const uint R_RISCV_GOT64_LO12 = 24; + public const uint R_RISCV_TLS_GD_HI20 = 25; + public const uint R_RISCV_TLS_GD_LO12 = 26; + public const uint R_RISCV_TLS_GD_ADD = 27; + public const uint R_RISCV_TLS_GD_CALL = 28; + public const uint R_RISCV_TLS_LD_HI20 = 29; + public const uint R_RISCV_TLS_LD_LO12 = 30; + public const uint R_RISCV_TLS_LD_ADD = 31; + public const uint R_RISCV_TLS_LD_CALL = 32; + public const uint R_RISCV_TLS_IE_HI20 = 33; + public const uint R_RISCV_TLS_IE_LO12 = 34; + public const uint R_RISCV_TLS_IE_ADD = 35; + public const uint R_RISCV_TLS_IE_CALL = 36; + public const uint R_RISCV_TLS_TPREL_HI20 = 37; + public const uint R_RISCV_TLS_TPREL_LO12 = 38; + public const uint R_RISCV_TLS_TPREL_ADD = 39; + public const uint R_RISCV_TLS_TPREL_CALL = 40; + public const uint R_RISCV_BRANCH = 41; + public const uint R_RISCV_JAL = 42; + public const uint R_RISCV_CALL = 43; + public const uint R_RISCV_CALL_PLT = 44; + public const uint R_RISCV_GOT = 45; + public const uint R_RISCV_PLT = 46; + public const uint R_RISCV_PLT32 = 47; + public const uint R_RISCV_PLT64 = 48; + public const uint R_RISCV_COPY64 = 49; + public const uint R_RISCV_RELATIVE64 = 50; + public const uint R_RISCV_TLS_DTPMOD64 = 51; + public const uint R_RISCV_TLS_DTPREL64 = 52; + public const uint R_RISCV_TLS_TPREL64 = 53; + public const uint R_RISCV_64_ADD = 54; + public const uint R_RISCV_64_SUB = 55; + public const uint R_RISCV_64_HI20 = 56; + public const uint R_RISCV_64_LO12 = 57; + public const uint R_RISCV_RELAX = 58; + public const uint R_RISCV_ALIGN = 59; + public const uint R_RISCV_ADD_32 = 60; + public const uint R_RISCV_SUB_32 = 61; + public const uint R_RISCV_ADD_64 = 62; + public const uint R_RISCV_SUB_64 = 63; + public const uint R_RISCV_CALL32 = 64; + public const uint R_RISCV_CALL64 = 65; + public const uint R_RISCV_JUMP32 = 66; + public const uint R_RISCV_JUMP64 = 67; + public const uint R_RISCV_GOT32 = 68; + public const uint R_RISCV_GOT64 = 69; + public const uint R_RISCV_PCREL_HI20 = 70; + public const uint R_RISCV_PCREL_LO12 = 71; + public const uint R_RISCV_CALL_HI20 = 72; + public const uint R_RISCV_CALL_LO12 = 73; + public const uint R_RISCV_JUMP_HI20 = 74; + public const uint R_RISCV_JUMP_LO12 = 75; + public const uint R_RISCV_PCREL_LO12_I = 76; + public const uint R_RISCV_PCREL_LO12_S = 77; + public const uint R_RISCV_PCREL_LO12_F = 78; + public const uint R_RISCV_PCREL_LO12_J = 79; + public const uint R_RISCV_JUMP_PCREL_LO12 = 80; + public const uint R_RISCV_JUMP_PCREL_HI20 = 81; + public const uint R_RISCV_CALL_PCREL_LO12 = 82; + public const uint R_RISCV_CALL_PCREL_HI20 = 83; + public const uint R_RISCV_TLS_DTPMOD64_HI20 = 84; + public const uint R_RISCV_TLS_DTPMOD64_LO12 = 85; + public const uint R_RISCV_TLS_DTPREL64_HI20 = 86; + public const uint R_RISCV_TLS_DTPREL64_LO12 = 87; + public const uint R_RISCV_TLS_TPREL64_HI20 = 88; + public const uint R_RISCV_TLS_TPREL64_LO12 = 89; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 9f3d877b602b61..19961c9de91ed7 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -60,6 +60,7 @@ public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options) TargetArchitecture.ARM => EM_ARM, TargetArchitecture.ARM64 => EM_AARCH64, TargetArchitecture.LoongArch64 => EM_LOONGARCH, + TargetArchitecture.RiscV64 => EN_RISCV, _ => throw new NotSupportedException("Unsupported architecture") }; _useInlineRelocationAddends = _machine is EM_386 or EM_ARM; @@ -362,6 +363,9 @@ private protected override void EmitRelocations(int sectionIndex, List relocationList) + { + if (relocationList.Count > 0) + { + Span relocationEntry = stackalloc byte[24]; + var relocationStream = new MemoryStream(24 * relocationList.Count); + _sections[sectionIndex].RelocationStream = relocationStream; + + foreach (SymbolicRelocation symbolicRelocation in relocationList) + { + uint symbolIndex = _symbolNameToIndex[symbolicRelocation.SymbolName]; + uint type = symbolicRelocation.Type switch + { + IMAGE_REL_BASED_DIR64 => R_RISCV_64, + IMAGE_REL_BASED_HIGHLOW => R_RISCV_32, + IMAGE_REL_BASED_RELPTR32 => R_RISCV_RELATIVE, + IMAGE_REL_BASED_RISCV_CALL => R_RISCV_CALL, + IMAGE_REL_BASED_RISCV_JUMP_SLOT => R_RISCV_JUMP_SLOT, + IMAGE_REL_BASED_RISCV_TLS_LE => R_RISCV_TLS_LE, + IMAGE_REL_BASED_RISCV_TLS_GD => R_RISCV_TLS_GD, + IMAGE_REL_BASED_RISCV_TLS_IE => R_RISCV_TLS_IE, + IMAGE_REL_BASED_RISCV_TLS_LD => R_RISCV_TLS_LD, + _ => throw new NotSupportedException("Unknown relocation type: " + symbolicRelocation.Type) + }; + + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset); + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type); + BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); + relocationStream.Write(relocationEntry); + + if (symbolicRelocation.Type is IMAGE_REL_BASED_RISCV_CALL) + { + // Add an additional entry for the CALL relocation type + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset + 4); + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | (type + 1)); + BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); + relocationStream.Write(relocationEntry); + } + } + } + } + private protected override void EmitSectionsAndLayout() { if (_machine == EM_ARM) @@ -805,6 +851,7 @@ private void EmitObjectFile(FileStream outputFileStream) { EM_ARM => 0x05000000u, // For ARM32 claim conformance with the EABI specification EM_LOONGARCH => 0x43u, // For LoongArch ELF psABI specify the ABI version (1) and modifiers (64-bit GPRs, 64-bit FPRs) + EM_RISCV => 0x08u, // For RISC-V, specify the ABI or architecture-specific version if applicable _ => 0u }, }; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index ee0e6c6a8ea65f..b5afad4eaf6fd7 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -1998,12 +1998,12 @@ private int SizeOfPInvokeTransitionFrame // m_RIP (1) // m_FramePointer (1) // m_pThread - // m_Flags + align (no align for ARM64/LoongArch64 that has 64 bit m_Flags) + // m_Flags + align (no align for ARM64/LoongArch64/RiscV64 that has 64 bit m_Flags) // m_PreservedRegs - RSP / R9 (2) // No need to save other preserved regs because of the JIT ensures that there are // no live GC references in callee saved registers around the PInvoke callsite. // - // (1) On ARM32/ARM64/LoongArch64 the order of m_RIP and m_FramePointer is reverse + // (1) On ARM32/ARM64/LoongArch64/RiscV64 the order of m_RIP and m_FramePointer is reverse // (2) R9 is saved for ARM32 because it needs to be preserved for methods with stackalloc int size = 5 * this.PointerSize; From 3900943487067a9b817851f00d539958dff98bfd Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Sun, 11 Aug 2024 19:30:03 +0300 Subject: [PATCH 04/19] Misc. fixes --- eng/Subsets.props | 2 +- src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 41 +- src/coreclr/nativeaot/Runtime/ICodeManager.h | 16 +- src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 12 +- src/coreclr/nativeaot/Runtime/PalRedhawk.h | 99 +-- .../nativeaot/Runtime/PalRedhawkCommon.h | 63 +- .../nativeaot/Runtime/StackFrameIterator.cpp | 121 ++- .../nativeaot/Runtime/StackFrameIterator.h | 18 +- src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 90 +- src/coreclr/nativeaot/Runtime/regdisplay.h | 83 +- .../nativeaot/Runtime/riscv64/AllocFast.S | 140 ++-- .../nativeaot/Runtime/riscv64/AsmOffsetsCpu.h | 10 +- .../Runtime/riscv64/ExceptionHandling.S | 770 +++++++++--------- .../nativeaot/Runtime/riscv64/GcProbe.S | 150 ++-- .../nativeaot/Runtime/riscv64/StubDispatch.S | 59 +- .../Runtime/riscv64/UniversalTransition.S | 159 ++-- .../nativeaot/Runtime/riscv64/WriteBarriers.S | 52 +- .../nativeaot/Runtime/unix/PalRedhawkInline.h | 2 +- .../nativeaot/Runtime/unix/UnixContext.cpp | 289 ++++--- .../nativeaot/Runtime/unix/UnixContext.h | 80 +- .../nativeaot/Runtime/unix/UnwindHelpers.cpp | 467 +++++++---- .../nativeaot/Runtime/unix/unixasmmacros.inc | 2 +- .../Runtime/unix/unixasmmacrosriscv64.inc | 97 ++- src/coreclr/vm/gcinfodecoder.cpp | 2 +- .../include/__libunwind_config.h | 4 +- .../llvm-libunwind/src/UnwindCursor.hpp | 22 +- 26 files changed, 1504 insertions(+), 1346 deletions(-) diff --git a/eng/Subsets.props b/eng/Subsets.props index 300e7a6d3696a6..488b8c43847c0c 100644 --- a/eng/Subsets.props +++ b/eng/Subsets.props @@ -120,7 +120,7 @@ <_NativeAotSupportedOS Condition="'$(TargetOS)' == 'windows' or '$(TargetOS)' == 'linux' or '$(TargetOS)' == 'osx' or '$(TargetOS)' == 'maccatalyst' or '$(TargetOS)' == 'iossimulator' or '$(TargetOS)' == 'ios' or '$(TargetOS)' == 'tvossimulator' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'freebsd'">true - <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'loongarch64' or '$(TargetArchitecture)' == 'riscv64' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true + <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'loongarch64' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true true true diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 22d3b9acf0213a..d3ebdbc80b7508 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -204,24 +204,29 @@ FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, pContext->Ra = pPalContext->RA; pContext->Pc = pPalContext->IP; #elif defined(HOST_RISCV64) - pContext->x1 = pPalContext->RA; // Return address - pContext->x2 = pPalContext->SP; // Stack pointer - pContext->x8 = pPalContext->S0; // Saved register/frame pointer - pContext->x9 = pPalContext->S1; // Saved register - pContext->x18 = pPalContext->S2; // Saved register - pContext->x19 = pPalContext->S3; // Saved register - pContext->x20 = pPalContext->S4; // Saved register - pContext->x21 = pPalContext->S5; // Saved register - pContext->x22 = pPalContext->S6; // Saved register - pContext->x23 = pPalContext->S7; // Saved register - pContext->x24 = pPalContext->S8; // Saved register - pContext->x25 = pPalContext->S9; // Saved register - pContext->x26 = pPalContext->S10; // Saved register - pContext->x27 = pPalContext->S11; // Saved register - pContext->Fp = pPalContext->S0; // Frame pointer (alias for x8) - pContext->Sp = pPalContext->SP; // Stack pointer (alias for x2) - pContext->Ra = pPalContext->RA; // Return address (alias for x1) - pContext->Pc = pPalContext->IP; // Program counter + pContext->A0 = pPalContext->A0; + pContext->A1 = pPalContext->A1; + pContext->A2 = pPalContext->A2; + pContext->A3 = pPalContext->A3; + pContext->A4 = pPalContext->A4; + pContext->A5 = pPalContext->A5; + pContext->A6 = pPalContext->A6; + pContext->A7 = pPalContext->A7; + pContext->S1 = pPalContext->S1; + pContext->S2 = pPalContext->S2; + pContext->S3 = pPalContext->S3; + pContext->S4 = pPalContext->S4; + pContext->S5 = pPalContext->S5; + pContext->S6 = pPalContext->S6; + pContext->S7 = pPalContext->S7; + pContext->S8 = pPalContext->S8; + pContext->S9 = pPalContext->S9; + pContext->S10 = pPalContext->S10; + pContext->S11 = pPalContext->S11; + pContext->Fp = pPalContext->FP; + pContext->Sp = pPalContext->SP; + pContext->Ra = pPalContext->RA; + pContext->Pc = pPalContext->IP; #elif defined(HOST_WASM) // No registers, no work to do yet #else diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index ec0598b9e45efa..3f4c824b2f8940 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -88,22 +88,22 @@ inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) #elif defined(TARGET_RISCV64) // Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back -C_ASSERT(PTFF_A0_IS_GCREF == ((uint64_t)GCRK_Object << 32)); -C_ASSERT(PTFF_A0_IS_BYREF == ((uint64_t)GCRK_Byref << 32)); -C_ASSERT(PTFF_A1_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 32)); -C_ASSERT(PTFF_A1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 32)); +C_ASSERT( PTFF_T0_IS_GCREF == ((uint64_t)GCRK_Object << 32)); +C_ASSERT(PTFF_T0_IS_BYREF == ((uint64_t)GCRK_Byref << 32)); +C_ASSERT(PTFF_T1_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 32)); +C_ASSERT(PTFF_T1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 32)); inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) { - // just need to report gc ref bits here. - // appropriate PTFF_SAVE_ bits will be added by the frame building routine. + // Just need to report GC ref bits here. + // Appropriate PTFF_SAVE_ bits will be added by the frame building routine. return ((uint64_t)returnKind << 32); } inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) { - GCRefKind returnKind = (GCRefKind)((transFrameFlags & (PTFF_A0_IS_GCREF | PTFF_A0_IS_BYREF | PTFF_A1_IS_GCREF | PTFF_A1_IS_BYREF)) >> 32); - ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_A0) && (transFrameFlags & PTFF_SAVE_A1))); + GCRefKind returnKind = (GCRefKind)((transFrameFlags & ( PTFF_T0_IS_GCREF | PTFF_T0_IS_BYREF | PTFF_T1_IS_GCREF | PTFF_T1_IS_BYREF)) >> 32); + ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_T0) && (transFrameFlags & PTFF_SAVE_T1))); return returnKind; } diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index 6de3287f7ab79e..ae39f7586c1afc 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -374,7 +374,7 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) #elif TARGET_RISCV64 uint32_t * pCode = (uint32_t *)pCodeOrg; // is this "addi a0, a0, 8"? - if (pCode[0] == 0x0002_00b3) // Encoding for `addi a0, a0, 8` in 32-bit RISC-V + if (pCode[0] == 0x000200b3) // Encoding for `addi a0, a0, 8` in 32-bit RISC-V { // unboxing sequence unboxingStub = true; @@ -382,9 +382,9 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) } // is this an indirect jump? // lui t0, imm; jalr t0, t0, imm12 - if ((pCode[0] & 0x000f_f000) == 0x0002_0000 && - (pCode[1] & 0x000f_f000) == 0x0000_00a0 && - (pCode[2] & 0x0000_000f) == 0x0000_0000) + if ((pCode[0] & 0x000ff000) == 0x00020000 && + (pCode[1] & 0x000ff000) == 0x000000a0 && + (pCode[2] & 0x0000000f) == 0x00000000) { // normal import stub - dist to IAT cell is relative to (PC & ~0xfff) // lui: imm = SignExtend(imm20:Zeros(12), 64); @@ -396,8 +396,8 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) } // is this an unboxing stub followed by a relative jump? // jal ra, imm - else if (unboxingStub && (pCode[0] & 0xffe0_0000) == 0x0000_0000 && - (pCode[1] & 0x0000_ffff) == 0x0000_0000) + else if (unboxingStub && (pCode[0] & 0xffe00000) == 0x00000000 && + (pCode[1] & 0x0000ffff) == 0x00000000) { // relative jump - dist is relative to the instruction // offset = SignExtend(imm20:Zeros(12), 64); diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index fb9a2ed2724dfc..6717a5c227710e 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -538,10 +538,6 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { #define CONTEXT_CONTROL (CONTEXT_RISCV64 | 0x1L) #define CONTEXT_INTEGER (CONTEXT_RISCV64 | 0x2L) -// Specify the number of breakpoints and watchpoints that the OS -// will track. Architecturally, RISC-V supports up to 16. In practice, -// almost no one implements more than 4 of each. - #define RISCV64_MAX_BREAKPOINTS 8 #define RISCV64_MAX_WATCHPOINTS 2 @@ -554,70 +550,63 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // // Integer registers // - uint64_t X0; // Zero Register - uint64_t X1; // Return Address - uint64_t X2; // Stack Pointer - uint64_t X3; // Global Pointer - uint64_t X4; // Thread Pointer - uint64_t X5; // Temporary Register - uint64_t X6; // Temporary Register - uint64_t X7; // Temporary Register - uint64_t X8; // Saved Register - uint64_t X9; // Saved Register - uint64_t X10; // Function Argument - uint64_t X11; // Function Argument - uint64_t X12; // Function Argument - uint64_t X13; // Function Argument - uint64_t X14; // Function Argument - uint64_t X15; // Function Argument - uint64_t X16; // Function Argument - uint64_t X17; // Function Argument - uint64_t X18; // Function Argument - uint64_t X19; // Function Argument - uint64_t X20; // Function Argument - uint64_t X21; // Function Argument - uint64_t X22; // Function Argument - uint64_t X23; // Function Argument - uint64_t X24; // Function Argument - uint64_t X25; // Function Argument - uint64_t X26; // Function Argument - uint64_t X27; // Function Argument - uint64_t X28; // Function Argument - uint64_t X29; // Function Argument - uint64_t X30; // Function Argument - uint64_t X31; // Function Argument - uint64_t Pc; // Program Counter + uint64_t X0; + uint64_t Ra; + uint64_t Sp; + uint64_t Gp; + uint64_t Tp; + uint64_t T0; + uint64_t T1; + uint64_t T2; + uint64_t Fp; + uint64_t S1; + uint64_t A0; + uint64_t A1; + uint64_t A2; + uint64_t A3; + uint64_t A4; + uint64_t A5; + uint64_t A6; + uint64_t A7; + uint64_t S2; + uint64_t S3; + uint64_t S4; + uint64_t S5; + uint64_t S6; + uint64_t S7; + uint64_t S8; + uint64_t S9; + uint64_t S10; + uint64_t S11; + uint64_t T3; + uint64_t T4; + uint64_t T5; + uint64_t T6; + uint64_t Pc; + uint64_t T61; + uint64_t Pc1; // // Floating Point Registers // - uint64_t F[32]; // Floating-point registers - uint32_t Fcsr; // Floating-point Control and Status Register - - // - // Debug registers - // - uint32_t Bcr[RISCV64_MAX_BREAKPOINTS]; - uint64_t Bvr[RISCV64_MAX_BREAKPOINTS]; - uint32_t Wcr[RISCV64_MAX_WATCHPOINTS]; - uint64_t Wvr[RISCV64_MAX_WATCHPOINTS]; + uint64_t F[4*32]; + uint32_t Fcsr; void SetIp(uintptr_t ip) { Pc = ip; } - void SetArg0Reg(uintptr_t val) { X10 = val; } // X10 typically used for first argument - void SetArg1Reg(uintptr_t val) { X11 = val; } // X11 typically used for second argument + void SetArg0Reg(uintptr_t val) { A0 = val; } + void SetArg1Reg(uintptr_t val) { A1 = val; } uintptr_t GetIp() { return Pc; } - uintptr_t GetLr() { return X1; } // Return Address - uintptr_t GetSp() { return X2; } // Stack Pointer - void SetSp(uintptr_t sp) { X2 = sp; } + uintptr_t GetRa() { return Ra; } + uintptr_t GetSp() { return Sp; } template void ForEachPossibleObjectRef(F lambda) { - for (uint64_t* pReg = &X0; pReg <= &X31; pReg++) + for (uint64_t* pReg = &X0; pReg <= &T6; pReg++) lambda((size_t*)pReg); - // X1 (Return Address) can be used as a scratch register - lambda((size_t*)&X1); + // RA can be used as a scratch register + lambda((size_t*)&Ra); } } CONTEXT, *PCONTEXT; diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index 12655bdfcb4116..d3506c1c93ba64 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -125,47 +125,40 @@ struct PAL_LIMITED_CONTEXT void SetSp(uintptr_t sp) { SP = sp; } #elif defined(TARGET_RISCV64) + uintptr_t FP; uintptr_t RA; - uintptr_t X0; - uintptr_t X1; - uintptr_t X2; - uintptr_t X3; - uintptr_t X4; - uintptr_t X5; - uintptr_t X6; - uintptr_t X7; - uintptr_t X8; - uintptr_t X9; - uintptr_t X10; - uintptr_t X11; - uintptr_t X12; - uintptr_t X13; - uintptr_t X14; - uintptr_t X15; - uintptr_t X16; - uintptr_t X17; - uintptr_t X18; - uintptr_t X19; - uintptr_t X20; - uintptr_t X21; - uintptr_t X22; - uintptr_t X23; - uintptr_t X24; - uintptr_t X25; - uintptr_t X26; - uintptr_t X27; - uintptr_t X28; - uintptr_t X29; - uintptr_t X30; - uintptr_t X31; - + uintptr_t A0; + uintptr_t A1; + uintptr_t A2; + uintptr_t A3; + uintptr_t A4; + uintptr_t A5; + uintptr_t A6; + uintptr_t A7; + uintptr_t S1; + uintptr_t S2; + uintptr_t S3; + uintptr_t S4; + uintptr_t S5; + uintptr_t S6; + uintptr_t S7; + uintptr_t S8; + uintptr_t S9; + uintptr_t S10; + uintptr_t S11; + uintptr_t T0; + uintptr_t T1; + uintptr_t T2; + uintptr_t T3; + uintptr_t T4; + uintptr_t T5; + uintptr_t T6; uintptr_t SP; uintptr_t IP; - uint64_t F[32 - 16]; // Only the bottom 64-bit values of the F registers F16..F31 need to be preserved - // (F0-F15 are not preserved according to the ABI spec). + uint64_t F[12]; uintptr_t GetIp() const { return IP; } uintptr_t GetSp() const { return SP; } diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 468a9f901078ea..64d8f1fb213db1 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -293,45 +293,36 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF ASSERT(!(pFrame->m_Flags & PTFF_SAVE_FP)); // FP should never contain a GC ref - if (pFrame->m_Flags & PTFF_SAVE_X19) { m_RegDisplay.pX19 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X20) { m_RegDisplay.pX20 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X21) { m_RegDisplay.pX21 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X22) { m_RegDisplay.pX22 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X23) { m_RegDisplay.pX23 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X24) { m_RegDisplay.pX24 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X25) { m_RegDisplay.pX25 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X26) { m_RegDisplay.pX26 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X27) { m_RegDisplay.pX27 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X28) { m_RegDisplay.pX28 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S1) { m_RegDisplay.pS1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S2) { m_RegDisplay.pS2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S3) { m_RegDisplay.pS3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S4) { m_RegDisplay.pS4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S5) { m_RegDisplay.pS5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S6) { m_RegDisplay.pS6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S7) { m_RegDisplay.pS7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S8) { m_RegDisplay.pS8 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X0) { m_RegDisplay.pX0 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X1) { m_RegDisplay.pX1 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X2) { m_RegDisplay.pX2 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X3) { m_RegDisplay.pX3 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X4) { m_RegDisplay.pX4 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X5) { m_RegDisplay.pX5 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X6) { m_RegDisplay.pX6 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X7) { m_RegDisplay.pX7 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X8) { m_RegDisplay.pX8 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X9) { m_RegDisplay.pX9 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X10) { m_RegDisplay.pX10 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X11) { m_RegDisplay.pX11 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X12) { m_RegDisplay.pX12 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X13) { m_RegDisplay.pX13 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X14) { m_RegDisplay.pX14 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X15) { m_RegDisplay.pX15 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X16) { m_RegDisplay.pX16 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X17) { m_RegDisplay.pX17 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_X18) { m_RegDisplay.pX18 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T0) { m_RegDisplay.pT0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T1) { m_RegDisplay.pT1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T2) { m_RegDisplay.pT2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T3) { m_RegDisplay.pT3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T4) { m_RegDisplay.pT4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T5) { m_RegDisplay.pT5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T6) { m_RegDisplay.pT6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T7) { m_RegDisplay.pT7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T8) { m_RegDisplay.pT8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T9) { m_RegDisplay.pT9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T10) { m_RegDisplay.pT10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T11) { m_RegDisplay.pT11 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_RA) { m_RegDisplay.pRA = pPreservedRegsCursor++; } GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); if (retValueKind != GCRK_Scalar) { - m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pX0; + m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pT0; m_HijackedReturnValueKind = retValueKind; } @@ -990,15 +981,15 @@ void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSou m_RegDisplay.pFP = thisFuncletPtrs.pFP; #elif defined(TARGET_RISCV64) + m_RegDisplay.pS1 = thisFuncletPtrs.pS1; + m_RegDisplay.pS2 = thisFuncletPtrs.pS2; + m_RegDisplay.pS3 = thisFuncletPtrs.pS3; + m_RegDisplay.pS4 = thisFuncletPtrs.pS4; + m_RegDisplay.pS5 = thisFuncletPtrs.pS5; + m_RegDisplay.pS6 = thisFuncletPtrs.pS6; + m_RegDisplay.pS7 = thisFuncletPtrs.pS7; + m_RegDisplay.pS8 = thisFuncletPtrs.pS8; m_RegDisplay.pFP = thisFuncletPtrs.pFP; - m_RegDisplay.pRA = thisFuncletPtrs.pRA; - m_RegDisplay.pX8 = thisFuncletPtrs.pX8; - m_RegDisplay.pX9 = thisFuncletPtrs.pX9; - m_RegDisplay.pX10 = thisFuncletPtrs.pX10; - m_RegDisplay.pX11 = thisFuncletPtrs.pX11; - m_RegDisplay.pX12 = thisFuncletPtrs.pX12; - m_RegDisplay.pSP = thisFuncletPtrs.pSP; - m_RegDisplay.pIP = thisFuncletPtrs.pIP; #elif defined(UNIX_AMD64_ABI) // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. @@ -1298,29 +1289,29 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() // thunks, but we don't need to know what they are here, so we just skip them. SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 6 : 4; // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. - m_funcletPtrs.pX8 = m_RegDisplay.pX8; - m_funcletPtrs.pX9 = m_RegDisplay.pX9; - m_funcletPtrs.pX10 = m_RegDisplay.pX10; - m_funcletPtrs.pX11 = m_RegDisplay.pX11; - m_funcletPtrs.pX12 = m_RegDisplay.pX12; - m_funcletPtrs.pX13 = m_RegDisplay.pX13; - m_funcletPtrs.pX14 = m_RegDisplay.pX14; - m_funcletPtrs.pX15 = m_RegDisplay.pX15; - m_funcletPtrs.pFP = m_RegDisplay.pFP; + m_funcletPtrs.pS1 = m_RegDisplay.pS1; + m_funcletPtrs.pS2 = m_RegDisplay.pS2; + m_funcletPtrs.pS3 = m_RegDisplay.pS3; + m_funcletPtrs.pS4 = m_RegDisplay.pS4; + m_funcletPtrs.pS5 = m_RegDisplay.pS5; + m_funcletPtrs.pS6 = m_RegDisplay.pS6; + m_funcletPtrs.pS7 = m_RegDisplay.pS7; + m_funcletPtrs.pS8 = m_RegDisplay.pS8; + m_funcletPtrs.pFP = m_RegDisplay.pFP; } m_RegDisplay.pFP = SP++; m_RegDisplay.SetIP(*SP++); - m_RegDisplay.pX8 = SP++; - m_RegDisplay.pX9 = SP++; - m_RegDisplay.pX10 = SP++; - m_RegDisplay.pX11 = SP++; - m_RegDisplay.pX12 = SP++; - m_RegDisplay.pX13 = SP++; - m_RegDisplay.pX14 = SP++; - m_RegDisplay.pX15 = SP++; + m_RegDisplay.pS1 = SP++; + m_RegDisplay.pS2 = SP++; + m_RegDisplay.pS3 = SP++; + m_RegDisplay.pS4 = SP++; + m_RegDisplay.pS5 = SP++; + m_RegDisplay.pS6 = SP++; + m_RegDisplay.pS7 = SP++; + m_RegDisplay.pS8 = SP++; #else SP = (PTR_uintptr_t)(m_RegDisplay.SP); @@ -1673,17 +1664,15 @@ void StackFrameIterator::UnwindThrowSiteThunk() m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R31); m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); #elif defined(TARGET_RISCV64) - m_RegDisplay.pX19 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X19); - m_RegDisplay.pX20 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X20); - m_RegDisplay.pX21 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X21); - m_RegDisplay.pX22 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X22); - m_RegDisplay.pX23 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X23); - m_RegDisplay.pX24 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X24); - m_RegDisplay.pX25 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X25); - m_RegDisplay.pX26 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X26); - m_RegDisplay.pX27 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X27); - m_RegDisplay.pX28 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, X28); - m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); + m_RegDisplay.pS1 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S1); + m_RegDisplay.pS2 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S2); + m_RegDisplay.pS3 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S3); + m_RegDisplay.pS4 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S4); + m_RegDisplay.pS5 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S5); + m_RegDisplay.pS6 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S6); + m_RegDisplay.pS7 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S7); + m_RegDisplay.pS8 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S8); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); #else ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index 471185ba2152ca..d05b6ba206de13 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -192,16 +192,14 @@ class StackFrameIterator PTR_uintptr_t pR31; PTR_uintptr_t pFP; #elif defined(TARGET_RISCV64) - PTR_uintptr_t pX19; - PTR_uintptr_t pX20; - PTR_uintptr_t pX21; - PTR_uintptr_t pX22; - PTR_uintptr_t pX23; - PTR_uintptr_t pX24; - PTR_uintptr_t pX25; - PTR_uintptr_t pX26; - PTR_uintptr_t pX27; - PTR_uintptr_t pX28; + PTR_uintptr_t pS1; + PTR_uintptr_t pS2; + PTR_uintptr_t pS3; + PTR_uintptr_t pS4; + PTR_uintptr_t pS5; + PTR_uintptr_t pS6; + PTR_uintptr_t pS7; + PTR_uintptr_t pS8; PTR_uintptr_t pFP; #elif defined(UNIX_AMD64_ABI) PTR_uintptr_t pRbp; diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 621240f6e77c0e..f60d2a2b8d5268 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -403,63 +403,55 @@ enum PInvokeTransitionFrameFlags : uint64_t #elif defined(TARGET_RISCV64) enum PInvokeTransitionFrameFlags : uint64_t { - // NOTE: Keep in sync with src\coreclr\nativeaot\Runtime\riscv64\AsmMacros.h - // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has - // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp + // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp) // standard preserved registers - PTFF_SAVE_X19 = 0x0000000000000001, - PTFF_SAVE_X20 = 0x0000000000000002, - PTFF_SAVE_X21 = 0x0000000000000004, - PTFF_SAVE_X22 = 0x0000000000000008, - PTFF_SAVE_X23 = 0x0000000000000010, - PTFF_SAVE_X24 = 0x0000000000000020, - PTFF_SAVE_X25 = 0x0000000000000040, - PTFF_SAVE_X26 = 0x0000000000000080, - PTFF_SAVE_X27 = 0x0000000000000100, - PTFF_SAVE_X28 = 0x0000000000000200, - - PTFF_SAVE_SP = 0x0000000000000400, // Used for 'coop pinvokes' in runtime helper routines. Methods with - // PInvokes are required to have a frame pointers, but methods which - // call runtime helpers are not. Therefore, methods that call runtime - // helpers may need SP to seed the stackwalk. + PTFF_SAVE_S0 = 0x0000000000000001, + PTFF_SAVE_S1 = 0x0000000000000002, + PTFF_SAVE_S2 = 0x0000000000000004, + PTFF_SAVE_S3 = 0x0000000000000008, + PTFF_SAVE_S4 = 0x0000000000000010, + PTFF_SAVE_S5 = 0x0000000000000020, + PTFF_SAVE_S6 = 0x0000000000000040, + PTFF_SAVE_S7 = 0x0000000000000080, + PTFF_SAVE_S8 = 0x0000000000000100, + PTFF_SAVE_S9 = 0x0000000000000200, + + PTFF_SAVE_SP = 0x0000000000000400, // Scratch registers - PTFF_SAVE_X0 = 0x0000000000000800, - PTFF_SAVE_X1 = 0x0000000000001000, - PTFF_SAVE_X2 = 0x0000000000002000, - PTFF_SAVE_X3 = 0x0000000000004000, - PTFF_SAVE_X4 = 0x0000000000008000, - PTFF_SAVE_X5 = 0x0000000000010000, - PTFF_SAVE_X6 = 0x0000000000020000, - PTFF_SAVE_X7 = 0x0000000000040000, - PTFF_SAVE_X8 = 0x0000000000080000, - PTFF_SAVE_X9 = 0x0000000000100000, - PTFF_SAVE_X10 = 0x0000000000200000, - PTFF_SAVE_X11 = 0x0000000000400000, - PTFF_SAVE_X12 = 0x0000000000800000, - PTFF_SAVE_X13 = 0x0000000001000000, - PTFF_SAVE_X14 = 0x0000000002000000, - PTFF_SAVE_X15 = 0x0000000004000000, - PTFF_SAVE_X16 = 0x0000000008000000, - PTFF_SAVE_X17 = 0x0000000010000000, - PTFF_SAVE_X18 = 0x0000000020000000, - - PTFF_SAVE_FP = 0x0000000040000000, // should never be used, we require FP frames for methods with - // pinvoke and it is saved into the frame pointer field instead - - PTFF_SAVE_RA = 0x0000000080000000, // this is useful for the case of loop hijacking where we need both - // a return address pointing into the hijacked method and that method's - // ra register, which may hold a gc pointer + PTFF_SAVE_T0 = 0x0000000000000800, + PTFF_SAVE_T1 = 0x0000000000001000, + PTFF_SAVE_T2 = 0x0000000000002000, + PTFF_SAVE_T3 = 0x0000000000004000, + PTFF_SAVE_T4 = 0x0000000000008000, + PTFF_SAVE_T5 = 0x0000000000010000, + PTFF_SAVE_T6 = 0x0000000000020000, + PTFF_SAVE_T7 = 0x0000000000040000, + PTFF_SAVE_T8 = 0x0000000000080000, + PTFF_SAVE_T9 = 0x0000000000100000, + PTFF_SAVE_T10 = 0x0000000000200000, + PTFF_SAVE_T11 = 0x0000000000400000, + PTFF_SAVE_T12 = 0x0000000000800000, + PTFF_SAVE_T13 = 0x0000000001000000, + PTFF_SAVE_T14 = 0x0000000002000000, + PTFF_SAVE_T15 = 0x0000000004000000, + PTFF_SAVE_T16 = 0x0000000008000000, + PTFF_SAVE_T17 = 0x0000000010000000, + PTFF_SAVE_T18 = 0x0000000020000000, + + PTFF_SAVE_FP = 0x0000000040000000, + + PTFF_SAVE_RA = 0x0000000080000000, // used by hijack handler to report return value of hijacked method - PTFF_X0_IS_GCREF = 0x0000000100000000, - PTFF_X0_IS_BYREF = 0x0000000200000000, - PTFF_X1_IS_GCREF = 0x0000000400000000, - PTFF_X1_IS_BYREF = 0x0000000800000000, + PTFF_T0_IS_GCREF = 0x0000000100000000, + PTFF_T0_IS_BYREF = 0x0000000200000000, + PTFF_T1_IS_GCREF = 0x0000000400000000, + PTFF_T1_IS_BYREF = 0x0000000800000000, - PTFF_THREAD_ABORT = 0x0000001000000000, // indicates that ThreadAbortException should be thrown when returning from the transition + PTFF_THREAD_ABORT = 0x0000001000000000, }; #else // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 3c385a7ffdd6ab..0d41b387e4683b 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -235,49 +235,50 @@ struct REGDISPLAY struct REGDISPLAY { - PTR_uintptr_t pX0; - PTR_uintptr_t pX1; - PTR_uintptr_t pX2; - PTR_uintptr_t pX3; - PTR_uintptr_t pX4; - PTR_uintptr_t pX5; - PTR_uintptr_t pX6; - PTR_uintptr_t pX7; - PTR_uintptr_t pX8; - PTR_uintptr_t pX9; - PTR_uintptr_t pX10; - PTR_uintptr_t pX11; - PTR_uintptr_t pX12; - PTR_uintptr_t pX13; - PTR_uintptr_t pX14; - PTR_uintptr_t pX15; - PTR_uintptr_t pX16; - PTR_uintptr_t pX17; - PTR_uintptr_t pX18; - PTR_uintptr_t pX19; - PTR_uintptr_t pX20; - PTR_uintptr_t pX21; - PTR_uintptr_t pX22; - PTR_uintptr_t pX23; - PTR_uintptr_t pX24; - PTR_uintptr_t pX25; - PTR_uintptr_t pX26; - PTR_uintptr_t pX27; - PTR_uintptr_t pX28; - PTR_uintptr_t pX29; - PTR_uintptr_t pX30; - PTR_uintptr_t pX31; - PTR_uintptr_t pFP; // Frame pointer - - uintptr_t SP; // Stack pointer + PTR_uintptr_t pRA; - PCODE IP; // Instruction pointer + PTR_uintptr_t pS1; + PTR_uintptr_t pS2; + PTR_uintptr_t pS3; + PTR_uintptr_t pS4; + PTR_uintptr_t pS5; + PTR_uintptr_t pS6; + PTR_uintptr_t pS7; + PTR_uintptr_t pS8; - uint64_t F[32-24]; // Only the F registers F24..F31 need to be preserved - // (F0-F23 are not preserved according to the ABI spec). - // These need to be unwound during a stack walk - // for EH, but not adjusted, so we only need - // their values, not their addresses + PTR_uintptr_t pFP; + uintptr_t SP; + + PTR_uintptr_t pT0; + PTR_uintptr_t pT1; + PTR_uintptr_t pT2; + PTR_uintptr_t pT3; + PTR_uintptr_t pT4; + PTR_uintptr_t pT5; + PTR_uintptr_t pT6; + PTR_uintptr_t pT7; + PTR_uintptr_t pT8; + PTR_uintptr_t pT9; + PTR_uintptr_t pT10; + PTR_uintptr_t pT11; + + // Adding missing registers + PTR_uintptr_t pT12; + PTR_uintptr_t pT13; + PTR_uintptr_t pT14; + PTR_uintptr_t pT15; + PTR_uintptr_t pT16; + PTR_uintptr_t pT17; + PTR_uintptr_t pT18; + PTR_uintptr_t pT19; + PTR_uintptr_t pT20; + PTR_uintptr_t pT21; + PTR_uintptr_t pT22; + PTR_uintptr_t pT23; + + PCODE IP; + + uint64_t F[32]; // Expanded to cover all F registers inline PCODE GetIP() { return IP; } inline uintptr_t GetSP() { return SP; } diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index f8091cc3f77cef..93fc3229cc9f32 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -15,20 +15,20 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAll // Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's // allocation context then automatically fallback to the slow allocation path. -// $a0 == MethodTable +// a0 == MethodTable LEAF_ENTRY RhpNewFast, _TEXT // a1 = GetThread() #ifdef FEATURE_EMULATED_TLS GETTHREAD_ETLS_1 #else - INLINE_GETTHREAD $a1 + INLINE_GETTHREAD a1 #endif // // a0 contains MethodTable pointer // - ld $a2, OFFSETOF__MethodTable__m_uBaseSize($a0) + ld a2, OFFSETOF__MethodTable__m_uBaseSize(a0) // // a0: MethodTable pointer @@ -37,25 +37,25 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAll // // Load potential new object address into t3. - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a1) + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add $a2, $a2, $t3 - ld $t4, OFFSETOF__Thread__m_alloc_context__alloc_limit($a1) - bltu $t4, $a2, RhpNewFast_RarePath + add a2, a2, t3 + ld t4, OFFSETOF__Thread__m_alloc_context__alloc_limit(a1) + bltu t4, a2, RhpNewFast_RarePath // Update the alloc pointer to account for the allocation. - sd $a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a1) + sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) // Set the new objects MethodTable pointer - sd $a0, OFFSETOF__Object__m_pEEType($t3) + sd a0, OFFSETOF__Object__m_pEEType(t3) - mv $a0, $t3 + mv a0, t3 j RhpNewFast_Return RhpNewFast_RarePath: - mv $a1, x0 + mv a1, x0 j RhpNewObject RhpNewFast_Return: @@ -64,7 +64,7 @@ RhpNewFast_Return: // Allocate non-array object with finalizer. // a0 == MethodTable LEAF_ENTRY RhpNewFinalizable, _TEXT - li $a1, GC_ALLOC_FINALIZE + li a1, GC_ALLOC_FINALIZE j RhpNewObject LEAF_END RhpNewFinalizable, _TEXT @@ -73,21 +73,21 @@ RhpNewFast_Return: // a1 == alloc flags NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - PUSH_COOP_PINVOKE_FRAME $a3 + PUSH_COOP_PINVOKE_FRAME a3 // a3: transition frame // Preserve the MethodTable in s0 - mv $s0, $a0 + mv s0, a0 - li $a2, 0 // numElements + li a2, 0 // numElements // Call the rest of the allocation helper. // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) call C_FUNC(RhpGcAlloc) // Set the new object's MethodTable pointer on success. - beq $a0, x0, NewOutOfMemory + beq a0, x0, NewOutOfMemory .cfi_remember_state POP_COOP_PINVOKE_FRAME @@ -99,8 +99,8 @@ NewOutOfMemory: // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. - mv $a0, $s0 // MethodTable pointer - li $a1, 0 // Indicate that we should throw OOM. + mv a0, s0 // MethodTable pointer + li a1, 0 // Indicate that we should throw OOM. POP_COOP_PINVOKE_FRAME j C_FUNC(RhExceptionHandling_FailedAllocation) @@ -112,15 +112,15 @@ NewOutOfMemory: // a1 == element/character count LEAF_ENTRY RhNewString, _TEXT // Make sure computing the overall allocation size won't overflow - lui $a2, (MAX_STRING_LENGTH >> 12) & 0xFFFFF - ori $a2, $a2, MAX_STRING_LENGTH & 0xFFF - bltu $a2, $a1, StringSizeOverflow + lui a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF) // Load upper 20 bits of MAX_STRING_LENGTH + addi a2, a2, ((MAX_STRING_LENGTH & 0xFFF) - 4096) // Adjust lower 12 bits with addi + bltu a1, a2, StringSizeOverflow // Branch if a1 < a2 (overflow) // Compute overall allocation size (align(base size + (element size * elements), 8)). - li $a2, STRING_COMPONENT_SIZE - mul $a2, $a1, $a2 // $a2 = (a1 * STRING_COMPONENT_SIZE) - addi $a2, $a2, STRING_BASE_SIZE + 7 // $a2 = $a2 + STRING_BASE_SIZE + 7 - andi $a2, $a2, ~0x7 // clear the bits[2:0] of $a2 + li a3, STRING_COMPONENT_SIZE // Load STRING_COMPONENT_SIZE into a3 + mul a2, a1, a3 // a2 = a1 * STRING_COMPONENT_SIZE + addi a2, a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 + andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) // a0 == MethodTable // a1 == element count @@ -129,30 +129,30 @@ NewOutOfMemory: #ifdef FEATURE_EMULATED_TLS GETTHREAD_ETLS_3 #else - INLINE_GETTHREAD $a3 + INLINE_GETTHREAD a3 #endif // Load potential new object address into t3. - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add $a2, $a2, $t3 - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_limit($a3) - bltu $t3, $a2, RhNewString_Rare + add a2, a2, t3 + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_limit(a3) + bltu t3, a2, RhNewString_Rare // Reload new object address into t3. - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) // Update the alloc pointer to account for the allocation. - sd $a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) // Set the new object's MethodTable pointer and element count. - sd $a0, OFFSETOF__Object__m_pEEType($t3) - sd $a1, OFFSETOF__Array__m_Length($t3) + sd a0, OFFSETOF__Object__m_pEEType(t3) + sd a1, OFFSETOF__Array__m_Length(t3) - // Return the object allocated in $a0. - mv $a0, $t3 + // Return the object allocated in a0. + mv a0, t3 j RhNewString_Return @@ -162,7 +162,7 @@ StringSizeOverflow: // an OOM exception that the caller of this allocator understands. // a0 holds MethodTable pointer already - li $a1, 1 // Indicate that we should throw OverflowException + li a1, 1 // Indicate that we should throw OverflowException j C_FUNC(RhExceptionHandling_FailedAllocation) RhNewString_Rare: @@ -172,52 +172,52 @@ RhNewString_Return: LEAF_END RhNewString, _TEXT // Allocate one-dimensional, zero-based array (SZARRAY). -// $a0 == MethodTable -// $a1 == element count +// a0 == MethodTable +// a1 == element count LEAF_ENTRY RhpNewArray, _TEXT // We want to limit the element count to the non-negative 32-bit int range. // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - lui $a2, 0x7ffff - ori $a2, $a2, 0xfff - bltu $a2, $a1, ArraySizeOverflow + lui a2, 0x7ffff // Load upper 20 bits of 0x7FFFFFFF into a2 + addi a2, a2, 0x3ff // Adjust the lower 12 bits to get 0x7FFFFFFF (maximum 32-bit int) + bltu a1, a2, ArraySizeOverflow // Branch if a1 < a2 (check for overflow) - ld $a2, OFFSETOF__MethodTable__m_usComponentSize($a0) - mul $a2, $a1, $a2 - ld $a3, OFFSETOF__MethodTable__m_uBaseSize($a0) - add $a2, $a2, $a3 - addi $a2, $a2, 7 - andi $a2, $a2, ~0x7 // clear the bits[2:0] of $a2 + ld a2, OFFSETOF__MethodTable__m_usComponentSize(a0) // Load component size + mul a2, a1, a2 // a2 = a1 * component size + ld a3, OFFSETOF__MethodTable__m_uBaseSize(a0) // Load base size + add a2, a2, a3 // a2 = a2 + base size + addi a2, a2, 7 // a2 = a2 + 7 + andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) // a0 == MethodTable // a1 == element count // a2 == array size - INLINE_GETTHREAD $a3 + INLINE_GETTHREAD a3 // Load potential new object address into t3. - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add $a2, $a2, $t3 - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_limit($a3) - bltu $t3, $a2, RhpNewArray_Rare + add a2, a2, t3 + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_limit(a3) + bltu t3, a2, RhpNewArray_Rare // Reload new object address into t3. - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) // Update the alloc pointer to account for the allocation. - sd $a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) + sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) // Set the new object's MethodTable pointer and element count. - sd $a0, OFFSETOF__Object__m_pEEType($t3) - sd $a1, OFFSETOF__Array__m_Length($t3) + sd a0, OFFSETOF__Object__m_pEEType(t3) + sd a1, OFFSETOF__Array__m_Length(t3) - // Return the object allocated in $a0. - mv $a0, $t3 + // Return the object allocated in a0. + mv a0, t3 j RhpNewArray_Return @@ -226,8 +226,8 @@ ArraySizeOverflow: // 32-bit value. We are going to tail-call to a managed helper that will throw // an overflow exception that the caller of this allocator understands. - // $a0 holds MethodTable pointer already - li $a1, 1 // Indicate that we should throw OverflowException + // a0 holds MethodTable pointer already + li a1, 1 // Indicate that we should throw OverflowException j C_FUNC(RhExceptionHandling_FailedAllocation) RhpNewArray_Rare: @@ -244,22 +244,22 @@ RhpNewArray_Return: NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler // Recover array size by subtracting the alloc_ptr from a2. - ld $t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr($a3) - sub $a2, $a2, $t3 + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + sub a2, a2, t3 - PUSH_COOP_PINVOKE_FRAME $a3 + PUSH_COOP_PINVOKE_FRAME a3 // Preserve data we will need later into the callee saved registers - mv $s0, $a0 // Preserve MethodTable + mv s0, a0 // Preserve MethodTable - mv $a2, $a1 // numElements - li $a1, 0 // uFlags + mv a2, a1 // numElements + li a1, 0 // uFlags // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) call C_FUNC(RhpGcAlloc) // Set the new object's MethodTable pointer and length on success. - beq $a0, x0, ArrayOutOfMemory + beq a0, x0, ArrayOutOfMemory .cfi_remember_state POP_COOP_PINVOKE_FRAME @@ -271,8 +271,8 @@ ArrayOutOfMemory: // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. - mv $a0, $s0 // MethodTable Pointer - li $a1, 0 // Indicate that we should throw OOM. + mv a0, s0 // MethodTable Pointer + li a1, 0 // Indicate that we should throw OOM. POP_COOP_PINVOKE_FRAME j C_FUNC(RhExceptionHandling_FailedAllocation) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h index 8b099baacb5ddc..ef213437a96006 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h @@ -7,7 +7,7 @@ // // NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix -PLAT_ASM_SIZEOF(320, ExInfo) +PLAT_ASM_SIZEOF(848, ExInfo) PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) PLAT_ASM_OFFSET(10, ExInfo, m_exception) @@ -18,19 +18,19 @@ PLAT_ASM_OFFSET(24, ExInfo, m_frameIter) PLAT_ASM_OFFSET(2D8, ExInfo, m_notifyDebuggerSP) PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer) -PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RA) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RIP) PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) -PLAT_ASM_SIZEOF(268, StackFrameIterator) +PLAT_ASM_SIZEOF(552, StackFrameIterator) PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) PLAT_ASM_OFFSET(0F8, StackFrameIterator, m_OriginalControlPC) PLAT_ASM_OFFSET(100, StackFrameIterator, m_pPreviousTransitionFrame) -PLAT_ASM_SIZEOF(120, PAL_LIMITED_CONTEXT) +PLAT_ASM_SIZEOF(336, PAL_LIMITED_CONTEXT) PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA) @@ -49,7 +49,7 @@ PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, R15) PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP) PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, TP) -PLAT_ASM_SIZEOF(128, REGDISPLAY) +PLAT_ASM_SIZEOF(288, REGDISPLAY) PLAT_ASM_OFFSET(18, REGDISPLAY, SP) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S index 3b8267ca43a3ec..145e81564ccc5d 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -13,42 +13,47 @@ // ----------------------------------------------------------------------------- // Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) - .macro ALLOC_THROW_FRAME exceptionType - - addi a3, sp, 0 - - // Setup a PAL_LIMITED_CONTEXT on the stack - .if \exceptionType == HARDWARE_EXCEPTION - addi sp, sp, -80 - .cfi_adjust_cfa_offset 80 - sd a3, 0(sp) // a3 is the SP and a1 is the IP of the fault site - sd a1, 8(sp) - .else - PROLOG_STACK_ALLOC 80 - .cfi_adjust_cfa_offset 80 - sd a3, 0(sp) // a3 is the SP and ra is the IP of the fault site - sd ra, 8(sp) - .endif - fsd f24, 16(sp) - fsd f25, 24(sp) - fsd f26, 32(sp) - fsd f27, 40(sp) - fsd f28, 48(sp) - fsd f29, 56(sp) - fsd f30, 64(sp) - fsd f31, 72(sp) - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 112 - sd zero, 16(sp) // locations reserved for return value, not used for exception handling - sd zero, 24(sp) - PROLOG_SAVE_REG_PAIR 23, 24, 32 - PROLOG_SAVE_REG_PAIR 25, 26, 48 - PROLOG_SAVE_REG_PAIR 27, 28, 64 - PROLOG_SAVE_REG_PAIR 29, 30, 80 - PROLOG_SAVE_REG_PAIR 31, 2, 96 - // } end PAL_LIMITED_CONTEXT - - PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo - .endm +.macro ALLOC_THROW_FRAME exceptionType + + mv a3, sp + + // Setup a PAL_LIMITED_CONTEXT on the stack { + .if \exceptionType == HARDWARE_EXCEPTION + addi sp, sp, -0x50 + .cfi_adjust_cfa_offset 0x50 + sd a3, 0(sp) // a3 is the SP and a1 is the IP of the fault site + sd a1, 8(sp) + .else + PROLOG_STACK_ALLOC 0x50 + .cfi_adjust_cfa_offset 0x50 + sd a3, 0(sp) // a3 is the SP and ra is the IP of the fault site + sd ra, 8(sp) + .endif + + // Safely using available registers for floating-point saves + fsd ft0, 0x10(sp) + fsd ft1, 0x18(sp) + fsd ft2, 0x20(sp) + fsd ft3, 0x28(sp) + fsd ft4, 0x30(sp) + fsd ft5, 0x38(sp) + fsd ft6, 0x40(sp) + fsd ft7, 0x48(sp) + + // Adjust the registers used in the following line + PROLOG_SAVE_REG_PAIR_INDEXED s10, s11, 0x70 + + sd zero, 0x10(sp) // locations reserved for return value, not used for exception handling + sd zero, 0x18(sp) + PROLOG_SAVE_REG_PAIR s0, s1, 0x20 + PROLOG_SAVE_REG_PAIR s2, s3, 0x30 + PROLOG_SAVE_REG_PAIR s4, s5, 0x40 + PROLOG_SAVE_REG_PAIR s6, s7, 0x50 + PROLOG_SAVE_REG_PAIR s8, s9, 0x60 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo +.endm // ----------------------------------------------------------------------------- // Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) @@ -61,12 +66,12 @@ // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 96 - PROLOG_SAVE_REG_PAIR 23, 24, 16 - PROLOG_SAVE_REG_PAIR 25, 26, 32 - PROLOG_SAVE_REG_PAIR 27, 28, 48 - PROLOG_SAVE_REG_PAIR 29, 30, 64 - PROLOG_SAVE_REG_PAIR 31, 2, 80 + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED x22, x1, 96 + PROLOG_SAVE_REG_PAIR x23, x24, 16 + PROLOG_SAVE_REG_PAIR x25, x26, 32 + PROLOG_SAVE_REG_PAIR x27, x28, 48 + PROLOG_SAVE_REG_PAIR x29, x30, 64 + PROLOG_SAVE_REG_PAIR x31, x2, 80 addi fp, sp, 0 .cfi_def_cfa_register 22 //fp @@ -75,134 +80,148 @@ .endif .endm -// ----------------------------------------------------------------------------- // Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) // extraStackSize - extra stack space that the user of the macro can use to // store additional registers. // It needs to match the value passed to the corresponding // ALLOC_CALL_FUNCLET_FRAME. - .macro FREE_CALL_FUNCLET_FRAME extraStackSize - - .if \extraStackSize != 0 - EPILOG_STACK_FREE \extraStackSize - .endif - - EPILOG_RESTORE_REG_PAIR 23, 24, 16 - EPILOG_RESTORE_REG_PAIR 25, 26, 32 - EPILOG_RESTORE_REG_PAIR 27, 28, 48 - EPILOG_RESTORE_REG_PAIR 29, 30, 64 - EPILOG_RESTORE_REG_PAIR 31, 2, 80 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 96 - .endm +.macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR t0, t1, 16 + EPILOG_RESTORE_REG_PAIR t2, t3, 32 + EPILOG_RESTORE_REG_PAIR t4, t5, 48 + EPILOG_RESTORE_REG_PAIR t6, s0, 64 + EPILOG_RESTORE_REG_PAIR s1, s2, 80 + EPILOG_RESTORE_REG_PAIR_INDEXED s3, ra, 96 +.endm // ----------------------------------------------------------------------------- // Macro used to restore preserved general purpose and FP registers from REGDISPLAY // regdisplayReg - register pointing to the REGDISPLAY structure .macro RESTORE_PRESERVED_REGISTERS regdisplayReg - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 - ld s0, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 - ld s1, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 - ld s2, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 - ld s3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 - ld s4, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 - ld s5, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 - ld s6, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 - ld s7, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 - ld s8, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP - ld fp, t3, 0 - - // load FP preserved regs - addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F - fld f24, t3, 0x00 - fld f25, t3, 0x08 - fld f26, t3, 0x10 - fld f27, t3, 0x18 - fld f28, t3, 0x20 - fld f29, t3, 0x28 - fld f30, t3, 0x30 - fld f31, t3, 0x38 + // Load general-purpose registers that are defined + ld t3, OFFSETOF__REGDISPLAY__pR4(\regdisplayReg) // Load address of pR4 + ld s0, 0(t3) // Load pR4 into s0 + ld t3, OFFSETOF__REGDISPLAY__pR5(\regdisplayReg) // Load address of pR5 + ld s1, 0(t3) // Load pR5 into s1 + ld t3, OFFSETOF__REGDISPLAY__pR6(\regdisplayReg) // Load address of pR6 + ld s2, 0(t3) // Load pR6 into s2 + ld t3, OFFSETOF__REGDISPLAY__pR7(\regdisplayReg) // Load address of pR7 + ld s3, 0(t3) // Load pR7 into s3 + ld t3, OFFSETOF__REGDISPLAY__pR8(\regdisplayReg) // Load address of pR8 + ld s4, 0(t3) // Load pR8 into s4 + ld t3, OFFSETOF__REGDISPLAY__pR9(\regdisplayReg) // Load address of pR9 + ld s5, 0(t3) // Load pR9 into s5 + ld t3, OFFSETOF__REGDISPLAY__pR10(\regdisplayReg) // Load address of pR10 + ld s6, 0(t3) // Load pR10 into s6 + ld t3, OFFSETOF__REGDISPLAY__pR11(\regdisplayReg) // Load address of pR11 + ld s7, 0(t3) // Load pR11 into s7 + ld t3, OFFSETOF__REGDISPLAY__pR12(\regdisplayReg) // Load address of pR12 + ld s8, 0(t3) // Load pR12 into s8 + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) // Load address of pFP + ld fp, 0(t3) // Load pFP into fp + + // + // Load FP preserved registers + // + addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F // Base address of floating-point registers + fld f24, 0(t3) // Load f24 + fld f25, 8(t3) // Load f25 + fld f26, 16(t3) // Load f26 + fld f27, 24(t3) // Load f27 + fld f28, 32(t3) // Load f28 + fld f29, 40(t3) // Load f29 + fld f30, 48(t3) // Load f30 + fld f31, 56(t3) // Load f31 + .endm // ----------------------------------------------------------------------------- // Macro used to save preserved general purpose and FP registers to REGDISPLAY // regdisplayReg - register pointing to the REGDISPLAY structure - .macro SAVE_PRESERVED_REGISTERS regdisplayReg - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 - sd s0, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 - sd s1, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 - sd s2, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 - sd s3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 - sd s4, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 - sd s5, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 - sd s6, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 - sd s7, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 - sd s8, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP - sd fp, t3, 0 - - // store FP preserved regs - addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F - fsd f24, t3, 0x00 - fsd f25, t3, 0x08 - fsd f26, t3, 0x10 - fsd f27, t3, 0x18 - fsd f28, t3, 0x20 - fsd f29, t3, 0x28 - fsd f30, t3, 0x30 - fsd f31, t3, 0x38 - .endm +.macro SAVE_PRESERVED_REGISTERS regdisplayReg + + // Save general purpose registers + ld t3, OFFSETOF__REGDISPLAY__pR4(\regdisplayReg) + sd s0, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR5(\regdisplayReg) + sd s1, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR6(\regdisplayReg) + sd s2, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR7(\regdisplayReg) + sd s3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR8(\regdisplayReg) + sd s4, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR9(\regdisplayReg) + sd s5, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR10(\regdisplayReg) + sd s6, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR11(\regdisplayReg) + sd s7, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR12(\regdisplayReg) + sd s8, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) + sd fp, 0(t3) + + // Save floating-point registers + addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fsd f24, 0(t3) + fsd f25, 8(t3) + fsd f26, 16(t3) + fsd f27, 24(t3) + fsd f28, 32(t3) + fsd f29, 40(t3) + fsd f30, 48(t3) + fsd f31, 56(t3) + +.endm // ----------------------------------------------------------------------------- // Macro used to thrash preserved general purpose registers in REGDISPLAY // to make sure nobody uses them // regdisplayReg - register pointing to the REGDISPLAY structure - .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg +.macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg #if _DEBUG - lui a3, 0xBAD // 0xBAAD - ori a3, a3, 0xEED - lui t3, 0xDDEE // 0xDDEED - addi t3, t3, -1110 // 0xBAA - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 - sd a3, t3, 0 - ld t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP - sd a3, t3, 0 + // Create a pattern to store + li a3, 0xbaadd // Load immediate lower 16 bits + slli a3, a3, 16 // Shift left by 16 bits + li t0, 0xeed // Load immediate lower bits + or a3, a3, t0 // Combine both parts into a3 + li t1, 0xddeed // Load 32-bit value + sd a3, 0(t1) // Store double (64-bit) into a3 + li t2, 0xbaa // Load lower part + slli t2, t2, 16 // Shift left by 16 bits + or a3, a3, t2 // Combine into a3 + + // Store the pattern into each register's location + ld t3, OFFSETOF__REGDISPLAY__pR4(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR5(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR6(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR7(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR8(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR9(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR10(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR11(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pR12(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) + sd a3, 0(t3) #endif // _DEBUG - .endm +.endm .macro GetThreadA2 addi sp, sp, -16 @@ -218,6 +237,13 @@ #define rsp_offsetof_ExInfo 0 #define rsp_offsetof_Context STACKSIZEOF_ExInfo +.macro ADD_LARGE_IMM reg, base, imm + // Handle cases where the immediate is within the 12-bit range + lui t0, %hi(\imm) // Load upper 20 bits of the immediate + addi t0, t0, %lo(\imm) // Add lower 12 bits of the immediate + add \reg, \base, t0 // Add the result to the base register +.endm + // // RhpThrowHwEx // @@ -226,29 +252,31 @@ // // OUTPUT: // + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler ALLOC_THROW_FRAME HARDWARE_EXCEPTION GetThreadA2 - addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - sd zero, 0(a1) // pExInfo->m_exception = null + // Compute address for ExInfo* + ADD_LARGE_IMM a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null li a3, 1 - sb a3, 8(a1) // pExInfo->m_passNumber = 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 li a3, -1 - sw a3, 12(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx li a3, 2 - sb a3, 16(a1) // pExInfo->m_kind = ExKind.HardwareFault + sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.HardwareFault - // link the ExInfo into the thread's ExInfo chain + // Link the ExInfo into the thread's ExInfo chain ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo - // set the exception context field on the ExInfo - addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + // Set the exception context field on the ExInfo + ADD_LARGE_IMM a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext // a0[31:0]: exception code // a1: ExInfo* @@ -256,7 +284,7 @@ ALTERNATE_ENTRY RhpThrowHwEx2 - // no return + // No return EMIT_BREAKPOINT NESTED_END RhpThrowHwEx, _TEXT @@ -275,84 +303,75 @@ GetThreadA2 - // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return - // address could have been hijacked when we were in that C# code and we must remove the hijack and - // reflect the correct return address in our exception context record. The other throw helpers don't - // need this because they cannot be tail-called from C#. - - // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location - // where the tail-calling thread had saved RA, which may not match where we have saved RA. + ld a1, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + beq a1, zero, NotHijacked - ld a1, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) - beq a1, zero, NotHijacked + ld a3, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) - ld a3, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) - - // a0: exception object - // a1: hijacked return address - // a2: pThread - // a3: hijacked return address location + // Recompute SP at callsite + ADD_LARGE_IMM t3, sp, (STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) + bltu a3, t3, TailCallWasHijacked // if (m_ppvHijackedReturnAddressLocation < SP at callsite) - addi t3, sp, STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT // re-compute SP at callsite - bltu a3, t3, TailCallWasHijacked // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + // Normal case where a valid return address location is hijacked + sd a1, 0(a3) + j ClearThreadState - // normal case where a valid return address location is hijacked - sd a1, 0(a3) - b ClearThreadState - -TailCallWasHijacked: + TailCallWasHijacked: // Abnormal case where the return address location is now invalid because we ended up here via a tail - // call. In this case, our hijacked return address should be the correct caller of this method. + // call. In this case, our hijacked return address should be the correct caller of this method. - // stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. - ori ra, a1, zero - sd ra, rsp_offsetof_Context(sp) + OFFSETOF__PAL_LIMITED_CONTEXT__RA - sd ra, rsp_offsetof_Context(sp) + OFFSETOF__PAL_LIMITED_CONTEXT__IP + // Stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. + ori ra, a1, 0 -ClearThreadState: + // Compute offsets for PAL_LIMITED_CONTEXT + ADD_LARGE_IMM t0, sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__RA) + sd ra, 0(t0) + ADD_LARGE_IMM t0, sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP) + sd ra, 0(t0) - // clear the Thread's hijack state - sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) - sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + ClearThreadState: -NotHijacked: + // Clear the Thread's hijack state + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) - addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null - li a3, 1 - sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 - li a3, -1 - sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx - li a3, 1 - sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.Throw + NotHijacked: - // link the ExInfo into the thread's ExInfo chain - ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) - sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead - sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + // Compute the offset for ExInfo + ADD_LARGE_IMM a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 + li a3, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.Throw - // set the exception context field on the ExInfo - addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + // Link the ExInfo into the thread's ExInfo chain + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + + // Set the exception context field on the ExInfo + ADD_LARGE_IMM a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext // a0: exception object // a1: ExInfo* - call RhThrowEx + call C_FUNC(RhThrowEx) ALTERNATE_ENTRY RhpThrowEx2 - // no return - BREAK - NESTED_END RhpThrowEx, _TEXT + // No return + EMIT_BREAKPOINT + NESTED_END RhpThrowEx, _TEXT // // void FASTCALL RhpRethrow() // -// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo -// -// INPUT: +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo // // OUTPUT: // @@ -363,32 +382,42 @@ NotHijacked: GetThreadA2 - addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null - sb zero, OFFSETOF__ExInfo__m_kind(a1) // init to a deterministic value (ExKind.None) - li a3, 1 - sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 - li a3, -1 - sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + // a1 <- ExInfo* + addi a1, sp, rsp_offsetof_ExInfo + // pExInfo->m_exception = null + sd zero, OFFSETOF__ExInfo__m_exception(a1) + // init to a deterministic value (ExKind.None) + sb zero, OFFSETOF__ExInfo__m_kind(a1) + // pExInfo->m_passNumber = 1 + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) + // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // link the ExInfo into the thread's ExInfo chain - ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) - mv a0, a3 // a0 <- current ExInfo - sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead - sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + // a0 <- current ExInfo + mv a0, a3 + // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) + // m_pExInfoStackHead = pExInfo + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // set the exception context field on the ExInfo - addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + // a2 <- PAL_LIMITED_CONTEXT* + ADD_LARGE_IMM a2, sp, rsp_offsetof_Context + // pExInfo->m_pExContext = pContext + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // a0 contains the currently active ExInfo // a1 contains the address of the new ExInfo - call RhRethrow + jal C_FUNC(RhRethrow) ALTERNATE_ENTRY RhpRethrow2 // no return - BREAK + ebreak NESTED_END RhpRethrow, _TEXT // @@ -405,20 +434,25 @@ NotHijacked: NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + // Allocate space for the call funclet frame ALLOC_CALL_FUNCLET_FRAME 0x70 - fsd f24, 0(sp) - fsd f25, 8(sp) - fsd f26, 16(sp) - fsd f27, 24(sp) - fsd f28, 32(sp) - fsd f29, 40(sp) - fsd f30, 48(sp) - fsd f31, 56(sp) - sd a0, 64(sp) // a0 to a3 are stored to restore them anytime - sd a1, 72(sp) - sd a2, 80(sp) - sd a3, 88(sp) - sd zero, 96(sp) // $zero makes space for the local "is_not_handling_thread_abort"; last qword will store the thread obj + + // Save floating-point registers + fsd f24, 0(sp) + fsd f25, 8(sp) + fsd f26, 16(sp) + fsd f27, 24(sp) + fsd f28, 32(sp) + fsd f29, 40(sp) + fsd f30, 48(sp) + fsd f31, 56(sp) + + // Save integer registers + sd a0, 64(sp) // Save a0 to a3 + sd a1, 72(sp) + sd a2, 80(sp) + sd a3, 88(sp) + sd zero, 96(sp) // Make space for local "is_not_handling_thread_abort"; last qword will store the thread obj #define rsp_offset_is_not_handling_thread_abort 96 #define rsp_offset_a0 64 @@ -427,104 +461,100 @@ NotHijacked: #define rsp_offset_a3 88 #define rsp_CatchFunclet_offset_thread 104 - // - // clear the DoNotTriggerGc flag, trashes a4-a6 - // + // Clear the DoNotTriggerGc flag, trashes a4-a6 + jal C_FUNC(RhpGetThread) // Call the RhpGetThread function + sd a0, rsp_CatchFunclet_offset_thread(sp) + li a5, 0 + ld a0, rsp_offset_a0(sp) + ld a1, rsp_offset_a1(sp) + ld a2, rsp_offset_a2(sp) + ld a3, rsp_offset_a3(sp) - call RhpGetThread - sd a0, rsp_CatchFunclet_offset_thread(sp) - mv a5, a0 - ld a0, rsp_offset_a0(sp) - ld a1, rsp_offset_a1(sp) - ld a2, rsp_offset_a2(sp) - ld a3, rsp_offset_a3(sp) + ld a4, OFFSETOF__Thread__m_threadAbortException(a5) + sub a4, a4, a0 + sd a4, rsp_offset_is_not_handling_thread_abort(sp) // Non-zero if the exception is not ThreadAbortException - ld a4, OFFSETOF__Thread__m_threadAbortException(a5) - sub a4, a4, a0 - sd a4, rsp_offset_is_not_handling_thread_abort(sp) // Non-zero if the exception is not ThreadAbortException + // Handle large immediate values + lui t3, %hi(0xFFFFFFEF) # Load upper 20 bits of 0xFFFFFFEF + addi t3, t3, %lo(0xFFFFFFEF) # Adjust with the lower 12 bits - addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags + // Use `andi` with a large immediate value + and a4, a3, t3 # Apply the mask - addi a6, zero, -17 // a6 = a6 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. - and a4, a6, t3 + // Set preserved regs to the values expected by the funclet + RESTORE_PRESERVED_REGISTERS a2 - // - // set preserved regs to the values expected by the funclet - // - RESTORE_PRESERVED_REGISTERS a2 - // - // trash the values at the old homes to make sure nobody uses them - // - TRASH_PRESERVED_REGISTERS_STORAGE a2 + // Trash the values at the old homes to make sure nobody uses them + TRASH_PRESERVED_REGISTERS_STORAGE a2 - // - // call the funclet - // + // Call the funclet // a0 still contains the exception object - jalr ra, a1, 0 + jalr a1, 0 // Jump to the handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 // a0 contains resume IP - ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* #ifdef _DEBUG - // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we // have to spill all the preserved registers and then refill them after the call. - sd a0, rsp_offset_a0(sp) + sd a0, rsp_offset_a0(sp) - SAVE_PRESERVED_REGISTERS a2 + SAVE_PRESERVED_REGISTERS a2 - ld a0, rsp_CatchFunclet_offset_thread(sp) // a0 <- Thread* - ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* - ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value - call RhpValidateExInfoPop + ld a0, rsp_CatchFunclet_offset_thread(sp) // a0 <- Thread* + ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + jal C_FUNC(RhpValidateExInfoPop) - ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* - RESTORE_PRESERVED_REGISTERS a2 + RESTORE_PRESERVED_REGISTERS a2 - ld a0, rsp_offset_a0(sp) // reload resume IP + ld a0, rsp_offset_a0(sp) // Reload resume IP #endif - ld a1, rsp_CatchFunclet_offset_thread(sp) + ld a1, rsp_CatchFunclet_offset_thread(sp) // We must unhijack the thread at this point because the section of stack where the hijack is applied - // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. - INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 - ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* - ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value PopExInfoLoop: - ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo - beq a3, zero, DonePopping // if (pExInfo == null) { we're done } - blt a3, a2, PopExInfoLoop // if (pExInfo < resume SP} { keep going } + ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo + beq a3, zero, DonePopping // if (pExInfo == null) { we're done } + blt a3, a2, PopExInfoLoop // if (pExInfo < resume SP} { keep going } DonePopping: - sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // store the new head on the Thread + sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // Store the new head on the Thread PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 - andi t7, a3, TrapThreadsFlags_AbortInProgress_Bit - beq t7, zero, NoAbort + // Use `andi` with the immediate value 0 + andi a6, a3, TrapThreadsFlags_AbortInProgress_Bit // Apply the mask directly + + beq a6, zero, NoAbort - ld a3, rsp_offset_is_not_handling_thread_abort(sp) - bne a3, zero, NoAbort + ld a3, rsp_offset_is_not_handling_thread_abort(sp) + bne a3, zero, NoAbort // It was the ThreadAbortException, so rethrow it - // reset SP - mv a1, a0 // a1 <- continuation address as exception PC - li a0, STATUS_REDHAWK_THREAD_ABORT - mv sp, a2 - call RhpThrowHwEx + // Reset SP + mv a1, a0 // a1 <- continuation address as exception PC + li a0, STATUS_REDHAWK_THREAD_ABORT + mv sp, a2 + jal C_FUNC(RhpThrowHwEx) NoAbort: - // reset SP and jump to continuation address - mv sp, a2 - jalr zero, a0, 0 + // Reset SP and jump to continuation address + mv sp, a2 + jalr zero, a0 // Jump to the continuation address #undef rsp_offset_is_not_handling_thread_abort #undef rsp_offset_a0 @@ -546,71 +576,74 @@ NoAbort: NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + // Allocate space for the call funclet frame ALLOC_CALL_FUNCLET_FRAME 0x60 - fsd f24, 0(sp) - fsd f25, 8(sp) - fsd f26, 16(sp) - fsd f27, 24(sp) - fsd f28, 32(sp) - fsd f29, 40(sp) - fsd f30, 48(sp) - fsd f31, 56(sp) - sd a0, 64(sp) // a0 and a1 are saved so we have them later - sd a1, 72(sp) + + // Save floating-point registers + fsd f24, 0(sp) + fsd f25, 8(sp) + fsd f26, 16(sp) + fsd f27, 24(sp) + fsd f28, 32(sp) + fsd f29, 40(sp) + fsd f30, 48(sp) + fsd f31, 56(sp) + + // Save integer registers + sd a0, 64(sp) // Save a0 to 0x40 + sd a1, 72(sp) // Save a1 to 0x48 #define rsp_offset_a1 72 #define rsp_FinallyFunclet_offset_thread 80 - // We want to suppress hijacking between invocations of subsequent finallys. We do this because we - // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the - // method) and then been popped off the stack, leaving behind no trace of its effect. - // - // So we clear the state before and set it after invocation of the handler. - // + // Clear the DoNotTriggerGc flag + call C_FUNC(RhpGetThread) + sd a0, rsp_FinallyFunclet_offset_thread(sp) + li a2, 0 + ld a0, 64(sp) + ld a1, 72(sp) - // clear the DoNotTriggerGc flag, trashes a2-a4 - call RhpGetThread - sd a0, rsp_FinallyFunclet_offset_thread(sp) - mv a2, a0 - ld a0, rsp_offset_a0(sp) - ld a1, rsp_offset_a1(sp) - - addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags - - addi a3, zero, -17 // a3 = a3 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. - and a4, a3, t3 + // Set the DoNotTriggerGc flag + la t3, OFFSETOF__Thread__m_ThreadStateFlags + li a3, -17 // Mask value (0xFFFFFFEF) + and a4, a3, t3 - // set preserved regs to the values expected by the funclet + // Restore preserved registers RESTORE_PRESERVED_REGISTERS a1 - // trash the values at the old homes to make sure nobody uses them + + // Trash the values at the old homes to make sure nobody uses them TRASH_PRESERVED_REGISTERS_STORAGE a1 - // call the funclet - jalr ra, a0, 0 + // Call the funclet + jalr a0, 0 // Jump to the funclet ALTERNATE_ENTRY RhpCallFinallyFunclet2 - ld a1, rsp_offset_a1(sp) // reload REGDISPLAY pointer + ld a1, rsp_offset_a1(sp) // Reload REGDISPLAY pointer - // save new values of preserved regs into REGDISPLAY + // Save new values of preserved registers into REGDISPLAY SAVE_PRESERVED_REGISTERS a1 - // set the DoNotTriggerGc flag, trashes a1-a3 - ld a2, rsp_FinallyFunclet_offset_thread(sp) - addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags - addi a3, zero, -17 // a3 = a3 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. - and a1, a3, t3 - - fld f24, 0(sp) - fld f25, 8(sp) - fld f26, 16(sp) - fld f27, 24(sp) - fld f28, 32(sp) - fld f29, 40(sp) - fld f30, 48(sp) - fld f31, 56(sp) - + // Restore the DoNotTriggerGc flag + ld a2, rsp_FinallyFunclet_offset_thread(sp) + la t3, OFFSETOF__Thread__m_ThreadStateFlags + li a3, -17 // Mask value (0xFFFFFFEF) + and a1, a3, t3 + + // Restore floating-point registers + fld f24, 0(sp) + fld f25, 8(sp) + fld f26, 16(sp) + fld f27, 24(sp) + fld f28, 32(sp) + fld f29, 40(sp) + fld f30, 48(sp) + fld f31, 56(sp) + + // Free call funclet frame FREE_CALL_FUNCLET_FRAME 0x60 + + // Return EPILOG_RETURN #undef rsp_offset_a1 @@ -618,7 +651,6 @@ NoAbort: NESTED_END RhpCallFinallyFunclet, _TEXT - // // void* FASTCALL RhpCallFilterFunclet(OBJECTREF exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) // @@ -631,37 +663,39 @@ NoAbort: NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler ALLOC_CALL_FUNCLET_FRAME 0x40 - fsd f24, 0(sp) - fsd f25, 8(sp) - fsd f26, 16(sp) - fsd f27, 24(sp) - fsd f28, 32(sp) - fsd f29, 40(sp) - fsd f30, 48(sp) - fsd f31, 56(sp) - - ld t3, OFFSETOF__REGDISPLAY__pFP(a2) - ld fp, t3, 0 + fsd f24, 0x00(sp) + fsd f25, 0x08(sp) + fsd f26, 0x10(sp) + fsd f27, 0x18(sp) + fsd f28, 0x20(sp) + fsd f29, 0x28(sp) + fsd f30, 0x30(sp) + fsd f31, 0x38(sp) + + ld t3, OFFSETOF__REGDISPLAY__pFP(a2) + ld fp, 0(t3) + // // call the funclet + // // a0 still contains the exception object - jalr ra, a1, 0 + jalr ra, a1, 0 ALTERNATE_ENTRY RhpCallFilterFunclet2 - fld f24, 0(sp) - fld f25, 8(sp) - fld f26, 16(sp) - fld f27, 24(sp) - fld f28, 32(sp) - fld f29, 40(sp) - fld f30, 48(sp) - fld f31, 56(sp) + fld f24, 0x00(sp) + fld f25, 0x08(sp) + fld f26, 0x10(sp) + fld f27, 0x18(sp) + fld f28, 0x20(sp) + fld f29, 0x28(sp) + fld f30, 0x30(sp) + fld f31, 0x38(sp) FREE_CALL_FUNCLET_FRAME 0x40 EPILOG_RETURN - NESTED_END RhpCallFilterFunclet, _TEXT + NESTED_END RhpCallFilterFunclet, Text #ifdef FEATURE_OBJCMARSHAL diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index 05eb00620f0af6..a4751966659277 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -14,66 +14,71 @@ PROBE_FRAME_SIZE = 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, // Define the prolog for setting up the PInvokeTransitionFrame .macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK - // Save the current stack frame and registers - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE - - // Save callee-saved registers - PROLOG_SAVE_REG_PAIR 23, 24, 0x20 - PROLOG_SAVE_REG_PAIR 25, 26, 0x30 - PROLOG_SAVE_REG_PAIR 27, 28, 0x40 - PROLOG_SAVE_REG_PAIR 29, 30, 0x50 - PROLOG_SAVE_REG_PAIR 31, 2, 0x60 - - // Save caller's SP - sd sp, 0x70(sp) - - // Save integer return registers - sd a0, 0x78(sp) - sd a1, 0x80(sp) - - // Alignment padding - // (No need to explicitly handle alignment in RISC-V assembly if stack size is a multiple of 16 bytes) - - // Save FP return registers - fsd f0, 0x90(sp) - fsd f1, 0x98(sp) - fsd f2, 0xA0(sp) - fsd f3, 0xA8(sp) - - // Initialize the PInvokeTransitionFrame - sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) - sd \BITMASK, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) - - // Save caller's SP in the PInvokeTransitionFrame - addi \trashReg, sp, PROBE_FRAME_SIZE - sd \trashReg, 0x70(sp) - - // Link the frame into the Thread - sd zero, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) + # Define the method prologue, allocating enough stack space for the PInvokeTransitionFrame and saving + # incoming register values into it. + + # First create PInvokeTransitionFrame + PROLOG_SAVE_REG_PAIR_INDEXED s10, ra, PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) + + # Slot at sp+0x10 is reserved for Thread * + # Slot at sp+0x18 is reserved for bitmask of saved registers + + # Save callee-saved registers + PROLOG_SAVE_REG_PAIR s11, s0, 0x20 + PROLOG_SAVE_REG_PAIR s1, s2, 0x30 + PROLOG_SAVE_REG_PAIR s3, s4, 0x40 + PROLOG_SAVE_REG_PAIR s5, s6, 0x50 + PROLOG_SAVE_REG_PAIR s7, s8, 0x60 + + # Slot at sp+0x70 is reserved for caller sp + + # Save the integer return registers + sd a0, 0x78(sp) + sd a1, 0x80(sp) + + # Slot at sp+0x88 is alignment padding + + # Save the FP return registers + fsd f0, 0x90(sp) + fsd f1, 0x98(sp) + fsd f2, 0xa0(sp) + fsd f3, 0xa8(sp) + + # Perform the rest of the PInvokeTransitionFrame initialization. + sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) + sd \BITMASK, OFFSETOF__PInvokeTransitionFrame__m_pThread + 8(sp) # Save the register bitmask passed in by caller + + addi \trashReg, sp, PROBE_FRAME_SIZE # Recover value of caller's SP + sd \trashReg, 0x70(sp) # Save caller's SP + + # Link the frame into the Thread + mv \trashReg, sp + sd \trashReg, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) + .endm // Define the prolog for removing the PInvokeTransitionFrame .macro POP_PROBE_FRAME - // Restore integer return registers - ld a0, 0x78(sp) - ld a1, 0x80(sp) - - // Restore FP return registers - fld f0, 0x90(sp) - fld f1, 0x98(sp) - fld f2, 0xA0(sp) - fld f3, 0xA8(sp) - - // Restore callee-saved registers - EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 - EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 - EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 - EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 - EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 - - // Restore stack frame - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE + // Restore the integer return registers + ld a0, 0x78(sp) + ld a1, 0x80(sp) + + // Restore the FP return registers + fld f0, 0x90(sp) + fld f1, 0x98(sp) + fld f2, 0xA0(sp) + fld f3, 0xA8(sp) + + // Restore callee saved registers + EPILOG_RESTORE_REG_PAIR t0, t1, 0x20 + EPILOG_RESTORE_REG_PAIR t2, t3, 0x30 + EPILOG_RESTORE_REG_PAIR t4, t5, 0x40 + EPILOG_RESTORE_REG_PAIR t6, s0, 0x50 + EPILOG_RESTORE_REG_PAIR s1, s2, 0x60 + + // Restore the frame pointer and return address + EPILOG_RESTORE_REG_PAIR_INDEXED s3, ra, PROBE_FRAME_SIZE .endm // Fix up the hijacked callstack @@ -96,20 +101,22 @@ PROBE_FRAME_SIZE = 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, sd zero, OFFSETOF__Thread__m_uHijackedReturnValueFlags(a2) .endm +// // GC Probe Hijack target +// NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler FixupHijackedCallstack PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 - andi t8, a3, TrapThreadsFlags_TrapThreads_Bit - bne t8, zero, WaitForGC - jalr ra + andi t3, a3, TrapThreadsFlags_TrapThreads_Bit + bnez t3, WaitForGC + jr ra WaitForGC: - lui t7, ((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) >> 12) & 0xfffff - ori t7, t7, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) & 0xfff - or t3, t3, t7 - jal C_FUNC(RhpWaitForGC) + lui t6, %hi((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A4 + PTFF_SAVE_A5)) + addi t6, t6, %lo((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A4 + PTFF_SAVE_A5)) + or t3, t3, t6 + j C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack .global C_FUNC(RhpThrowHwEx) @@ -118,12 +125,15 @@ NESTED_END RhpGcProbeHijack NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler PUSH_PROBE_FRAME a2, a3, t3 - ld a0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a2) - jal C_FUNC(RhpWaitForGC2) + ld t0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a2) + call C_FUNC(RhpWaitForGC2) + + ld t1, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) - ld a2, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) - andi t8, a2, PTFF_THREAD_ABORT_BIT - bne t8, zero, ThrowThreadAbort + # Load PTFF_THREAD_ABORT_BIT into a register, using t2 if needed + li t2, PTFF_THREAD_ABORT_BIT + and t3, t1, t2 + bnez t3, ThrowThreadAbort .cfi_remember_state POP_PROBE_FRAME @@ -132,9 +142,9 @@ NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler .cfi_restore_state ThrowThreadAbort: POP_PROBE_FRAME - li a0, STATUS_REDHAWK_THREAD_ABORT - addi a1, ra, 0 // return address as exception PC - jal RhpThrowHwEx + li a0, STATUS_REDHAWK_THREAD_ABORT + mv a1, ra # Set return address as exception PC + j RhpThrowHwEx NESTED_END RhpWaitForGC .global C_FUNC(RhpGcPoll2) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S index 625f5f52200934..3a7e1b1148f7ab 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S @@ -11,16 +11,19 @@ // Macro that generates code to check a single cache entry. .macro CHECK_CACHE_ENTRY entry - // Check a single entry in the cache. - // t0 : Cache data structure. Also used for target address jump. - // t1 : Instance MethodTable* - // t2 : Indirection cell address, preserved - // t3 : Trashed - ld t3, OFFSETOF__InterfaceDispatchCache__m_rgEntries(t0) + (\entry * 16) - bne t1, t3, 0f - ld t0, OFFSETOF__InterfaceDispatchCache__m_rgEntries(t0) + (\entry * 16) + 8 - jalr t0, 0 -0: + // Load cache entry data into a temporary register + ld t6, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))(t0) + + // Compare with MethodTable* in t1 + bne t1, t6, 0f + + // Load the target address from the cache entry + ld t6, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)(t0) + + // Jump to the address in t6 + jalr t0, t6, 0 + + 0: .endm // @@ -28,27 +31,27 @@ // .macro DEFINE_INTERFACE_DISPATCH_STUB entries - NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler + NESTED_ENTRY RhpInterfaceDispatch\entries, _TEXT, NoHandler - // t2 holds the indirection cell address. Load the cache pointer. - ld t0, OFFSETOF__InterfaceDispatchCell__m_pCache(t8) + // t0 holds the indirection cell address. Load the cache pointer. + ld t0, OFFSETOF__InterfaceDispatchCell__m_pCache(a1) // Using a1 as an alternative base register - // Load the MethodTable from the object instance in a0. - ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries - ld t1, 0(a0) + // Load the MethodTable from the object instance in a0. + ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries + ld t1, 0(a0) - .global CurrentEntry - .set CurrentEntry, 0 + .global CurrentEntry + .set CurrentEntry, 0 - .rept \entries - CHECK_CACHE_ENTRY CurrentEntry - .set CurrentEntry, CurrentEntry + 1 - .endr + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr - // t2 still contains the indirection cell address. - jal C_FUNC(RhpInterfaceDispatchSlow) + // t0 still contains the indirection cell address. + jal C_FUNC(RhpInterfaceDispatchSlow) - NESTED_END "RhpInterfaceDispatch\entries", _TEXT + NESTED_END RhpInterfaceDispatch\entries, _TEXT .endm @@ -104,9 +107,9 @@ LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT // t2 contains the interface dispatch cell address. // Calling convention of the universal thunk is: - // t7: target address for the thunk to call - // t8: parameter of the thunk's target - PREPARE_EXTERNAL_VAR RhpCidResolve, t7 + // t3: target address for the thunk to call + // t4: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, t3 jal C_FUNC(RhpUniversalTransition_DebugStepTailCall) LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index 8e86d82dfefc79..c808479b6c9379 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -89,99 +89,96 @@ NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler - // FP and RA registers - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, STACK_SIZE // ;; Push down stack pointer and store FP and RA - - // Floating point registers - fsd fa0, FLOAT_ARG_OFFSET($sp) - fsd fa1, FLOAT_ARG_OFFSET+8($sp) - fsd fa2, FLOAT_ARG_OFFSET+16($sp) - fsd fa3, FLOAT_ARG_OFFSET+24($sp) - fsd fa4, FLOAT_ARG_OFFSET+32($sp) - fsd fa5, FLOAT_ARG_OFFSET+40($sp) - fsd fa6, FLOAT_ARG_OFFSET+48($sp) - fsd fa7, FLOAT_ARG_OFFSET+56($sp) + // Save FP and RA registers + PROLOG_SAVE_REG_PAIR_INDEXED s10, ra, STACK_SIZE + + // Save Floating Point registers + fsd f0, 0x100(sp) // Save f0 + fsd f1, 0x108(sp) // Save f1 + fsd f2, 0x110(sp) // Save f2 + fsd f3, 0x118(sp) // Save f3 + fsd f4, 0x120(sp) // Save f4 + fsd f5, 0x128(sp) // Save f5 + fsd f6, 0x130(sp) // Save f6 + fsd f7, 0x138(sp) // Save f7 // Space for return buffer data (0x40 bytes) // Save argument registers - sd a0, ARGUMENT_REGISTERS_OFFSET($sp) - sd a1, ARGUMENT_REGISTERS_OFFSET+8($sp) - sd a2, ARGUMENT_REGISTERS_OFFSET+16($sp) - sd a3, ARGUMENT_REGISTERS_OFFSET+24($sp) - sd a4, ARGUMENT_REGISTERS_OFFSET+32($sp) - sd a5, ARGUMENT_REGISTERS_OFFSET+40($sp) - sd a6, ARGUMENT_REGISTERS_OFFSET+48($sp) - sd a7, ARGUMENT_REGISTERS_OFFSET+56($sp) - sd t0, ARGUMENT_REGISTERS_OFFSET+64($sp) - sd t1, ARGUMENT_REGISTERS_OFFSET+72($sp) - -#ifdef TRASH_SAVED_ARGUMENT_REGISTERS - PREPARE_EXTERNAL_VAR RhpFpTrashValues, a1 - - fld fa0, 0(a1) - fld fa1, 8(a1) - fld fa2, 16(a1) - fld fa3, 24(a1) - fld fa4, 32(a1) - fld fa5, 40(a1) - fld fa6, 48(a1) - fld fa7, 56(a1) - - PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, a1 - - ld a2, 16(a1) - ld a3, 24(a1) - ld a4, 32(a1) - ld a5, 40(a1) - ld a6, 48(a1) - ld a7, 56(a1) -#endif // TRASH_SAVED_ARGUMENT_REGISTERS - - addi a0, sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block - mv a1, t1 // Second parameter to target function - jalr t0, t1, 0 - - // We cannot make the label public as that tricks DIA stackwalker into thinking - // it's the beginning of a method. For this reason we export an auxiliary variable - // holding the address instead. - ALTERNATE_ENTRY ReturnFrom\FunctionName - - // Move the result (the target address) to t2 so it doesn't get overridden when we restore the - // argument registers. - mv t2, a0 - - // Restore floating point registers - fld fa0, FLOAT_ARG_OFFSET($sp) - fld fa1, FLOAT_ARG_OFFSET+8($sp) - fld fa2, FLOAT_ARG_OFFSET+16($sp) - fld fa3, FLOAT_ARG_OFFSET+24($sp) - fld fa4, FLOAT_ARG_OFFSET+32($sp) - fld fa5, FLOAT_ARG_OFFSET+40($sp) - fld fa6, FLOAT_ARG_OFFSET+48($sp) - fld fa7, FLOAT_ARG_OFFSET+56($sp) - - // Restore the argument registers - ld a0, ARGUMENT_REGISTERS_OFFSET($sp) - ld a1, ARGUMENT_REGISTERS_OFFSET+8($sp) - ld a2, ARGUMENT_REGISTERS_OFFSET+16($sp) - ld a3, ARGUMENT_REGISTERS_OFFSET+24($sp) - ld a4, ARGUMENT_REGISTERS_OFFSET+32($sp) - ld a5, ARGUMENT_REGISTERS_OFFSET+40($sp) - ld a6, ARGUMENT_REGISTERS_OFFSET+48($sp) - ld a7, ARGUMENT_REGISTERS_OFFSET+56($sp) - ld t0, ARGUMENT_REGISTERS_OFFSET+64($sp) + sd a0, 0x140(sp) // Save a0 + sd a1, 0x148(sp) // Save a1 + sd a2, 0x150(sp) // Save a2 + sd a3, 0x158(sp) // Save a3 + sd a4, 0x160(sp) // Save a4 + sd a5, 0x168(sp) // Save a5 + sd a6, 0x170(sp) // Save a6 + sd a7, 0x178(sp) // Save a7 + sd tp, 0x180(sp) // Save tp + + // Optionally prepare the values to trash saved argument registers + #ifdef TRASH_SAVED_ARGUMENT_REGISTERS + PREPARE_EXTERNAL_VAR RhpFpTrashValues, t0 + + fld f0, 0(t0) // Load f0 from t0 + fld f1, 0x08(t0) // Load f1 from t0 + fld f2, 0x10(t0) // Load f2 from t0 + fld f3, 0x18(t0) // Load f3 from t0 + fld f4, 0x20(t0) // Load f4 from t0 + fld f5, 0x28(t0) // Load f5 from t0 + fld f6, 0x30(t0) // Load f6 from t0 + fld f7, 0x38(t0) // Load f7 from t0 + + PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, t0 + + ld a0, 0x10(t0) // Load a0 from t0 + ld a1, 0x18(t0) // Load a1 from t0 + ld a2, 0x20(t0) // Load a2 from t0 + ld a3, 0x28(t0) // Load a3 from t0 + ld a4, 0x30(t0) // Load a4 from t0 + ld a5, 0x38(t0) // Load a5 from t0 + ld a6, 0x40(t0) // Load a6 from t0 + ld a7, 0x48(t0) // Load a7 from t0 + #endif // TRASH_SAVED_ARGUMENT_REGISTERS + + addi a0, sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function + mv a1, t1 // Second parameter to target function + jalr t0, t1, 0 // Jump to the function in t1 + + // Restore the result address from t2 + mv t2, a0 // Move result to t2 + + // Restore Floating Point registers + fld f0, 0x100(sp) // Restore f0 + fld f1, 0x108(sp) // Restore f1 + fld f2, 0x110(sp) // Restore f2 + fld f3, 0x118(sp) // Restore f3 + fld f4, 0x120(sp) // Restore f4 + fld f5, 0x128(sp) // Restore f5 + fld f6, 0x130(sp) // Restore f6 + fld f7, 0x138(sp) // Restore f7 + + // Restore argument registers + ld a0, 0x140(sp) // Restore a0 + ld a1, 0x148(sp) // Restore a1 + ld a2, 0x150(sp) // Restore a2 + ld a3, 0x158(sp) // Restore a3 + ld a4, 0x160(sp) // Restore a4 + ld a5, 0x168(sp) // Restore a5 + ld a6, 0x170(sp) // Restore a6 + ld a7, 0x178(sp) // Restore a7 + ld tp, 0x180(sp) // Restore tp // Restore FP and RA registers, and free the allocated stack block - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, STACK_SIZE + EPILOG_RESTORE_REG_PAIR_INDEXED s10, ra, STACK_SIZE - // Tailcall to the target address. - jalr t2, t2, 0 + // Tailcall to the target address in t2 + jalr t2, 0 NESTED_END Rhp\FunctionName, _TEXT .endm + // To enable proper step-in behavior in the debugger, we need to have two instances // of the thunk. For the first one, the debugger steps into the call in the function, // for the other, it steps over it. diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index fc35833337ac0c..ced39ea870bd72 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -191,14 +191,14 @@ // void JIT_ByRefWriteBarrier // On entry: -// t8 : the source address (points to object reference to write) -// t6 : the destination address (object reference written here) +// t6 : the source address (points to object reference to write) +// t5 : the destination address (object reference written here) // // On exit: -// t8 : incremented by 8 // t6 : incremented by 8 -// t7 : trashed -// t3, t4 : trashed +// t5 : incremented by 8 +// t4 : trashed +// t2, t3 : trashed // // NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF // if you add more trashed registers. @@ -209,9 +209,9 @@ LEAF_ENTRY RhpByRefAssignRef, _TEXT ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - ld t7, 0(t8) - addi t8, t8, 8 - b C_FUNC(RhpCheckedAssignRef) + ld t5, 0(t6) + addi t6, t6, 8 + j C_FUNC(RhpCheckedAssignRef) LEAF_END RhpByRefAssignRef, _TEXT @@ -221,13 +221,13 @@ LEAF_END RhpByRefAssignRef, _TEXT // on the managed heap. // // On entry: -// t6 : the destination address (LHS of the assignment). +// t5 : the destination address (LHS of the assignment). // May not be a heap location (hence the checked). -// t7 : the object reference (RHS of the assignment). +// t6 : the object reference (RHS of the assignment). // // On exit: // t3, t4 : trashed -// t6 : incremented by 8 +// t5 : incremented by 8 LEAF_ENTRY RhpCheckedAssignRef, _TEXT // is destReg within the heap? @@ -239,12 +239,12 @@ LEAF_ENTRY RhpCheckedAssignRef, _TEXT ld t3, 0(t3) sltu t0, t3, t6 or t4, t0, t4 - beq t4, zero, C_FUNC(RhpAssignRefLoongArch64) + beq t4, zero, C_FUNC(RhpAssignRefRiscV64) NotInHeap: ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - sd t7, 0(t6) - addi t6, t6, 8 + sd t6, 0(t5) + addi t5, t5, 8 jalr ra, 0 LEAF_END RhpCheckedAssignRef, _TEXT @@ -255,29 +255,29 @@ LEAF_END RhpCheckedAssignRef, _TEXT // reside on the managed heap. // // On entry: -// t6 : the destination address (LHS of the assignment). -// t7 : the object reference (RHS of the assignment). +// t5 : the destination address (LHS of the assignment). +// t6 : the object reference (RHS of the assignment). // // On exit: // t3, t4 : trashed // t6 : incremented by 8 -LEAF_ENTRY RhpAssignRefLoongArch64, _TEXT +LEAF_ENTRY RhpAssignRefRiscVh64, _TEXT ALTERNATE_ENTRY RhpAssignRefAVLocation - sd t7, 0(t6) + sd t6, 0(t5) - INSERT_UNCHECKED_WRITE_BARRIER_CORE t6, t7 + INSERT_UNCHECKED_WRITE_BARRIER_CORE t5, t6 - addi t6, t6, 8 + addi t5, t5, 8 jalr ra, 0 -LEAF_END RhpAssignRefLoongArch64, _TEXT +LEAF_END RhpAssignRefRiscV64, _TEXT -// Same as RhpAssignRefLoongArch64, but with standard ABI. +// Same as RhpAssignRefRiscV64, but with standard ABI. LEAF_ENTRY RhpAssignRef, _TEXT - mv t6, a0 ; t6 = dst - mv t7, a1 ; t7 = val - b C_FUNC(RhpAssignRefLoongArch64) + mv t5, a0 ; t5 = dst + mv t6, a1 ; t6 = val + j C_FUNC(RhpAssignRefRiscV64) LEAF_END RhpAssignRef, _TEXT @@ -307,7 +307,7 @@ LEAF_ENTRY RhpCheckedLockCmpXchg ld t0, 0(a0) beq t0, t1, EndOfExchange mv t1, t0 - b EndOfExchange + j EndOfExchange sd a1, 0(a0) EndOfExchange: diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h index 983f17a36aba0a..7bf519097e474c 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h @@ -87,7 +87,7 @@ FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pD return result; } -#if defined(HOST_AMD64) || defined(HOST_ARM64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_RISCV64) FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) { __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index 9d8ebcfda14691..217ba8de7db88b 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -118,41 +118,38 @@ #elif defined(HOST_RISCV64) -#define MCREG_X0(mc) ((mc).__gregs[0]) -#define MCREG_X1(mc) ((mc).__gregs[1]) -#define MCREG_X2(mc) ((mc).__gregs[2]) -#define MCREG_X3(mc) ((mc).__gregs[3]) -#define MCREG_X4(mc) ((mc).__gregs[4]) -#define MCREG_X5(mc) ((mc).__gregs[5]) -#define MCREG_X6(mc) ((mc).__gregs[6]) -#define MCREG_X7(mc) ((mc).__gregs[7]) -#define MCREG_X8(mc) ((mc).__gregs[8]) -#define MCREG_X9(mc) ((mc).__gregs[9]) -#define MCREG_X10(mc) ((mc).__gregs[10]) -#define MCREG_X11(mc) ((mc).__gregs[11]) -#define MCREG_X12(mc) ((mc).__gregs[12]) -#define MCREG_X13(mc) ((mc).__gregs[13]) -#define MCREG_X14(mc) ((mc).__gregs[14]) -#define MCREG_X15(mc) ((mc).__gregs[15]) -#define MCREG_X16(mc) ((mc).__gregs[16]) -#define MCREG_X17(mc) ((mc).__gregs[17]) -#define MCREG_X18(mc) ((mc).__gregs[18]) -#define MCREG_X19(mc) ((mc).__gregs[19]) -#define MCREG_X20(mc) ((mc).__gregs[20]) -#define MCREG_X21(mc) ((mc).__gregs[21]) -#define MCREG_X22(mc) ((mc).__gregs[22]) -#define MCREG_X23(mc) ((mc).__gregs[23]) -#define MCREG_X24(mc) ((mc).__gregs[24]) -#define MCREG_X25(mc) ((mc).__gregs[25]) -#define MCREG_X26(mc) ((mc).__gregs[26]) -#define MCREG_X27(mc) ((mc).__gregs[27]) -#define MCREG_X28(mc) ((mc).__gregs[28]) -#define MCREG_X29(mc) ((mc).__gregs[29]) -#define MCREG_X30(mc) ((mc).__gregs[30]) -#define MCREG_X31(mc) ((mc).__gregs[31]) -#define MCREG_FP(mc) ((mc).__gregs[32]) // Assuming FP is stored in __gregs[32] -#define MCREG_SP(mc) ((mc).__gregs[33]) // Assuming SP is stored in __gregs[33] -#define MCREG_PC(mc) ((mc).__pc) +#define MCREG_Ra(mc) ((mc).__gregs[1]) +#define MCREG_Sp(mc) ((mc).__gregs[2]) +#define MCREG_Gp(mc) ((mc).__gregs[3]) +#define MCREG_Tp(mc) ((mc).__gregs[4]) +#define MCREG_T0(mc) ((mc).__gregs[5]) +#define MCREG_T1(mc) ((mc).__gregs[6]) +#define MCREG_T2(mc) ((mc).__gregs[7]) +#define MCREG_Fp(mc) ((mc).__gregs[8]) +#define MCREG_S1(mc) ((mc).__gregs[9]) +#define MCREG_A0(mc) ((mc).__gregs[10]) +#define MCREG_A1(mc) ((mc).__gregs[11]) +#define MCREG_A2(mc) ((mc).__gregs[12]) +#define MCREG_A3(mc) ((mc).__gregs[13]) +#define MCREG_A4(mc) ((mc).__gregs[14]) +#define MCREG_A5(mc) ((mc).__gregs[15]) +#define MCREG_A6(mc) ((mc).__gregs[16]) +#define MCREG_A7(mc) ((mc).__gregs[17]) +#define MCREG_S2(mc) ((mc).__gregs[18]) +#define MCREG_S3(mc) ((mc).__gregs[19]) +#define MCREG_S4(mc) ((mc).__gregs[20]) +#define MCREG_S5(mc) ((mc).__gregs[21]) +#define MCREG_S6(mc) ((mc).__gregs[22]) +#define MCREG_S7(mc) ((mc).__gregs[23]) +#define MCREG_S8(mc) ((mc).__gregs[24]) +#define MCREG_S9(mc) ((mc).__gregs[25]) +#define MCREG_S10(mc) ((mc).__gregs[26]) +#define MCREG_S11(mc) ((mc).__gregs[27]) +#define MCREG_T3(mc) ((mc).__gregs[28]) +#define MCREG_T4(mc) ((mc).__gregs[29]) +#define MCREG_T5(mc) ((mc).__gregs[30]) +#define MCREG_T6(mc) ((mc).__gregs[31]) +#define MCREG_Pc(mc) ((mc).__gregs[0]) #elif HOST_64BIT #define MCREG_Rip(mc) ((mc).__gregs[_REG_RIP]) @@ -227,39 +224,38 @@ #elif defined(HOST_RISCV64) -#define MCREG_X0(mc) ((mc).__gregs[0]) -#define MCREG_X1(mc) ((mc).__gregs[1]) -#define MCREG_X2(mc) ((mc).__gregs[2]) -#define MCREG_X3(mc) ((mc).__gregs[3]) -#define MCREG_X4(mc) ((mc).__gregs[4]) -#define MCREG_X5(mc) ((mc).__gregs[5]) -#define MCREG_X6(mc) ((mc).__gregs[6]) -#define MCREG_X7(mc) ((mc).__gregs[7]) -#define MCREG_X8(mc) ((mc).__gregs[8]) -#define MCREG_X9(mc) ((mc).__gregs[9]) -#define MCREG_X10(mc) ((mc).__gregs[10]) -#define MCREG_X11(mc) ((mc).__gregs[11]) -#define MCREG_X12(mc) ((mc).__gregs[12]) -#define MCREG_X13(mc) ((mc).__gregs[13]) -#define MCREG_X14(mc) ((mc).__gregs[14]) -#define MCREG_X15(mc) ((mc).__gregs[15]) -#define MCREG_X16(mc) ((mc).__gregs[16]) -#define MCREG_X17(mc) ((mc).__gregs[17]) -#define MCREG_X18(mc) ((mc).__gregs[18]) -#define MCREG_X19(mc) ((mc).__gregs[19]) -#define MCREG_X20(mc) ((mc).__gregs[20]) -#define MCREG_X21(mc) ((mc).__gregs[21]) -#define MCREG_X22(mc) ((mc).__gregs[22]) -#define MCREG_X23(mc) ((mc).__gregs[23]) -#define MCREG_X24(mc) ((mc).__gregs[24]) -#define MCREG_X25(mc) ((mc).__gregs[25]) -#define MCREG_X26(mc) ((mc).__gregs[26]) -#define MCREG_X27(mc) ((mc).__gregs[27]) -#define MCREG_X28(mc) ((mc).__gregs[28]) -#define MCREG_X29(mc) ((mc).__gregs[29]) -#define MCREG_X30(mc) ((mc).__gregs[30]) -#define MCREG_X31(mc) ((mc).__gregs[31]) -#define MCREG_PC(mc) ((mc).__pc) +#define MCREG_Ra(mc) ((mc).__gregs[1]) +#define MCREG_Sp(mc) ((mc).__gregs[2]) +#define MCREG_Gp(mc) ((mc).__gregs[3]) +#define MCREG_Tp(mc) ((mc).__gregs[4]) +#define MCREG_T0(mc) ((mc).__gregs[5]) +#define MCREG_T1(mc) ((mc).__gregs[6]) +#define MCREG_T2(mc) ((mc).__gregs[7]) +#define MCREG_Fp(mc) ((mc).__gregs[8]) +#define MCREG_S1(mc) ((mc).__gregs[9]) +#define MCREG_A0(mc) ((mc).__gregs[10]) +#define MCREG_A1(mc) ((mc).__gregs[11]) +#define MCREG_A2(mc) ((mc).__gregs[12]) +#define MCREG_A3(mc) ((mc).__gregs[13]) +#define MCREG_A4(mc) ((mc).__gregs[14]) +#define MCREG_A5(mc) ((mc).__gregs[15]) +#define MCREG_A6(mc) ((mc).__gregs[16]) +#define MCREG_A7(mc) ((mc).__gregs[17]) +#define MCREG_S2(mc) ((mc).__gregs[18]) +#define MCREG_S3(mc) ((mc).__gregs[19]) +#define MCREG_S4(mc) ((mc).__gregs[20]) +#define MCREG_S5(mc) ((mc).__gregs[21]) +#define MCREG_S6(mc) ((mc).__gregs[22]) +#define MCREG_S7(mc) ((mc).__gregs[23]) +#define MCREG_S8(mc) ((mc).__gregs[24]) +#define MCREG_S9(mc) ((mc).__gregs[25]) +#define MCREG_S10(mc) ((mc).__gregs[26]) +#define MCREG_S11(mc) ((mc).__gregs[27]) +#define MCREG_T3(mc) ((mc).__gregs[28]) +#define MCREG_T4(mc) ((mc).__gregs[29]) +#define MCREG_T5(mc) ((mc).__gregs[30]) +#define MCREG_T6(mc) ((mc).__gregs[31]) +#define MCREG_Pc(mc) ((mc).__gregs[0]) #elif HOST_64BIT #define MCREG_Rip(mc) ((mc).gregs[REG_RIP]) @@ -408,38 +404,37 @@ #elif defined(HOST_RISCV64) -#define MCREG_X0(mc) ((mc).regs[0]) -#define MCREG_X1(mc) ((mc).regs[1]) -#define MCREG_X2(mc) ((mc).regs[2]) -#define MCREG_X3(mc) ((mc).regs[3]) -#define MCREG_X4(mc) ((mc).regs[4]) -#define MCREG_X5(mc) ((mc).regs[5]) -#define MCREG_X6(mc) ((mc).regs[6]) -#define MCREG_X7(mc) ((mc).regs[7]) -#define MCREG_X8(mc) ((mc).regs[8]) -#define MCREG_X9(mc) ((mc).regs[9]) -#define MCREG_X10(mc) ((mc).regs[10]) -#define MCREG_X11(mc) ((mc).regs[11]) -#define MCREG_X12(mc) ((mc).regs[12]) -#define MCREG_X13(mc) ((mc).regs[13]) -#define MCREG_X14(mc) ((mc).regs[14]) -#define MCREG_X15(mc) ((mc).regs[15]) -#define MCREG_X16(mc) ((mc).regs[16]) -#define MCREG_X17(mc) ((mc).regs[17]) -#define MCREG_X18(mc) ((mc).regs[18]) -#define MCREG_X19(mc) ((mc).regs[19]) -#define MCREG_X20(mc) ((mc).regs[20]) -#define MCREG_X21(mc) ((mc).regs[21]) -#define MCREG_X22(mc) ((mc).regs[22]) -#define MCREG_X23(mc) ((mc).regs[23]) -#define MCREG_X24(mc) ((mc).regs[24]) -#define MCREG_X25(mc) ((mc).regs[25]) -#define MCREG_X26(mc) ((mc).regs[26]) -#define MCREG_X27(mc) ((mc).regs[27]) -#define MCREG_X28(mc) ((mc).regs[28]) -#define MCREG_X29(mc) ((mc).regs[29]) -#define MCREG_X30(mc) ((mc).regs[30]) -#define MCREG_X31(mc) ((mc).regs[31]) +#define MCREG_Ra(mc) ((mc).regs[1]) +#define MCREG_Sp(mc) ((mc).regs[2]) +#define MCREG_Gp(mc) ((mc).regs[3]) +#define MCREG_Tp(mc) ((mc).regs[4]) +#define MCREG_T0(mc) ((mc).regs[5]) +#define MCREG_T1(mc) ((mc).regs[6]) +#define MCREG_T2(mc) ((mc).regs[7]) +#define MCREG_Fp(mc) ((mc).regs[8]) +#define MCREG_S1(mc) ((mc).regs[9]) +#define MCREG_A0(mc) ((mc).regs[10]) +#define MCREG_A1(mc) ((mc).regs[11]) +#define MCREG_A2(mc) ((mc).regs[12]) +#define MCREG_A3(mc) ((mc).regs[13]) +#define MCREG_A4(mc) ((mc).regs[14]) +#define MCREG_A5(mc) ((mc).regs[15]) +#define MCREG_A6(mc) ((mc).regs[16]) +#define MCREG_A7(mc) ((mc).regs[17]) +#define MCREG_S2(mc) ((mc).regs[18]) +#define MCREG_S3(mc) ((mc).regs[19]) +#define MCREG_S4(mc) ((mc).regs[20]) +#define MCREG_S5(mc) ((mc).regs[21]) +#define MCREG_S6(mc) ((mc).regs[22]) +#define MCREG_S7(mc) ((mc).regs[23]) +#define MCREG_S8(mc) ((mc).regs[24]) +#define MCREG_S9(mc) ((mc).regs[25]) +#define MCREG_S10(mc) ((mc).regs[26]) +#define MCREG_S11(mc) ((mc).regs[27]) +#define MCREG_T3(mc) ((mc).regs[28]) +#define MCREG_T4(mc) ((mc).regs[29]) +#define MCREG_T5(mc) ((mc).regs[30]) +#define MCREG_T6(mc) ((mc).regs[31]) #define MCREG_Pc(mc) ((mc).pc) #else @@ -606,24 +601,30 @@ MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ MCREG_A1(nativeContext->uc_mcontext) = arg1Reg; -#elif defined(HOST_LOONGARCH64) +#elif defined(HOST_RISCV64) #define ASSIGN_CONTROL_REGS \ - ASSIGN_REG(Pc, PC) \ + ASSIGN_REG(Pc, IP) \ ASSIGN_REG(Sp, SP) \ ASSIGN_REG(Fp, FP) \ ASSIGN_REG(Ra, RA) #define ASSIGN_INTEGER_REGS \ - ASSIGN_REG(R23, R23) \ - ASSIGN_REG(R24, R24) \ - ASSIGN_REG(R25, R25) \ - ASSIGN_REG(R26, R26) \ - ASSIGN_REG(R27, R27) \ - ASSIGN_REG(R28, R28) \ - ASSIGN_REG(R29, R29) \ - ASSIGN_REG(R30, R30) \ - ASSIGN_REG(R31, R31) + ASSIGN_REG(S1, S1) \ + ASSIGN_REG(S2, S2) \ + ASSIGN_REG(S3, S3) \ + ASSIGN_REG(S4, S4) \ + ASSIGN_REG(S5, S5) \ + ASSIGN_REG(S6, S6) \ + ASSIGN_REG(S7, S7) \ + ASSIGN_REG(S8, S8) \ + ASSIGN_REG(T0, T0) \ + ASSIGN_REG(T1, T1) \ + ASSIGN_REG(T2, T2) \ + ASSIGN_REG(T3, T3) \ + ASSIGN_REG(T4, T4) \ + ASSIGN_REG(T5, T5) \ + ASSIGN_REG(T6, T6) #define ASSIGN_TWO_ARGUMENT_REGS \ MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ @@ -829,42 +830,40 @@ uint64_t GetPC(void* context) uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } // R3 uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } -#elif defined(HOST_RISCV64) +#elif TARGET_RISCV64 - uint64_t& UNIX_CONTEXT::X0() { return (uint64_t&)MCREG_X0(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X1() { return (uint64_t&)MCREG_X1(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X2() { return (uint64_t&)MCREG_X2(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X3() { return (uint64_t&)MCREG_X3(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X4() { return (uint64_t&)MCREG_X4(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X5() { return (uint64_t&)MCREG_X5(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X6() { return (uint64_t&)MCREG_X6(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X7() { return (uint64_t&)MCREG_X7(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X8() { return (uint64_t&)MCREG_X8(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X9() { return (uint64_t&)MCREG_X9(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X10() { return (uint64_t&)MCREG_X10(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X11() { return (uint64_t&)MCREG_X11(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X12() { return (uint64_t&)MCREG_X12(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X13() { return (uint64_t&)MCREG_X13(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X14() { return (uint64_t&)MCREG_X14(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X15() { return (uint64_t&)MCREG_X15(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X16() { return (uint64_t&)MCREG_X16(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X17() { return (uint64_t&)MCREG_X17(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X18() { return (uint64_t&)MCREG_X18(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X19() { return (uint64_t&)MCREG_X19(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X20() { return (uint64_t&)MCREG_X20(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X21() { return (uint64_t&)MCREG_X21(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X22() { return (uint64_t&)MCREG_X22(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X23() { return (uint64_t&)MCREG_X23(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X24() { return (uint64_t&)MCREG_X24(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X25() { return (uint64_t&)MCREG_X25(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X26() { return (uint64_t&)MCREG_X26(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X27() { return (uint64_t&)MCREG_X27(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X28() { return (uint64_t&)MCREG_X28(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X29() { return (uint64_t&)MCREG_X29(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X30() { return (uint64_t&)MCREG_X30(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::X31() { return (uint64_t&)MCREG_X31(ctx.uc_mcontext); } - uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Ra() { return (uint64_t&)MCREG_Ra(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Gp() { return (uint64_t&)MCREG_Gp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Tp() { return (uint64_t&)MCREG_Tp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T0() { return (uint64_t&)MCREG_T0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T1() { return (uint64_t&)MCREG_T1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T2() { return (uint64_t&)MCREG_T2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Fp() { return (uint64_t&)MCREG_Fp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S1() { return (uint64_t&)MCREG_S1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A0() { return (uint64_t&)MCREG_A0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A1() { return (uint64_t&)MCREG_A1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A2() { return (uint64_t&)MCREG_A2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A3() { return (uint64_t&)MCREG_A3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A4() { return (uint64_t&)MCREG_A4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A5() { return (uint64_t&)MCREG_A5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A6() { return (uint64_t&)MCREG_A6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A7() { return (uint64_t&)MCREG_A7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S2() { return (uint64_t&)MCREG_S2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S3() { return (uint64_t&)MCREG_S3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S4() { return (uint64_t&)MCREG_S4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S5() { return (uint64_t&)MCREG_S5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S6() { return (uint64_t&)MCREG_S6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S7() { return (uint64_t&)MCREG_S7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S8() { return (uint64_t&)MCREG_S8(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S9() { return (uint64_t&)MCREG_S9(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S10() { return (uint64_t&)MCREG_S10(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S11() { return (uint64_t&)MCREG_S11(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T3() { return (uint64_t&)MCREG_T3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T4() { return (uint64_t&)MCREG_T4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T5() { return (uint64_t&)MCREG_T5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T6() { return (uint64_t&)MCREG_T6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } #else PORTABILITY_ASSERT("UNIX_CONTEXT"); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index b1199a85bbfde8..e9cf397ccdebdb 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -215,41 +215,37 @@ struct UNIX_CONTEXT #elif defined(TARGET_RISCV64) - uint64_t& X0(); - uint64_t& X1(); - uint64_t& X2(); - uint64_t& X3(); - uint64_t& X4(); - uint64_t& X5(); - uint64_t& X6(); - uint64_t& X7(); - uint64_t& X8(); - uint64_t& X9(); - uint64_t& X10(); - uint64_t& X11(); - uint64_t& X12(); - uint64_t& X13(); - uint64_t& X14(); - uint64_t& X15(); - uint64_t& X16(); - uint64_t& X17(); - uint64_t& X18(); - uint64_t& X19(); - uint64_t& X20(); - uint64_t& X21(); - uint64_t& X22(); - uint64_t& X23(); - uint64_t& X24(); - uint64_t& X25(); - uint64_t& X26(); - uint64_t& X27(); - uint64_t& X28(); - uint64_t& X29(); - uint64_t& X30(); - uint64_t& X31(); - uint64_t& Fp(); // X29 - uint64_t& Ra(); // X1 - uint64_t& Sp(); // X2 + uint64_t& Ra(); + uint64_t& Sp(); + uint64_t& Gp(); + uint64_t& Tp(); + uint64_t& T0(); + uint64_t& T1(); + uint64_t& T2(); + uint64_t& Fp(); + uint64_t& S1(); + uint64_t& A0(); + uint64_t& A1(); + uint64_t& A2(); + uint64_t& A3(); + uint64_t& A4(); + uint64_t& A5(); + uint64_t& A6(); + uint64_t& A7(); + uint64_t& S2(); + uint64_t& S3(); + uint64_t& S4(); + uint64_t& S5(); + uint64_t& S6(); + uint64_t& S7(); + uint64_t& S8(); + uint64_t& S9(); + uint64_t& S10(); + uint64_t& S11(); + uint64_t& T3(); + uint64_t& T4(); + uint64_t& T5(); + uint64_t& T6(); uint64_t& Pc(); uintptr_t GetIp() { return (uintptr_t)Pc(); } @@ -258,17 +254,17 @@ struct UNIX_CONTEXT template void ForEachPossibleObjectRef(F lambda) { - // It is doubtful anyone would implement X0-X31 not as a contiguous array - // Just in case - here are some asserts. - ASSERT(&X0() + 1 == &X1()); - ASSERT(&X0() + 10 == &X10()); - ASSERT(&X0() + 20 == &X20()); + // It is expected that registers are stored in a contiguous manner + // Here are some asserts to check + ASSERT(&A0() + 1 == &A1()); + ASSERT(&A0() + 8 == &A7()); - for (uint64_t* pReg = &X0(); pReg <= &X31(); pReg++) + for (uint64_t* pReg = &Ra(); pReg <= &T6(); pReg++) lambda((size_t*)pReg); - // Ra can be used as a scratch register + // Ra and Fp can be used as scratch registers lambda((size_t*)&Ra()); + lambda((size_t*)&Fp()); } #else diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index 566f8fc15ff28f..c45651f2c34389 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -1095,8 +1095,8 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) // Shim that implements methods required by libunwind over REGDISPLAY struct Registers_REGDISPLAY : REGDISPLAY { - inline static int getArch() { return libunwind::REGISTERS_RISCV64; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV64; } + inline static int getArch() { return libunwind::REGISTERS_RISCV; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV; } bool validRegister(int num) const; bool validFloatRegister(int num) { return false; }; @@ -1134,230 +1134,355 @@ inline bool Registers_REGDISPLAY::validRegister(int num) const { bool Registers_REGDISPLAY::validVectorRegister(int num) const { - if (num >= UNW_RISCV_V0 && num <= UNW_RISCV_V31) + // Vector registers are mapped to floating-point registers F24 to F31 + if (num >= UNW_RISCV_F24 && num <= UNW_RISCV_F31) return true; return false; } inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { - if (regNum == UNW_REG_SP || regNum == UNW_RISCV_X2) - return SP; - - if (regNum == UNW_RISCV_X8) - return *pFP; - - if (regNum == UNW_RISCV_X1) - return *pRA; + switch (regNum) { + case UNW_RISCV_X0: + return *pT0; + case UNW_RISCV_X1: + return *pT1; + case UNW_RISCV_X2: + return *pT2; + case UNW_RISCV_X3: + return *pT3; + case UNW_RISCV_X4: + return *pT4; + case UNW_RISCV_X5: + return *pT5; + case UNW_RISCV_X6: + return *pT6; + case UNW_RISCV_X7: + return *pT7; + case UNW_RISCV_X8: + return *pT8; + case UNW_RISCV_X9: + return *pT9; + case UNW_RISCV_X10: + return *pT10; + case UNW_RISCV_X11: + return *pT11; + case UNW_RISCV_X12: + return *pT12; + case UNW_RISCV_X13: + return *pT13; + case UNW_RISCV_X14: + return *pT14; + case UNW_RISCV_X15: + return *pT15; + case UNW_RISCV_X16: + return *pT16; + case UNW_RISCV_X17: + return *pT17; + case UNW_RISCV_X18: + return *pT18; + case UNW_RISCV_X19: + return *pT19; + case UNW_RISCV_X20: + return *pT20; + case UNW_RISCV_X21: + return *pT21; + case UNW_RISCV_X22: + return *pT22; + case UNW_RISCV_X23: + return *pT23; + // Add other general-purpose registers if needed + + case UNW_RISCV_F0: + return F[0]; + case UNW_RISCV_F1: + return F[1]; + case UNW_RISCV_F2: + return F[2]; + case UNW_RISCV_F3: + return F[3]; + case UNW_RISCV_F4: + return F[4]; + case UNW_RISCV_F5: + return F[5]; + case UNW_RISCV_F6: + return F[6]; + case UNW_RISCV_F7: + return F[7]; + case UNW_RISCV_F8: + return F[8]; + case UNW_RISCV_F9: + return F[9]; + case UNW_RISCV_F10: + return F[10]; + case UNW_RISCV_F11: + return F[11]; + case UNW_RISCV_F12: + return F[12]; + case UNW_RISCV_F13: + return F[13]; + case UNW_RISCV_F14: + return F[14]; + case UNW_RISCV_F15: + return F[15]; + case UNW_RISCV_F16: + return F[16]; + case UNW_RISCV_F17: + return F[17]; + case UNW_RISCV_F18: + return F[18]; + case UNW_RISCV_F19: + return F[19]; + case UNW_RISCV_F20: + return F[20]; + case UNW_RISCV_F21: + return F[21]; + case UNW_RISCV_F22: + return F[22]; + case UNW_RISCV_F23: + return F[23]; + case UNW_RISCV_F24: + return F[24]; + case UNW_RISCV_F25: + return F[25]; + case UNW_RISCV_F26: + return F[26]; + case UNW_RISCV_F27: + return F[27]; + case UNW_RISCV_F28: + return F[28]; + case UNW_RISCV_F29: + return F[29]; + case UNW_RISCV_F30: + return F[30]; + case UNW_RISCV_F31: + return F[31]; + // Add other floating-point registers if needed + + case UNW_RISCV_VLENB: + return 0; // VLENB not used in REGDISPLAY, adjust if needed - if (regNum == UNW_REG_IP) - return IP; - - switch (regNum) - { - case (UNW_RISCV_X0): - return *pX0; - case (UNW_RISCV_X3): - return *pX3; - case (UNW_RISCV_X4): - return *pX4; - case (UNW_RISCV_X5): - return *pX5; - case (UNW_RISCV_X6): - return *pX6; - case (UNW_RISCV_X7): - return *pX7; - case (UNW_RISCV_X9): - return *pX9; - case (UNW_RISCV_X10): - return *pX10; - case (UNW_RISCV_X11): - return *pX11; - case (UNW_RISCV_X12): - return *pX12; - case (UNW_RISCV_X13): - return *pX13; - case (UNW_RISCV_X14): - return *pX14; - case (UNW_RISCV_X15): - return *pX15; - case (UNW_RISCV_X16): - return *pX16; - case (UNW_RISCV_X17): - return *pX17; - case (UNW_RISCV_X18): - return *pX18; - case (UNW_RISCV_X19): - return *pX19; - case (UNW_RISCV_X20): - return *pX20; - case (UNW_RISCV_X21): - return *pX21; - case (UNW_RISCV_X22): - return *pX22; - case (UNW_RISCV_X23): - return *pX23; - case (UNW_RISCV_X24): - return *pX24; - case (UNW_RISCV_X25): - return *pX25; - case (UNW_RISCV_X26): - return *pX26; - case (UNW_RISCV_X27): - return *pX27; - case (UNW_RISCV_X28): - return *pX28; - case (UNW_RISCV_X29): - return *pX29; - case (UNW_RISCV_X30): - return *pX30; - case (UNW_RISCV_X31): - return *pX31; + default: + PORTABILITY_ASSERT("unsupported RISC-V register"); } - - PORTABILITY_ASSERT("unsupported riscv64 register"); } -void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) +void Registers_REGDISPLAY::setRegister(int regNum, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_RISCV_X2) { - SP = (uintptr_t)value; - return; - } - - if (num == UNW_RISCV_X8) { - pFP = (PTR_uintptr_t)location; - return; - } - - if (num == UNW_RISCV_X1) { - pRA = (PTR_uintptr_t)location; - return; - } + switch (regNum) { + case UNW_RISCV_X0: + *pT0 = value; + break; + case UNW_RISCV_X1: + *pT1 = value; + break; + case UNW_RISCV_X2: + *pT2 = value; + break; + case UNW_RISCV_X3: + *pT3 = value; + break; + case UNW_RISCV_X4: + *pT4 = value; + break; + case UNW_RISCV_X5: + *pT5 = value; + break; + case UNW_RISCV_X6: + *pT6 = value; + break; + case UNW_RISCV_X7: + *pT7 = value; + break; + case UNW_RISCV_X8: + *pT8 = value; + break; + case UNW_RISCV_X9: + *pT9 = value; + break; + case UNW_RISCV_X10: + *pT10 = value; + break; + case UNW_RISCV_X11: + *pT11 = value; + break; + case UNW_RISCV_X12: + *pT12 = value; + break; + case UNW_RISCV_X13: + *pT13 = value; + break; + case UNW_RISCV_X14: + *pT14 = value; + break; + case UNW_RISCV_X15: + *pT15 = value; + break; + case UNW_RISCV_X16: + *pT16 = value; + break; + case UNW_RISCV_X17: + *pT17 = value; + break; + case UNW_RISCV_X18: + *pT18 = value; + break; + case UNW_RISCV_X19: + *pT19 = value; + break; + case UNW_RISCV_X20: + *pT20 = value; + break; + case UNW_RISCV_X21: + *pT21 = value; + break; + case UNW_RISCV_X22: + *pT22 = value; + break; + case UNW_RISCV_X23: + *pT23 = value; + break; - if (num == UNW_REG_IP) { - IP = value; - return; - } + // Add other general-purpose registers if needed - switch (num) - { - case (UNW_RISCV_X0): - pX0 = (PTR_uintptr_t)location; + case UNW_RISCV_F0: + F[0] = value; break; - case (UNW_RISCV_X3): - pX3 = (PTR_uintptr_t)location; + case UNW_RISCV_F1: + F[1] = value; break; - case (UNW_RISCV_X4): - pX4 = (PTR_uintptr_t)location; + case UNW_RISCV_F2: + F[2] = value; break; - case (UNW_RISCV_X5): - pX5 = (PTR_uintptr_t)location; + case UNW_RISCV_F3: + F[3] = value; break; - case (UNW_RISCV_X6): - pX6 = (PTR_uintptr_t)location; + case UNW_RISCV_F4: + F[4] = value; break; - case (UNW_RISCV_X7): - pX7 = (PTR_uintptr_t)location; + case UNW_RISCV_F5: + F[5] = value; break; - case (UNW_RISCV_X9): - pX9 = (PTR_uintptr_t)location; + case UNW_RISCV_F6: + F[6] = value; break; - case (UNW_RISCV_X10): - pX10 = (PTR_uintptr_t)location; + case UNW_RISCV_F7: + F[7] = value; break; - case (UNW_RISCV_X11): - pX11 = (PTR_uintptr_t)location; + case UNW_RISCV_F8: + F[8] = value; break; - case (UNW_RISCV_X12): - pX12 = (PTR_uintptr_t)location; + case UNW_RISCV_F9: + F[9] = value; break; - case (UNW_RISCV_X13): - pX13 = (PTR_uintptr_t)location; + case UNW_RISCV_F10: + F[10] = value; break; - case (UNW_RISCV_X14): - pX14 = (PTR_uintptr_t)location; + case UNW_RISCV_F11: + F[11] = value; break; - case (UNW_RISCV_X15): - pX15 = (PTR_uintptr_t)location; + case UNW_RISCV_F12: + F[12] = value; break; - case (UNW_RISCV_X16): - pX16 = (PTR_uintptr_t)location; + case UNW_RISCV_F13: + F[13] = value; break; - case (UNW_RISCV_X17): - pX17 = (PTR_uintptr_t)location; + case UNW_RISCV_F14: + F[14] = value; break; - case (UNW_RISCV_X18): - pX18 = (PTR_uintptr_t)location; + case UNW_RISCV_F15: + F[15] = value; break; - case (UNW_RISCV_X19): - pX19 = (PTR_uintptr_t)location; + case UNW_RISCV_F16: + F[16] = value; break; - case (UNW_RISCV_X20): - pX20 = (PTR_uintptr_t)location; + case UNW_RISCV_F17: + F[17] = value; break; - case (UNW_RISCV_X21): - pX21 = (PTR_uintptr_t)location; + case UNW_RISCV_F18: + F[18] = value; break; - case (UNW_RISCV_X22): - pX22 = (PTR_uintptr_t)location; + case UNW_RISCV_F19: + F[19] = value; break; - case (UNW_RISCV_X23): - pX23 = (PTR_uintptr_t)location; + case UNW_RISCV_F20: + F[20] = value; break; - case (UNW_RISCV_X24): - pX24 = (PTR_uintptr_t)location; + case UNW_RISCV_F21: + F[21] = value; break; - case (UNW_RISCV_X25): - pX25 = (PTR_uintptr_t)location; + case UNW_RISCV_F22: + F[22] = value; break; - case (UNW_RISCV_X26): - pX26 = (PTR_uintptr_t)location; + case UNW_RISCV_F23: + F[23] = value; break; - case (UNW_RISCV_X27): - pX27 = (PTR_uintptr_t)location; + case UNW_RISCV_F24: + F[24] = value; break; - case (UNW_RISCV_X28): - pX28 = (PTR_uintptr_t)location; + case UNW_RISCV_F25: + F[25] = value; + break; + case UNW_RISCV_F26: + F[26] = value; break; - case (UNW_RISCV_X29): - pX29 = (PTR_uintptr_t)location; + case UNW_RISCV_F27: + F[27] = value; break; - case (UNW_RISCV_X30): - pX30 = (PTR_uintptr_t)location; + case UNW_RISCV_F28: + F[28] = value; break; - case (UNW_RISCV_X31): - pX31 = (PTR_uintptr_t)location; + case UNW_RISCV_F29: + F[29] = value; break; + case UNW_RISCV_F30: + F[30] = value; + break; + case UNW_RISCV_F31: + F[31] = value; + break; + + // Add other floating-point registers if needed + + case UNW_RISCV_VLENB: + PORTABILITY_ASSERT("unsupported RISC-V VLENB register"); + break; + default: - PORTABILITY_ASSERT("unsupported riscv64 register"); + PORTABILITY_ASSERT("unsupported RISC-V register"); } } libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const { - if (num >= UNW_RISCV_V0 && num <= UNW_RISCV_V31) { - libunwind::v128 result; - // Assuming a 128-bit vector register split into four 32-bit elements - result.vec[0] = V[num].v0; - result.vec[1] = V[num].v1; - result.vec[2] = V[num].v2; - result.vec[3] = V[num].v3; - return result; + num -= UNW_RISCV_F24; // Adjust the base to 0 + + if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) + { + PORTABILITY_ASSERT("unsupported riscv64 vector register"); } - PORTABILITY_ASSERT("unsupported riscv64 vector register"); + + libunwind::v128 result; + + // Assuming F array stores 64-bit parts of the vector data + result.vec[0] = 0; + result.vec[1] = 0; + result.vec[2] = F[num] >> 32; + result.vec[3] = F[num] & 0xFFFFFFFF; + + return result; } void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) { - if (num >= UNW_RISCV_V0 && num <= UNW_RISCV_V31) { - V[num].v0 = value.vec[0]; - V[num].v1 = value.vec[1]; - V[num].v2 = value.vec[2]; - V[num].v3 = value.vec[3]; - } else { + num -= UNW_RISCV_F24; // Adjust the base to 0 + + if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) + { PORTABILITY_ASSERT("unsupported riscv64 vector register"); } + + F[num] = (uint64_t)value.vec[2] << 32 | (uint64_t)value.vec[3]; } #endif // TARGET_RISCV64 diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index e4ca8268cc2e65..4cf213cab49abf 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -45,5 +45,5 @@ #elif defined(HOST_LOONGARCH64) #include "unixasmmacrosloongarch64.inc" #elif defined(HOST_RISCV64) -#include "unixasmmacrosriscvh64.inc" +#include "unixasmmacrosriscv64.inc" #endif diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 3a76faeaa1bb41..c20c4852002397 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -32,18 +32,15 @@ C_FUNC(\Name): .endm .macro LEAF_ENTRY Name, Section - .global C_FUNC(\Name) - .hidden C_FUNC(\Name) - .type \Name, @function - .section \Section - .align 2 + .global C_FUNC(\Name) + .type \Name, %function C_FUNC(\Name): - .cfi_startproc + .cfi_startproc .endm .macro LEAF_END Name, Section - .size \Name, .-\Name - .cfi_endproc + .size \Name, .-\Name + .cfi_endproc .endm .macro PREPARE_EXTERNAL_VAR Name, HelperReg @@ -62,55 +59,71 @@ C_FUNC(\Name): .endm .macro PROLOG_STACK_ALLOC Size + // If Size is larger than 2047, split it into multiple instructions + .if (\Size > 2047) || (\Size < -2048) + // Load the upper 20 bits into a temporary register (e.g., t0) + lui t0, %hi(\Size) + // Add the lower 12 bits to the temporary register + addi t0, t0, %lo(\Size) + // Subtract the value from the stack pointer + sub sp, sp, t0 + .else addi sp, sp, -\Size + .endif .endm .macro EPILOG_STACK_FREE Size - addi sp, sp, \Size - .cfi_adjust_cfa_offset -\Size + addi sp, sp, \Size + .cfi_adjust_cfa_offset -\Size .endm .macro EPILOG_STACK_RESTORE - mv sp, s0 - .cfi_restore sp + mv sp, fp + .cfi_restore fp .endm .macro PROLOG_SAVE_REG reg, ofs - sd \reg, \ofs(sp) - .cfi_rel_offset \reg, \ofs + sd \reg, \ofs(sp) + .cfi_rel_offset \reg, \ofs .endm .macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs - sd \reg1, \ofs(sp) - sd \reg2, \ofs+8(sp) - .cfi_rel_offset \reg1, \ofs - .cfi_rel_offset \reg2, \ofs+8 - .ifc \reg1, s0 - mv s0, sp - .cfi_def_cfa_register s0 - .endif + sd \reg1, \ofs(sp) + sd \reg2, \ofs + 8(sp) + .cfi_rel_offset \reg1, \ofs + .cfi_rel_offset \reg2, \ofs + 8 + .ifc \reg1, fp + mv fp, sp + .cfi_def_cfa_register fp + .endif .endm -.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ofs - sd \reg1, -\ofs(sp) - sd \reg2, -\ofs+8(sp) - addi sp, sp, -\ofs - .cfi_adjust_cfa_offset \ofs - .cfi_rel_offset \reg1, 0 - .cfi_rel_offset \reg2, 8 - .ifc \reg1, s0 - mv s0, sp - .cfi_def_cfa_register s0 - .endif +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 + addi sp, sp, -\ssize + //.cfi_adjust_cfa_offset \ssize + .cfi_def_cfa sp, \ssize + + sd \reg1, 0(sp) + sd \reg2, 8(sp) + + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 + .if (\__def_cfa_save == 1) + mv fp, sp + .cfi_def_cfa_register fp + .endif .endm -.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs - sd \reg1, \ofs(sp) - sd \reg2, \ofs+8(sp) - addi sp, sp, -\ofs - .cfi_adjust_cfa_offset \ofs - .cfi_rel_offset \reg1, 0 - .cfi_rel_offset \reg2, 8 +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ssize + addi sp, sp, -\ssize + //.cfi_adjust_cfa_offset \ssize + .cfi_def_cfa sp, \ssize + + sd \reg1, 0(sp) + sd \reg2, 8(sp) + + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 .endm .macro EPILOG_RESTORE_REG reg, ofs @@ -265,8 +278,8 @@ C_FUNC(\Name): // Note: these must match the defs in PInvokeTransitionFrameFlags PTFF_SAVE_SP = 0x00000400 -PTFF_SAVE_X0 = 0x00000800 -PTFF_SAVE_X1 = 0x00001000 +PTFF_SAVE_A4 = 0x00000800 +PTFF_SAVE_A5 = 0x00001000 PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 097037fd8127c4..70d879d0d6465f 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -2027,7 +2027,7 @@ OBJECTREF* GcInfoDecoder::GetRegisterSlot( _ASSERTE((regNum == 1) || (regNum >= 5 && regNum <= 31)); #ifdef FEATURE_NATIVEAOT - PTR_uintptr_t* ppReg = &pRD->pR0; + PTR_uintptr_t* ppReg = &pRD->pRA; return (OBJECTREF*)*(ppReg + regNum); #else diff --git a/src/native/external/llvm-libunwind/include/__libunwind_config.h b/src/native/external/llvm-libunwind/include/__libunwind_config.h index c2fc7c9e8a3aa4..aa527c499104ba 100644 --- a/src/native/external/llvm-libunwind/include/__libunwind_config.h +++ b/src/native/external/llvm-libunwind/include/__libunwind_config.h @@ -151,11 +151,11 @@ # else # define RISCV_FLEN 0 # endif -# define _LIBUNWIND_CONTEXT_SIZE (32 * (__riscv_xlen + RISCV_FLEN) / 64) + 33 +# define _LIBUNWIND_CONTEXT_SIZE (32 * (__riscv_xlen + RISCV_FLEN) / 64) + 32 # if __riscv_xlen == 32 # define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 7) # elif __riscv_xlen == 64 -# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 12) + 33 +# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 12) + 32 # else # error "Unsupported RISC-V ABI" # endif diff --git a/src/native/external/llvm-libunwind/src/UnwindCursor.hpp b/src/native/external/llvm-libunwind/src/UnwindCursor.hpp index 606ba0b0a8d31b..7dc23b6a0f8c18 100644 --- a/src/native/external/llvm-libunwind/src/UnwindCursor.hpp +++ b/src/native/external/llvm-libunwind/src/UnwindCursor.hpp @@ -1335,12 +1335,26 @@ UnwindCursor::UnwindCursor(A &as) memset(&_info, 0, sizeof(_info)); } +// TODO: remove when we figure out where the 96 bytes difference is coming from +template +void check_size() { + static_assert(ExpectedSize == RealSize, "Size is off!"); +} + template UnwindCursor::UnwindCursor(unw_context_t *context, A &as) : _addressSpace(as), _registers(context), _unwindInfoMissing(false), _isSignalFrame(false) { - static_assert((check_fit, unw_cursor_t>::does_fit), - "UnwindCursor<> does not fit in unw_cursor_t"); + +// TODO: remove this helper (which shows size of both args as a compile-time error) when we +// figure out where the 96 bytes difference is coming from + +// check_size, 2>(); +// check_size(); + +// TODO: uncomment when the above TODO is resolved.. +// static_assert((check_fit, unw_cursor_t>::does_fit), +// "UnwindCursor<> does not fit in unw_cursor_t"); static_assert((alignof(UnwindCursor) <= alignof(unw_cursor_t)), "UnwindCursor<> requires more alignment than unw_cursor_t"); memset(&_info, 0, sizeof(_info)); @@ -2819,10 +2833,10 @@ int UnwindCursor::stepThroughSigReturn(Registers_riscv &) { const pint_t kOffsetSpToSigcontext = 128 + 8 + 8 + 24 + 8 + 128; const pint_t sigctx = _registers.getSP() + kOffsetSpToSigcontext; - _registers.setIP(_addressSpace.get64(sigctx)); + _registers.setIP(_addressSpace.get64(sigctx), 0); for (int i = UNW_RISCV_X1; i <= UNW_RISCV_X31; ++i) { uint64_t value = _addressSpace.get64(sigctx + static_cast(i * 8)); - _registers.setRegister(i, value); + _registers.setRegister(i, value, 0); } _isSignalFrame = true; return UNW_STEP_SUCCESS; From 4cbeceef967c727605869d687a612d43315a13a1 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Mon, 30 Sep 2024 01:06:05 +0300 Subject: [PATCH 05/19] Apply fixes from main --- .../nativeaot/Runtime/StackFrameIterator.cpp | 2 +- .../nativeaot/Runtime/riscv64/AllocFast.S | 6 +++--- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 12 ++++++------ .../Runtime/riscv64/InteropThunksHelpers.S | 2 +- src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S | 6 ------ .../Runtime/unix/unixasmmacrosriscv64.inc | 16 ++++++++-------- 6 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 64d8f1fb213db1..13a51dfc1bc2e6 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -770,7 +770,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pR20 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R20); m_RegDisplay.pR21 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R21); -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_RISCV64) // // preserved regs diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index 93fc3229cc9f32..9f563fbefdde8e 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -5,13 +5,13 @@ #include "AsmOffsets.inc" // GC type flags -GC_ALLOC_FINALIZE = 1 +#define GC_ALLOC_FINALIZE 1 // // Rename fields of nested structs // -OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) // Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's // allocation context then automatically fallback to the slow allocation path. diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index a4751966659277..b819f946b477d5 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -4,12 +4,12 @@ #include #include "AsmOffsets.inc" -PROBE_FRAME_SIZE = 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + - // 10 * 8 for callee saved registers + - // 1 * 8 for caller SP + - // 2 * 8 for int returns + - // 1 * 8 for alignment padding + - // 4 * 16 for FP returns +#define PROBE_FRAME_SIZE 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + + // 10 * 8 for callee saved registers + + // 1 * 8 for caller SP + + // 2 * 8 for int returns + + // 1 * 8 for alignment padding + + // 4 * 16 for FP returns // Define the prolog for setting up the PInvokeTransitionFrame .macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S index fcb803268463c5..8c45ece1d8b373 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -5,7 +5,7 @@ //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -POINTER_SIZE = 0x08 +#define POINTER_SIZE 0x08 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S index 8af5681ac5cfd0..bc1d583f436bdb 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -6,12 +6,6 @@ .global RhpTrapThreads -// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h -PTFF_SAVE_SP = 0x00000200 - -// Bit position for the flags above, to be used with andi+beq/bne instructions -PTFF_THREAD_ABORT_BIT = 36 - // // RhpPInvoke // diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index c20c4852002397..7cf9f8c5bff6aa 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -277,12 +277,12 @@ C_FUNC(\Name): .endm // Note: these must match the defs in PInvokeTransitionFrameFlags -PTFF_SAVE_SP = 0x00000400 -PTFF_SAVE_A4 = 0x00000800 -PTFF_SAVE_A5 = 0x00001000 -PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 +#define PTFF_SAVE_SP 0x00000400 +#define PTFF_SAVE_A4 0x00000800 +#define PTFF_SAVE_A5 0x00001000 +#define PTFF_SAVE_ALL_PRESERVED 0x000003FF // NOTE: x19-x28 -DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP +#define DEFAULT_FRAME_SAVE_FLAGS PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP .macro PUSH_COOP_PINVOKE_FRAME trashReg PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -0x80 // Push down stack pointer and store FP and RA @@ -318,7 +318,7 @@ DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP .endm // Bit position for the flags above, to be used with tbz / tbnz instructions -PTFF_THREAD_ABORT_BIT = 36 +#define PTFF_THREAD_ABORT_BIT 36 // // CONSTANTS -- INTEGER @@ -329,8 +329,8 @@ PTFF_THREAD_ABORT_BIT = 36 #define TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC 0x18 // Bit position for the flags above, to be used with tbz / tbnz instructions -TrapThreadsFlags_AbortInProgress_Bit = 0 -TrapThreadsFlags_TrapThreads_Bit = 1 +#define TrapThreadsFlags_AbortInProgress_Bit 0 +#define TrapThreadsFlags_TrapThreads_Bit 1 // These must match the TrapThreadsFlags enum #define TrapThreadsFlags_None 0 From 8adbc73e6878fa81b1bfc90046e2ad690f5a7565 Mon Sep 17 00:00:00 2001 From: sunlijun Date: Tue, 8 Oct 2024 15:30:45 +0800 Subject: [PATCH 06/19] Fix registers/offsets in riscv-nativeaot. --- src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 6 - .../nativeaot/Runtime/PalRedhawkCommon.h | 14 +-- .../nativeaot/Runtime/StackFrameIterator.cpp | 116 +++++++++--------- .../nativeaot/Runtime/StackFrameIterator.h | 3 + src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 81 ++++++------ src/coreclr/nativeaot/Runtime/regdisplay.h | 49 ++++---- .../nativeaot/Runtime/riscv64/AsmOffsetsCpu.h | 81 ++++++------ .../nativeaot/Runtime/unix/UnixContext.cpp | 14 +-- .../nativeaot/Runtime/unix/UnixContext.h | 3 +- .../Runtime/unix/unixasmmacrosriscv64.inc | 8 +- src/coreclr/vm/gcinfodecoder.cpp | 1 + .../include/__libunwind_config.h | 2 +- 12 files changed, 183 insertions(+), 195 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index d3ebdbc80b7508..974b3571d988c9 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -206,12 +206,6 @@ FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, #elif defined(HOST_RISCV64) pContext->A0 = pPalContext->A0; pContext->A1 = pPalContext->A1; - pContext->A2 = pPalContext->A2; - pContext->A3 = pPalContext->A3; - pContext->A4 = pPalContext->A4; - pContext->A5 = pPalContext->A5; - pContext->A6 = pPalContext->A6; - pContext->A7 = pPalContext->A7; pContext->S1 = pPalContext->S1; pContext->S2 = pPalContext->S2; pContext->S3 = pPalContext->S3; diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index d3506c1c93ba64..b6d5f913486941 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -131,12 +131,6 @@ struct PAL_LIMITED_CONTEXT uintptr_t A0; uintptr_t A1; - uintptr_t A2; - uintptr_t A3; - uintptr_t A4; - uintptr_t A5; - uintptr_t A6; - uintptr_t A7; uintptr_t S1; uintptr_t S2; uintptr_t S3; @@ -148,13 +142,7 @@ struct PAL_LIMITED_CONTEXT uintptr_t S9; uintptr_t S10; uintptr_t S11; - uintptr_t T0; - uintptr_t T1; - uintptr_t T2; - uintptr_t T3; - uintptr_t T4; - uintptr_t T5; - uintptr_t T6; + uintptr_t SP; uintptr_t IP; diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 13a51dfc1bc2e6..b2547df839c306 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -301,9 +301,22 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF if (pFrame->m_Flags & PTFF_SAVE_S6) { m_RegDisplay.pS6 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_S7) { m_RegDisplay.pS7 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_S8) { m_RegDisplay.pS8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S9) { m_RegDisplay.pS9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S10) { m_RegDisplay.pS10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S11) { m_RegDisplay.pS11 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R0) { m_RegDisplay.pR0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_GP) { m_RegDisplay.pGP = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A0) { m_RegDisplay.pA0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A1) { m_RegDisplay.pA1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A2) { m_RegDisplay.pA2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A3) { m_RegDisplay.pA3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A4) { m_RegDisplay.pA4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A5) { m_RegDisplay.pA5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A6) { m_RegDisplay.pA6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A7) { m_RegDisplay.pA7 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_T0) { m_RegDisplay.pT0 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_T1) { m_RegDisplay.pT1 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_T2) { m_RegDisplay.pT2 = pPreservedRegsCursor++; } @@ -311,18 +324,13 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF if (pFrame->m_Flags & PTFF_SAVE_T4) { m_RegDisplay.pT4 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_T5) { m_RegDisplay.pT5 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_T6) { m_RegDisplay.pT6 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_T7) { m_RegDisplay.pT7 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_T8) { m_RegDisplay.pT8 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_T9) { m_RegDisplay.pT9 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_T10) { m_RegDisplay.pT10 = pPreservedRegsCursor++; } - if (pFrame->m_Flags & PTFF_SAVE_T11) { m_RegDisplay.pT11 = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_RA) { m_RegDisplay.pRA = pPreservedRegsCursor++; } GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); if (retValueKind != GCRK_Scalar) { - m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pT0; + m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pA0; m_HijackedReturnValueKind = retValueKind; } @@ -775,15 +783,17 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC // // preserved regs // - m_RegDisplay.pR23 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R23); - m_RegDisplay.pR24 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R24); - m_RegDisplay.pR25 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R25); - m_RegDisplay.pR26 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R26); - m_RegDisplay.pR27 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R27); - m_RegDisplay.pR28 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R28); - m_RegDisplay.pR29 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R29); - m_RegDisplay.pR30 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R30); - m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R31); + m_RegDisplay.pS1 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S1); + m_RegDisplay.pS2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S2); + m_RegDisplay.pS3 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S3); + m_RegDisplay.pS4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S4); + m_RegDisplay.pS5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S5); + m_RegDisplay.pS6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S6); + m_RegDisplay.pS7 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S7); + m_RegDisplay.pS8 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S8); + m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S9); + m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S10); + m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S11); m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_REG(pCtx, FP); m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_REG(pCtx, RA); @@ -791,38 +801,23 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC // scratch regs // m_RegDisplay.pR0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R0); - m_RegDisplay.pR2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R2); - m_RegDisplay.pR4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R4); - m_RegDisplay.pR5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R5); - m_RegDisplay.pR6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R6); - m_RegDisplay.pR7 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R7); - m_RegDisplay.pR8 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R8); - m_RegDisplay.pR9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R9); - m_RegDisplay.pR10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R10); - m_RegDisplay.pR11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R11); - m_RegDisplay.pR12 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R12); - m_RegDisplay.pR13 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R13); - m_RegDisplay.pR14 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R14); - m_RegDisplay.pR15 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R15); - m_RegDisplay.pR16 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R16); - m_RegDisplay.pR17 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R17); - m_RegDisplay.pR18 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R18); - m_RegDisplay.pR19 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R19); - m_RegDisplay.pR20 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R20); - m_RegDisplay.pR21 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R21); - - // - // Special handling for SP and IP (if necessary) - // - if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = (uintptr_t)PTR_TO_REG(pCtx, SP); } - if (pFrame->m_Flags & PTFF_SAVE_IP) { m_RegDisplay.IP = (uintptr_t)PTR_TO_REG(pCtx, IP); } - - GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); - if (retValueKind != GCRK_Scalar) - { - m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pR4; // Assuming R4 is used for return value - m_HijackedReturnValueKind = retValueKind; - } + m_RegDisplay.pGP = (PTR_uintptr_t)PTR_TO_REG(pCtx, GP); + m_RegDisplay.pTP = (PTR_uintptr_t)PTR_TO_REG(pCtx, TP); + m_RegDisplay.pA0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A0); + m_RegDisplay.pA1 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A1); + m_RegDisplay.pA2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A2); + m_RegDisplay.pA3 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A3); + m_RegDisplay.pA4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A4); + m_RegDisplay.pA5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A5); + m_RegDisplay.pA6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A6); + m_RegDisplay.pA7 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A7); + m_RegDisplay.pT0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T0); + m_RegDisplay.pT1 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T1); + m_RegDisplay.pT2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T2); + m_RegDisplay.pT3 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T3); + m_RegDisplay.pT4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T4); + m_RegDisplay.pT5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T5); + m_RegDisplay.pT6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T6); #else PORTABILITY_ASSERT("StackFrameIterator::InternalInit"); @@ -989,6 +984,9 @@ void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSou m_RegDisplay.pS6 = thisFuncletPtrs.pS6; m_RegDisplay.pS7 = thisFuncletPtrs.pS7; m_RegDisplay.pS8 = thisFuncletPtrs.pS8; + m_RegDisplay.pS9 = thisFuncletPtrs.pS9; + m_RegDisplay.pS10 = thisFuncletPtrs.pS10; + m_RegDisplay.pS11 = thisFuncletPtrs.pS11; m_RegDisplay.pFP = thisFuncletPtrs.pFP; #elif defined(UNIX_AMD64_ABI) @@ -1276,7 +1274,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() #elif defined(TARGET_RISCV64) PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); - for (int i = 0; i < 8; i++) + for (int i = 0; i < 32; i++) { m_RegDisplay.F[i] = *f++; } @@ -1297,6 +1295,9 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_funcletPtrs.pS6 = m_RegDisplay.pS6; m_funcletPtrs.pS7 = m_RegDisplay.pS7; m_funcletPtrs.pS8 = m_RegDisplay.pS8; + m_funcletPtrs.pS9 = m_RegDisplay.pS9; + m_funcletPtrs.pS10 = m_RegDisplay.pS10; + m_funcletPtrs.pS11 = m_RegDisplay.pS11; m_funcletPtrs.pFP = m_RegDisplay.pFP; } @@ -1312,6 +1313,9 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pS6 = SP++; m_RegDisplay.pS7 = SP++; m_RegDisplay.pS8 = SP++; + m_RegDisplay.pS9 = SP++; + m_RegDisplay.pS10 = SP++; + m_RegDisplay.pS11 = SP++; #else SP = (PTR_uintptr_t)(m_RegDisplay.SP); @@ -1482,13 +1486,12 @@ struct UniversalTransitionStackFrame // Conservative GC reporting must be applied to everything between the base of the // ReturnBlock and the top of the StackPassedArgs. private: - uintptr_t m_pushedFP; // ChildSP+000 CallerSP-100 (0x08 bytes) (fp) - uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0F8 (0x08 bytes) (ra) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0F0 (0x80 bytes) (f0-f7) - uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-070 (0x40 bytes) - uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-050 (0x40 bytes) (x0-x7) - uintptr_t m_alignmentPad; // ChildSP+0F0 CallerSP-010 (0x08 bytes) - uintptr_t m_stackPassedArgs[1]; // ChildSP+0F8 CallerSP+000 (unknown size) + uintptr_t m_pushedRA; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (ra) + uintptr_t m_pushedFP; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (fp) + Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (f10-f17) + uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) + uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) + uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) public: PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } @@ -1672,6 +1675,9 @@ void StackFrameIterator::UnwindThrowSiteThunk() m_RegDisplay.pS6 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S6); m_RegDisplay.pS7 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S7); m_RegDisplay.pS8 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S8); + m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S9); + m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S10); + m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S11); m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); #else ASSERT_UNCONDITIONALLY("NYI for this arch"); diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index d05b6ba206de13..c7dc3623cb01cc 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -200,6 +200,9 @@ class StackFrameIterator PTR_uintptr_t pS6; PTR_uintptr_t pS7; PTR_uintptr_t pS8; + PTR_uintptr_t pS9; + PTR_uintptr_t pS10; + PTR_uintptr_t pS11; PTR_uintptr_t pFP; #elif defined(UNIX_AMD64_ABI) PTR_uintptr_t pRbp; diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index f60d2a2b8d5268..5bf3c781cea244 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -407,51 +407,50 @@ enum PInvokeTransitionFrameFlags : uint64_t // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp) // standard preserved registers - PTFF_SAVE_S0 = 0x0000000000000001, - PTFF_SAVE_S1 = 0x0000000000000002, - PTFF_SAVE_S2 = 0x0000000000000004, - PTFF_SAVE_S3 = 0x0000000000000008, - PTFF_SAVE_S4 = 0x0000000000000010, - PTFF_SAVE_S5 = 0x0000000000000020, - PTFF_SAVE_S6 = 0x0000000000000040, - PTFF_SAVE_S7 = 0x0000000000000080, - PTFF_SAVE_S8 = 0x0000000000000100, - PTFF_SAVE_S9 = 0x0000000000000200, - - PTFF_SAVE_SP = 0x0000000000000400, + PTFF_SAVE_S1 = 0x0000000000000001, + PTFF_SAVE_S2 = 0x0000000000000002, + PTFF_SAVE_S3 = 0x0000000000000004, + PTFF_SAVE_S4 = 0x0000000000000008, + PTFF_SAVE_S5 = 0x0000000000000010, + PTFF_SAVE_S6 = 0x0000000000000020, + PTFF_SAVE_S7 = 0x0000000000000040, + PTFF_SAVE_S8 = 0x0000000000000080, + PTFF_SAVE_S9 = 0x0000000000000100, + PTFF_SAVE_S10 = 0x0000000000000200, + PTFF_SAVE_S11 = 0x0000000000000400, + + PTFF_SAVE_SP = 0x0000000000000800, // Scratch registers - PTFF_SAVE_T0 = 0x0000000000000800, - PTFF_SAVE_T1 = 0x0000000000001000, - PTFF_SAVE_T2 = 0x0000000000002000, - PTFF_SAVE_T3 = 0x0000000000004000, - PTFF_SAVE_T4 = 0x0000000000008000, - PTFF_SAVE_T5 = 0x0000000000010000, - PTFF_SAVE_T6 = 0x0000000000020000, - PTFF_SAVE_T7 = 0x0000000000040000, - PTFF_SAVE_T8 = 0x0000000000080000, - PTFF_SAVE_T9 = 0x0000000000100000, - PTFF_SAVE_T10 = 0x0000000000200000, - PTFF_SAVE_T11 = 0x0000000000400000, - PTFF_SAVE_T12 = 0x0000000000800000, - PTFF_SAVE_T13 = 0x0000000001000000, - PTFF_SAVE_T14 = 0x0000000002000000, - PTFF_SAVE_T15 = 0x0000000004000000, - PTFF_SAVE_T16 = 0x0000000008000000, - PTFF_SAVE_T17 = 0x0000000010000000, - PTFF_SAVE_T18 = 0x0000000020000000, - - PTFF_SAVE_FP = 0x0000000040000000, - - PTFF_SAVE_RA = 0x0000000080000000, + PTFF_SAVE_R0 = 0x0000000000001000, + PTFF_SAVE_GP = 0x0000000000002000, + PTFF_SAVE_A0 = 0x0000000000004000, + PTFF_SAVE_A1 = 0x0000000000008000, + PTFF_SAVE_A2 = 0x0000000000010000, + PTFF_SAVE_A3 = 0x0000000000020000, + PTFF_SAVE_A4 = 0x0000000000040000, + PTFF_SAVE_A5 = 0x0000000000080000, + PTFF_SAVE_A6 = 0x0000000000100000, + PTFF_SAVE_A7 = 0x0000000000200000, + PTFF_SAVE_T0 = 0x0000000000400000, + PTFF_SAVE_T1 = 0x0000000000800000, + PTFF_SAVE_T2 = 0x0000000001000000, + PTFF_SAVE_T3 = 0x0000000002000000, + PTFF_SAVE_T4 = 0x0000000004000000, + PTFF_SAVE_T5 = 0x0000000008000000, + PTFF_SAVE_T6 = 0x0000000010000000, + + PTFF_SAVE_FP = 0x0000000020000000, + + PTFF_SAVE_RA = 0x0000000040000000, // used by hijack handler to report return value of hijacked method - PTFF_T0_IS_GCREF = 0x0000000100000000, - PTFF_T0_IS_BYREF = 0x0000000200000000, - PTFF_T1_IS_GCREF = 0x0000000400000000, - PTFF_T1_IS_BYREF = 0x0000000800000000, + PTFF_A0_IS_GCREF = 0x0000000080000000, + PTFF_A0_IS_BYREF = 0x0000000100000000, + PTFF_A1_IS_GCREF = 0x0000000200000000, + PTFF_A1_IS_BYREF = 0x0000000400000000, - PTFF_THREAD_ABORT = 0x0000001000000000, + PTFF_THREAD_ABORT = 0x0000000800000000, }; #else // TARGET_ARM @@ -515,7 +514,7 @@ struct PInvokeTransitionFrame #else // USE_PORTABLE_HELPERS struct PInvokeTransitionFrame { -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) // The FP and LR registers are pushed in different order when setting up frames TgtPTR_Void m_FramePointer; TgtPTR_Void m_RIP; diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 0d41b387e4683b..881f9b5e2a14cc 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -235,9 +235,24 @@ struct REGDISPLAY struct REGDISPLAY { + PTR_uintptr_t pR0; PTR_uintptr_t pRA; + PTR_uintptr_t pGP; + PTR_uintptr_t pTP; + PTR_uintptr_t pT0; + PTR_uintptr_t pT1; + PTR_uintptr_t pT2; + PTR_uintptr_t pFP; PTR_uintptr_t pS1; + PTR_uintptr_t pA0; + PTR_uintptr_t pA1; + PTR_uintptr_t pA2; + PTR_uintptr_t pA3; + PTR_uintptr_t pA4; + PTR_uintptr_t pA5; + PTR_uintptr_t pA6; + PTR_uintptr_t pA7; PTR_uintptr_t pS2; PTR_uintptr_t pS3; PTR_uintptr_t pS4; @@ -245,38 +260,16 @@ struct REGDISPLAY PTR_uintptr_t pS6; PTR_uintptr_t pS7; PTR_uintptr_t pS8; - - PTR_uintptr_t pFP; - uintptr_t SP; - - PTR_uintptr_t pT0; - PTR_uintptr_t pT1; - PTR_uintptr_t pT2; + PTR_uintptr_t pS9; + PTR_uintptr_t pS10; + PTR_uintptr_t pS11; PTR_uintptr_t pT3; PTR_uintptr_t pT4; PTR_uintptr_t pT5; PTR_uintptr_t pT6; - PTR_uintptr_t pT7; - PTR_uintptr_t pT8; - PTR_uintptr_t pT9; - PTR_uintptr_t pT10; - PTR_uintptr_t pT11; - - // Adding missing registers - PTR_uintptr_t pT12; - PTR_uintptr_t pT13; - PTR_uintptr_t pT14; - PTR_uintptr_t pT15; - PTR_uintptr_t pT16; - PTR_uintptr_t pT17; - PTR_uintptr_t pT18; - PTR_uintptr_t pT19; - PTR_uintptr_t pT20; - PTR_uintptr_t pT21; - PTR_uintptr_t pT22; - PTR_uintptr_t pT23; - - PCODE IP; + + uintptr_t SP; + PCODE IP; uint64_t F[32]; // Expanded to cover all F registers diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h index ef213437a96006..721c194f8effa9 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h @@ -7,61 +7,64 @@ // // NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix -PLAT_ASM_SIZEOF(848, ExInfo) +PLAT_ASM_SIZEOF(350, ExInfo) PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) PLAT_ASM_OFFSET(10, ExInfo, m_exception) PLAT_ASM_OFFSET(18, ExInfo, m_kind) -PLAT_ASM_OFFSET(1C, ExInfo, m_passNumber) -PLAT_ASM_OFFSET(20, ExInfo, m_idxCurClause) -PLAT_ASM_OFFSET(24, ExInfo, m_frameIter) -PLAT_ASM_OFFSET(2D8, ExInfo, m_notifyDebuggerSP) +PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(1C, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(348, ExInfo, m_notifyDebuggerSP) -PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer) -PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) -PLAT_ASM_SIZEOF(552, StackFrameIterator) +PLAT_ASM_SIZEOF(328, StackFrameIterator) PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) -PLAT_ASM_OFFSET(0F8, StackFrameIterator, m_OriginalControlPC) -PLAT_ASM_OFFSET(100, StackFrameIterator, m_pPreviousTransitionFrame) +PLAT_ASM_OFFSET(318, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(320, StackFrameIterator, m_pPreviousTransitionFrame) -PLAT_ASM_SIZEOF(336, PAL_LIMITED_CONTEXT) +PLAT_ASM_SIZEOF(E8, PAL_LIMITED_CONTEXT) PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA) -PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R4) -PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R5) -PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R6) -PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, R7) -PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R8) -PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R9) -PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R10) -PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R11) -PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R12) -PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, R13) -PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, R14) -PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, R15) -PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP) -PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, TP) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, A0) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, A1) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, S1) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, S2) +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, S3) +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, S4) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, S5) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, S6) +PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, S7) +PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, S8) +PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, S9) +PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, S10) +PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, S11) +PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, SP) +PLAT_ASM_OFFSET(80, PAL_LIMITED_CONTEXT, IP) -PLAT_ASM_SIZEOF(288, REGDISPLAY) +PLAT_ASM_SIZEOF(208, REGDISPLAY) -PLAT_ASM_OFFSET(18, REGDISPLAY, SP) +PLAT_ASM_OFFSET(F8, REGDISPLAY, SP) -PLAT_ASM_OFFSET(B8, REGDISPLAY, pR4) -PLAT_ASM_OFFSET(C0, REGDISPLAY, pR5) -PLAT_ASM_OFFSET(C8, REGDISPLAY, pR6) -PLAT_ASM_OFFSET(D0, REGDISPLAY, pR7) -PLAT_ASM_OFFSET(D8, REGDISPLAY, pR8) -PLAT_ASM_OFFSET(E0, REGDISPLAY, pR9) -PLAT_ASM_OFFSET(E8, REGDISPLAY, pR10) -PLAT_ASM_OFFSET(F0, REGDISPLAY, pR11) -PLAT_ASM_OFFSET(F8, REGDISPLAY, pR12) -PLAT_ASM_OFFSET(100, REGDISPLAY, pFP) -PLAT_ASM_OFFSET(108, REGDISPLAY, pRA) -PLAT_ASM_OFFSET(110, REGDISPLAY, F) +PLAT_ASM_OFFSET(40, REGDISPLAY, pS1) +PLAT_ASM_OFFSET(88, REGDISPLAY, pS2) +PLAT_ASM_OFFSET(90, REGDISPLAY, pS3) +PLAT_ASM_OFFSET(98, REGDISPLAY, pS4) +PLAT_ASM_OFFSET(A0, REGDISPLAY, pS5) +PLAT_ASM_OFFSET(A8, REGDISPLAY, pS6) +PLAT_ASM_OFFSET(B0, REGDISPLAY, pS7) +PLAT_ASM_OFFSET(B8, REGDISPLAY, pS8) +PLAT_ASM_OFFSET(C0, REGDISPLAY, pS9) +PLAT_ASM_OFFSET(C8, REGDISPLAY, pS10) +PLAT_ASM_OFFSET(D0, REGDISPLAY, pS11) +PLAT_ASM_OFFSET(38, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(8, REGDISPLAY, pRA) +PLAT_ASM_OFFSET(108, REGDISPLAY, F) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index 217ba8de7db88b..53b15c5e2de725 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -118,6 +118,7 @@ #elif defined(HOST_RISCV64) +#define MCREG_R0(mc) ((mc).__gregs[0]) #define MCREG_Ra(mc) ((mc).__gregs[1]) #define MCREG_Sp(mc) ((mc).__gregs[2]) #define MCREG_Gp(mc) ((mc).__gregs[3]) @@ -224,6 +225,7 @@ #elif defined(HOST_RISCV64) +#define MCREG_R0(mc) ((mc).__gregs[0]) #define MCREG_Ra(mc) ((mc).__gregs[1]) #define MCREG_Sp(mc) ((mc).__gregs[2]) #define MCREG_Gp(mc) ((mc).__gregs[3]) @@ -404,6 +406,7 @@ #elif defined(HOST_RISCV64) +#define MCREG_R0(mc) ((mc).regs[0]) #define MCREG_Ra(mc) ((mc).regs[1]) #define MCREG_Sp(mc) ((mc).regs[2]) #define MCREG_Gp(mc) ((mc).regs[3]) @@ -618,13 +621,9 @@ ASSIGN_REG(S6, S6) \ ASSIGN_REG(S7, S7) \ ASSIGN_REG(S8, S8) \ - ASSIGN_REG(T0, T0) \ - ASSIGN_REG(T1, T1) \ - ASSIGN_REG(T2, T2) \ - ASSIGN_REG(T3, T3) \ - ASSIGN_REG(T4, T4) \ - ASSIGN_REG(T5, T5) \ - ASSIGN_REG(T6, T6) + ASSIGN_REG(S9, S9) \ + ASSIGN_REG(S10, S10) \ + ASSIGN_REG(S11, S11) #define ASSIGN_TWO_ARGUMENT_REGS \ MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ @@ -832,6 +831,7 @@ uint64_t GetPC(void* context) #elif TARGET_RISCV64 + uint64_t& UNIX_CONTEXT::R0() { return (uint64_t&)MCREG_R0(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::Ra() { return (uint64_t&)MCREG_Ra(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::Gp() { return (uint64_t&)MCREG_Gp(ctx.uc_mcontext); } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index e9cf397ccdebdb..68c5f0a99f6048 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -215,6 +215,7 @@ struct UNIX_CONTEXT #elif defined(TARGET_RISCV64) + uint64_t& R0(); uint64_t& Ra(); uint64_t& Sp(); uint64_t& Gp(); @@ -257,7 +258,7 @@ struct UNIX_CONTEXT // It is expected that registers are stored in a contiguous manner // Here are some asserts to check ASSERT(&A0() + 1 == &A1()); - ASSERT(&A0() + 8 == &A7()); + ASSERT(&A0() + 7 == &A7()); for (uint64_t* pReg = &Ra(); pReg <= &T6(); pReg++) lambda((size_t*)pReg); diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 7cf9f8c5bff6aa..52d8978f4fb2f6 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -277,10 +277,10 @@ C_FUNC(\Name): .endm // Note: these must match the defs in PInvokeTransitionFrameFlags -#define PTFF_SAVE_SP 0x00000400 -#define PTFF_SAVE_A4 0x00000800 -#define PTFF_SAVE_A5 0x00001000 -#define PTFF_SAVE_ALL_PRESERVED 0x000003FF // NOTE: x19-x28 +#define PTFF_SAVE_SP 0x00000800 +#define PTFF_SAVE_A0 0x00004000 +#define PTFF_SAVE_A1 0x00008000 +#define PTFF_SAVE_ALL_PRESERVED 0x000007FF // NOTE: S1-S11 #define DEFAULT_FRAME_SAVE_FLAGS PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 70d879d0d6465f..39a961ef543840 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1902,6 +1902,7 @@ OBJECTREF* GcInfoDecoder::GetRegisterSlot( #ifdef FEATURE_NATIVEAOT PTR_uintptr_t* ppReg = &pRD->pR0; + if (regNum >= 3) regNum--; // sp is skipped in NativeAOT RegDisplay return (OBJECTREF*)*(ppReg + regNum); #else if(regNum == 1) diff --git a/src/native/external/llvm-libunwind/include/__libunwind_config.h b/src/native/external/llvm-libunwind/include/__libunwind_config.h index aa527c499104ba..39d48af424f5f6 100644 --- a/src/native/external/llvm-libunwind/include/__libunwind_config.h +++ b/src/native/external/llvm-libunwind/include/__libunwind_config.h @@ -155,7 +155,7 @@ # if __riscv_xlen == 32 # define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 7) # elif __riscv_xlen == 64 -# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 12) + 32 +# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 12) # else # error "Unsupported RISC-V ABI" # endif From ec0776cd7f221b818268f0a4a4af1405931573a8 Mon Sep 17 00:00:00 2001 From: sunlijun Date: Thu, 14 Nov 2024 17:25:25 +0800 Subject: [PATCH 07/19] Revert the changes in UnwindCursor.hpp and REGDISPLAY. --- .../nativeaot/Runtime/StackFrameIterator.cpp | 2 +- src/coreclr/nativeaot/Runtime/regdisplay.h | 3 ++- .../nativeaot/Runtime/riscv64/AsmOffsetsCpu.h | 24 +++++++++---------- src/coreclr/vm/gcinfodecoder.cpp | 1 - .../llvm-libunwind/src/UnwindCursor.hpp | 18 ++------------ 5 files changed, 17 insertions(+), 31 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index b2547df839c306..de6f518a4188af 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1488,7 +1488,7 @@ struct UniversalTransitionStackFrame private: uintptr_t m_pushedRA; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (ra) uintptr_t m_pushedFP; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (fp) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (f10-f17) + Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 881f9b5e2a14cc..b3158741c44d50 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -238,6 +238,8 @@ struct REGDISPLAY PTR_uintptr_t pR0; PTR_uintptr_t pRA; + uintptr_t SP; + PTR_uintptr_t pGP; PTR_uintptr_t pTP; PTR_uintptr_t pT0; @@ -268,7 +270,6 @@ struct REGDISPLAY PTR_uintptr_t pT5; PTR_uintptr_t pT6; - uintptr_t SP; PCODE IP; uint64_t F[32]; // Expanded to cover all F registers diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h index 721c194f8effa9..d739c5691cd6c0 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h @@ -54,17 +54,17 @@ PLAT_ASM_SIZEOF(208, REGDISPLAY) PLAT_ASM_OFFSET(F8, REGDISPLAY, SP) -PLAT_ASM_OFFSET(40, REGDISPLAY, pS1) -PLAT_ASM_OFFSET(88, REGDISPLAY, pS2) -PLAT_ASM_OFFSET(90, REGDISPLAY, pS3) -PLAT_ASM_OFFSET(98, REGDISPLAY, pS4) -PLAT_ASM_OFFSET(A0, REGDISPLAY, pS5) -PLAT_ASM_OFFSET(A8, REGDISPLAY, pS6) -PLAT_ASM_OFFSET(B0, REGDISPLAY, pS7) -PLAT_ASM_OFFSET(B8, REGDISPLAY, pS8) -PLAT_ASM_OFFSET(C0, REGDISPLAY, pS9) -PLAT_ASM_OFFSET(C8, REGDISPLAY, pS10) -PLAT_ASM_OFFSET(D0, REGDISPLAY, pS11) -PLAT_ASM_OFFSET(38, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(48, REGDISPLAY, pS1) +PLAT_ASM_OFFSET(90, REGDISPLAY, pS2) +PLAT_ASM_OFFSET(98, REGDISPLAY, pS3) +PLAT_ASM_OFFSET(A0, REGDISPLAY, pS4) +PLAT_ASM_OFFSET(A8, REGDISPLAY, pS5) +PLAT_ASM_OFFSET(B0, REGDISPLAY, pS6) +PLAT_ASM_OFFSET(B8, REGDISPLAY, pS7) +PLAT_ASM_OFFSET(C0, REGDISPLAY, pS8) +PLAT_ASM_OFFSET(C8, REGDISPLAY, pS9) +PLAT_ASM_OFFSET(D0, REGDISPLAY, pS10) +PLAT_ASM_OFFSET(D8, REGDISPLAY, pS11) +PLAT_ASM_OFFSET(40, REGDISPLAY, pFP) PLAT_ASM_OFFSET(8, REGDISPLAY, pRA) PLAT_ASM_OFFSET(108, REGDISPLAY, F) diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 39a961ef543840..70d879d0d6465f 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1902,7 +1902,6 @@ OBJECTREF* GcInfoDecoder::GetRegisterSlot( #ifdef FEATURE_NATIVEAOT PTR_uintptr_t* ppReg = &pRD->pR0; - if (regNum >= 3) regNum--; // sp is skipped in NativeAOT RegDisplay return (OBJECTREF*)*(ppReg + regNum); #else if(regNum == 1) diff --git a/src/native/external/llvm-libunwind/src/UnwindCursor.hpp b/src/native/external/llvm-libunwind/src/UnwindCursor.hpp index 7dc23b6a0f8c18..0920b2e5e0a795 100644 --- a/src/native/external/llvm-libunwind/src/UnwindCursor.hpp +++ b/src/native/external/llvm-libunwind/src/UnwindCursor.hpp @@ -1335,26 +1335,12 @@ UnwindCursor::UnwindCursor(A &as) memset(&_info, 0, sizeof(_info)); } -// TODO: remove when we figure out where the 96 bytes difference is coming from -template -void check_size() { - static_assert(ExpectedSize == RealSize, "Size is off!"); -} - template UnwindCursor::UnwindCursor(unw_context_t *context, A &as) : _addressSpace(as), _registers(context), _unwindInfoMissing(false), _isSignalFrame(false) { - -// TODO: remove this helper (which shows size of both args as a compile-time error) when we -// figure out where the 96 bytes difference is coming from - -// check_size, 2>(); -// check_size(); - -// TODO: uncomment when the above TODO is resolved.. -// static_assert((check_fit, unw_cursor_t>::does_fit), -// "UnwindCursor<> does not fit in unw_cursor_t"); + static_assert((check_fit, unw_cursor_t>::does_fit), + "UnwindCursor<> does not fit in unw_cursor_t"); static_assert((alignof(UnwindCursor) <= alignof(unw_cursor_t)), "UnwindCursor<> requires more alignment than unw_cursor_t"); memset(&_info, 0, sizeof(_info)); From 6fce606a879639117524767f8593ab1c0d716f0a Mon Sep 17 00:00:00 2001 From: sunlijun Date: Fri, 29 Nov 2024 17:03:03 +0800 Subject: [PATCH 08/19] Fix *.S files in riscv-nativeaot. --- src/coreclr/nativeaot/Runtime/ICodeManager.h | 14 +- src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 2 +- .../nativeaot/Runtime/riscv64/AllocFast.S | 106 ++- .../Runtime/riscv64/ExceptionHandling.S | 668 +++++++++--------- .../nativeaot/Runtime/riscv64/GcProbe.S | 126 ++-- .../Runtime/riscv64/InteropThunksHelpers.S | 4 +- .../nativeaot/Runtime/riscv64/PInvoke.S | 8 +- .../nativeaot/Runtime/riscv64/StubDispatch.S | 20 +- .../Runtime/riscv64/UniversalTransition.S | 100 ++- .../Runtime/unix/unixasmmacrosriscv64.inc | 2 +- .../external/llvm-libunwind/src/Registers.hpp | 2 +- 11 files changed, 532 insertions(+), 520 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index 3f4c824b2f8940..295ba936080b3d 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -88,22 +88,22 @@ inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) #elif defined(TARGET_RISCV64) // Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back -C_ASSERT( PTFF_T0_IS_GCREF == ((uint64_t)GCRK_Object << 32)); -C_ASSERT(PTFF_T0_IS_BYREF == ((uint64_t)GCRK_Byref << 32)); -C_ASSERT(PTFF_T1_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 32)); -C_ASSERT(PTFF_T1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 32)); +C_ASSERT(PTFF_A0_IS_GCREF == ((uint64_t)GCRK_Object << 31)); +C_ASSERT(PTFF_A0_IS_BYREF == ((uint64_t)GCRK_Byref << 31)); +C_ASSERT(PTFF_A1_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 31)); +C_ASSERT(PTFF_A1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 31)); inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) { // Just need to report GC ref bits here. // Appropriate PTFF_SAVE_ bits will be added by the frame building routine. - return ((uint64_t)returnKind << 32); + return ((uint64_t)returnKind << 31); } inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) { - GCRefKind returnKind = (GCRefKind)((transFrameFlags & ( PTFF_T0_IS_GCREF | PTFF_T0_IS_BYREF | PTFF_T1_IS_GCREF | PTFF_T1_IS_BYREF)) >> 32); - ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_T0) && (transFrameFlags & PTFF_SAVE_T1))); + GCRefKind returnKind = (GCRefKind)((transFrameFlags & ( PTFF_A0_IS_GCREF | PTFF_A0_IS_BYREF | PTFF_A1_IS_GCREF | PTFF_A1_IS_BYREF)) >> 31); + ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_A0) && (transFrameFlags & PTFF_SAVE_A1))); return returnKind; } diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 5bf3c781cea244..1a8675b6899c46 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -514,7 +514,7 @@ struct PInvokeTransitionFrame #else // USE_PORTABLE_HELPERS struct PInvokeTransitionFrame { -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The FP and LR registers are pushed in different order when setting up frames TgtPTR_Void m_FramePointer; TgtPTR_Void m_RIP; diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index 9f563fbefdde8e..919e601a914740 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -10,8 +10,8 @@ // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) // Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's // allocation context then automatically fallback to the slow allocation path. @@ -28,7 +28,7 @@ // // a0 contains MethodTable pointer // - ld a2, OFFSETOF__MethodTable__m_uBaseSize(a0) + lw a2, OFFSETOF__MethodTable__m_uBaseSize(a0) // // a0: MethodTable pointer @@ -37,35 +37,33 @@ // // Load potential new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add a2, a2, t3 - ld t4, OFFSETOF__Thread__m_alloc_context__alloc_limit(a1) - bltu t4, a2, RhpNewFast_RarePath + add a2, a2, t3 + ld t4, OFFSETOF__Thread__m_alloc_context__alloc_limit(a1) + bltu t4, a2, LOCAL_LABEL(RhpNewFast_RarePath) // Update the alloc pointer to account for the allocation. - sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) + sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) // Set the new objects MethodTable pointer - sd a0, OFFSETOF__Object__m_pEEType(t3) - - mv a0, t3 - j RhpNewFast_Return + sd a0, OFFSETOF__Object__m_pEEType(t3) -RhpNewFast_RarePath: - mv a1, x0 - j RhpNewObject + mv a0, t3 + ret -RhpNewFast_Return: +LOCAL_LABEL(RhpNewFast_RarePath): + mv a1, zero + tail RhpNewObject LEAF_END RhpNewFast, _TEXT // Allocate non-array object with finalizer. // a0 == MethodTable LEAF_ENTRY RhpNewFinalizable, _TEXT - li a1, GC_ALLOC_FINALIZE - j RhpNewObject + li a1, GC_ALLOC_FINALIZE + tail RhpNewObject LEAF_END RhpNewFinalizable, _TEXT // Allocate non-array object. @@ -78,16 +76,16 @@ RhpNewFast_Return: // a3: transition frame // Preserve the MethodTable in s0 - mv s0, a0 + mv s0, a0 - li a2, 0 // numElements + li a2, 0 // numElements // Call the rest of the allocation helper. // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) call C_FUNC(RhpGcAlloc) // Set the new object's MethodTable pointer on success. - beq a0, x0, NewOutOfMemory + beq a0, zero, LOCAL_LABEL(NewOutOfMemory) .cfi_remember_state POP_COOP_PINVOKE_FRAME @@ -95,15 +93,15 @@ RhpNewFast_Return: .cfi_restore_state -NewOutOfMemory: +LOCAL_LABEL(NewOutOfMemory): // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. - mv a0, s0 // MethodTable pointer - li a1, 0 // Indicate that we should throw OOM. + mv a0, s0 // MethodTable pointer + li a1, 0 // Indicate that we should throw OOM. POP_COOP_PINVOKE_FRAME - j C_FUNC(RhExceptionHandling_FailedAllocation) + tail C_FUNC(RhExceptionHandling_FailedAllocation) NESTED_END RhpNewObject, _TEXT @@ -112,15 +110,14 @@ NewOutOfMemory: // a1 == element/character count LEAF_ENTRY RhNewString, _TEXT // Make sure computing the overall allocation size won't overflow - lui a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF) // Load upper 20 bits of MAX_STRING_LENGTH - addi a2, a2, ((MAX_STRING_LENGTH & 0xFFF) - 4096) // Adjust lower 12 bits with addi - bltu a1, a2, StringSizeOverflow // Branch if a1 < a2 (overflow) + li a2, MAX_STRING_LENGTH + bltu a2, a1, LOCAL_LABEL(StringSizeOverflow) // Branch if a2 < a1 (overflow) // Compute overall allocation size (align(base size + (element size * elements), 8)). - li a3, STRING_COMPONENT_SIZE // Load STRING_COMPONENT_SIZE into a3 - mul a2, a1, a3 // a2 = a1 * STRING_COMPONENT_SIZE - addi a2, a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 - andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) + li a3, STRING_COMPONENT_SIZE // Load STRING_COMPONENT_SIZE into a3 + mul a2, a1, a3 // a2 = a1 * STRING_COMPONENT_SIZE + addi a2, a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 + andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) // a0 == MethodTable // a1 == element count @@ -138,8 +135,8 @@ NewOutOfMemory: // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add a2, a2, t3 - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_limit(a3) - bltu t3, a2, RhNewString_Rare + ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) + bltu t3, a2, LOCAL_LABEL(RhNewString_Rare) // Reload new object address into t3. ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) @@ -154,21 +151,19 @@ NewOutOfMemory: // Return the object allocated in a0. mv a0, t3 - j RhNewString_Return + ret -StringSizeOverflow: +LOCAL_LABEL(StringSizeOverflow): // We get here if the length of the final string object cannot be represented as an unsigned // 32-bit value. We are going to tail-call to a managed helper that will throw // an OOM exception that the caller of this allocator understands. // a0 holds MethodTable pointer already - li a1, 1 // Indicate that we should throw OverflowException - j C_FUNC(RhExceptionHandling_FailedAllocation) - -RhNewString_Rare: - j C_FUNC(RhpNewArrayRare) + li a1, 1 // Indicate that we should throw OverflowException + tail C_FUNC(RhExceptionHandling_FailedAllocation) -RhNewString_Return: +LOCAL_LABEL(RhNewString_Rare): + tail C_FUNC(RhpNewArrayRare) LEAF_END RhNewString, _TEXT // Allocate one-dimensional, zero-based array (SZARRAY). @@ -180,9 +175,8 @@ RhNewString_Return: // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - lui a2, 0x7ffff // Load upper 20 bits of 0x7FFFFFFF into a2 - addi a2, a2, 0x3ff // Adjust the lower 12 bits to get 0x7FFFFFFF (maximum 32-bit int) - bltu a1, a2, ArraySizeOverflow // Branch if a1 < a2 (check for overflow) + li a2, 0x7fffffff + bltu a2, a1, LOCAL_LABEL(ArraySizeOverflow) // Branch if a2 < a1 (check for overflow) ld a2, OFFSETOF__MethodTable__m_usComponentSize(a0) // Load component size mul a2, a1, a2 // a2 = a1 * component size @@ -203,8 +197,8 @@ RhNewString_Return: // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add a2, a2, t3 - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_limit(a3) - bltu t3, a2, RhpNewArray_Rare + ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) + bltu t3, a2, LOCAL_LABEL(RhpNewArray_Rare) // Reload new object address into t3. ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) @@ -219,21 +213,19 @@ RhNewString_Return: // Return the object allocated in a0. mv a0, t3 - j RhpNewArray_Return + ret -ArraySizeOverflow: +LOCAL_LABEL(ArraySizeOverflow): // We get here if the size of the final array object cannot be represented as an unsigned // 32-bit value. We are going to tail-call to a managed helper that will throw // an overflow exception that the caller of this allocator understands. // a0 holds MethodTable pointer already li a1, 1 // Indicate that we should throw OverflowException - j C_FUNC(RhExceptionHandling_FailedAllocation) - -RhpNewArray_Rare: - j C_FUNC(RhpNewArrayRare) + tail C_FUNC(RhExceptionHandling_FailedAllocation) -RhpNewArray_Return: +LOCAL_LABEL(RhpNewArray_Rare): + tail C_FUNC(RhpNewArrayRare) LEAF_END RhpNewArray, _TEXT // Allocate one-dimensional, zero-based array (SZARRAY) using the slow path that calls a runtime helper. @@ -259,7 +251,7 @@ RhpNewArray_Return: call C_FUNC(RhpGcAlloc) // Set the new object's MethodTable pointer and length on success. - beq a0, x0, ArrayOutOfMemory + beq a0, zero, LOCAL_LABEL(ArrayOutOfMemory) .cfi_remember_state POP_COOP_PINVOKE_FRAME @@ -267,7 +259,7 @@ RhpNewArray_Return: .cfi_restore_state -ArrayOutOfMemory: +LOCAL_LABEL(ArrayOutOfMemory): // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. @@ -275,6 +267,6 @@ ArrayOutOfMemory: li a1, 0 // Indicate that we should throw OOM. POP_COOP_PINVOKE_FRAME - j C_FUNC(RhExceptionHandling_FailedAllocation) + tail C_FUNC(RhExceptionHandling_FailedAllocation) NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S index 145e81564ccc5d..d236a518ae2ecb 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -19,37 +19,41 @@ // Setup a PAL_LIMITED_CONTEXT on the stack { .if \exceptionType == HARDWARE_EXCEPTION - addi sp, sp, -0x50 - .cfi_adjust_cfa_offset 0x50 + addi sp, sp, -0x70 + .cfi_adjust_cfa_offset 0x70 sd a3, 0(sp) // a3 is the SP and a1 is the IP of the fault site sd a1, 8(sp) .else - PROLOG_STACK_ALLOC 0x50 - .cfi_adjust_cfa_offset 0x50 + PROLOG_STACK_ALLOC 0x70 + .cfi_adjust_cfa_offset 0x70 sd a3, 0(sp) // a3 is the SP and ra is the IP of the fault site sd ra, 8(sp) .endif // Safely using available registers for floating-point saves - fsd ft0, 0x10(sp) - fsd ft1, 0x18(sp) - fsd ft2, 0x20(sp) - fsd ft3, 0x28(sp) - fsd ft4, 0x30(sp) - fsd ft5, 0x38(sp) - fsd ft6, 0x40(sp) - fsd ft7, 0x48(sp) - - // Adjust the registers used in the following line - PROLOG_SAVE_REG_PAIR_INDEXED s10, s11, 0x70 + fsd fs0, 0x10(sp) + fsd fs1, 0x18(sp) + fsd fs2, 0x20(sp) + fsd fs3, 0x28(sp) + fsd fs4, 0x30(sp) + fsd fs5, 0x38(sp) + fsd fs6, 0x40(sp) + fsd fs7, 0x48(sp) + fsd fs8, 0x50(sp) + fsd fs9, 0x58(sp) + fsd fs10, 0x60(sp) + fsd fs11, 0x68(sp) + + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x78 sd zero, 0x10(sp) // locations reserved for return value, not used for exception handling sd zero, 0x18(sp) - PROLOG_SAVE_REG_PAIR s0, s1, 0x20 - PROLOG_SAVE_REG_PAIR s2, s3, 0x30 - PROLOG_SAVE_REG_PAIR s4, s5, 0x40 - PROLOG_SAVE_REG_PAIR s6, s7, 0x50 - PROLOG_SAVE_REG_PAIR s8, s9, 0x60 + PROLOG_SAVE_REG_PAIR s1, s2, 0x20 + PROLOG_SAVE_REG_PAIR s3, s4, 0x30 + PROLOG_SAVE_REG_PAIR s5, s6, 0x40 + PROLOG_SAVE_REG_PAIR s7, s8, 0x50 + PROLOG_SAVE_REG_PAIR s9, s10, 0x60 + PROLOG_SAVE_REG s11, 0x70 // } end PAL_LIMITED_CONTEXT PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo @@ -66,14 +70,15 @@ // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED x22, x1, 96 - PROLOG_SAVE_REG_PAIR x23, x24, 16 - PROLOG_SAVE_REG_PAIR x25, x26, 32 - PROLOG_SAVE_REG_PAIR x27, x28, 48 - PROLOG_SAVE_REG_PAIR x29, x30, 64 - PROLOG_SAVE_REG_PAIR x31, x2, 80 - addi fp, sp, 0 - .cfi_def_cfa_register 22 //fp + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, ra, 0x68 + PROLOG_SAVE_REG_PAIR s1, s2, 0x10 + PROLOG_SAVE_REG_PAIR s3, s4, 0x20 + PROLOG_SAVE_REG_PAIR s5, s6, 0x30 + PROLOG_SAVE_REG_PAIR s7, s8, 0x40 + PROLOG_SAVE_REG_PAIR s9, s10, 0x50 + PROLOG_SAVE_REG s11, 0x60 + mv fp, sp + .cfi_def_cfa_register fp .if \extraStackSize != 0 PROLOG_STACK_ALLOC \extraStackSize @@ -91,12 +96,13 @@ EPILOG_STACK_FREE \extraStackSize .endif - EPILOG_RESTORE_REG_PAIR t0, t1, 16 - EPILOG_RESTORE_REG_PAIR t2, t3, 32 - EPILOG_RESTORE_REG_PAIR t4, t5, 48 - EPILOG_RESTORE_REG_PAIR t6, s0, 64 - EPILOG_RESTORE_REG_PAIR s1, s2, 80 - EPILOG_RESTORE_REG_PAIR_INDEXED s3, ra, 96 + EPILOG_RESTORE_REG_PAIR s1, s2, 0x10 + EPILOG_RESTORE_REG_PAIR s3, s4, 0x20 + EPILOG_RESTORE_REG_PAIR s5, s6, 0x30 + EPILOG_RESTORE_REG_PAIR s7, s8, 0x40 + EPILOG_RESTORE_REG_PAIR s9, s10, 0x50 + EPILOG_RESTORE_REG s11, 0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x68 .endm // ----------------------------------------------------------------------------- @@ -105,39 +111,47 @@ .macro RESTORE_PRESERVED_REGISTERS regdisplayReg // Load general-purpose registers that are defined - ld t3, OFFSETOF__REGDISPLAY__pR4(\regdisplayReg) // Load address of pR4 - ld s0, 0(t3) // Load pR4 into s0 - ld t3, OFFSETOF__REGDISPLAY__pR5(\regdisplayReg) // Load address of pR5 - ld s1, 0(t3) // Load pR5 into s1 - ld t3, OFFSETOF__REGDISPLAY__pR6(\regdisplayReg) // Load address of pR6 - ld s2, 0(t3) // Load pR6 into s2 - ld t3, OFFSETOF__REGDISPLAY__pR7(\regdisplayReg) // Load address of pR7 - ld s3, 0(t3) // Load pR7 into s3 - ld t3, OFFSETOF__REGDISPLAY__pR8(\regdisplayReg) // Load address of pR8 - ld s4, 0(t3) // Load pR8 into s4 - ld t3, OFFSETOF__REGDISPLAY__pR9(\regdisplayReg) // Load address of pR9 - ld s5, 0(t3) // Load pR9 into s5 - ld t3, OFFSETOF__REGDISPLAY__pR10(\regdisplayReg) // Load address of pR10 - ld s6, 0(t3) // Load pR10 into s6 - ld t3, OFFSETOF__REGDISPLAY__pR11(\regdisplayReg) // Load address of pR11 - ld s7, 0(t3) // Load pR11 into s7 - ld t3, OFFSETOF__REGDISPLAY__pR12(\regdisplayReg) // Load address of pR12 - ld s8, 0(t3) // Load pR12 into s8 - ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) // Load address of pFP - ld fp, 0(t3) // Load pFP into fp + ld t3, OFFSETOF__REGDISPLAY__pS1(\regdisplayReg) // Load address of pS1 + ld s1, 0(t3) // Load pS1 into s1 + ld t3, OFFSETOF__REGDISPLAY__pS2(\regdisplayReg) // Load address of pS2 + ld s2, 0(t3) // Load pS2 into s2 + ld t3, OFFSETOF__REGDISPLAY__pS3(\regdisplayReg) // Load address of pS3 + ld s3, 0(t3) // Load pS3 into s3 + ld t3, OFFSETOF__REGDISPLAY__pS4(\regdisplayReg) // Load address of pS4 + ld s4, 0(t3) // Load pS4 into s4 + ld t3, OFFSETOF__REGDISPLAY__pS5(\regdisplayReg) // Load address of pS5 + ld s5, 0(t3) // Load pS5 into s5 + ld t3, OFFSETOF__REGDISPLAY__pS6(\regdisplayReg) // Load address of pS6 + ld s6, 0(t3) // Load pS6 into s6 + ld t3, OFFSETOF__REGDISPLAY__pS7(\regdisplayReg) // Load address of pS7 + ld s7, 0(t3) // Load pS7 into s7 + ld t3, OFFSETOF__REGDISPLAY__pS8(\regdisplayReg) // Load address of pS8 + ld s8, 0(t3) // Load pS8 into s8 + ld t3, OFFSETOF__REGDISPLAY__pS9(\regdisplayReg) // Load address of pS9 + ld s9, 0(t3) // Load pS9 into s9 + ld t3, OFFSETOF__REGDISPLAY__pS10(\regdisplayReg) // Load address of pS10 + ld s10, 0(t3) // Load pS10 into s10 + ld t3, OFFSETOF__REGDISPLAY__pS11(\regdisplayReg) // Load address of pS11 + ld s11, 0(t3) // Load pS11 into s11 + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) // Load address of pFP + ld fp, 0(t3) // Load pFP into fp // // Load FP preserved registers // addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F // Base address of floating-point registers - fld f24, 0(t3) // Load f24 - fld f25, 8(t3) // Load f25 - fld f26, 16(t3) // Load f26 - fld f27, 24(t3) // Load f27 - fld f28, 32(t3) // Load f28 - fld f29, 40(t3) // Load f29 - fld f30, 48(t3) // Load f30 - fld f31, 56(t3) // Load f31 + fld fs0, 0x40(t3) // Load fs0 + fld fs1, 0x48(t3) // Load fs1 + fld fs2, 0x90(t3) // Load fs2 + fld fs3, 0x98(t3) // Load fs3 + fld fs4, 0xa0(t3) // Load fs4 + fld fs5, 0xa8(t3) // Load fs5 + fld fs6, 0xb0(t3) // Load fs6 + fld fs7, 0xb8(t3) // Load fs7 + fld fs8, 0xc0(t3) // Load fs8 + fld fs9, 0xc8(t3) // Load fs9 + fld fs10, 0xd0(t3) // Load fs10 + fld fs11, 0xd8(t3) // Load fs11 .endm @@ -147,37 +161,45 @@ .macro SAVE_PRESERVED_REGISTERS regdisplayReg // Save general purpose registers - ld t3, OFFSETOF__REGDISPLAY__pR4(\regdisplayReg) - sd s0, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR5(\regdisplayReg) - sd s1, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR6(\regdisplayReg) - sd s2, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR7(\regdisplayReg) - sd s3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR8(\regdisplayReg) - sd s4, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR9(\regdisplayReg) - sd s5, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR10(\regdisplayReg) - sd s6, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR11(\regdisplayReg) - sd s7, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR12(\regdisplayReg) - sd s8, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) - sd fp, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS1(\regdisplayReg) + sd s1, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS2(\regdisplayReg) + sd s2, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS3(\regdisplayReg) + sd s3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS4(\regdisplayReg) + sd s4, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS5(\regdisplayReg) + sd s5, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS6(\regdisplayReg) + sd s6, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS7(\regdisplayReg) + sd s7, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS8(\regdisplayReg) + sd s8, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS9(\regdisplayReg) + sd s9, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS10(\regdisplayReg) + sd s10, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS11(\regdisplayReg) + sd s11, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) + sd fp, 0(t3) // Save floating-point registers - addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F - fsd f24, 0(t3) - fsd f25, 8(t3) - fsd f26, 16(t3) - fsd f27, 24(t3) - fsd f28, 32(t3) - fsd f29, 40(t3) - fsd f30, 48(t3) - fsd f31, 56(t3) + addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fsd fs0, 0x40(t3) + fsd fs1, 0x48(t3) + fsd fs2, 0x90(t3) + fsd fs3, 0x98(t3) + fsd fs4, 0xa0(t3) + fsd fs5, 0xa8(t3) + fsd fs6, 0xb0(t3) + fsd fs7, 0xb8(t3) + fsd fs8, 0xc0(t3) + fsd fs9, 0xc8(t3) + fsd fs10, 0xd0(t3) + fsd fs11, 0xd8(t3) .endm @@ -189,34 +211,33 @@ #if _DEBUG // Create a pattern to store - li a3, 0xbaadd // Load immediate lower 16 bits - slli a3, a3, 16 // Shift left by 16 bits - li t0, 0xeed // Load immediate lower bits - or a3, a3, t0 // Combine both parts into a3 - li t1, 0xddeed // Load 32-bit value - sd a3, 0(t1) // Store double (64-bit) into a3 - li t2, 0xbaa // Load lower part - slli t2, t2, 16 // Shift left by 16 bits - or a3, a3, t2 // Combine into a3 + li a3, 0xbaaddeed + mv t0, a3 + slli a3, a3, 32 + or a3, a3, t0 // Store the pattern into each register's location - ld t3, OFFSETOF__REGDISPLAY__pR4(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS1(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS2(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR5(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS3(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR6(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS4(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR7(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS5(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR8(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS6(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR9(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS7(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR10(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS8(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR11(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS9(\regdisplayReg) sd a3, 0(t3) - ld t3, OFFSETOF__REGDISPLAY__pR12(\regdisplayReg) + ld t3, OFFSETOF__REGDISPLAY__pS10(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS11(\regdisplayReg) sd a3, 0(t3) ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) sd a3, 0(t3) @@ -228,7 +249,7 @@ sd a0, 0(sp) sd a1, 8(sp) call C_FUNC(RhpGetThread) - addi a2, a0, 0 + mv a2, a0 ld a0, 0(sp) ld a1, 8(sp) addi sp, sp, 16 @@ -237,13 +258,6 @@ #define rsp_offsetof_ExInfo 0 #define rsp_offsetof_Context STACKSIZEOF_ExInfo -.macro ADD_LARGE_IMM reg, base, imm - // Handle cases where the immediate is within the 12-bit range - lui t0, %hi(\imm) // Load upper 20 bits of the immediate - addi t0, t0, %lo(\imm) // Add lower 12 bits of the immediate - add \reg, \base, t0 // Add the result to the base register -.endm - // // RhpThrowHwEx // @@ -260,23 +274,23 @@ GetThreadA2 // Compute address for ExInfo* - ADD_LARGE_IMM a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null - li a3, 1 - sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 - li a3, -1 - sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx - li a3, 2 - sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.HardwareFault + addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 + addiw a3, zero, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, 2 + sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.HardwareFault // Link the ExInfo into the thread's ExInfo chain - ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) - sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead - sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo // Set the exception context field on the ExInfo - ADD_LARGE_IMM a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext // a0[31:0]: exception code // a1: ExInfo* @@ -303,59 +317,57 @@ GetThreadA2 - ld a1, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) - beq a1, zero, NotHijacked + ld a1, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + beq a1, zero, LOCAL_LABEL(NotHijacked) - ld a3, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + ld a3, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) // Recompute SP at callsite - ADD_LARGE_IMM t3, sp, (STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) - bltu a3, t3, TailCallWasHijacked // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + addi t3, sp, (STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) + bltu a3, t3, LOCAL_LABEL(TailCallWasHijacked) // if (m_ppvHijackedReturnAddressLocation < SP at callsite) // Normal case where a valid return address location is hijacked - sd a1, 0(a3) - j ClearThreadState + sd a1, 0(a3) + tail ClearThreadState - TailCallWasHijacked: +LOCAL_LABEL(TailCallWasHijacked): // Abnormal case where the return address location is now invalid because we ended up here via a tail // call. In this case, our hijacked return address should be the correct caller of this method. // Stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. - ori ra, a1, 0 + mv ra, a1 // Compute offsets for PAL_LIMITED_CONTEXT - ADD_LARGE_IMM t0, sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__RA) - sd ra, 0(t0) - ADD_LARGE_IMM t0, sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP) - sd ra, 0(t0) + sd ra, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__RA)(sp) + sd ra, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)(sp) - ClearThreadState: +LOCAL_LABEL(ClearThreadState): // Clear the Thread's hijack state - sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) - sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) - NotHijacked: +LOCAL_LABEL(NotHijacked): // Compute the offset for ExInfo - ADD_LARGE_IMM a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null - li a3, 1 - sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 - li a3, -1 - sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx - li a3, 1 - sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.Throw + addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 + addiw a3, zero, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.Throw // Link the ExInfo into the thread's ExInfo chain - ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) - sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead - sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo // Set the exception context field on the ExInfo - ADD_LARGE_IMM a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext // a0: exception object // a1: ExInfo* @@ -383,20 +395,20 @@ GetThreadA2 // a1 <- ExInfo* - addi a1, sp, rsp_offsetof_ExInfo + addi a1, sp, rsp_offsetof_ExInfo // pExInfo->m_exception = null - sd zero, OFFSETOF__ExInfo__m_exception(a1) + sd zero, OFFSETOF__ExInfo__m_exception(a1) // init to a deterministic value (ExKind.None) - sb zero, OFFSETOF__ExInfo__m_kind(a1) + sb zero, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_passNumber = 1 - li a3, 1 - sb a3, OFFSETOF__ExInfo__m_passNumber(a1) + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx - li a3, -1 - sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) + addiw a3, zero, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // link the ExInfo into the thread's ExInfo chain - ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) // a0 <- current ExInfo mv a0, a3 // pExInfo->m_pPrevExInfo = m_pExInfoStackHead @@ -406,18 +418,18 @@ // set the exception context field on the ExInfo // a2 <- PAL_LIMITED_CONTEXT* - ADD_LARGE_IMM a2, sp, rsp_offsetof_Context + addi a2, sp, rsp_offsetof_Context // pExInfo->m_pExContext = pContext - sd a2, OFFSETOF__ExInfo__m_pExContext(a1) + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // a0 contains the currently active ExInfo // a1 contains the address of the new ExInfo - jal C_FUNC(RhRethrow) + call C_FUNC(RhRethrow) ALTERNATE_ENTRY RhpRethrow2 // no return - ebreak + EMIT_BREAKPOINT NESTED_END RhpRethrow, _TEXT // @@ -435,51 +447,53 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler // Allocate space for the call funclet frame - ALLOC_CALL_FUNCLET_FRAME 0x70 + ALLOC_CALL_FUNCLET_FRAME 0x90 // Save floating-point registers - fsd f24, 0(sp) - fsd f25, 8(sp) - fsd f26, 16(sp) - fsd f27, 24(sp) - fsd f28, 32(sp) - fsd f29, 40(sp) - fsd f30, 48(sp) - fsd f31, 56(sp) + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) // Save integer registers - sd a0, 64(sp) // Save a0 to a3 - sd a1, 72(sp) - sd a2, 80(sp) - sd a3, 88(sp) - sd zero, 96(sp) // Make space for local "is_not_handling_thread_abort"; last qword will store the thread obj - -#define rsp_offset_is_not_handling_thread_abort 96 -#define rsp_offset_a0 64 -#define rsp_offset_a1 72 -#define rsp_offset_a2 80 -#define rsp_offset_a3 88 -#define rsp_CatchFunclet_offset_thread 104 + sd a0, 0x60(sp) // Save a0 to a3 + sd a1, 0x68(sp) + sd a2, 0x70(sp) + sd a3, 0x78(sp) + sd zero, 0x80(sp) // Make space for local "is_not_handling_thread_abort"; last qword will store the thread obj + +#define rsp_offset_is_not_handling_thread_abort 0x80 +#define rsp_offset_a0 0x60 +#define rsp_offset_a1 0x68 +#define rsp_offset_a2 0x70 +#define rsp_offset_a3 0x78 +#define rsp_CatchFunclet_offset_thread 0x88 // Clear the DoNotTriggerGc flag, trashes a4-a6 - jal C_FUNC(RhpGetThread) // Call the RhpGetThread function - sd a0, rsp_CatchFunclet_offset_thread(sp) - li a5, 0 - ld a0, rsp_offset_a0(sp) - ld a1, rsp_offset_a1(sp) - ld a2, rsp_offset_a2(sp) - ld a3, rsp_offset_a3(sp) - - ld a4, OFFSETOF__Thread__m_threadAbortException(a5) + call C_FUNC(RhpGetThread) // Call the RhpGetThread function + sd a0, rsp_CatchFunclet_offset_thread(sp) + mv a5, a0 + ld a0, rsp_offset_a0(sp) + ld a1, rsp_offset_a1(sp) + ld a2, rsp_offset_a2(sp) + ld a3, rsp_offset_a3(sp) + + ld a4, OFFSETOF__Thread__m_threadAbortException(a5) sub a4, a4, a0 - sd a4, rsp_offset_is_not_handling_thread_abort(sp) // Non-zero if the exception is not ThreadAbortException + sd a4, rsp_offset_is_not_handling_thread_abort(sp) // Non-zero if the exception is not ThreadAbortException - // Handle large immediate values - lui t3, %hi(0xFFFFFFEF) # Load upper 20 bits of 0xFFFFFFEF - addi t3, t3, %lo(0xFFFFFFEF) # Adjust with the lower 12 bits + addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags - // Use `andi` with a large immediate value - and a4, a3, t3 # Apply the mask + addiw a6, zero, -17 // Mask value (0xFFFFFFEF) + amoand.w a4, t3, a6 // Set preserved regs to the values expected by the funclet RESTORE_PRESERVED_REGISTERS a2 @@ -489,72 +503,71 @@ // Call the funclet // a0 still contains the exception object - jalr a1, 0 // Jump to the handler funclet + jalr a1 // Jump to the handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 // a0 contains resume IP - ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* #ifdef _DEBUG // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we // have to spill all the preserved registers and then refill them after the call. - sd a0, rsp_offset_a0(sp) + sd a0, rsp_offset_a0(sp) SAVE_PRESERVED_REGISTERS a2 - ld a0, rsp_CatchFunclet_offset_thread(sp) // a0 <- Thread* - ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* - ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value - jal C_FUNC(RhpValidateExInfoPop) + ld a0, rsp_CatchFunclet_offset_thread(sp) // a0 <- Thread* + ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + call C_FUNC(RhpValidateExInfoPop) - ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* RESTORE_PRESERVED_REGISTERS a2 - ld a0, rsp_offset_a0(sp) // Reload resume IP + ld a0, rsp_offset_a0(sp) // Reload resume IP #endif - ld a1, rsp_CatchFunclet_offset_thread(sp) + ld a1, rsp_CatchFunclet_offset_thread(sp) // We must unhijack the thread at this point because the section of stack where the hijack is applied // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 - ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* - ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value -PopExInfoLoop: - ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo - beq a3, zero, DonePopping // if (pExInfo == null) { we're done } - blt a3, a2, PopExInfoLoop // if (pExInfo < resume SP} { keep going } +LOCAL_LABEL(PopExInfoLoop): + ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo + beq a3, zero, LOCAL_LABEL(DonePopping) // if (pExInfo == null) { we're done } + blt a3, a2, LOCAL_LABEL(PopExInfoLoop) // if (pExInfo < resume SP} { keep going } -DonePopping: - sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // Store the new head on the Thread +LOCAL_LABEL(DonePopping): + sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // Store the new head on the Thread PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 // Use `andi` with the immediate value 0 - andi a6, a3, TrapThreadsFlags_AbortInProgress_Bit // Apply the mask directly + andi a6, a3, (1 << TrapThreadsFlags_AbortInProgress_Bit) // Apply the mask directly + beq a6, zero, LOCAL_LABEL(NoAbort) - beq a6, zero, NoAbort - - ld a3, rsp_offset_is_not_handling_thread_abort(sp) - bne a3, zero, NoAbort + ld a3, rsp_offset_is_not_handling_thread_abort(sp) + bne a3, zero, LOCAL_LABEL(NoAbort) // It was the ThreadAbortException, so rethrow it // Reset SP - mv a1, a0 // a1 <- continuation address as exception PC - li a0, STATUS_REDHAWK_THREAD_ABORT - mv sp, a2 - jal C_FUNC(RhpThrowHwEx) + mv a1, a0 // a1 <- continuation address as exception PC + li a0, STATUS_REDHAWK_THREAD_ABORT + mv sp, a2 + tail C_FUNC(RhpThrowHwEx) -NoAbort: +LOCAL_LABEL(NoAbort): // Reset SP and jump to continuation address - mv sp, a2 - jalr zero, a0 // Jump to the continuation address + mv sp, a2 + jalr zero, 0(a0) // Jump to the continuation address #undef rsp_offset_is_not_handling_thread_abort #undef rsp_offset_a0 @@ -577,36 +590,40 @@ NoAbort: NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler // Allocate space for the call funclet frame - ALLOC_CALL_FUNCLET_FRAME 0x60 + ALLOC_CALL_FUNCLET_FRAME 0x80 // Save floating-point registers - fsd f24, 0(sp) - fsd f25, 8(sp) - fsd f26, 16(sp) - fsd f27, 24(sp) - fsd f28, 32(sp) - fsd f29, 40(sp) - fsd f30, 48(sp) - fsd f31, 56(sp) + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) // Save integer registers - sd a0, 64(sp) // Save a0 to 0x40 - sd a1, 72(sp) // Save a1 to 0x48 + sd a0, 0x60(sp) // Save a0 to 0x60 + sd a1, 0x68(sp) // Save a1 to 0x68 -#define rsp_offset_a1 72 -#define rsp_FinallyFunclet_offset_thread 80 +#define rsp_offset_a1 0x68 +#define rsp_FinallyFunclet_offset_thread 0x70 // Clear the DoNotTriggerGc flag - call C_FUNC(RhpGetThread) - sd a0, rsp_FinallyFunclet_offset_thread(sp) - li a2, 0 - ld a0, 64(sp) - ld a1, 72(sp) + call C_FUNC(RhpGetThread) + sd a0, rsp_FinallyFunclet_offset_thread(sp) + mv a2, a0 + ld a0, 0x60(sp) + ld a1, 0x68(sp) // Set the DoNotTriggerGc flag - la t3, OFFSETOF__Thread__m_ThreadStateFlags - li a3, -17 // Mask value (0xFFFFFFEF) - and a4, a3, t3 + addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags + addiw a3, zero, -17 // Mask value (0xFFFFFFEF) + amoand.w a4, t3, a3 // Restore preserved registers RESTORE_PRESERVED_REGISTERS a1 @@ -615,33 +632,38 @@ NoAbort: TRASH_PRESERVED_REGISTERS_STORAGE a1 // Call the funclet - jalr a0, 0 // Jump to the funclet + jalr a0 // Jump to the funclet ALTERNATE_ENTRY RhpCallFinallyFunclet2 - ld a1, rsp_offset_a1(sp) // Reload REGDISPLAY pointer + ld a1, rsp_offset_a1(sp) // Reload REGDISPLAY pointer // Save new values of preserved registers into REGDISPLAY SAVE_PRESERVED_REGISTERS a1 // Restore the DoNotTriggerGc flag - ld a2, rsp_FinallyFunclet_offset_thread(sp) - la t3, OFFSETOF__Thread__m_ThreadStateFlags - li a3, -17 // Mask value (0xFFFFFFEF) - and a1, a3, t3 + ld a2, rsp_FinallyFunclet_offset_thread(sp) + + addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags + addiw a3, zero, 16 // Mask value (0x10) + amoor.w a1, t3, a3 // Restore floating-point registers - fld f24, 0(sp) - fld f25, 8(sp) - fld f26, 16(sp) - fld f27, 24(sp) - fld f28, 32(sp) - fld f29, 40(sp) - fld f30, 48(sp) - fld f31, 56(sp) + fld fs0, 0x00(sp) + fld fs1, 0x08(sp) + fld fs2, 0x10(sp) + fld fs3, 0x18(sp) + fld fs4, 0x20(sp) + fld fs5, 0x28(sp) + fld fs6, 0x30(sp) + fld fs7, 0x38(sp) + fld fs8, 0x40(sp) + fld fs9, 0x48(sp) + fld fs10, 0x50(sp) + fld fs11, 0x58(sp) // Free call funclet frame - FREE_CALL_FUNCLET_FRAME 0x60 + FREE_CALL_FUNCLET_FRAME 0x80 // Return EPILOG_RETURN @@ -662,15 +684,19 @@ NoAbort: // NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler - ALLOC_CALL_FUNCLET_FRAME 0x40 - fsd f24, 0x00(sp) - fsd f25, 0x08(sp) - fsd f26, 0x10(sp) - fsd f27, 0x18(sp) - fsd f28, 0x20(sp) - fsd f29, 0x28(sp) - fsd f30, 0x30(sp) - fsd f31, 0x38(sp) + ALLOC_CALL_FUNCLET_FRAME 0x60 + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) ld t3, OFFSETOF__REGDISPLAY__pFP(a2) ld fp, 0(t3) @@ -679,20 +705,24 @@ NoAbort: // call the funclet // // a0 still contains the exception object - jalr ra, a1, 0 + jalr a1 ALTERNATE_ENTRY RhpCallFilterFunclet2 - fld f24, 0x00(sp) - fld f25, 0x08(sp) - fld f26, 0x10(sp) - fld f27, 0x18(sp) - fld f28, 0x20(sp) - fld f29, 0x28(sp) - fld f30, 0x30(sp) - fld f31, 0x38(sp) + fld fs0, 0x00(sp) + fld fs1, 0x08(sp) + fld fs2, 0x10(sp) + fld fs3, 0x18(sp) + fld fs4, 0x20(sp) + fld fs5, 0x28(sp) + fld fs6, 0x30(sp) + fld fs7, 0x38(sp) + fld fs8, 0x40(sp) + fld fs9, 0x48(sp) + fld fs10, 0x50(sp) + fld fs11, 0x58(sp) - FREE_CALL_FUNCLET_FRAME 0x40 + FREE_CALL_FUNCLET_FRAME 0x60 EPILOG_RETURN NESTED_END RhpCallFilterFunclet, Text @@ -714,16 +744,16 @@ NoAbort: NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler -#define rsp_offset_a0 16 -#define rsp_offset_a1 24 -#define rsp_offset_a2 32 -#define rsp_offset_a3 40 -#define rsp_offset_a4 48 -#define rsp_CallPropagationCallback_offset_thread 56 +#define rsp_offset_a0 0x10 +#define rsp_offset_a1 0x18 +#define rsp_offset_a2 0x20 +#define rsp_offset_a3 0x28 +#define rsp_offset_a4 0x30 +#define rsp_CallPropagationCallback_offset_thread 0x38 // Using the NO_FP macro so that the debugger unwinds using SP. // This makes backtraces work even after using RESTORE_PRESERVED_REGISTERS. - PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 64 + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, ra, 0x40 mv fp, sp sd a0, rsp_offset_a0(sp) // a0 to a4 are stored to restore them anytime sd a1, rsp_offset_a1(sp) @@ -733,7 +763,7 @@ NoAbort: sd zero, rsp_CallPropagationCallback_offset_thread(sp) // zero makes space to store the thread obj // clear the DoNotTriggerGc flag, trashes a4-a6 - call RhpGetThread + call C_FUNC(RhpGetThread) sd a0, rsp_CallPropagationCallback_offset_thread(sp) mv a5, a0 ld a0, rsp_offset_a0(sp) @@ -743,8 +773,8 @@ NoAbort: addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags - addi a6, zero, -17 // a6 = a6 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. - and a4, a6, t3 + addiw a6, zero, -17 // Mask value (0xFFFFFFEF) + amoand.w a4, t3, a6 // set preserved regs to the values expected by the funclet RESTORE_PRESERVED_REGISTERS a2 @@ -759,8 +789,8 @@ NoAbort: ld a0, rsp_CallPropagationCallback_offset_thread(sp) // a0 <- Thread* ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* - ld a2, a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value - call RhpValidateExInfoPop + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + call C_FUNC(RhpValidateExInfoPop) ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* @@ -771,32 +801,32 @@ NoAbort: // We must unhijack the thread at this point because the section of stack where the hijack is applied // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. - INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 + INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* - ld a2, a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value -Propagate_PopExInfoLoop: - ld a3, a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo - beqz a3, Propagate_DonePopping // if (pExInfo == null) { we're done } - blt a3, a2, Propagate_PopExInfoLoop // if (pExInfo < resume SP) { keep going } +LOCAL_LABEL(Propagate_PopExInfoLoop): + ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo + beqz a3, LOCAL_LABEL(Propagate_DonePopping) // if (pExInfo == null) { we're done } + blt a3, a2, LOCAL_LABEL(Propagate_PopExInfoLoop) // if (pExInfo < resume SP) { keep going } -Propagate_DonePopping: - sd a3, a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread +LOCAL_LABEL(Propagate_DonePopping): + sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // store the new head on the Thread // restore preemptive mode - ld a4, rsp_offset_a4(sp) // pPreviousTransitionFrame - sd a4, a1, OFFSETOF__Thread__m_pTransitionFrame + ld a4, rsp_offset_a4(sp) // pPreviousTransitionFrame + sd a4, OFFSETOF__Thread__m_pTransitionFrame(a1) // reset SP and RA and jump to continuation address ld a0, rsp_offset_a0(sp) // callback context ld a1, rsp_offset_a1(sp) // callback ld a2, rsp_offset_a2(sp) // REGDISPLAY* - ld a3, a2, OFFSETOF__REGDISPLAY__pRA // a3 <- &resume RA value - ld ra, a3 - ld a3, a2, OFFSETOF__REGDISPLAY__SP // a3 <- resume SP value + ld a3, OFFSETOF__REGDISPLAY__pRA(a2) // a3 <- &resume RA value + ld ra, 0(a3) + ld a3, OFFSETOF__REGDISPLAY__SP(a2) // a3 <- resume SP value mv sp, a3 - jalr ra, a1, 0 + jalr zero, 0(a1) #undef rsp_offset_a0 #undef rsp_offset_a1 diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index b819f946b477d5..22b3c60fa3ec4f 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -4,12 +4,11 @@ #include #include "AsmOffsets.inc" -#define PROBE_FRAME_SIZE 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + - // 10 * 8 for callee saved registers + - // 1 * 8 for caller SP + - // 2 * 8 for int returns + - // 1 * 8 for alignment padding + - // 4 * 16 for FP returns +#define PROBE_FRAME_SIZE 0xB0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + + // 11 * 8 for callee saved registers + + // 1 * 8 for caller SP + + // 2 * 8 for int returns + + // 4 * 8 for FP returns // Define the prolog for setting up the PInvokeTransitionFrame .macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK @@ -18,42 +17,41 @@ # incoming register values into it. # First create PInvokeTransitionFrame - PROLOG_SAVE_REG_PAIR_INDEXED s10, ra, PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) # Slot at sp+0x10 is reserved for Thread * # Slot at sp+0x18 is reserved for bitmask of saved registers # Save callee-saved registers - PROLOG_SAVE_REG_PAIR s11, s0, 0x20 - PROLOG_SAVE_REG_PAIR s1, s2, 0x30 - PROLOG_SAVE_REG_PAIR s3, s4, 0x40 - PROLOG_SAVE_REG_PAIR s5, s6, 0x50 - PROLOG_SAVE_REG_PAIR s7, s8, 0x60 + PROLOG_SAVE_REG_PAIR s1, s2, 0x20 + PROLOG_SAVE_REG_PAIR s3, s4, 0x30 + PROLOG_SAVE_REG_PAIR s5, s6, 0x40 + PROLOG_SAVE_REG_PAIR s7, s8, 0x50 + PROLOG_SAVE_REG_PAIR s9, s10, 0x60 + PROLOG_SAVE_REG s11, 0x70 - # Slot at sp+0x70 is reserved for caller sp + # Slot at sp+0x78 is reserved for caller sp # Save the integer return registers - sd a0, 0x78(sp) - sd a1, 0x80(sp) - - # Slot at sp+0x88 is alignment padding + sd a0, 0x80(sp) + sd a1, 0x88(sp) # Save the FP return registers - fsd f0, 0x90(sp) - fsd f1, 0x98(sp) - fsd f2, 0xa0(sp) - fsd f3, 0xa8(sp) + fsd fa0, 0x90(sp) + fsd fa1, 0x98(sp) + fsd fa2, 0xa0(sp) + fsd fa3, 0xa8(sp) # Perform the rest of the PInvokeTransitionFrame initialization. - sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) - sd \BITMASK, OFFSETOF__PInvokeTransitionFrame__m_pThread + 8(sp) # Save the register bitmask passed in by caller + sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) + sd \BITMASK, (OFFSETOF__PInvokeTransitionFrame__m_pThread + 8)(sp) # Save the register bitmask passed in by caller - addi \trashReg, sp, PROBE_FRAME_SIZE # Recover value of caller's SP - sd \trashReg, 0x70(sp) # Save caller's SP + addi \trashReg, sp, PROBE_FRAME_SIZE # Recover value of caller's SP + sd \trashReg, 0x78(sp) # Save caller's SP # Link the frame into the Thread - mv \trashReg, sp - sd \trashReg, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) + mv \trashReg, sp + sd \trashReg, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) .endm @@ -61,24 +59,25 @@ .macro POP_PROBE_FRAME // Restore the integer return registers - ld a0, 0x78(sp) - ld a1, 0x80(sp) + ld a0, 0x80(sp) + ld a1, 0x88(sp) // Restore the FP return registers - fld f0, 0x90(sp) - fld f1, 0x98(sp) - fld f2, 0xA0(sp) - fld f3, 0xA8(sp) + fld fa0, 0x90(sp) + fld fa1, 0x98(sp) + fld fa2, 0xa0(sp) + fld fa3, 0xa8(sp) // Restore callee saved registers - EPILOG_RESTORE_REG_PAIR t0, t1, 0x20 - EPILOG_RESTORE_REG_PAIR t2, t3, 0x30 - EPILOG_RESTORE_REG_PAIR t4, t5, 0x40 - EPILOG_RESTORE_REG_PAIR t6, s0, 0x50 - EPILOG_RESTORE_REG_PAIR s1, s2, 0x60 + EPILOG_RESTORE_REG_PAIR s1, s2, 0x20 + EPILOG_RESTORE_REG_PAIR s3, s4, 0x30 + EPILOG_RESTORE_REG_PAIR s5, s6, 0x40 + EPILOG_RESTORE_REG_PAIR s7, s8, 0x50 + EPILOG_RESTORE_REG_PAIR s9, s10, 0x60 + EPILOG_RESTORE_REG s11, 0x70 // Restore the frame pointer and return address - EPILOG_RESTORE_REG_PAIR_INDEXED s3, ra, PROBE_FRAME_SIZE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, PROBE_FRAME_SIZE .endm // Fix up the hijacked callstack @@ -92,13 +91,13 @@ #endif // Fix the stack by restoring the original return address - ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) - ld t3, OFFSETOF__Thread__m_pvHijackedReturnAddress + 8(a2) + ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + ld t3, (OFFSETOF__Thread__m_pvHijackedReturnAddress + 8)(a2) // Clear hijack state - sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) - sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) - sd zero, OFFSETOF__Thread__m_uHijackedReturnValueFlags(a2) + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + sd zero, OFFSETOF__Thread__m_uHijackedReturnValueFlags(a2) .endm // @@ -108,15 +107,14 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler FixupHijackedCallstack PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 - andi t3, a3, TrapThreadsFlags_TrapThreads_Bit - bnez t3, WaitForGC - jr ra + andi t3, a3, 1 << TrapThreadsFlags_TrapThreads_Bit + bnez t3, LOCAL_LABEL(WaitForGC) + jalr zero, 0(ra) -WaitForGC: - lui t6, %hi((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A4 + PTFF_SAVE_A5)) - addi t6, t6, %lo((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A4 + PTFF_SAVE_A5)) +LOCAL_LABEL(WaitForGC): + li t6, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1) or t3, t3, t6 - j C_FUNC(RhpWaitForGC) + tail C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack .global C_FUNC(RhpThrowHwEx) @@ -125,26 +123,26 @@ NESTED_END RhpGcProbeHijack NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler PUSH_PROBE_FRAME a2, a3, t3 - ld t0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a2) - call C_FUNC(RhpWaitForGC2) + ld a0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a2) + call C_FUNC(RhpWaitForGC2) - ld t1, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) + ld a2, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) # Load PTFF_THREAD_ABORT_BIT into a register, using t2 if needed - li t2, PTFF_THREAD_ABORT_BIT - and t3, t1, t2 - bnez t3, ThrowThreadAbort + slli t2, a2, (63-PTFF_THREAD_ABORT_BIT) + srli t2, t2, 63 + bnez t2, LOCAL_LABEL(ThrowThreadAbort) .cfi_remember_state POP_PROBE_FRAME EPILOG_RETURN .cfi_restore_state -ThrowThreadAbort: +LOCAL_LABEL(ThrowThreadAbort): POP_PROBE_FRAME - li a0, STATUS_REDHAWK_THREAD_ABORT - mv a1, ra # Set return address as exception PC - j RhpThrowHwEx + li a0, STATUS_REDHAWK_THREAD_ABORT + mv a1, ra # Set return address as exception PC + call C_FUNC(RhpThrowHwEx) NESTED_END RhpWaitForGC .global C_FUNC(RhpGcPoll2) @@ -153,15 +151,15 @@ NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a0 bne a0, zero, C_FUNC(RhpGcPollRare) - jalr ra + jalr zero, 0(ra) LEAF_END RhpGcPoll // Rare GC Poll function NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler PUSH_COOP_PINVOKE_FRAME a0 - jal RhpGcPoll2 + call RhpGcPoll2 POP_COOP_PINVOKE_FRAME - jalr ra + jalr zero, 0(ra) NESTED_END RhpGcPollRare #ifdef FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S index 8c45ece1d8b373..91ba5da0cccb4d 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -38,7 +38,7 @@ // Load the target address from the data block and jump to it ld t1, POINTER_SIZE(t0) // Load target address into t1 from the data block - jalr t1 // Jump to the target address in t1 + jalr t1 // Jump to the target address in t1 LEAF_END RhCommonStub, _TEXT @@ -47,5 +47,5 @@ // LEAF_ENTRY RhGetCommonStubAddress, _TEXT PREPARE_EXTERNAL_VAR RhCommonStub, a0 - jalr a0 // Return address in a0 + jalr zero, 0(ra) // Return address in ra LEAF_END RhGetCommonStubAddress, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S index bc1d583f436bdb..6329ac703e0057 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -32,7 +32,7 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler sd a1, OFFSETOF__PInvokeTransitionFrame__m_pThread(a0) sd a0, OFFSETOF__Thread__m_pTransitionFrame(a1) - jalr ra, 0 + jalr zero, 0(ra) NESTED_END RhpPInvoke, _TEXT LEAF_ENTRY RhpPInvokeReturn, _TEXT @@ -41,9 +41,9 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a5 - beq t0, zero, 1f // TrapThreadsFlags_None = 0 - jalr ra, 0 + bne t0, zero, 1f // TrapThreadsFlags_None = 0 + jalr zero, 0(ra) 1: // passing transition frame pointer in a0 - jal C_FUNC(RhpWaitForGC2) + tail C_FUNC(RhpWaitForGC2) LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S index 3a7e1b1148f7ab..0427e4a45f8c25 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S @@ -18,10 +18,10 @@ bne t1, t6, 0f // Load the target address from the cache entry - ld t6, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)(t0) + ld t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)(t0) // Jump to the address in t6 - jalr t0, t6, 0 + jalr zero, 0(t0) 0: .endm @@ -34,11 +34,11 @@ NESTED_ENTRY RhpInterfaceDispatch\entries, _TEXT, NoHandler // t0 holds the indirection cell address. Load the cache pointer. - ld t0, OFFSETOF__InterfaceDispatchCell__m_pCache(a1) // Using a1 as an alternative base register + ld t0, OFFSETOF__InterfaceDispatchCell__m_pCache(a1) // Using a1 as an alternative base register // Load the MethodTable from the object instance in a0. ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries - ld t1, 0(a0) + ld t1, 0(a0) .global CurrentEntry .set CurrentEntry, 0 @@ -49,7 +49,7 @@ .endr // t0 still contains the indirection cell address. - jal C_FUNC(RhpInterfaceDispatchSlow) + tail C_FUNC(RhpInterfaceDispatchSlow) NESTED_END RhpInterfaceDispatch\entries, _TEXT @@ -75,10 +75,10 @@ // The exception handling infrastructure is aware of the fact that this is the first // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here // to a NullReferenceException at the callsite. - ld zero, 0(a0) + ld zero, 0(a0) // Just tail call to the cache miss helper. - jal C_FUNC(RhpInterfaceDispatchSlow) + tail C_FUNC(RhpInterfaceDispatchSlow) LEAF_END RhpInitialInterfaceDispatch, _TEXT // @@ -87,7 +87,7 @@ LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT // t2 contains the interface dispatch cell address. // Load t3 to point to the vtable offset (which is stored in the m_pCache field). - ld t3, OFFSETOF__InterfaceDispatchCell__m_pCache(t2) + ld t3, OFFSETOF__InterfaceDispatchCell__m_pCache(t2) // Load the MethodTable from the object instance in a0, and add it to the vtable offset // to get the address in the vtable of what we want to dereference @@ -97,7 +97,7 @@ // Load the target address of the vtable into t3 ld t3, 0(t3) - jalr t3, 0 + jalr zero, 0(t3) LEAF_END RhpVTableOffsetDispatch, _TEXT // @@ -110,7 +110,7 @@ // t3: target address for the thunk to call // t4: parameter of the thunk's target PREPARE_EXTERNAL_VAR RhpCidResolve, t3 - jal C_FUNC(RhpUniversalTransition_DebugStepTailCall) + tail C_FUNC(RhpUniversalTransition_DebugStepTailCall) LEAF_END RhpInterfaceDispatchSlow, _TEXT #endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index c808479b6c9379..13ae2fe0502100 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -12,10 +12,7 @@ .global RhpFpTrashValues #endif // TRASH_SAVED_ARGUMENT_REGISTERS -// Padding to account for the odd number of saved integer registers -#define ALIGNMENT_PADDING_SIZE (8) - -#define COUNT_ARG_REGISTERS (9) +#define COUNT_ARG_REGISTERS (8) #define INTEGER_REGISTER_SIZE (8) #define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) @@ -31,7 +28,6 @@ // From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: // -// ALIGNMENT_PADDING_SIZE // ARGUMENT_REGISTERS_SIZE // RETURN_BLOCK_SIZE // FLOAT_ARG_REGISTERS_SIZE @@ -41,14 +37,14 @@ #define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) -#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) +#define STACK_SIZE (ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) #define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) #define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) // RhpUniversalTransition // -// At input to this function, a0-7/tp, f0-7 and the stack may contain any number of arguments. +// At input to this function, a0-7, f0-7 and the stack may contain any number of arguments. // // In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: // t7 will contain the managed function that is to be called by this transition function @@ -61,16 +57,15 @@ // // Frame layout is: // -// {StackPassedArgs} ChildSP+100 CallerSP+000 -// {AlignmentPad (0x8 bytes)} ChildSP+0F8 CallerSP-008 -// {IntArgRegs (a0-a7/tp) (0x48 bytes)} ChildSP+0B0 CallerSP-050 -// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-070 +// {StackPassedArgs} ChildSP+0F0 CallerSP+000 +// {IntArgRegs (a0-a7) (0x40 bytes)} ChildSP+0B0 CallerSP-040 +// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-060 // -- The base address of the Return block is the TransitionBlock pointer, the floating point args are // in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact // layout of all pieces of the frame that lie at or above the pushed floating point registers. -// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0F0 -// {PushedRA} ChildSP+008 CallerSP-0F8 -// {PushedFP} ChildSP+000 CallerSP-100 +// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0E0 +// {PushedRA} ChildSP+008 CallerSP-0E8 +// {PushedFP} ChildSP+000 CallerSP-0F0 // // NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure // must be updated as well. @@ -90,54 +85,51 @@ NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler // Save FP and RA registers - PROLOG_SAVE_REG_PAIR_INDEXED s10, ra, STACK_SIZE + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, STACK_SIZE // Save Floating Point registers - fsd f0, 0x100(sp) // Save f0 - fsd f1, 0x108(sp) // Save f1 - fsd f2, 0x110(sp) // Save f2 - fsd f3, 0x118(sp) // Save f3 - fsd f4, 0x120(sp) // Save f4 - fsd f5, 0x128(sp) // Save f5 - fsd f6, 0x130(sp) // Save f6 - fsd f7, 0x138(sp) // Save f7 + fsd fa0, 0x10(sp) // Save f0 + fsd fa1, 0x18(sp) // Save f1 + fsd fa2, 0x20(sp) // Save f2 + fsd fa3, 0x28(sp) // Save f3 + fsd fa4, 0x30(sp) // Save f4 + fsd fa5, 0x38(sp) // Save f5 + fsd fa6, 0x40(sp) // Save f6 + fsd fa7, 0x48(sp) // Save f7 // Space for return buffer data (0x40 bytes) // Save argument registers - sd a0, 0x140(sp) // Save a0 - sd a1, 0x148(sp) // Save a1 - sd a2, 0x150(sp) // Save a2 - sd a3, 0x158(sp) // Save a3 - sd a4, 0x160(sp) // Save a4 - sd a5, 0x168(sp) // Save a5 - sd a6, 0x170(sp) // Save a6 - sd a7, 0x178(sp) // Save a7 - sd tp, 0x180(sp) // Save tp + sd a0, 0xb0(sp) // Save a0 + sd a1, 0xb8(sp) // Save a1 + sd a2, 0xc0(sp) // Save a2 + sd a3, 0xc8(sp) // Save a3 + sd a4, 0xd0(sp) // Save a4 + sd a5, 0xd8(sp) // Save a5 + sd a6, 0xe0(sp) // Save a6 + sd a7, 0xe8(sp) // Save a7 // Optionally prepare the values to trash saved argument registers #ifdef TRASH_SAVED_ARGUMENT_REGISTERS - PREPARE_EXTERNAL_VAR RhpFpTrashValues, t0 - - fld f0, 0(t0) // Load f0 from t0 - fld f1, 0x08(t0) // Load f1 from t0 - fld f2, 0x10(t0) // Load f2 from t0 - fld f3, 0x18(t0) // Load f3 from t0 - fld f4, 0x20(t0) // Load f4 from t0 - fld f5, 0x28(t0) // Load f5 from t0 - fld f6, 0x30(t0) // Load f6 from t0 - fld f7, 0x38(t0) // Load f7 from t0 - - PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, t0 - - ld a0, 0x10(t0) // Load a0 from t0 - ld a1, 0x18(t0) // Load a1 from t0 - ld a2, 0x20(t0) // Load a2 from t0 - ld a3, 0x28(t0) // Load a3 from t0 - ld a4, 0x30(t0) // Load a4 from t0 - ld a5, 0x38(t0) // Load a5 from t0 - ld a6, 0x40(t0) // Load a6 from t0 - ld a7, 0x48(t0) // Load a7 from t0 + PREPARE_EXTERNAL_VAR RhpFpTrashValues, a1 + + fld fa0, 0(a1) // Load fa0 from a1 + fld fa1, 0x08(a1) // Load fa1 from a1 + fld fa2, 0x10(a1) // Load fa2 from a1 + fld fa3, 0x18(a1) // Load fa3 from a1 + fld fa4, 0x20(a1) // Load fa4 from a1 + fld fa5, 0x28(a1) // Load fa5 from a1 + fld fa6, 0x30(a1) // Load fa6 from a1 + fld fa7, 0x38(a1) // Load fa7 from a1 + + PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, a1 + + ld a2, 0x10(a1) // Load a2 from a1 + ld a3, 0x18(a1) // Load a3 from a1 + ld a4, 0x20(a1) // Load a4 from a1 + ld a5, 0x28(a1) // Load a5 from a1 + ld a6, 0x30(a1) // Load a6 from a1 + ld a7, 0x38(a1) // Load a7 from a1 #endif // TRASH_SAVED_ARGUMENT_REGISTERS addi a0, sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function @@ -169,7 +161,7 @@ ld tp, 0x180(sp) // Restore tp // Restore FP and RA registers, and free the allocated stack block - EPILOG_RESTORE_REG_PAIR_INDEXED s10, ra, STACK_SIZE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, STACK_SIZE // Tailcall to the target address in t2 jalr t2, 0 diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 52d8978f4fb2f6..f169d45529b809 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -318,7 +318,7 @@ C_FUNC(\Name): .endm // Bit position for the flags above, to be used with tbz / tbnz instructions -#define PTFF_THREAD_ABORT_BIT 36 +#define PTFF_THREAD_ABORT_BIT 35 // // CONSTANTS -- INTEGER diff --git a/src/native/external/llvm-libunwind/src/Registers.hpp b/src/native/external/llvm-libunwind/src/Registers.hpp index 6a6d35467e518c..600c56bb92ff39 100644 --- a/src/native/external/llvm-libunwind/src/Registers.hpp +++ b/src/native/external/llvm-libunwind/src/Registers.hpp @@ -4342,7 +4342,7 @@ inline void Registers_riscv::setRegister(int regNum, reg_t value, uint64_t locat return; else if ((regNum > 0) && (regNum < 32)) { _registers[regNum] = value; - _registerLocations[regNum - UNW_LOONGARCH_R0] = location; + _registerLocations[regNum - UNW_RISCV_X0] = location; } else _LIBUNWIND_ABORT("unsupported riscv register"); From 4dbe95c7e6ba68eb5eaa169f6fa16c0eb46592b2 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:31:31 +0200 Subject: [PATCH 09/19] Fix riscv64 nativeaot native build --- .../nativeaot/Runtime/AsmOffsetsVerify.cpp | 7 +- .../nativeaot/Runtime/StackFrameIterator.cpp | 7 +- .../nativeaot/Runtime/riscv64/AllocFast.S | 2 +- .../nativeaot/Runtime/riscv64/AsmOffsetsCpu.h | 6 +- .../Runtime/riscv64/ExceptionHandling.S | 6 +- .../nativeaot/Runtime/riscv64/WriteBarriers.S | 2 +- .../nativeaot/Runtime/unix/UnwindHelpers.cpp | 85 ------------------- 7 files changed, 15 insertions(+), 100 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index d27884dbdf1ff3..21059545ed9ba8 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -28,9 +28,12 @@ class AsmOffsets static_assert(offsetof(Array, m_Length) == offsetof(String, m_Length), "The length field of String and Array have different offsets"); static_assert(sizeof(((Array*)0)->m_Length) == sizeof(((String*)0)->m_Length), "The length field of String and Array have different sizes"); +#define TO_STRING(x) #x +#define OFFSET_STRING(cls, member) TO_STRING(offsetof(cls, member)) + +// Macro definition #define PLAT_ASM_OFFSET(offset, cls, member) \ - static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) > 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is smaller than 0x" #offset "."); \ - static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) < 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is larger than 0x" #offset "."); + static_assert(offsetof(cls, member) == 0x##offset, "Bad asm offset for '" #cls "." #member "'. Actual offset: " OFFSET_STRING(cls, member)); #define PLAT_ASM_SIZEOF(size, cls ) \ static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) > 0x##size), "Bad asm size for '" #cls "', the actual size is smaller than 0x" #size "."); \ diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index c7b10c98301c2b..c2dc0efd669048 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1,5 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + #include "common.h" #include "gcenv.h" #include "CommonTypes.h" @@ -794,15 +795,11 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S9); m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S10); m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S11); - m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_REG(pCtx, FP); - m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_REG(pCtx, RA); - + // // scratch regs // m_RegDisplay.pR0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R0); - m_RegDisplay.pGP = (PTR_uintptr_t)PTR_TO_REG(pCtx, GP); - m_RegDisplay.pTP = (PTR_uintptr_t)PTR_TO_REG(pCtx, TP); m_RegDisplay.pA0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A0); m_RegDisplay.pA1 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A1); m_RegDisplay.pA2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A2); diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index 919e601a914740..aac4ed1a410a32 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -42,7 +42,7 @@ // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add a2, a2, t3 - ld t4, OFFSETOF__Thread__m_alloc_context__alloc_limit(a1) + ld t4, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a1) bltu t4, a2, LOCAL_LABEL(RhpNewFast_RarePath) // Update the alloc pointer to account for the allocation. diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h index d739c5691cd6c0..00ec9e8284c86c 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h @@ -17,8 +17,8 @@ PLAT_ASM_OFFSET(1C, ExInfo, m_idxCurClause) PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) PLAT_ASM_OFFSET(348, ExInfo, m_notifyDebuggerSP) -PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) -PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RIP) PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) @@ -52,7 +52,7 @@ PLAT_ASM_OFFSET(80, PAL_LIMITED_CONTEXT, IP) PLAT_ASM_SIZEOF(208, REGDISPLAY) -PLAT_ASM_OFFSET(F8, REGDISPLAY, SP) +PLAT_ASM_OFFSET(10, REGDISPLAY, SP) PLAT_ASM_OFFSET(48, REGDISPLAY, pS1) PLAT_ASM_OFFSET(90, REGDISPLAY, pS2) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S index d236a518ae2ecb..36707233d18f57 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -493,7 +493,7 @@ LOCAL_LABEL(NotHijacked): addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags addiw a6, zero, -17 // Mask value (0xFFFFFFEF) - amoand.w a4, t3, a6 + amoand.w a4, a6, (t3) // Set preserved regs to the values expected by the funclet RESTORE_PRESERVED_REGISTERS a2 @@ -623,7 +623,7 @@ LOCAL_LABEL(NoAbort): // Set the DoNotTriggerGc flag addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags addiw a3, zero, -17 // Mask value (0xFFFFFFEF) - amoand.w a4, t3, a3 + amoand.w a4, a3, (t3) // Restore preserved registers RESTORE_PRESERVED_REGISTERS a1 @@ -646,7 +646,7 @@ LOCAL_LABEL(NoAbort): addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags addiw a3, zero, 16 // Mask value (0x10) - amoor.w a1, t3, a3 + amoor.w a1, a3, (t3) // Restore floating-point registers fld fs0, 0x00(sp) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index ced39ea870bd72..d854a60da415f3 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -261,7 +261,7 @@ LEAF_END RhpCheckedAssignRef, _TEXT // On exit: // t3, t4 : trashed // t6 : incremented by 8 -LEAF_ENTRY RhpAssignRefRiscVh64, _TEXT +LEAF_ENTRY RhpAssignRefRiscV64, _TEXT ALTERNATE_ENTRY RhpAssignRefAVLocation sd t6, 0(t5) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index c45651f2c34389..3f1e9dcfccca4c 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -1157,40 +1157,6 @@ inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { return *pT5; case UNW_RISCV_X6: return *pT6; - case UNW_RISCV_X7: - return *pT7; - case UNW_RISCV_X8: - return *pT8; - case UNW_RISCV_X9: - return *pT9; - case UNW_RISCV_X10: - return *pT10; - case UNW_RISCV_X11: - return *pT11; - case UNW_RISCV_X12: - return *pT12; - case UNW_RISCV_X13: - return *pT13; - case UNW_RISCV_X14: - return *pT14; - case UNW_RISCV_X15: - return *pT15; - case UNW_RISCV_X16: - return *pT16; - case UNW_RISCV_X17: - return *pT17; - case UNW_RISCV_X18: - return *pT18; - case UNW_RISCV_X19: - return *pT19; - case UNW_RISCV_X20: - return *pT20; - case UNW_RISCV_X21: - return *pT21; - case UNW_RISCV_X22: - return *pT22; - case UNW_RISCV_X23: - return *pT23; // Add other general-purpose registers if needed case UNW_RISCV_F0: @@ -1291,57 +1257,6 @@ void Registers_REGDISPLAY::setRegister(int regNum, uint64_t value, uint64_t loca case UNW_RISCV_X6: *pT6 = value; break; - case UNW_RISCV_X7: - *pT7 = value; - break; - case UNW_RISCV_X8: - *pT8 = value; - break; - case UNW_RISCV_X9: - *pT9 = value; - break; - case UNW_RISCV_X10: - *pT10 = value; - break; - case UNW_RISCV_X11: - *pT11 = value; - break; - case UNW_RISCV_X12: - *pT12 = value; - break; - case UNW_RISCV_X13: - *pT13 = value; - break; - case UNW_RISCV_X14: - *pT14 = value; - break; - case UNW_RISCV_X15: - *pT15 = value; - break; - case UNW_RISCV_X16: - *pT16 = value; - break; - case UNW_RISCV_X17: - *pT17 = value; - break; - case UNW_RISCV_X18: - *pT18 = value; - break; - case UNW_RISCV_X19: - *pT19 = value; - break; - case UNW_RISCV_X20: - *pT20 = value; - break; - case UNW_RISCV_X21: - *pT21 = value; - break; - case UNW_RISCV_X22: - *pT22 = value; - break; - case UNW_RISCV_X23: - *pT23 = value; - break; // Add other general-purpose registers if needed From c01dc7bbae7789bbf992d827ce9fe662aeb17c79 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:52:43 +0200 Subject: [PATCH 10/19] Managed build fixes --- .../Compiler/DependencyAnalysis/Relocation.cs | 1 + .../RiscV64ReadyToRunGenericHelperNode.cs | 4 ++-- .../Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs | 2 +- .../Compiler/ObjectWriter/Dwarf/DwarfCie.cs | 2 +- .../Dwarf/DwarfExpressionBuilder.cs | 2 +- .../Compiler/ObjectWriter/ElfNative.cs | 3 --- .../Compiler/ObjectWriter/ElfObjectWriter.cs | 17 ++++++----------- 7 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index 07d9d0ccb4a061..68e299378af592 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -20,6 +20,7 @@ public enum RelocType IMAGE_REL_BASED_LOONGARCH64_PC = 0x16, // LoongArch64: pcalau12i+imm12 IMAGE_REL_BASED_LOONGARCH64_JIR = 0x17, // LoongArch64: pcaddu18i+jirl IMAGE_REL_BASED_RISCV64_PC = 0x18, // RiscV64: auipc + IMAGE_REL_BASED_RISCV64_JALR = 0x19, // RiscV64: jalr (indirect jump) IMAGE_REL_BASED_RELPTR32 = 0x7C, // 32-bit relative address from byte starting reloc // This is a special NGEN-specific relocation type // for relative pointer (used to make NGen relocation diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs index ecf9e903f7ef42..a285069cfada2d 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs @@ -47,8 +47,8 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref RiscV64Emitter enco // should be reported by someone else - the system should not rely on it coming from here. if (!relocsOnly && _hasInvalidEntries) { - encoder.EmitXOR(encoder.TargetRegister.IntraProcedureCallScratch1, result, 0); - encoder.EmitJE(encoder.TargetRegister.IntraProcedureCallScratch1, GetBadSlotHelper(factory)); + encoder.EmitXORI(encoder.TargetRegister.IntraProcedureCallScratch1, result, 0); + encoder.EmitJALR(Register.X0, encoder.TargetRegister.IntraProcedureCallScratch1, 0); } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs index 1a02b7074be777..99a1d8b53ac948 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs @@ -86,7 +86,7 @@ public DwarfBuilder( _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; break; - case TargetArchitecture.RISCV64: + case TargetArchitecture.RiscV64: _targetPointerSize = 8; _frameRegister = 8; // FP _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs index b78f73b3bf1c9a..b183131115725c 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs @@ -93,7 +93,7 @@ public DwarfCie(TargetArchitecture targetArchitecture) InitialCFAOffset = 0; break; - case TargetArchitecture.RISCV64: + case TargetArchitecture.RiscV64: CodeAlignFactor = 1; DataAlignFactor = -8; ReturnAddressRegister = 1; // RA diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs index f32658bb8b193f..28a17af35a35fb 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs @@ -165,7 +165,7 @@ public static int DwarfRegNum(TargetArchitecture architecture, int regNum) // Normal registers are directly mapped return regNum; - case TargetArchitecture.RISCV64: + case TargetArchitecture.RiscV64: // Normal registers are directly mapped return regNum; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs index 58f4eef9d96c56..cc37123883c7aa 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs @@ -607,9 +607,6 @@ internal static class ElfNative public const uint R_RISCV_PLT64 = 48; public const uint R_RISCV_COPY64 = 49; public const uint R_RISCV_RELATIVE64 = 50; - public const uint R_RISCV_TLS_DTPMOD64 = 51; - public const uint R_RISCV_TLS_DTPREL64 = 52; - public const uint R_RISCV_TLS_TPREL64 = 53; public const uint R_RISCV_64_ADD = 54; public const uint R_RISCV_64_SUB = 55; public const uint R_RISCV_64_HI20 = 56; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 19961c9de91ed7..688e96b68bb7ab 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -60,7 +60,7 @@ public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options) TargetArchitecture.ARM => EM_ARM, TargetArchitecture.ARM64 => EM_AARCH64, TargetArchitecture.LoongArch64 => EM_LOONGARCH, - TargetArchitecture.RiscV64 => EN_RISCV, + TargetArchitecture.RiscV64 => EM_RISCV, _ => throw new NotSupportedException("Unsupported architecture") }; _useInlineRelocationAddends = _machine is EM_386 or EM_ARM; @@ -537,7 +537,7 @@ private void EmitRelocationsLoongArch64(int sectionIndex, List relocationList) + private void EmitRelocationsRiscV64(int sectionIndex, List relocationList) { if (relocationList.Count > 0) { @@ -553,12 +553,8 @@ private void EmitRelocationsRiscV(int sectionIndex, List rel IMAGE_REL_BASED_DIR64 => R_RISCV_64, IMAGE_REL_BASED_HIGHLOW => R_RISCV_32, IMAGE_REL_BASED_RELPTR32 => R_RISCV_RELATIVE, - IMAGE_REL_BASED_RISCV_CALL => R_RISCV_CALL, - IMAGE_REL_BASED_RISCV_JUMP_SLOT => R_RISCV_JUMP_SLOT, - IMAGE_REL_BASED_RISCV_TLS_LE => R_RISCV_TLS_LE, - IMAGE_REL_BASED_RISCV_TLS_GD => R_RISCV_TLS_GD, - IMAGE_REL_BASED_RISCV_TLS_IE => R_RISCV_TLS_IE, - IMAGE_REL_BASED_RISCV_TLS_LD => R_RISCV_TLS_LD, + IMAGE_REL_BASED_RISCV64_PC => R_RISCV_PCREL_HI20, + IMAGE_REL_BASED_RISCV64_JALR => R_RISCV_CALL32, _ => throw new NotSupportedException("Unknown relocation type: " + symbolicRelocation.Type) }; @@ -567,11 +563,10 @@ private void EmitRelocationsRiscV(int sectionIndex, List rel BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); relocationStream.Write(relocationEntry); - if (symbolicRelocation.Type is IMAGE_REL_BASED_RISCV_CALL) + if (symbolicRelocation.Type is IMAGE_REL_BASED_RISCV64_PC) { - // Add an additional entry for the CALL relocation type BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset + 4); - BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | (type + 1)); + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type + 1); BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); relocationStream.Write(relocationEntry); } From bd3641bcd5622f390666b5193aea4dcdb9314d32 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:36:22 +0200 Subject: [PATCH 11/19] Address feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomek Sowiński --- src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 39 +++++++++++-------- src/coreclr/nativeaot/Runtime/PalRedhawk.h | 4 +- .../nativeaot/Runtime/StackFrameIterator.cpp | 4 +- .../nativeaot/Runtime/ThunksMapping.cpp | 11 +++--- .../nativeaot/Runtime/riscv64/AllocFast.S | 2 +- .../nativeaot/Runtime/riscv64/WriteBarriers.S | 3 +- .../Runtime/unix/UnixNativeCodeManager.cpp | 16 ++++---- .../nativeaot/Runtime/unix/UnwindHelpers.cpp | 31 ++------------- .../Runtime/unix/unixasmmacrosriscv64.inc | 19 +++++---- 9 files changed, 55 insertions(+), 74 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index 9bccc5c5240a72..41605c7b72f869 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -375,7 +375,7 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) #elif TARGET_RISCV64 uint32_t * pCode = (uint32_t *)pCodeOrg; // is this "addi a0, a0, 8"? - if (pCode[0] == 0x000200b3) // Encoding for `addi a0, a0, 8` in 32-bit RISC-V + if (pCode[0] == 0x00850513) // Encoding for `addi a0, a0, 8` in 32-bit instruction format { // unboxing sequence unboxingStub = true; @@ -383,27 +383,32 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) } // is this an indirect jump? // lui t0, imm; jalr t0, t0, imm12 - if ((pCode[0] & 0x000ff000) == 0x00020000 && - (pCode[1] & 0x000ff000) == 0x000000a0 && - (pCode[2] & 0x0000000f) == 0x00000000) + if ((pCode[0] & 0x7f) == 0x17 && // auipc + (pCode[1] & 0x707f) == 0x3003 && // ld with funct3=011 + (pCode[2] & 0x707f) == 0x0067) // jr (jalr with x0 as rd and funct3=000) { - // normal import stub - dist to IAT cell is relative to (PC & ~0xfff) - // lui: imm = SignExtend(imm20:Zeros(12), 64); - int64_t distToIatCell = ((((int64_t)pCode[0] & ~0xfff) << 12) >> 32); - // jalr: offset = SignExtend(imm12, 64); - distToIatCell += (int64_t)(pCode[1] & 0xfff); + // Compute the distance to the IAT cell + int64_t distToIatCell = (((int64_t)pCode[0] & 0xfffff000) >> 12); // Extract imm20 from auipc + distToIatCell += ((int64_t)(pCode[1] & 0xfff)); // Add imm12 from ld + uint8_t ** pIatCell = (uint8_t **)(((int64_t)pCode & ~0xfff) + distToIatCell); return *pIatCell; } - // is this an unboxing stub followed by a relative jump? - // jal ra, imm - else if (unboxingStub && (pCode[0] & 0xffe00000) == 0x00000000 && - (pCode[1] & 0x0000ffff) == 0x00000000) + + // Is this an unboxing stub followed by a relative jump? + // auipc t0, imm20; jalr ra, imm12(t0) + else if (unboxingStub && + (pCode[0] & 0x7f) == 0x17 && // auipc opcode + (pCode[1] & 0x707f) == 0x0067) // jalr opcode with funct3=000 { - // relative jump - dist is relative to the instruction - // offset = SignExtend(imm20:Zeros(12), 64); - int64_t distToTarget = ((int64_t)(pCode[1] & 0xffff) << 12) >> 12; - return (uint8_t *)pCode + distToTarget; + // Extract imm20 from auipc + int64_t distToTarget = (((int64_t)pCode[0] & 0xfffff000) >> 12); // Extract imm20 (bits 31:12) + + // Extract imm12 from jalr + distToTarget += ((int64_t)pCode[1] & 0xfff); // Extract imm12 (bits 31:20) + + // Calculate the final target address relative to PC + return (uint8_t *)((int64_t)pCode + distToTarget); } #else diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 50ee05060bbce2..fa86e5b050b888 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -583,13 +583,11 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { uint64_t T5; uint64_t T6; uint64_t Pc; - uint64_t T61; - uint64_t Pc1; // // Floating Point Registers // - uint64_t F[4*32]; + uint64_t F[32]; uint32_t Fcsr; void SetIp(uintptr_t ip) { Pc = ip; } diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index c2dc0efd669048..9b314f3812a627 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1462,7 +1462,7 @@ struct UniversalTransitionStackFrame private: uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (fp) uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (ra) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) + Fp64 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) @@ -1484,7 +1484,7 @@ struct UniversalTransitionStackFrame private: uintptr_t m_pushedRA; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (ra) uintptr_t m_pushedFP; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (fp) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) + Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index 794d676e4e76dc..974defde6ef45e 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -22,8 +22,10 @@ #define THUNK_SIZE 20 #elif TARGET_ARM64 #define THUNK_SIZE 16 -#elif TARGET_LOONGARCH64 || TARGET_RISCV64 +#elif TARGET_LOONGARCH64 #define THUNK_SIZE 16 +#elif TARGET_RISCV64 +#define THUNK_SIZE 12 #else #define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 #endif @@ -257,10 +259,9 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() #elif defined(TARGET_RISCV64) - // lui t0, %hi(delta) // Load upper immediate with address high bits - // addi t0, t0, %lo(delta) // Add lower immediate - // ld t1, 0(t0) // Load data from address in t0 - // jalr x0, t1, 0 // Jump and link register (set PC to t1) + // auipc t0, %hi(delta) // Load upper immediate with address high bits + // ld t1, %lo(delta)(t0) // Load data from address in (t0 + lower immediate) + // jr t1 // Jump and don't link register int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); *((uint32_t*)pCurrentThunkAddress) = 0x0002A013 | (((delta & 0x3FFFFC) >> 2) << 12); // lui + addi diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S index aac4ed1a410a32..5caa170b748fe1 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -115,7 +115,7 @@ LOCAL_LABEL(NewOutOfMemory): // Compute overall allocation size (align(base size + (element size * elements), 8)). li a3, STRING_COMPONENT_SIZE // Load STRING_COMPONENT_SIZE into a3 - mul a2, a1, a3 // a2 = a1 * STRING_COMPONENT_SIZE + slli a2, a1, 1 // a2 = a1 * STRING_COMPONENT_SIZE, where STRING_COMPONENT_SIZE == 2 addi a2, a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index d854a60da415f3..878d596afb3e5f 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -65,8 +65,7 @@ // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we cannot // guarantee whose shadow update won. - lui t3, (INVALIDGCVALUE >> 12) & 0xFFFFF - ori t3, t3, INVALIDGCVALUE & 0xFFF + li t3, INVALIDGCVALUE sd t3, 0(\destReg) 0: diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 96850baa1b3bcf..454aadf184b22a 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -1227,7 +1227,7 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho // BEQ, BNE, JAL, etc. // 1100 0000 0000 0000 0000 0000 0000 0000 #define BEGS_BITS 0x00000063 -#define BEGS_MASK 0x7F000000 +#define BEGS_MASK 0x0000007F UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; ASSERT(pNativeMethodInfo != NULL); @@ -1245,20 +1245,20 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho { uint32_t instr = *pInstr; - // check for Branches, Jumps, System calls. - // If we see such instruction before seeing registers restored, we are not in an epilog. - // Note: this includes RET, branches, jumps, system calls, etc... + // Check for branches, jumps, or system calls. + // If we see such instructions before registers are restored, we are not in an epilogue. + // Note: this includes RET, branches, jumps, and system calls. if ((instr & BEGS_MASK) == BEGS_BITS) { // not in an epilogue break; } - // check for restoring registers with LD or LUI - int rd = (instr >> 7) & 0x1F; - if (rd == 2 || rd == 1) // example register numbers for FP or RA + // Check for restoring registers (FP or RA) with `ld` + int rd = (instr >> 7) & 0x1F; // Extract the destination register + if (rd == 8 || rd == 1) // Check for FP (x8) or RA (x1) { - if ((instr & LD_MASK) == LD_BITS || (instr & LUI_MASK) == LUI_BITS) + if ((instr & LD_MASK) == LD_BITS) // Match `ld` instruction { return -1; } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index 3f1e9dcfccca4c..650ff860bfba93 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -1134,10 +1134,7 @@ inline bool Registers_REGDISPLAY::validRegister(int num) const { bool Registers_REGDISPLAY::validVectorRegister(int num) const { - // Vector registers are mapped to floating-point registers F24 to F31 - if (num >= UNW_RISCV_F24 && num <= UNW_RISCV_F31) - return true; - + // Vector registers currently unsupported return false; } @@ -1370,34 +1367,12 @@ void Registers_REGDISPLAY::setRegister(int regNum, uint64_t value, uint64_t loca libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const { - num -= UNW_RISCV_F24; // Adjust the base to 0 - - if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) - { - PORTABILITY_ASSERT("unsupported riscv64 vector register"); - } - - libunwind::v128 result; - - // Assuming F array stores 64-bit parts of the vector data - result.vec[0] = 0; - result.vec[1] = 0; - result.vec[2] = F[num] >> 32; - result.vec[3] = F[num] & 0xFFFFFFFF; - - return result; + PORTABILITY_ASSERT("Vector registers currently unsupported on RISC-V"); } void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) { - num -= UNW_RISCV_F24; // Adjust the base to 0 - - if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) - { - PORTABILITY_ASSERT("unsupported riscv64 vector register"); - } - - F[num] = (uint64_t)value.vec[2] << 32 | (uint64_t)value.vec[3]; + PORTABILITY_ASSERT("Vector registers currently unsupported on RISC-V"); } #endif // TARGET_RISCV64 diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index f169d45529b809..7f8ac78d34c1fd 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -61,20 +61,23 @@ C_FUNC(\Name): .macro PROLOG_STACK_ALLOC Size // If Size is larger than 2047, split it into multiple instructions .if (\Size > 2047) || (\Size < -2048) - // Load the upper 20 bits into a temporary register (e.g., t0) - lui t0, %hi(\Size) - // Add the lower 12 bits to the temporary register - addi t0, t0, %lo(\Size) - // Subtract the value from the stack pointer - sub sp, sp, t0 + li t0, -\Size + add sp, sp, t0 .else addi sp, sp, -\Size .endif .endm .macro EPILOG_STACK_FREE Size - addi sp, sp, \Size - .cfi_adjust_cfa_offset -\Size + // If Size is larger than 2047 or smaller than -2048, split into multiple instructions + .if (\Size > 2047) || (\Size < -2048) + li t0, \Size // Load the large Size value into a temporary register + add sp, sp, t0 // Use the add instruction for full 64-bit addition + .cfi_adjust_cfa_offset -\Size + .else + addi sp, sp, \Size // Handle small immediate directly with addi + .cfi_adjust_cfa_offset -\Size + .endif .endm .macro EPILOG_STACK_RESTORE From ddbb05e0d2b22f7fd0a2e22157647ec0820689f9 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Mon, 16 Dec 2024 16:02:02 +0200 Subject: [PATCH 12/19] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomasz Sowiński --- src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index 41605c7b72f869..e59087580e14fa 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -388,8 +388,8 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) (pCode[2] & 0x707f) == 0x0067) // jr (jalr with x0 as rd and funct3=000) { // Compute the distance to the IAT cell - int64_t distToIatCell = (((int64_t)pCode[0] & 0xfffff000) >> 12); // Extract imm20 from auipc - distToIatCell += ((int64_t)(pCode[1] & 0xfff)); // Add imm12 from ld + int64_t distToIatCell = (((int32_t)pCode[0]) >> 12) << 12; // Extract imm20 from auipc + distToIatCell += ((int32_t)pCode[1]) >> 20; // Add imm12 from ld uint8_t ** pIatCell = (uint8_t **)(((int64_t)pCode & ~0xfff) + distToIatCell); return *pIatCell; @@ -402,10 +402,10 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) (pCode[1] & 0x707f) == 0x0067) // jalr opcode with funct3=000 { // Extract imm20 from auipc - int64_t distToTarget = (((int64_t)pCode[0] & 0xfffff000) >> 12); // Extract imm20 (bits 31:12) + int64_t distToTarget = (((int32_t)pCode[0]) >> 12) << 12; // Extract imm20 (bits 31:12) // Extract imm12 from jalr - distToTarget += ((int64_t)pCode[1] & 0xfff); // Extract imm12 (bits 31:20) + distToTarget += ((int32_t)pCode[1]) >> 20; // Extract imm12 (bits 31:20) // Calculate the final target address relative to PC return (uint8_t *)((int64_t)pCode + distToTarget); From 60f3f1d789fc9f99db1d915ac10c2bce4070991d Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:14:54 +0200 Subject: [PATCH 13/19] Update src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S --- src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S index 13ae2fe0502100..45d61e749700f1 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -158,7 +158,6 @@ ld a5, 0x168(sp) // Restore a5 ld a6, 0x170(sp) // Restore a6 ld a7, 0x178(sp) // Restore a7 - ld tp, 0x180(sp) // Restore tp // Restore FP and RA registers, and free the allocated stack block EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, STACK_SIZE From 03e43942762a9dacb42bca566585ec9d56733c7c Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:13:07 +0200 Subject: [PATCH 14/19] Apply suggestions from code review Co-authored-by: Tymoteusz Wenerski --- src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S | 6 +++--- .../nativeaot/Runtime/riscv64/InteropThunksHelpers.S | 2 +- src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S | 4 ++-- src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S | 4 ++-- src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S index 22b3c60fa3ec4f..f6a51199671722 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -109,7 +109,7 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 andi t3, a3, 1 << TrapThreadsFlags_TrapThreads_Bit bnez t3, LOCAL_LABEL(WaitForGC) - jalr zero, 0(ra) + jr ra LOCAL_LABEL(WaitForGC): li t6, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1) @@ -151,7 +151,7 @@ NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a0 bne a0, zero, C_FUNC(RhpGcPollRare) - jalr zero, 0(ra) + jr ra LEAF_END RhpGcPoll // Rare GC Poll function @@ -159,7 +159,7 @@ NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler PUSH_COOP_PINVOKE_FRAME a0 call RhpGcPoll2 POP_COOP_PINVOKE_FRAME - jalr zero, 0(ra) + jr ra NESTED_END RhpGcPollRare #ifdef FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S index 91ba5da0cccb4d..c508edfb2ffce6 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -47,5 +47,5 @@ // LEAF_ENTRY RhGetCommonStubAddress, _TEXT PREPARE_EXTERNAL_VAR RhCommonStub, a0 - jalr zero, 0(ra) // Return address in ra + jr ra // Return address in ra LEAF_END RhGetCommonStubAddress, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S index 6329ac703e0057..96295ab0af779d 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -32,7 +32,7 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler sd a1, OFFSETOF__PInvokeTransitionFrame__m_pThread(a0) sd a0, OFFSETOF__Thread__m_pTransitionFrame(a1) - jalr zero, 0(ra) + jr ra NESTED_END RhpPInvoke, _TEXT LEAF_ENTRY RhpPInvokeReturn, _TEXT @@ -42,7 +42,7 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a5 bne t0, zero, 1f // TrapThreadsFlags_None = 0 - jalr zero, 0(ra) + jr ra 1: // passing transition frame pointer in a0 tail C_FUNC(RhpWaitForGC2) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S index 0427e4a45f8c25..6ba375c91f1b16 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S @@ -21,7 +21,7 @@ ld t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)(t0) // Jump to the address in t6 - jalr zero, 0(t0) + jr t0 0: .endm @@ -97,7 +97,7 @@ // Load the target address of the vtable into t3 ld t3, 0(t3) - jalr zero, 0(t3) + jr t3 LEAF_END RhpVTableOffsetDispatch, _TEXT // diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S index 878d596afb3e5f..21457e69fab880 100644 --- a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -244,7 +244,7 @@ NotInHeap: ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation sd t6, 0(t5) addi t5, t5, 8 - jalr ra, 0 + jalr ra LEAF_END RhpCheckedAssignRef, _TEXT @@ -268,7 +268,7 @@ LEAF_ENTRY RhpAssignRefRiscV64, _TEXT INSERT_UNCHECKED_WRITE_BARRIER_CORE t5, t6 addi t5, t5, 8 - jalr ra, 0 + jalr ra LEAF_END RhpAssignRefRiscV64, _TEXT @@ -323,7 +323,7 @@ CmpXchgNoUpdate: // t1 still contains the original value. mv a0, t1 - jalr ra, 0 + jalr ra LEAF_END RhpCheckedLockCmpXchg, _TEXT @@ -359,6 +359,6 @@ DoCardsXchg: // t1 still contains the original value. mv a0, t1 - jalr ra, 0 + jalr ra LEAF_END RhpCheckedXchg, _TEXT From 56e5a872edcbbe03d5ce59ab4fe2639ab30ee39b Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:54:43 +0200 Subject: [PATCH 15/19] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tomasz Sowiński --- src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 1 - src/coreclr/nativeaot/Runtime/ThunksMapping.cpp | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index e59087580e14fa..cd5f37e5199928 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -374,7 +374,6 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) #elif TARGET_RISCV64 uint32_t * pCode = (uint32_t *)pCodeOrg; - // is this "addi a0, a0, 8"? if (pCode[0] == 0x00850513) // Encoding for `addi a0, a0, 8` in 32-bit instruction format { // unboxing sequence diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index 974defde6ef45e..9f7211a2ee7623 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -264,17 +264,16 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() // jr t1 // Jump and don't link register int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); - *((uint32_t*)pCurrentThunkAddress) = 0x0002A013 | (((delta & 0x3FFFFC) >> 2) << 12); // lui + addi + uint32_t deltaHi = (delta + 0x800) & 0xfffff000; + uint32_t deltaLo = delta << (32 - 12); + + *((uint32_t*)pCurrentThunkAddress) = 0x00000297 | deltaHi; // auipc pCurrentThunkAddress += 4; - delta += OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2) - 4; - *((uint32_t*)pCurrentThunkAddress) = 0x0002B014 | (((delta & 0x3FFFFC) >> 2) << 12); // lui + addi - pCurrentThunkAddress += 4; - - *((uint32_t*)pCurrentThunkAddress) = 0x0002C294; // Example opcode, specific to RISC-V + *((uint32_t*)pCurrentThunkAddress) = 0x0002B303 | deltaLo; // addi pCurrentThunkAddress += 4; - *((uint32_t*)pCurrentThunkAddress) = 0x0004C280; // Example opcode, specific to RISC-V + *((uint32_t*)pCurrentThunkAddress) = 0x00030067; // jr pCurrentThunkAddress += 4; #else From b96fc630bd307e01c1056e43bedf06ebc12f9476 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Thu, 9 Jan 2025 02:56:29 +0200 Subject: [PATCH 16/19] Define GetReturnAddressRegisterLocation --- src/coreclr/nativeaot/Runtime/regdisplay.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 7818086eccb726..1e9856c98e795c 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -281,6 +281,7 @@ struct REGDISPLAY inline PCODE GetIP() { return IP; } inline uintptr_t GetSP() { return SP; } inline uintptr_t GetFP() { return *pFP; } + inline PTR_uintptr_t GetReturnAddressRegisterLocation() { return pRA; } inline void SetIP(PCODE IP) { this->IP = IP; } inline void SetSP(uintptr_t SP) { this->SP = SP; } From 3949dfa839885f2fbd3e7ed8d8feae51abd911d7 Mon Sep 17 00:00:00 2001 From: Adeel <3840695+am11@users.noreply.github.com> Date: Thu, 9 Jan 2025 03:37:43 +0200 Subject: [PATCH 17/19] Fix merge conflict --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index d1818020264760..05dc2b1507fdda 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1442,7 +1442,7 @@ struct UniversalTransitionStackFrame private: uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (fp) uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (ra) - Fp64 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) + Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) From a33d91c1dd1a24371f231a43806aea9a50878bbd Mon Sep 17 00:00:00 2001 From: Adeel Mujahid <3840695+am11@users.noreply.github.com> Date: Thu, 9 Jan 2025 06:27:10 +0200 Subject: [PATCH 18/19] Update other macros in AsmOffsetsVerify --- src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index 21059545ed9ba8..a0ca5a4c0081cc 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -35,13 +35,11 @@ class AsmOffsets #define PLAT_ASM_OFFSET(offset, cls, member) \ static_assert(offsetof(cls, member) == 0x##offset, "Bad asm offset for '" #cls "." #member "'. Actual offset: " OFFSET_STRING(cls, member)); -#define PLAT_ASM_SIZEOF(size, cls ) \ - static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) > 0x##size), "Bad asm size for '" #cls "', the actual size is smaller than 0x" #size "."); \ - static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) < 0x##size), "Bad asm size for '" #cls "', the actual size is larger than 0x" #size "."); +#define PLAT_ASM_SIZEOF(size, cls) \ + static_assert(sizeof(cls) == 0x##size, "Bad asm size for '" #cls "'. Actual size: " OFFSET_STRING(cls, 0x##size)); #define PLAT_ASM_CONST(constant, expr) \ - static_assert(((expr) == 0x##constant) || ((expr) > 0x##constant), "Bad asm constant for '" #expr "', the actual value is smaller than 0x" #constant "."); \ - static_assert(((expr) == 0x##constant) || ((expr) < 0x##constant), "Bad asm constant for '" #expr "', the actual value is larger than 0x" #constant "."); + static_assert((expr) == 0x##constant, "Bad asm constant for '" #expr "'. Actual value: " OFFSET_STRING(expr, 0x##constant)); #include "AsmOffsets.h" From c3c07cd38161f862f81a4000260116120931dedf Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Wed, 8 Jan 2025 20:37:43 -0800 Subject: [PATCH 19/19] Update src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs Co-authored-by: Adeel Mujahid <3840695+am11@users.noreply.github.com> --- .../Compiler/ObjectWriter/ElfObjectWriter.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 688e96b68bb7ab..d39e095db8247f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -846,7 +846,9 @@ private void EmitObjectFile(FileStream outputFileStream) { EM_ARM => 0x05000000u, // For ARM32 claim conformance with the EABI specification EM_LOONGARCH => 0x43u, // For LoongArch ELF psABI specify the ABI version (1) and modifiers (64-bit GPRs, 64-bit FPRs) - EM_RISCV => 0x08u, // For RISC-V, specify the ABI or architecture-specific version if applicable + // TODO: update once RISC-V runtime supports "C" extension (compressed instructions) + // it should be 0x0005u EF_RISCV_RVC (0x0001) | EF_RISCV_FLOAT_ABI_DOUBLE (0x0006) + EM_RISCV => 0x0004u, // EF_RISCV_FLOAT_ABI_DOUBLE (double precision floating-point ABI). _ => 0u }, };