-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Implement INIT/ADJUST_TRAMPOLINE #70267
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Carlos Eduardo Seo (ceseo) ChangesAdd support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes #65573 Full diff: https://github.com/llvm/llvm-project/pull/70267.diff 4 Files Affected:
diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index 2d213d95f333af3..2034378f7831c02 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -272,6 +272,11 @@ switch32
switch8
switchu8
+// This function generates a custom trampoline function with the specific
+// realFunc and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+ const void* realFunc, void* localsPtr);
+
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c
index 844eb2794414285..71b9a7b4c755eca 100644
--- a/compiler-rt/lib/builtins/trampoline_setup.c
+++ b/compiler-rt/lib/builtins/trampoline_setup.c
@@ -41,3 +41,30 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)
+
+// The AArch64 compiler generates calls to __trampoline_setup() when creating
+// trampoline functions on the stack for use with nested functions.
+// This function creates a custom 20-byte trampoline function on the stack
+// which loads x18 with a pointer to the outer function's locals
+// and then jumps to the target nested function.
+
+#if __aarch64__
+COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
+ int trampSizeAllocated,
+ const void *realFunc, void *localsPtr) {
+ // should never happen, but if compiler did not allocate
+ // enough space on stack for the trampoline, abort
+ if (trampSizeAllocated < 20)
+ compilerrt_abort();
+
+ // create trampoline
+ trampOnStack[0] = 0x580000b1; // ldr x17, realFunc
+ trampOnStack[1] = 0x580000d2; // ldr x18, localsPtr
+ trampOnStack[2] = 0xd61f0220; // br x17
+ trampOnStack[3] = (uint32_t)realFunc;
+ trampOnStack[4] = (uint32_t)localsPtr;
+
+ // clear instruction cache
+ __clear_cache(trampOnStack, &trampOnStack[10]);
+}
+#endif // __aarch64__
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7211607fee528a6..58063dd2c5130eb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5946,6 +5946,52 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
return Final;
}
+SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error("ADJUST_TRAMPOLINE operation is only supported on Linux.");
+
+ return Op.getOperand(0);
+}
+
+SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ SDLoc dl(Op);
+
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+ Entry.Node = DAG.getConstant(20, dl, MVT::i64);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -5961,6 +6007,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE:
+ return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE:
+ return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 52e519cd8a0c93c..b50a63b812510d4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1045,6 +1045,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
9e7230f
to
b641619
Compare
@ceseo Thanks for this patch. |
I missed that one. Thanks for pointing that out. So, I think I agree that it is better to do the same as gcc does. The code will probably be very untidy, since we have to write the actual instructions (like in the x86 implementation), but it should work too. The only observation I have is that, whatever the approach is, we cannot use If the submitter of that patch wants to do this, I'm OK with that since I'll be away for a month or so. |
b641619
to
99dead3
Compare
Probably fixes #56625 too? Edit: not yet, if this is limited to Linux |
Yes, we need to sort out the nest parameter register for Windows and macOS. Both use |
Any other comments on this? |
We will try another approach that avoids executable stack when procedure pointers support is fully implemented. |
I think this change is useful on its own for aarch64 Linux, and I think it can be merged as is. |
@ceseo What is your plan here? Do you have a different patch? |
Not for now. I'm waiting until the support for procedure pointers is fully implemented and then I'll give it another shot to avoid executable stack. If you think this is useful as is until then, please feel free to merge it. |
@ceseo Could you reopen the PR? |
Can you change the description? It will be used as the default commit message when you click "Squash and merge". |
OK. I'll fix the test and the description and resubmit. |
8512cfc
to
8dd35ab
Compare
I also simplified the test. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but wait for @MaskRay to approve.
Could you update the NOTE about aarch64 in https://github.com/llvm/llvm-project/blob/main/flang/docs/InternalProcedureTrampolines.md to note that this is implemented now, but requires compiler-rt (and -rtlib=compiler-rt)? That can be in a separate NFC patch without review after -rtlib=compiler-rt goes in, if you merge this first.
Ping? |
@kiranchandramohan do you want this in for LLVM 19, or should we leave it for LLVM 20? |
Yes, it will be great to have this for LLVM 19 since this enables an important HPC application. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Ideally @compnerd could remove the "Request changes".
I agree that this will be nice to be merged into relase/19.x
I'll just make the small changes requested by @MaskRay and resubmit the final version. |
8dd35ab
to
7c18ac7
Compare
@kiranchandramohan I'll commit this once all the checks pass. |
7c18ac7
to
60f9048
Compare
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes llvm#65573 Fixes llvm#76927 Fixes llvm#83555 Updates llvm#66157
60f9048
to
2e20622
Compare
I will merge this PR. The Windows failure is due to something else and it was fixed by 7b61973 I'll revert it if it breaks post-commit |
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes llvm#65573 Fixes llvm#76927 Fixes llvm#83555 Updates llvm#66157 (cherry picked from commit c4b66bf)
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes llvm#65573 Fixes llvm#76927 Fixes llvm#83555 Updates llvm#66157 (cherry picked from commit c4b66bf)
Summary: Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes #65573 Fixes #76927 Fixes #83555 Updates #66157 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250624
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes llvm#65573 Fixes llvm#76927 Fixes llvm#83555 Updates llvm#66157 (cherry picked from commit c4b66bf)
It looks like LLVM 19 will support this. |
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for
AArch64.
Fixes #65573
Fixes #76927
Fixes #83555
Updates #66157