-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[ARM] Prevent stack argument overwrite during tail calls #166492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
For tail-calls we want to re-use the caller stack-frame and potentially
need to copy stack arguments.
For large stack arguments, such as by-val structs, this can lead to
overwriting incoming stack arguments when preparing outgoing ones by
copying them. E.g., in cases like
%"struct.s1" = type { [19 x i32] }
define void @f0(ptr byval(%"struct.s1") %0, ptr %1) {
tail call void @F1(ptr %1, ptr byval(%"struct.s1") %0)
ret void
}
declare void @F1(ptr, ptr)
that swap arguments, the last bytes of %0 are on the stack, followed by
%1. To prepare the outgoing arguments, %0 needs to be copied and %1
needs to be loaded into r0. However, currently the copy of %0
overwrites the location of %1, resulting in loading garbage into r0.
We fix that by forcing the load to the pointer stack argument to happen
before the copy.
|
@llvm/pr-subscribers-backend-arm Author: David Tellenbach (dtellenbach) ChangesFor tail-calls we want to re-use the caller stack-frame and potentially For large stack arguments, such as by-val structs, this can lead to that swap arguments, the last bytes of %0 are on the stack, followed by We fix that by forcing the load to the pointer stack argument to happen Full diff: https://github.com/llvm/llvm-project/pull/166492.diff 2 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6b0653457cbaf..46ab962e05bbd 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2510,9 +2510,44 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
Chain = DAG.getStackArgumentTokenFactor(Chain);
- if (ByValTempChain)
+ if (ByValTempChain) {
+ // In case of large byval copies, re-using the stackframe for tail-calls
+ // can lead to overwriting incoming arguments on the stack. Force
+ // loading these stack arguments before the copy to avoid that.
+ SmallVector<SDValue, 8> IncomingLoad;
+ for (unsigned I = 0; I < OutVals.size(); ++I) {
+ if (Outs[I].Flags.isByVal())
+ continue;
+
+ SDValue OutVal = OutVals[I];
+ LoadSDNode *OutLN = dyn_cast_or_null<LoadSDNode>(OutVal);
+ if (!OutLN)
+ continue;
+
+ FrameIndexSDNode *FIN =
+ dyn_cast_or_null<FrameIndexSDNode>(OutLN->getBasePtr());
+ if (!FIN)
+ continue;
+
+ if (!MFI.isFixedObjectIndex(FIN->getIndex()))
+ continue;
+
+ for (const CCValAssign &VA : ArgLocs) {
+ if (VA.isMemLoc())
+ IncomingLoad.push_back(OutVal.getValue(1));
+ }
+ }
+
+ // Update the chain to force loads for potentially clobbered argument
+ // loads to happen before the byval copy.
+ if (!IncomingLoad.empty()) {
+ IncomingLoad.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad);
+ }
+
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
ByValTempChain);
+ }
AfterFormalArgLoads = true;
}
diff --git a/llvm/test/CodeGen/ARM/byval_struct_copy_tailcall.ll b/llvm/test/CodeGen/ARM/byval_struct_copy_tailcall.ll
new file mode 100644
index 0000000000000..50c676c425ce7
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/byval_struct_copy_tailcall.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mtriple thumbv7em-apple-darwin -o - < %s | FileCheck %s
+
+%"struct.s1" = type { [19 x i32] }
+
+define void @f0(ptr byval(%"struct.s1") %0, ptr %1) #1 {
+; CHECK-LABEL: _f0: @ @f0
+; CHECK-NEXT: @ %bb.0:
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #76
+; CHECK-NEXT: add.w r9, sp, #84
+; CHECK-NEXT: stm.w r9, {r0, r1, r2, r3}
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: add r1, sp, #84
+; CHECK-NEXT: movs r2, #76
+; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: LBB0_1: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr r4, [r1], #4
+; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: str r4, [r3], #4
+; CHECK-NEXT: bne LBB0_1
+; CHECK-NEXT: @ %bb.2:
+; CHECK-NEXT: add.w r1, r0, #12
+; CHECK-NEXT: add r2, sp, #100
+; CHECK-NEXT: ldr r0, [sp, #160]
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldr r3, [r1], #4
+; CHECK-NEXT: str r3, [r2], #4
+; CHECK-NEXT: ldm.w sp, {r1, r2, r3}
+; CHECK-NEXT: add sp, #76
+; CHECK-NEXT: pop.w {r4, lr}
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: b.w _f1
+ tail call void @f1(ptr %1, ptr byval(%"struct.s1") %0)
+ ret void
+}
+
+declare void @f1(ptr, ptr)
+
+attributes #1 = { nounwind "frame-pointes"="non-leaf" }
|
|
Seems to be happening since |
| ; CHECK-NEXT: @ %bb.2: | ||
| ; CHECK-NEXT: add.w r1, r0, #12 | ||
| ; CHECK-NEXT: add r2, sp, #100 | ||
| ; CHECK-NEXT: ldr r0, [sp, #160] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the load that had happened after the copy and got overwritten.
For tail-calls we want to re-use the caller stack-frame and potentially
need to copy stack arguments.
For large stack arguments, such as by-val structs, this can lead to
overwriting incoming stack arguments when preparing outgoing ones by
copying them. E.g., in cases like
that swap arguments, the last bytes of %0 are on the stack, followed by
%1. To prepare the outgoing arguments, %0 needs to be copied and %1
needs to be loaded into r0. However, currently the copy of %0
overwrites the location of %1, resulting in loading garbage into r0.
We fix that by forcing the load to the pointer stack argument to happen
before the copy.