From 75e36e847e04a671c9de955ad01ba0a3327b2247 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Fri, 8 May 2015 23:52:00 +0000 Subject: [PATCH] ScheduleDAGInstrs: In functions with tail calls PseudoSourceValues are not non-aliasing distinct objects The code that builds the dependence graph assumes that two PseudoSourceValues don't alias. In a tail calling function two FixedStackObjects might refer to the same location. Worse 'immutable' fixed stack objects like function arguments are not immutable and will be clobbered. Change this so that a load from a FixedStackObject is not invariant in a tail calling function and don't return a PseudoSourceValue for an instruction in tail calling functions when building the dependence graph so that we handle function arguments conservatively. Fix for PR23459. rdar://20740035 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236916 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineFrameInfo.h | 13 ++++++ lib/CodeGen/ScheduleDAGInstrs.cpp | 8 ++++ lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +- lib/Target/ARM/ARMISelLowering.cpp | 4 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 4 +- lib/Target/PowerPC/PPCISelLowering.cpp | 1 + lib/Target/X86/X86ISelLowering.cpp | 1 + .../CodeGen/AArch64/tailcall_misched_graph.ll | 42 +++++++++++++++++++ test/CodeGen/ARM/debug-frame.ll | 6 +-- test/CodeGen/ARM/ehabi.ll | 16 +++---- test/CodeGen/X86/tailcallstack64.ll | 4 +- 11 files changed, 87 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/AArch64/tailcall_misched_graph.ll diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index d4eaaf8ea66..40f3b4944cc 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -246,6 +246,11 @@ class MachineFrameInfo { /// True if this is a varargs function that contains a musttail call. bool HasMustTailInVarArgFunc; + /// True if this function contains a tail call. If so immutable objects like + /// function arguments are no longer so. A tail call *can* override fixed + /// stack objects like arguments so we can't treat them as immutable. + bool HasTailCall; + /// Not null, if shrink-wrapping found a better place for the prologue. MachineBasicBlock *Save; /// Not null, if shrink-wrapping found a better place for the epilogue. @@ -281,6 +286,7 @@ public: HasMustTailInVarArgFunc = false; Save = nullptr; Restore = nullptr; + HasTailCall = false; } /// hasStackObjects - Return true if there are any stack objects in this @@ -508,6 +514,10 @@ public: bool hasMustTailInVarArgFunc() const { return HasMustTailInVarArgFunc; } void setHasMustTailInVarArgFunc(bool B) { HasMustTailInVarArgFunc = B; } + /// Returns true if the function contains a tail call. + bool hasTailCall() const { return HasTailCall; } + void setHasTailCall() { HasTailCall = true; } + /// getMaxCallFrameSize - Return the maximum size of a call frame that must be /// allocated for an outgoing function call. This is only available if /// CallFrameSetup/Destroy pseudo instructions are used by the target, and @@ -545,6 +555,9 @@ public: /// isImmutableObjectIndex - Returns true if the specified index corresponds /// to an immutable object. bool isImmutableObjectIndex(int ObjectIdx) const { + // Tail calling functions can clobber their function arguments. + if (HasTailCall) + return false; assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); return Objects[ObjectIdx+NumFixedObjects].isImmutable; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 755faf631af..583ed38f313 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -143,6 +144,13 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, if (const PseudoSourceValue *PSV = (*MI->memoperands_begin())->getPseudoValue()) { + // Function that contain tail calls don't have unique PseudoSourceValue + // objects. Two PseudoSourceValues might refer to the same or overlapping + // locations. The client code calling this function assumes this is not the + // case. So return a conservative answer of no known object. + if (MFI->hasTailCall()) + return; + // For now, ignore PseudoSourceValues which may alias LLVM IR values // because the code that uses this function has no way to cope with // such aliases. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index cbb5a32098f..6d2a95129c3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2873,8 +2873,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // If we're doing a tall call, use a TC_RETURN here rather than an // actual call instruction. - if (IsTailCall) + if (IsTailCall) { + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b1c0b85e500..a3743d452e0 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1847,8 +1847,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(InFlag); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - if (isTailCall) + if (isTailCall) { + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 9055e7ee532..ed5676c1fbb 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -637,8 +637,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (InFlag.getNode()) Ops.push_back(InFlag); - if (isTailCall) + if (isTailCall) { + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); + } int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3; Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c7016bc2dcf..8da10f69ff6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4215,6 +4215,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, isa(Callee)) && "Expecting an global address, external symbol, absolute value or register"); + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7f24ab43490..4563354f1f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3135,6 +3135,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); } diff --git a/test/CodeGen/AArch64/tailcall_misched_graph.ll b/test/CodeGen/AArch64/tailcall_misched_graph.ll new file mode 100644 index 00000000000..57f96874c0d --- /dev/null +++ b/test/CodeGen/AArch64/tailcall_misched_graph.ll @@ -0,0 +1,42 @@ +; RUN: llc -mcpu=cyclone -debug-only=misched < %s 2>&1 | FileCheck %s + +; REQUIRE: asserts + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +define void @caller2(i8* %a0, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9) { +entry: + tail call void @callee2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a0) + ret void +} + +declare void @callee2(i8*, i8*, i8*, i8*, i8*, + i8*, i8*, i8*, i8*, i8*) + +; Make sure there is a dependence between the load and store to the same stack +; location during a tail call. Tail calls clobber the incoming argument area and +; therefore it is not safe to assume argument locations are invariant. +; PR23459 has a test case that we where miscompiling because of this at the +; time. + +; CHECK: Frame Objects +; CHECK: fi#-4: {{.*}} fixed, at location [SP+8] +; CHECK: fi#-3: {{.*}} fixed, at location [SP] +; CHECK: fi#-2: {{.*}} fixed, at location [SP+8] +; CHECK: fi#-1: {{.*}} fixed, at location [SP] + +; CHECK: [[VRA:%vreg.*]] = LDRXui +; CHECK: [[VRB:%vreg.*]] = LDRXui +; CHECK: STRXui %vreg{{.*}}, +; CHECK: STRXui [[VRB]], + +; Make sure that there is an dependence edge between fi#-2 and fi#-4. +; Without this edge the scheduler would be free to move the store accross the load. + +; CHECK: SU({{.*}}): [[VRB]] = LDRXui +; CHECK-NOT: SU +; CHECK: Successors: +; CHECK: ch SU([[DEPSTORE:.*]]): Latency=0 + +; CHECK: SU([[DEPSTORE]]): STRXui %vreg0, diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll index f7caaeadd58..134829254e3 100644 --- a/test/CodeGen/ARM/debug-frame.ll +++ b/test/CodeGen/ARM/debug-frame.ll @@ -179,7 +179,7 @@ declare void @_ZSt9terminatev() ; CHECK-FP: .cfi_offset r4, -36 ; CHECK-FP: add r11, sp, #28 ; CHECK-FP: .cfi_def_cfa r11, 8 -; CHECK-FP: sub sp, sp, #28 +; CHECK-FP: sub sp, sp, #44 ; CHECK-FP: .cfi_endproc ; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd: @@ -195,8 +195,8 @@ declare void @_ZSt9terminatev() ; CHECK-FP-ELIM: .cfi_offset r6, -28 ; CHECK-FP-ELIM: .cfi_offset r5, -32 ; CHECK-FP-ELIM: .cfi_offset r4, -36 -; CHECK-FP-ELIM: sub sp, sp, #28 -; CHECK-FP-ELIM: .cfi_def_cfa_offset 64 +; CHECK-FP-ELIM: sub sp, sp, #36 +; CHECK-FP-ELIM: .cfi_def_cfa_offset 72 ; CHECK-FP-ELIM: .cfi_endproc ; CHECK-V7-FP-LABEL: _Z4testiiiiiddddd: diff --git a/test/CodeGen/ARM/ehabi.ll b/test/CodeGen/ARM/ehabi.ll index ebf0c2a0033..088e48d2d79 100644 --- a/test/CodeGen/ARM/ehabi.ll +++ b/test/CodeGen/ARM/ehabi.ll @@ -146,8 +146,8 @@ declare void @_ZSt9terminatev() ; CHECK-FP: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-FP: .setfp r11, sp, #28 ; CHECK-FP: add r11, sp, #28 -; CHECK-FP: .pad #28 -; CHECK-FP: sub sp, sp, #28 +; CHECK-FP: .pad #44 +; CHECK-FP: sub sp, sp, #44 ; CHECK-FP: .personality __gxx_personality_v0 ; CHECK-FP: .handlerdata ; CHECK-FP: .fnend @@ -156,8 +156,8 @@ declare void @_ZSt9terminatev() ; CHECK-FP-ELIM: .fnstart ; CHECK-FP-ELIM: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-FP-ELIM: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FP-ELIM: .pad #28 -; CHECK-FP-ELIM: sub sp, sp, #28 +; CHECK-FP-ELIM: .pad #36 +; CHECK-FP-ELIM: sub sp, sp, #36 ; CHECK-FP-ELIM: .personality __gxx_personality_v0 ; CHECK-FP-ELIM: .handlerdata ; CHECK-FP-ELIM: .fnend @@ -205,7 +205,7 @@ declare void @_ZSt9terminatev() ; DWARF-FP: .cfi_offset r4, -36 ; DWARF-FP: add r11, sp, #28 ; DWARF-FP: .cfi_def_cfa r11, 8 -; DWARF-FP: sub sp, sp, #28 +; DWARF-FP: sub sp, sp, #44 ; DWARF-FP: sub sp, r11, #28 ; DWARF-FP: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; DWARF-FP: mov pc, lr @@ -226,9 +226,9 @@ declare void @_ZSt9terminatev() ; DWARF-FP-ELIM: .cfi_offset r6, -28 ; DWARF-FP-ELIM: .cfi_offset r5, -32 ; DWARF-FP-ELIM: .cfi_offset r4, -36 -; DWARF-FP-ELIM: sub sp, sp, #28 -; DWARF-FP-ELIM: .cfi_def_cfa_offset 64 -; DWARF-FP-ELIM: add sp, sp, #28 +; DWARF-FP-ELIM: sub sp, sp, #36 +; DWARF-FP-ELIM: .cfi_def_cfa_offset 72 +; DWARF-FP-ELIM: add sp, sp, #36 ; DWARF-FP-ELIM: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; DWARF-FP-ELIM: mov pc, lr ; DWARF-FP-ELIM: .cfi_endproc diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll index bff5f9924f6..158b777fe1f 100644 --- a/test/CodeGen/X86/tailcallstack64.ll +++ b/test/CodeGen/X86/tailcallstack64.ll @@ -12,9 +12,9 @@ ; Add %in1 %p1 to a different temporary register (%eax). ; CHECK: addl {{%edi|%ecx}}, [[R1]] ; Move param %in2 to stack. -; CHECK: movl [[R2]], [[A1]](%rsp) +; CHECK-DAG: movl [[R2]], [[A1]](%rsp) ; Move result of addition to stack. -; CHECK: movl [[R1]], [[A2]](%rsp) +; CHECK-DAG: movl [[R1]], [[A2]](%rsp) ; Eventually, do a TAILCALL ; CHECK: TAILCALL