mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-14 17:34:41 +00:00
Follow up to r165072. Try a different approach: only move the load when it's going to be folded into the call. rdar://12437604
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165287 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fca3f4021a
commit
2a2947885a
@ -387,21 +387,12 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
|
||||
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
|
||||
Load.getOperand(1), Load.getOperand(2));
|
||||
|
||||
bool IsGlued = Call.getOperand(0).getNode()->getGluedUser() == Call.getNode();
|
||||
unsigned NumOps = Call.getNode()->getNumOperands();
|
||||
Ops.clear();
|
||||
Ops.push_back(SDValue(Load.getNode(), 1));
|
||||
for (unsigned i = 1, e = NumOps; i != e; ++i)
|
||||
Ops.push_back(Call.getOperand(i));
|
||||
if (!IsGlued)
|
||||
CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
|
||||
else
|
||||
// If call's chain was glued to the call (tailcall), and now the load
|
||||
// is moved between them. Remove the glue to avoid a cycle (where the
|
||||
// call is glued to its old chain and the load is using the old chain
|
||||
// as its new chain).
|
||||
CurDAG->MorphNodeTo(Call.getNode(), Call.getOpcode(),
|
||||
Call.getNode()->getVTList(), &Ops[0], NumOps-1);
|
||||
CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
|
||||
}
|
||||
|
||||
/// isCalleeLoad - Return true if call address is a load and it can be
|
||||
@ -410,6 +401,10 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
|
||||
/// In the case of a tail call, there isn't a callseq node between the call
|
||||
/// chain and the load.
|
||||
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
|
||||
// The transformation is somewhat dangerous if the call's chain was glued to
|
||||
// the call. After MoveBelowOrigChain the load is moved between the call and
|
||||
// the chain, this can create a cycle if the load is not folded. So it is
|
||||
// *really* important that we are sure the load will be folded.
|
||||
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
|
||||
return false;
|
||||
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
|
||||
@ -447,7 +442,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
||||
|
||||
if (OptLevel != CodeGenOpt::None &&
|
||||
(N->getOpcode() == X86ISD::CALL ||
|
||||
N->getOpcode() == X86ISD::TC_RETURN)) {
|
||||
(N->getOpcode() == X86ISD::TC_RETURN &&
|
||||
// Only does this if load can be foled into TC_RETURN.
|
||||
(Subtarget->is64Bit() ||
|
||||
getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
|
||||
/// Also try moving call address load from outside callseq_start to just
|
||||
/// before the call to allow it to be folded.
|
||||
///
|
||||
|
@ -1,4 +1,6 @@
|
||||
; RUN: llc -mtriple=i386-apple-macosx -relocation-model=pic < %s
|
||||
; RUN: llc -mtriple=x86_64-apple-macosx -relocation-model=pic < %s
|
||||
|
||||
; rdar://12393897
|
||||
|
||||
%TRp = type { i32, %TRH*, i32, i32 }
|
||||
@ -14,3 +16,37 @@ entry:
|
||||
%call = tail call i32 %1(%TRp* inreg %rp) nounwind optsize
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
%btConeShape = type { %btConvexInternalShape, float, float, float, [3 x i32] }
|
||||
%btConvexInternalShape = type { %btConvexShape, %btVector, %btVector, float, float }
|
||||
%btConvexShape = type { %btCollisionShape }
|
||||
%btCollisionShape = type { i32 (...)**, i32, i8* }
|
||||
%btVector = type { [4 x float] }
|
||||
|
||||
define { <2 x float>, <2 x float> } @t2(%btConeShape* %this) unnamed_addr uwtable ssp align 2 {
|
||||
entry:
|
||||
%0 = getelementptr inbounds %btConeShape* %this, i64 0, i32 0
|
||||
br i1 undef, label %if.then, label %if.end17
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%vecnorm.sroa.2.8.copyload = load float* undef, align 4
|
||||
%cmp4 = fcmp olt float undef, 0x3D10000000000000
|
||||
%vecnorm.sroa.2.8.copyload36 = select i1 %cmp4, float -1.000000e+00, float %vecnorm.sroa.2.8.copyload
|
||||
%call.i.i.i = tail call float @sqrtf(float 0.000000e+00) nounwind readnone
|
||||
%div.i.i = fdiv float 1.000000e+00, %call.i.i.i
|
||||
%mul7.i.i.i = fmul float %div.i.i, %vecnorm.sroa.2.8.copyload36
|
||||
%1 = load float (%btConvexInternalShape*)** undef, align 8
|
||||
%call12 = tail call float %1(%btConvexInternalShape* %0)
|
||||
%mul7.i.i = fmul float %call12, %mul7.i.i.i
|
||||
%retval.sroa.0.4.insert = insertelement <2 x float> zeroinitializer, float undef, i32 1
|
||||
%add13.i = fadd float undef, %mul7.i.i
|
||||
%retval.sroa.1.8.insert = insertelement <2 x float> undef, float %add13.i, i32 0
|
||||
br label %if.end17
|
||||
|
||||
if.end17: ; preds = %if.then, %entry
|
||||
%retval.sroa.1.8.load3338 = phi <2 x float> [ %retval.sroa.1.8.insert, %if.then ], [ undef, %entry ]
|
||||
%retval.sroa.0.0.load3137 = phi <2 x float> [ %retval.sroa.0.4.insert, %if.then ], [ undef, %entry ]
|
||||
ret { <2 x float>, <2 x float> } undef
|
||||
}
|
||||
|
||||
declare float @sqrtf(float) nounwind readnone
|
||||
|
Loading…
x
Reference in New Issue
Block a user