diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 98778c3091e..b79dd99d8fa 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -387,21 +387,12 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); - bool IsGlued = Call.getOperand(0).getNode()->getGluedUser() == Call.getNode(); unsigned NumOps = Call.getNode()->getNumOperands(); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); for (unsigned i = 1, e = NumOps; i != e; ++i) Ops.push_back(Call.getOperand(i)); - if (!IsGlued) - CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); - else - // If call's chain was glued to the call (tailcall), and now the load - // is moved between them. Remove the glue to avoid a cycle (where the - // call is glued to its old chain and the load is using the old chain - // as its new chain). - CurDAG->MorphNodeTo(Call.getNode(), Call.getOpcode(), - Call.getNode()->getVTList(), &Ops[0], NumOps-1); + CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -410,6 +401,10 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, /// In the case of a tail call, there isn't a callseq node between the call /// chain and the load. static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { + // The transformation is somewhat dangerous if the call's chain was glued to + // the call. After MoveBelowOrigChain the load is moved between the call and + // the chain, this can create a cycle if the load is not folded. So it is + // *really* important that we are sure the load will be folded. if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); @@ -447,7 +442,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && (N->getOpcode() == X86ISD::CALL || - N->getOpcode() == X86ISD::TC_RETURN)) { + (N->getOpcode() == X86ISD::TC_RETURN && + // Only does this if load can be foled into TC_RETURN. + (Subtarget->is64Bit() || + getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// diff --git a/test/CodeGen/X86/2012-10-02-DAGCycle.ll b/test/CodeGen/X86/2012-10-02-DAGCycle.ll index 9d2b7ea8525..8d914db3315 100644 --- a/test/CodeGen/X86/2012-10-02-DAGCycle.ll +++ b/test/CodeGen/X86/2012-10-02-DAGCycle.ll @@ -1,4 +1,6 @@ ; RUN: llc -mtriple=i386-apple-macosx -relocation-model=pic < %s +; RUN: llc -mtriple=x86_64-apple-macosx -relocation-model=pic < %s + ; rdar://12393897 %TRp = type { i32, %TRH*, i32, i32 } @@ -14,3 +16,37 @@ entry: %call = tail call i32 %1(%TRp* inreg %rp) nounwind optsize ret i32 %call } + +%btConeShape = type { %btConvexInternalShape, float, float, float, [3 x i32] } +%btConvexInternalShape = type { %btConvexShape, %btVector, %btVector, float, float } +%btConvexShape = type { %btCollisionShape } +%btCollisionShape = type { i32 (...)**, i32, i8* } +%btVector = type { [4 x float] } + +define { <2 x float>, <2 x float> } @t2(%btConeShape* %this) unnamed_addr uwtable ssp align 2 { +entry: + %0 = getelementptr inbounds %btConeShape* %this, i64 0, i32 0 + br i1 undef, label %if.then, label %if.end17 + +if.then: ; preds = %entry + %vecnorm.sroa.2.8.copyload = load float* undef, align 4 + %cmp4 = fcmp olt float undef, 0x3D10000000000000 + %vecnorm.sroa.2.8.copyload36 = select i1 %cmp4, float -1.000000e+00, float %vecnorm.sroa.2.8.copyload + %call.i.i.i = tail call float @sqrtf(float 0.000000e+00) nounwind readnone + %div.i.i = fdiv float 1.000000e+00, %call.i.i.i + %mul7.i.i.i = fmul float %div.i.i, %vecnorm.sroa.2.8.copyload36 + %1 = load float (%btConvexInternalShape*)** undef, align 8 + %call12 = tail call float %1(%btConvexInternalShape* %0) + %mul7.i.i = fmul float %call12, %mul7.i.i.i + %retval.sroa.0.4.insert = insertelement <2 x float> zeroinitializer, float undef, i32 1 + %add13.i = fadd float undef, %mul7.i.i + %retval.sroa.1.8.insert = insertelement <2 x float> undef, float %add13.i, i32 0 + br label %if.end17 + +if.end17: ; preds = %if.then, %entry + %retval.sroa.1.8.load3338 = phi <2 x float> [ %retval.sroa.1.8.insert, %if.then ], [ undef, %entry ] + %retval.sroa.0.0.load3137 = phi <2 x float> [ %retval.sroa.0.4.insert, %if.then ], [ undef, %entry ] + ret { <2 x float>, <2 x float> } undef +} + +declare float @sqrtf(float) nounwind readnone