diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 52c47767caa..748668cdf67 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -131,28 +131,16 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, } } -static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { - SmallVector VTs; - SDNode *GlueDestNode = Glue.getNode(); - - // Don't add glue from a node to itself. - if (GlueDestNode == N) return; - - // Don't add glue to something that already has it, either as a use or value. - if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return; - - for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) - VTs.push_back(N->getValueType(I)); - - if (AddGlue) - VTs.push_back(MVT::Glue); - +// Helper for AddGlue to clone node operands. +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, + SmallVectorImpl &VTs, + SDValue ExtraOper = SDValue()) { SmallVector Ops; for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) Ops.push_back(N->getOperand(I)); - if (GlueDestNode) - Ops.push_back(Glue); + if (ExtraOper.getNode()) + Ops.push_back(ExtraOper); SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); MachineSDNode::mmo_iterator Begin = 0, End = 0; @@ -171,6 +159,46 @@ static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { MN->setMemRefs(Begin, End); } +static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { + SmallVector VTs; + SDNode *GlueDestNode = Glue.getNode(); + + // Don't add glue from a node to itself. + if (GlueDestNode == N) return false; + + // Don't add a glue operand to something that already uses glue. + if (GlueDestNode && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + return false; + } + // Don't add glue to something that already has a glue value. + if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + + if (AddGlue) + VTs.push_back(MVT::Glue); + + CloneNodeWithValues(N, DAG, VTs, Glue); + + return true; +} + +// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the +// node even though simply shrinking the value list is sufficient. +static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { + assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue && + !N->hasAnyUseOfValue(N->getNumValues() - 1)) && + "expected an unused glue value"); + + SmallVector VTs; + for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I) + VTs.push_back(N->getValueType(I)); + + CloneNodeWithValues(N, DAG, VTs); +} + /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. /// This function finds loads of the same base and different offsets. If the /// offsets are not far apart (target specific), it add MVT::Glue inputs and @@ -238,19 +266,23 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - AddGlue(Lead, SDValue(0, 0), true, DAG); - - SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1); + SDValue InGlue = SDValue(0, 0); + if (AddGlue(Lead, InGlue, true, DAG)) + InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { bool OutGlue = I < E - 1; SDNode *Load = Loads[I]; - AddGlue(Load, InGlue, OutGlue, DAG); + // If AddGlue fails, we could leave an unsused glue value. This should not + // cause any + if (AddGlue(Load, InGlue, OutGlue, DAG)) { + if (OutGlue) + InGlue = SDValue(Load, Load->getNumValues() - 1); - if (OutGlue) - InGlue = SDValue(Load, Load->getNumValues() - 1); - - ++LoadsClustered; + ++LoadsClustered; + } + else if (!OutGlue && InGlue.getNode()) + RemoveUnusedGlue(InGlue.getNode(), DAG); } } diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll new file mode 100644 index 00000000000..9543587747a --- /dev/null +++ b/test/CodeGen/X86/2012-04-26-sdglue.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx | FileCheck %s +; rdar://11314175: SD Scheduler, BuildSchedUnits assert: +; N->getNodeId() == -1 && "Node already inserted! + +; It's hard to test for the ISEL condition because CodeGen optimizes +; away the bugpointed code. Just ensure the basics are still there. +;CHECK: func: +;CHECK: vmovups +;CHECK: vpshufd +;CHECK: vpshufd +;CHECK: vmulps +;CHECK: vmulps +;CHECK: ret + +define void @func() nounwind ssp { + %tmp = load <4 x float>* null, align 1 + %tmp14 = getelementptr <4 x float>* null, i32 2 + %tmp15 = load <4 x float>* %tmp14, align 1 + %tmp16 = shufflevector <4 x float> %tmp, <4 x float> , <8 x i32> + %tmp17 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp16, <4 x float> undef, i8 1) + %tmp18 = bitcast <4 x float> %tmp to <16 x i8> + %tmp19 = shufflevector <16 x i8> %tmp18, <16 x i8> undef, <16 x i32> + %tmp20 = bitcast <16 x i8> %tmp19 to <4 x float> + %tmp21 = bitcast <4 x float> %tmp15 to <16 x i8> + %tmp22 = shufflevector <16 x i8> undef, <16 x i8> %tmp21, <16 x i32> + %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float> + %tmp24 = shufflevector <4 x float> %tmp20, <4 x float> , <8 x i32> + %tmp25 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp24, <4 x float> %tmp23, i8 1) + %tmp26 = fmul <8 x float> %tmp17, undef + %tmp27 = fmul <8 x float> %tmp25, undef + %tmp28 = fadd <8 x float> %tmp26, %tmp27 + %tmp29 = fadd <8 x float> %tmp28, undef + %tmp30 = shufflevector <8 x float> %tmp29, <8 x float> undef, <4 x i32> + %tmp31 = fmul <4 x float> undef, %tmp30 + %tmp32 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> zeroinitializer, <4 x float> %tmp31, i8 1) + %tmp33 = fadd <8 x float> undef, %tmp32 + %tmp34 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %tmp33, <8 x float> undef) nounwind + %tmp35 = fsub <8 x float> %tmp34, undef + %tmp36 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> zeroinitializer, <8 x float> %tmp35) nounwind + store <8 x float> %tmp36, <8 x float>* undef, align 32 + ret void +} + +declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone + +declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone