diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 7b349f6a16f..65f6d27744e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -209,8 +209,8 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, - SDValue N, SDValue &Base, SDValue &Scale, + bool SelectScalarSSELoad(SDNode *Root, SDValue N, + SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &InChain, SDValue &OutChain); @@ -1317,7 +1317,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, @@ -1327,10 +1327,10 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, InChain = N.getOperand(0).getValue(1); if (ISD::isNON_EXTLoad(InChain.getNode()) && InChain.getValue(0).hasOneUse() && - IsProfitableToFold(N, Pred.getNode(), Op) && - IsLegalToFold(N, Pred.getNode(), Op)) { + IsProfitableToFold(N.getOperand(0), InChain.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { LoadSDNode *LD = cast(InChain); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; OutChain = LD->getChain(); return true; @@ -1344,10 +1344,12 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && - N.getOperand(0).getOperand(0).hasOneUse()) { + N.getOperand(0).getOperand(0).hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; OutChain = LD->getChain(); InChain = SDValue(LD, 1); @@ -1424,7 +1426,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast(N); diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll index 42831f00225..a44e08dc84a 100644 --- a/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/test/CodeGen/X86/vec_ss_load_fold.ll @@ -45,3 +45,28 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) + + +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) +declare <4 x float> @f() + +define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind { + %a = load float *%b + %B = insertelement <4 x float> undef, float %a, i32 0 + %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4) + ret <4 x float> %X +; CHECK: test3: +; CHECK: roundss $4, (%eax), %xmm0 +} + +define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind { + %a = load float *%b + %B = insertelement <4 x float> undef, float %a, i32 0 + %q = call <4 x float> @f() + %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4) + ret <4 x float> %X +; CHECK: test4: +; CHECK: movss (%eax), %xmm +; CHECK: call +; CHECK: roundss $4, %xmm{{.*}}, %xmm0 +} \ No newline at end of file diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp index e1d09a89496..8774c529652 100644 --- a/utils/TableGen/DAGISelEmitter.cpp +++ b/utils/TableGen/DAGISelEmitter.cpp @@ -510,7 +510,6 @@ void PatternCodeEmitter::EmitMatchCode(TreePatternNode *N, TreePatternNode *P, const std::string &RootName, const std::string &ChainSuffix, bool &FoundChain) { - // Save loads/stores matched by a pattern. if (!N->isLeaf() && N->getName().empty()) { if (N->NodeHasProperty(SDNPMemOperand, CGP)) @@ -573,7 +572,7 @@ void PatternCodeEmitter::EmitMatchCode(TreePatternNode *N, TreePatternNode *P, // use. If the node has multiple uses and the pattern has a load as // an operand, then we can't fold the load. emitCheck(getValueName(RootName) + ".hasOneUse()"); - } else { + } else if (!N->isLeaf()) { // ComplexPatterns do their own legality check. // If the immediate use can somehow reach this node through another // path, then can't fold it either or it will create a cycle. // e.g. In the following diagram, XX can reach ld through YY. If @@ -627,8 +626,11 @@ void PatternCodeEmitter::EmitMatchCode(TreePatternNode *N, TreePatternNode *P, } else FoundChain = true; ChainName = "Chain" + ChainSuffix; - emitInit("SDValue " + ChainName + " = " + getNodeName(RootName) + - "->getOperand(0);"); + + if (!N->getComplexPatternInfo(CGP) || + isRoot) + emitInit("SDValue " + ChainName + " = " + getNodeName(RootName) + + "->getOperand(0);"); } } @@ -686,7 +688,7 @@ void PatternCodeEmitter::EmitMatchCode(TreePatternNode *N, TreePatternNode *P, // Handle cases when root is a complex pattern. const ComplexPattern *CP; - if (isRoot && N->isLeaf() && (CP = N->getComplexPatternInfo(CGP))) { + if (N->isLeaf() && (CP = N->getComplexPatternInfo(CGP))) { std::string Fn = CP->getSelectFunc(); unsigned NumOps = CP->getNumOperands(); for (unsigned i = 0; i < NumOps; ++i) { @@ -700,9 +702,8 @@ void PatternCodeEmitter::EmitMatchCode(TreePatternNode *N, TreePatternNode *P, emitCode("SDValue Chain" + ChainSuffix + ";"); } - std::string Code = Fn + "(" + - getNodeName(RootName) + ", " + - getValueName(RootName); + std::string Code = Fn + "(N, "; // always pass in the root. + Code += getValueName(RootName); for (unsigned i = 0; i < NumOps; i++) Code += ", CPTmp" + RootName + "_" + utostr(i); if (CP->hasProperty(SDNPHasChain)) { @@ -736,6 +737,24 @@ void PatternCodeEmitter::EmitChildMatchCode(TreePatternNode *Child, FoldedFlag = std::make_pair(getValueName(RootName), CInfo.getNumResults() + (unsigned)HasChain); } + } else if (const ComplexPattern *CP = Child->getComplexPatternInfo(CGP)) { + if (CP->getSelectFunc() == "SelectScalarSSELoad") + errs() << "FOUND IT\n"; + EmitMatchCode(Child, Parent, RootName, ChainSuffix, FoundChain); + bool HasChain = false; + + if (Child->NodeHasProperty(SDNPHasChain, CGP)) { + HasChain = true; + const SDNodeInfo &PInfo = CGP.getSDNodeInfo(Parent->getOperator()); + FoldedChains.push_back(std::make_pair("CPInChain", + PInfo.getNumResults())); + } + if (Child->NodeHasProperty(SDNPOutFlag, CGP)) { + assert(FoldedFlag.first == "" && FoldedFlag.second == 0 && + "Pattern folded multiple nodes which produce flags?"); + FoldedFlag = std::make_pair(getValueName(RootName), + CP->getNumOperands() + (unsigned)HasChain); + } } else { // If this child has a name associated with it, capture it in VarMap. If // we already saw this in the pattern, emit code to verify dagness. @@ -762,37 +781,6 @@ void PatternCodeEmitter::EmitChildMatchCode(TreePatternNode *Child, // Handle register references. Nothing to do here. } else if (LeafRec->isSubClassOf("Register")) { // Handle register references. - } else if (LeafRec->isSubClassOf("ComplexPattern")) { - // Handle complex pattern. - const ComplexPattern *CP = Child->getComplexPatternInfo(CGP); - std::string Fn = CP->getSelectFunc(); - unsigned NumOps = CP->getNumOperands(); - for (unsigned i = 0; i < NumOps; ++i) { - emitDecl("CPTmp" + RootName + "_" + utostr(i)); - emitCode("SDValue CPTmp" + RootName + "_" + utostr(i) + ";"); - } - if (CP->hasProperty(SDNPHasChain)) { - const SDNodeInfo &PInfo = CGP.getSDNodeInfo(Parent->getOperator()); - FoldedChains.push_back(std::make_pair("CPInChain", - PInfo.getNumResults())); - ChainName = "Chain" + ChainSuffix; - emitDecl("CPInChain"); - emitDecl(ChainName); - emitCode("SDValue CPInChain;"); - emitCode("SDValue " + ChainName + ";"); - } - - std::string Code = Fn + "(N, "; - if (CP->hasProperty(SDNPHasChain)) { - std::string ParentName(RootName.begin(), RootName.end()-1); - Code += getValueName(ParentName) + ", "; - } - Code += getValueName(RootName); - for (unsigned i = 0; i < NumOps; i++) - Code += ", CPTmp" + RootName + "_" + utostr(i); - if (CP->hasProperty(SDNPHasChain)) - Code += ", CPInChain, Chain" + ChainSuffix; - emitCheck(Code + ")"); } else if (LeafRec->getName() == "srcvalue") { // Place holder for SRCVALUE nodes. Nothing to do here. } else if (LeafRec->isSubClassOf("ValueType")) { diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp index 84e9a3d03dd..d32d3a7b132 100644 --- a/utils/TableGen/DAGISelMatcherGen.cpp +++ b/utils/TableGen/DAGISelMatcherGen.cpp @@ -212,7 +212,6 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N, // const TreePatternNode *Root = Pattern.getSrcPattern(); if (N != Root) { // Not the root of the pattern. - // If there is a node between the root and this node, then we definitely // need to emit the check. bool NeedCheck = !Root->hasChild(N);