diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 28535d12737..5434fb632b4 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -477,6 +477,11 @@ public: const SDOperand *Ops, unsigned NumOps); SDNode *getTargetNode(unsigned Opcode, std::vector &ResultTys, const SDOperand *Ops, unsigned NumOps); + + /// getNodeIfExists - Get the specified node if it's already available, or + /// else return NULL. + SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, + const SDOperand *Ops, unsigned NumOps); /// DAGUpdateListener - Clients of various APIs that cause global effects on /// the DAG can optionally implement this interface. This allows the clients diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 579870e6d07..9feefb9e303 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -758,6 +758,23 @@ SDOperand DAGCombiner::combine(SDNode *N) { } } + // If N is a commutative binary node, try commuting it to enable more + // sdisel CSE. + if (RV.Val == 0 && + SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + // Constant operands are canonicalized to RHS. + if (isa(N0) || !isa(N1)) { + SDOperand Ops[] = { N1, N0 }; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), + Ops, 2); + if (CSENode && CSENode->use_size() <= N->use_size()) + return SDOperand(CSENode, 0); + } + } + return RV; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9331e8b7ee9..94eba83e05d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3291,6 +3291,20 @@ SDNode *SelectionDAG::getTargetNode(unsigned Opcode, Ops, NumOps).Val; } +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + const SDOperand *Ops, unsigned NumOps) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return E; + } + return NULL; +} + /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll new file mode 100644 index 00000000000..7427678db00 --- /dev/null +++ b/test/CodeGen/X86/dagcombine-cse.ll @@ -0,0 +1,27 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& grep asm-printer | grep 14 + +define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind { +entry: + %tmp7 = mul i32 %idxY, %ref_frame_stride ; [#uses=2] + %tmp9 = add i32 %tmp7, %idxX ; [#uses=1] + %tmp11 = getelementptr i8* %ref_frame_ptr, i32 %tmp9 ; [#uses=1] + %tmp1112 = bitcast i8* %tmp11 to i32* ; [#uses=1] + %tmp13 = load i32* %tmp1112, align 4 ; [#uses=1] + %tmp18 = add i32 %idxX, 4 ; [#uses=1] + %tmp20.sum = add i32 %tmp18, %tmp7 ; [#uses=1] + %tmp21 = getelementptr i8* %ref_frame_ptr, i32 %tmp20.sum ; [#uses=1] + %tmp2122 = bitcast i8* %tmp21 to i16* ; [#uses=1] + %tmp23 = load i16* %tmp2122, align 2 ; [#uses=1] + %tmp2425 = zext i16 %tmp23 to i64 ; [#uses=1] + %tmp26 = shl i64 %tmp2425, 32 ; [#uses=1] + %tmp2728 = zext i32 %tmp13 to i64 ; [#uses=1] + %tmp29 = or i64 %tmp26, %tmp2728 ; [#uses=1] + %tmp3454 = bitcast i64 %tmp29 to double ; [#uses=1] + %tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0 ; <<2 x double>> [#uses=1] + %tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1 ; <<2 x double>> [#uses=1] + %tmp42 = bitcast <2 x double> %tmp36 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; [#uses=1] + ret i32 %tmp48 +}