From 9bfa03c6fd8e02b738e0077fd1af7b18eeeeb4c1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 12 May 2008 23:04:07 +0000 Subject: [PATCH] Xform bitconvert(build_pair(load a, load b)) to a single load if the load locations are at the right offset from each other. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51008 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 2 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 49 +++++++++++++++++++ lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- lib/Target/X86/README-SSE.txt | 54 --------------------- lib/Target/X86/X86ISelLowering.cpp | 24 ++------- test/CodeGen/X86/combine-lds.ll | 7 +++ 6 files changed, 63 insertions(+), 75 deletions(-) create mode 100644 test/CodeGen/X86/combine-lds.ll diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 3b704298a87..ffc927a56c6 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -691,7 +691,7 @@ public: /// loading 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist, - MachineFrameInfo *MFI) const; + const MachineFrameInfo *MFI) const; /// PerformDAGCombine - This method will be invoked for all target nodes and /// for any target-independent nodes that the target has registered with diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 28f32d3d3b9..684b2f66a59 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -177,6 +177,7 @@ namespace { SDOperand visitSIGN_EXTEND_INREG(SDNode *N); SDOperand visitTRUNCATE(SDNode *N); SDOperand visitBIT_CONVERT(SDNode *N); + SDOperand visitBUILD_PAIR(SDNode *N); SDOperand visitFADD(SDNode *N); SDOperand visitFSUB(SDNode *N); SDOperand visitFMUL(SDNode *N); @@ -217,6 +218,7 @@ namespace { ISD::CondCode Cond, bool foldBooleans = true); SDOperand SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); + SDOperand CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT); SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType); SDOperand BuildSDIV(SDNode *N); SDOperand BuildUDIV(SDNode *N); @@ -710,6 +712,7 @@ SDOperand DAGCombiner::visit(SDNode *N) { case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); @@ -3356,6 +3359,40 @@ SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) { return ReduceLoadWidth(N); } +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDOperand Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.Val; + return Elt.getOperand(Elt.ResNo).Val; +} + +/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// if load locations are consecutive. +SDOperand DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + SDNode *LD1 = getBuildPairElt(N, 0); + if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + return SDOperand(); + MVT::ValueType LD1VT = LD1->getValueType(0); + SDNode *LD2 = getBuildPairElt(N, 1); + const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + TLI.isConsecutiveLoad(LD2, LD1, MVT::getSizeInBits(LD1VT)/8, 1, MFI)) { + LoadSDNode *LD = cast(LD1); + unsigned Align = LD->getAlignment(); + unsigned NewAlign = TLI.getTargetMachine().getTargetData()-> + getABITypeAlignment(MVT::getTypeForValueType(VT)); + if ((!AfterLegalize || TLI.isTypeLegal(VT)) && + TLI.isOperationLegal(ISD::LOAD, VT) && NewAlign <= Align) + return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), Align); + } + return SDOperand(); +} + SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDOperand N0 = N->getOperand(0); MVT::ValueType VT = N->getValueType(0); @@ -3463,10 +3500,22 @@ SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::OR, VT, X, Cst); } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) { + SDOperand CombineLD = CombineConsecutiveLoads(N0.Val, VT); + if (CombineLD.Val) + return CombineLD; + } return SDOperand(); } +SDOperand DAGCombiner::visitBUILD_PAIR(SDNode *N) { + MVT::ValueType VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + /// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e57813dfe7f..408a5b23b4f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1514,7 +1514,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, /// location that the 'Base' load is loading from. bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist, - MachineFrameInfo *MFI) const { + const MachineFrameInfo *MFI) const { if (LD->getOperand(0).Val != Base->getOperand(0).Val) return false; MVT::ValueType VT = LD->getValueType(0); diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 1a5d9045b05..34b949a6018 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -426,60 +426,6 @@ entry: ret void } -//===---------------------------------------------------------------------===// - -Consider (PR2108): - -#include -__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);} -__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);} - -These are very similar routines, but we generate significantly worse code for -the first one on x86-32: - -_doload64: - subl $12, %esp - movl 20(%esp), %eax - movl %eax, 4(%esp) - movl 16(%esp), %eax - movl %eax, (%esp) - movsd (%esp), %xmm0 - addl $12, %esp - ret -_doload64_2: - movl 4(%esp), %eax - movsd (%eax), %xmm0 - ret - -The problem is that the argument lowering logic splits the i64 argument into -2x i32 loads early, the f64 insert doesn't match. Here's a reduced testcase: - -define fastcc double @doload64(i64 %x) nounwind { -entry: - %tmp717 = bitcast i64 %x to double ; [#uses=1] - ret double %tmp717 -} - -compiles to: - -_doload64: - subl $12, %esp - movl 20(%esp), %eax - movl %eax, 4(%esp) - movl 16(%esp), %eax - movl %eax, (%esp) - movsd (%esp), %xmm0 - addl $12, %esp - ret - -instead of movsd from the stack. This is actually not too bad to implement. The -best way to do this is to implement a dag combine that turns -bitconvert(build_pair(load a, load b)) into one load of the right type. The -only trick to this is writing the predicate that determines that a/b are at the -right offset from each other. For the enterprising hacker, InferAlignment is a -helpful place to start poking if interested. - - //===---------------------------------------------------------------------===// __m128d test1( __m128d A, __m128d B) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d50e36535a..806b626456a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6285,13 +6285,7 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, LD->getAlignment()); } -static SDNode *getBuildPairElt(SDNode *N, unsigned i) { - SDOperand Elt = N->getOperand(i); - if (Elt.getOpcode() != ISD::MERGE_VALUES) - return Elt.Val; - return Elt.getOperand(Elt.ResNo).Val; -} - +/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI) { @@ -6312,25 +6306,17 @@ static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, return SDOperand(); // Value must be a load. - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDNode *Base = N->getOperand(0).Val; if (!isa(Base)) { - if (Base->getOpcode() == ISD::BIT_CONVERT) - Base = Base->getOperand(0).Val; - if (Base->getOpcode() != ISD::BUILD_PAIR) + if (Base->getOpcode() != ISD::BIT_CONVERT) return SDOperand(); - SDNode *Pair = Base; - Base = getBuildPairElt(Pair, 0); - if (!ISD::isNON_EXTLoad(Base)) - return SDOperand(); - SDNode *NextLD = getBuildPairElt(Pair, 1); - if (!ISD::isNON_EXTLoad(NextLD) || - !TLI.isConsecutiveLoad(NextLD, Base, 4/*32 bits*/, 1, MFI)) + Base = Base->getOperand(0).Val; + if (!isa(Base)) return SDOperand(); } - LoadSDNode *LD = cast(Base); // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast(Base); return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr()); } diff --git a/test/CodeGen/X86/combine-lds.ll b/test/CodeGen/X86/combine-lds.ll new file mode 100644 index 00000000000..5e0ad996678 --- /dev/null +++ b/test/CodeGen/X86/combine-lds.ll @@ -0,0 +1,7 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1 + +define fastcc double @doload64(i64 %x) nounwind { + %tmp717 = bitcast i64 %x to double + ret double %tmp717 +}