mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-24 06:30:19 +00:00
Xform bitconvert(build_pair(load a, load b)) to a single load if the load locations are at the right offset from each other.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51008 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
38eb9f9ae6
commit
9bfa03c6fd
@ -691,7 +691,7 @@ public:
|
||||
/// loading 'Bytes' bytes from a location that is 'Dist' units away from the
|
||||
/// location that the 'Base' load is loading from.
|
||||
bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist,
|
||||
MachineFrameInfo *MFI) const;
|
||||
const MachineFrameInfo *MFI) const;
|
||||
|
||||
/// PerformDAGCombine - This method will be invoked for all target nodes and
|
||||
/// for any target-independent nodes that the target has registered with
|
||||
|
@ -177,6 +177,7 @@ namespace {
|
||||
SDOperand visitSIGN_EXTEND_INREG(SDNode *N);
|
||||
SDOperand visitTRUNCATE(SDNode *N);
|
||||
SDOperand visitBIT_CONVERT(SDNode *N);
|
||||
SDOperand visitBUILD_PAIR(SDNode *N);
|
||||
SDOperand visitFADD(SDNode *N);
|
||||
SDOperand visitFSUB(SDNode *N);
|
||||
SDOperand visitFMUL(SDNode *N);
|
||||
@ -217,6 +218,7 @@ namespace {
|
||||
ISD::CondCode Cond, bool foldBooleans = true);
|
||||
SDOperand SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
|
||||
unsigned HiOp);
|
||||
SDOperand CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT);
|
||||
SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType);
|
||||
SDOperand BuildSDIV(SDNode *N);
|
||||
SDOperand BuildUDIV(SDNode *N);
|
||||
@ -710,6 +712,7 @@ SDOperand DAGCombiner::visit(SDNode *N) {
|
||||
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
|
||||
case ISD::TRUNCATE: return visitTRUNCATE(N);
|
||||
case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
|
||||
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
|
||||
case ISD::FADD: return visitFADD(N);
|
||||
case ISD::FSUB: return visitFSUB(N);
|
||||
case ISD::FMUL: return visitFMUL(N);
|
||||
@ -3356,6 +3359,40 @@ SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
return ReduceLoadWidth(N);
|
||||
}
|
||||
|
||||
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
|
||||
SDOperand Elt = N->getOperand(i);
|
||||
if (Elt.getOpcode() != ISD::MERGE_VALUES)
|
||||
return Elt.Val;
|
||||
return Elt.getOperand(Elt.ResNo).Val;
|
||||
}
|
||||
|
||||
/// CombineConsecutiveLoads - build_pair (load, load) -> load
|
||||
/// if load locations are consecutive.
|
||||
SDOperand DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT::ValueType VT) {
|
||||
assert(N->getOpcode() == ISD::BUILD_PAIR);
|
||||
|
||||
SDNode *LD1 = getBuildPairElt(N, 0);
|
||||
if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
|
||||
return SDOperand();
|
||||
MVT::ValueType LD1VT = LD1->getValueType(0);
|
||||
SDNode *LD2 = getBuildPairElt(N, 1);
|
||||
const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
if (ISD::isNON_EXTLoad(LD2) &&
|
||||
LD2->hasOneUse() &&
|
||||
TLI.isConsecutiveLoad(LD2, LD1, MVT::getSizeInBits(LD1VT)/8, 1, MFI)) {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(LD1);
|
||||
unsigned Align = LD->getAlignment();
|
||||
unsigned NewAlign = TLI.getTargetMachine().getTargetData()->
|
||||
getABITypeAlignment(MVT::getTypeForValueType(VT));
|
||||
if ((!AfterLegalize || TLI.isTypeLegal(VT)) &&
|
||||
TLI.isOperationLegal(ISD::LOAD, VT) && NewAlign <= Align)
|
||||
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(),
|
||||
LD->getSrcValue(), LD->getSrcValueOffset(),
|
||||
LD->isVolatile(), Align);
|
||||
}
|
||||
return SDOperand();
|
||||
}
|
||||
|
||||
SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) {
|
||||
SDOperand N0 = N->getOperand(0);
|
||||
MVT::ValueType VT = N->getValueType(0);
|
||||
@ -3464,9 +3501,21 @@ SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) {
|
||||
return DAG.getNode(ISD::OR, VT, X, Cst);
|
||||
}
|
||||
|
||||
// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
|
||||
if (N0.getOpcode() == ISD::BUILD_PAIR) {
|
||||
SDOperand CombineLD = CombineConsecutiveLoads(N0.Val, VT);
|
||||
if (CombineLD.Val)
|
||||
return CombineLD;
|
||||
}
|
||||
|
||||
return SDOperand();
|
||||
}
|
||||
|
||||
SDOperand DAGCombiner::visitBUILD_PAIR(SDNode *N) {
|
||||
MVT::ValueType VT = N->getValueType(0);
|
||||
return CombineConsecutiveLoads(N, VT);
|
||||
}
|
||||
|
||||
/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
|
||||
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
|
||||
/// destination element value type.
|
||||
|
@ -1514,7 +1514,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
|
||||
/// location that the 'Base' load is loading from.
|
||||
bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
|
||||
unsigned Bytes, int Dist,
|
||||
MachineFrameInfo *MFI) const {
|
||||
const MachineFrameInfo *MFI) const {
|
||||
if (LD->getOperand(0).Val != Base->getOperand(0).Val)
|
||||
return false;
|
||||
MVT::ValueType VT = LD->getValueType(0);
|
||||
|
@ -426,60 +426,6 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Consider (PR2108):
|
||||
|
||||
#include <xmmintrin.h>
|
||||
__m128i doload64(unsigned long long x) { return _mm_loadl_epi64(&x);}
|
||||
__m128i doload64_2(unsigned long long *x) { return _mm_loadl_epi64(x);}
|
||||
|
||||
These are very similar routines, but we generate significantly worse code for
|
||||
the first one on x86-32:
|
||||
|
||||
_doload64:
|
||||
subl $12, %esp
|
||||
movl 20(%esp), %eax
|
||||
movl %eax, 4(%esp)
|
||||
movl 16(%esp), %eax
|
||||
movl %eax, (%esp)
|
||||
movsd (%esp), %xmm0
|
||||
addl $12, %esp
|
||||
ret
|
||||
_doload64_2:
|
||||
movl 4(%esp), %eax
|
||||
movsd (%eax), %xmm0
|
||||
ret
|
||||
|
||||
The problem is that the argument lowering logic splits the i64 argument into
|
||||
2x i32 loads early, the f64 insert doesn't match. Here's a reduced testcase:
|
||||
|
||||
define fastcc double @doload64(i64 %x) nounwind {
|
||||
entry:
|
||||
%tmp717 = bitcast i64 %x to double ; <double> [#uses=1]
|
||||
ret double %tmp717
|
||||
}
|
||||
|
||||
compiles to:
|
||||
|
||||
_doload64:
|
||||
subl $12, %esp
|
||||
movl 20(%esp), %eax
|
||||
movl %eax, 4(%esp)
|
||||
movl 16(%esp), %eax
|
||||
movl %eax, (%esp)
|
||||
movsd (%esp), %xmm0
|
||||
addl $12, %esp
|
||||
ret
|
||||
|
||||
instead of movsd from the stack. This is actually not too bad to implement. The
|
||||
best way to do this is to implement a dag combine that turns
|
||||
bitconvert(build_pair(load a, load b)) into one load of the right type. The
|
||||
only trick to this is writing the predicate that determines that a/b are at the
|
||||
right offset from each other. For the enterprising hacker, InferAlignment is a
|
||||
helpful place to start poking if interested.
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
__m128d test1( __m128d A, __m128d B) {
|
||||
|
@ -6285,13 +6285,7 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||
LD->getAlignment());
|
||||
}
|
||||
|
||||
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
|
||||
SDOperand Elt = N->getOperand(i);
|
||||
if (Elt.getOpcode() != ISD::MERGE_VALUES)
|
||||
return Elt.Val;
|
||||
return Elt.getOperand(Elt.ResNo).Val;
|
||||
}
|
||||
|
||||
/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
|
||||
static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget,
|
||||
const TargetLowering &TLI) {
|
||||
@ -6312,25 +6306,17 @@ static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDOperand();
|
||||
|
||||
// Value must be a load.
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
SDNode *Base = N->getOperand(0).Val;
|
||||
if (!isa<LoadSDNode>(Base)) {
|
||||
if (Base->getOpcode() == ISD::BIT_CONVERT)
|
||||
if (Base->getOpcode() != ISD::BIT_CONVERT)
|
||||
return SDOperand();
|
||||
Base = Base->getOperand(0).Val;
|
||||
if (Base->getOpcode() != ISD::BUILD_PAIR)
|
||||
return SDOperand();
|
||||
SDNode *Pair = Base;
|
||||
Base = getBuildPairElt(Pair, 0);
|
||||
if (!ISD::isNON_EXTLoad(Base))
|
||||
return SDOperand();
|
||||
SDNode *NextLD = getBuildPairElt(Pair, 1);
|
||||
if (!ISD::isNON_EXTLoad(NextLD) ||
|
||||
!TLI.isConsecutiveLoad(NextLD, Base, 4/*32 bits*/, 1, MFI))
|
||||
if (!isa<LoadSDNode>(Base))
|
||||
return SDOperand();
|
||||
}
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Base);
|
||||
|
||||
// Transform it into VZEXT_LOAD addr.
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Base);
|
||||
return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
|
||||
}
|
||||
|
||||
|
7
test/CodeGen/X86/combine-lds.ll
Normal file
7
test/CodeGen/X86/combine-lds.ll
Normal file
@ -0,0 +1,7 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1
|
||||
|
||||
define fastcc double @doload64(i64 %x) nounwind {
|
||||
%tmp717 = bitcast i64 %x to double
|
||||
ret double %tmp717
|
||||
}
|
Loading…
Reference in New Issue
Block a user