mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-05 01:31:05 +00:00
Improve the loading of load-anyext vectors by allowing the codegen to load
multiple scalars and insert them into a vector. Next, we shuffle the elements into the correct places, as before. Also fix a small dagcombine bug in SimplifyBinOpWithSameOpcodeHands, when the migration of bitcasts happened too late in the SelectionDAG process. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159991 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fae96f17b4
commit
2dd83eb1ab
lib
test/CodeGen/X86
@ -2340,7 +2340,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
|
||||
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
|
||||
// on scalars.
|
||||
if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
||||
&& Level == AfterLegalizeVectorOps) {
|
||||
&& Level == AfterLegalizeTypes) {
|
||||
SDValue In0 = N0.getOperand(0);
|
||||
SDValue In1 = N1.getOperand(0);
|
||||
EVT In0Ty = In0.getValueType();
|
||||
|
@ -14425,7 +14425,8 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
|
||||
static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
LoadSDNode *Ld = cast<LoadSDNode>(N);
|
||||
EVT RegVT = Ld->getValueType(0);
|
||||
EVT MemVT = Ld->getMemoryVT();
|
||||
@ -14447,47 +14448,73 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
unsigned RegSz = RegVT.getSizeInBits();
|
||||
unsigned MemSz = MemVT.getSizeInBits();
|
||||
assert(RegSz > MemSz && "Register size must be greater than the mem size");
|
||||
// All sizes must be a power of two
|
||||
if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue();
|
||||
|
||||
// Attempt to load the original value using a single load op.
|
||||
// Find a scalar type which is equal to the loaded word size.
|
||||
// All sizes must be a power of two.
|
||||
if (!isPowerOf2_32(RegSz * MemSz * NumElems))
|
||||
return SDValue();
|
||||
|
||||
// Attempt to load the original value using scalar loads.
|
||||
// Find the largest scalar type that divides the total loaded size.
|
||||
MVT SclrLoadTy = MVT::i8;
|
||||
for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
|
||||
tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
|
||||
MVT Tp = (MVT::SimpleValueType)tp;
|
||||
if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) {
|
||||
if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
|
||||
SclrLoadTy = Tp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Proceed if a load word is found.
|
||||
if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue();
|
||||
// Calculate the number of scalar loads that we need to perform
|
||||
// in order to load our vector from memory.
|
||||
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
|
||||
|
||||
// Represent our vector as a sequence of elements which are the
|
||||
// largest scalar that we can load.
|
||||
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
|
||||
RegSz/SclrLoadTy.getSizeInBits());
|
||||
|
||||
// Represent the data using the same element type that is stored in
|
||||
// memory. In practice, we ''widen'' MemVT.
|
||||
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
|
||||
RegSz/MemVT.getScalarType().getSizeInBits());
|
||||
// Can't shuffle using an illegal type.
|
||||
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
|
||||
|
||||
// Perform a single load.
|
||||
SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
|
||||
Ld->getBasePtr(),
|
||||
Ld->getPointerInfo(), Ld->isVolatile(),
|
||||
Ld->isNonTemporal(), Ld->isInvariant(),
|
||||
Ld->getAlignment());
|
||||
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
|
||||
"Invalid vector type");
|
||||
|
||||
// Insert the word loaded into a vector.
|
||||
SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
|
||||
LoadUnitVecVT, ScalarLoad);
|
||||
// We can't shuffle using an illegal type.
|
||||
if (!TLI.isTypeLegal(WideVecVT))
|
||||
return SDValue();
|
||||
|
||||
SmallVector<SDValue, 8> Chains;
|
||||
SDValue Ptr = Ld->getBasePtr();
|
||||
SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
|
||||
TLI.getPointerTy());
|
||||
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
|
||||
|
||||
for (unsigned i = 0; i < NumLoads; ++i) {
|
||||
// Perform a single load.
|
||||
SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
|
||||
Ptr, Ld->getPointerInfo(),
|
||||
Ld->isVolatile(), Ld->isNonTemporal(),
|
||||
Ld->isInvariant(), Ld->getAlignment());
|
||||
Chains.push_back(ScalarLoad.getValue(1));
|
||||
// Create the first element type using SCALAR_TO_VECTOR in order to avoid
|
||||
// another round of DAGCombining.
|
||||
if (i == 0)
|
||||
Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
|
||||
else
|
||||
Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
|
||||
ScalarLoad, DAG.getIntPtrConstant(i));
|
||||
|
||||
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
|
||||
}
|
||||
|
||||
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
|
||||
Chains.size());
|
||||
|
||||
// Bitcast the loaded value to a vector of the original element type, in
|
||||
// the size of the target vector type.
|
||||
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT,
|
||||
ScalarInVector);
|
||||
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
|
||||
unsigned SizeRatio = RegSz/MemSz;
|
||||
|
||||
// Redistribute the loaded elements into the different locations.
|
||||
@ -14503,8 +14530,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
|
||||
// Replace the original load with the new sequence
|
||||
// and return the new chain.
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff);
|
||||
return SDValue(ScalarLoad.getNode(), 1);
|
||||
return DCI.CombineTo(N, Shuff, TF, true);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -14574,8 +14600,9 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
|
||||
// Can't shuffle using an illegal type
|
||||
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
|
||||
// Can't shuffle using an illegal type.
|
||||
if (!TLI.isTypeLegal(WideVecVT))
|
||||
return SDValue();
|
||||
|
||||
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
|
||||
DAG.getUNDEF(WideVecVT),
|
||||
@ -15308,7 +15335,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG);
|
||||
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
|
||||
|
13
test/CodeGen/X86/2012-07-10-extload64.ll
Normal file
13
test/CodeGen/X86/2012-07-10-extload64.ll
Normal file
@ -0,0 +1,13 @@
|
||||
; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
|
||||
|
||||
; CHECK: load_store
|
||||
define void @load_store(<4 x i16>* %in) {
|
||||
entry:
|
||||
%A27 = load <4 x i16>* %in, align 4
|
||||
%A28 = add <4 x i16> %A27, %A27
|
||||
store <4 x i16> %A28, <4 x i16>* %in, align 4
|
||||
ret void
|
||||
; CHECK: movd
|
||||
; CHECK: pinsrd
|
||||
; CHECK: ret
|
||||
}
|
@ -10,8 +10,7 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
|
||||
entry:
|
||||
; CHECK: cfi_def_cfa_offset
|
||||
; CHECK-NOT: set
|
||||
; CHECK: movzwl
|
||||
; CHECK: movzwl
|
||||
; CHECK: punpcklwd
|
||||
; CHECK: pshufd
|
||||
; CHECK: pshufb
|
||||
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
|
||||
|
@ -1,18 +1,12 @@
|
||||
; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
|
||||
; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
|
||||
; PR4891
|
||||
|
||||
; Both loads should happen before either store.
|
||||
|
||||
; CHECK: movd ({{.*}}), {{.*}}
|
||||
; CHECK: movd ({{.*}}), {{.*}}
|
||||
; CHECK: movd {{.*}}, ({{.*}})
|
||||
; CHECK: movd {{.*}}, ({{.*}})
|
||||
|
||||
; WIN64: movd ({{.*}}), {{.*}}
|
||||
; WIN64: movd ({{.*}}), {{.*}}
|
||||
; WIN64: movd {{.*}}, ({{.*}})
|
||||
; WIN64: movd {{.*}}, ({{.*}})
|
||||
; CHECK: movl ({{.*}}), {{.*}}
|
||||
; CHECK: movl ({{.*}}), {{.*}}
|
||||
; CHECK: movl {{.*}}, ({{.*}})
|
||||
; CHECK: movl {{.*}}, ({{.*}})
|
||||
|
||||
define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
|
||||
entry:
|
||||
|
Loading…
x
Reference in New Issue
Block a user