mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 17:32:19 +00:00
Add target hook to prevent folding some bitcasted loads.
This is to avoid this transformation in some cases: fold (conv (load x)) -> (load (conv*)x) On architectures that don't natively support some vector loads efficiently casting the load to a smaller vector of larger types and loading is more efficient. Patch by Micah Villmow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194783 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a0846f4143
commit
509a492442
@ -204,6 +204,17 @@ public:
|
|||||||
return PredictableSelectIsExpensive;
|
return PredictableSelectIsExpensive;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isLoadBitCastBeneficial() - Return true if the following transform
|
||||||
|
/// is beneficial.
|
||||||
|
/// fold (conv (load x)) -> (load (conv*)x)
|
||||||
|
/// On architectures that don't natively support some vector loads efficiently,
|
||||||
|
/// casting the load to a smaller vector of larger types and loading
|
||||||
|
/// is more efficient, however, this can be undone by optimizations in
|
||||||
|
/// dag combiner.
|
||||||
|
virtual bool isLoadBitCastBeneficial(EVT /* Load */, EVT /* Bitcast */) const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the ValueType of the result of SETCC operations. Also used to
|
/// Return the ValueType of the result of SETCC operations. Also used to
|
||||||
/// obtain the target's preferred type for the condition operand of SELECT and
|
/// obtain the target's preferred type for the condition operand of SELECT and
|
||||||
/// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other
|
/// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other
|
||||||
|
@ -5768,7 +5768,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
|
|||||||
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
|
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
|
||||||
// Do not change the width of a volatile load.
|
// Do not change the width of a volatile load.
|
||||||
!cast<LoadSDNode>(N0)->isVolatile() &&
|
!cast<LoadSDNode>(N0)->isVolatile() &&
|
||||||
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
|
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
|
||||||
|
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
|
||||||
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
||||||
unsigned Align = TLI.getDataLayout()->
|
unsigned Align = TLI.getDataLayout()->
|
||||||
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
|
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
|
||||||
|
@ -196,6 +196,18 @@ MVT AMDGPUTargetLowering::getVectorIdxTy() const {
|
|||||||
return MVT::i32;
|
return MVT::i32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
|
||||||
|
EVT CastTy) const {
|
||||||
|
if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
|
||||||
|
unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
|
||||||
|
|
||||||
|
return ((LScalarSize <= CastScalarSize) ||
|
||||||
|
(CastScalarSize >= 32) ||
|
||||||
|
(LScalarSize < 32));
|
||||||
|
}
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// Target Properties
|
// Target Properties
|
||||||
|
@ -77,6 +77,7 @@ public:
|
|||||||
virtual bool isFAbsFree(EVT VT) const;
|
virtual bool isFAbsFree(EVT VT) const;
|
||||||
virtual bool isFNegFree(EVT VT) const;
|
virtual bool isFNegFree(EVT VT) const;
|
||||||
virtual MVT getVectorIdxTy() const;
|
virtual MVT getVectorIdxTy() const;
|
||||||
|
virtual bool isLoadBitCastBeneficial(EVT, EVT) const LLVM_OVERRIDE;
|
||||||
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||||
bool isVarArg,
|
bool isVarArg,
|
||||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||||
|
42
test/CodeGen/R600/combine_vloads.ll
Normal file
42
test/CodeGen/R600/combine_vloads.ll
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
|
||||||
|
|
||||||
|
;
|
||||||
|
; kernel void combine_vloads(global char8* src, global char8* result) {
|
||||||
|
; for (int i = 0; i < 1024; ++i)
|
||||||
|
; result[i] = src[0] + src[1] + src[2] + src[3];
|
||||||
|
; }
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
; 128-bit loads instead of many 8-bit
|
||||||
|
; EG-LABEL: @combine_vloads:
|
||||||
|
; EG: VTX_READ_128
|
||||||
|
; EG: VTX_READ_128
|
||||||
|
define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.exit: ; preds = %for.body
|
||||||
|
ret void
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body, %entry
|
||||||
|
%i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
|
||||||
|
%arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
|
||||||
|
%0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
|
||||||
|
%vecload2 = load <8 x i32> addrspace(1)* %0, align 32
|
||||||
|
%1 = bitcast <8 x i32> %vecload2 to <32 x i8>
|
||||||
|
%tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
%tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||||
|
%tmp9 = add nsw <8 x i8> %tmp5, %tmp8
|
||||||
|
%tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
|
||||||
|
%tmp13 = add nsw <8 x i8> %tmp9, %tmp12
|
||||||
|
%tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||||
|
%tmp17 = add nsw <8 x i8> %tmp13, %tmp16
|
||||||
|
%scevgep = getelementptr <8 x i8> addrspace(1)* %result, i32 %i.01
|
||||||
|
%2 = bitcast <8 x i8> %tmp17 to <2 x i32>
|
||||||
|
%3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)*
|
||||||
|
store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8
|
||||||
|
%tmp19 = add nsw i32 %i.01, 1
|
||||||
|
%exitcond = icmp eq i32 %tmp19, 1024
|
||||||
|
br i1 %exitcond, label %for.exit, label %for.body
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user