mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-13 17:38:39 +00:00
Fix PR15267
- When extloading from a vector with non-byte-addressable element, e.g. <4 x i1>, the current logic breaks. Extend the current logic to fix the case where the element type is not byte-addressable by loading all bytes, bit-extracting/packing each element. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175642 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9b5b8b0b94
commit
eedff3547d
@ -363,30 +363,135 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
|
|||||||
EVT SrcVT = LD->getMemoryVT();
|
EVT SrcVT = LD->getMemoryVT();
|
||||||
ISD::LoadExtType ExtType = LD->getExtensionType();
|
ISD::LoadExtType ExtType = LD->getExtensionType();
|
||||||
|
|
||||||
SmallVector<SDValue, 8> LoadVals;
|
SmallVector<SDValue, 8> Vals;
|
||||||
SmallVector<SDValue, 8> LoadChains;
|
SmallVector<SDValue, 8> LoadChains;
|
||||||
unsigned NumElem = SrcVT.getVectorNumElements();
|
unsigned NumElem = SrcVT.getVectorNumElements();
|
||||||
unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
|
|
||||||
|
|
||||||
for (unsigned Idx=0; Idx<NumElem; Idx++) {
|
EVT SrcEltVT = SrcVT.getScalarType();
|
||||||
SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
|
EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
|
||||||
Op.getNode()->getValueType(0).getScalarType(),
|
|
||||||
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
|
|
||||||
SrcVT.getScalarType(),
|
|
||||||
LD->isVolatile(), LD->isNonTemporal(),
|
|
||||||
LD->getAlignment());
|
|
||||||
|
|
||||||
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
|
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
|
||||||
DAG.getIntPtrConstant(Stride));
|
// When elements in a vector is not byte-addressable, we cannot directly
|
||||||
|
// load each element by advancing pointer, which could only address bytes.
|
||||||
|
// Instead, we load all significant words, mask bits off, and concatenate
|
||||||
|
// them to form each element. Finally, they are extended to destination
|
||||||
|
// scalar type to build the destination vector.
|
||||||
|
EVT WideVT = TLI.getPointerTy();
|
||||||
|
|
||||||
LoadVals.push_back(ScalarLoad.getValue(0));
|
assert(WideVT.isRound() &&
|
||||||
LoadChains.push_back(ScalarLoad.getValue(1));
|
"Could not handle the sophisticated case when the widest integer is"
|
||||||
|
" not power of 2.");
|
||||||
|
assert(WideVT.bitsGE(SrcEltVT) &&
|
||||||
|
"Type is not legalized?");
|
||||||
|
|
||||||
|
unsigned WideBytes = WideVT.getStoreSize();
|
||||||
|
unsigned Offset = 0;
|
||||||
|
unsigned RemainingBytes = SrcVT.getStoreSize();
|
||||||
|
SmallVector<SDValue, 8> LoadVals;
|
||||||
|
|
||||||
|
while (RemainingBytes > 0) {
|
||||||
|
SDValue ScalarLoad;
|
||||||
|
unsigned LoadBytes = WideBytes;
|
||||||
|
|
||||||
|
if (RemainingBytes >= LoadBytes) {
|
||||||
|
ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
|
||||||
|
LD->getPointerInfo().getWithOffset(Offset),
|
||||||
|
LD->isVolatile(), LD->isNonTemporal(),
|
||||||
|
LD->isInvariant(), LD->getAlignment());
|
||||||
|
} else {
|
||||||
|
EVT LoadVT = WideVT;
|
||||||
|
while (RemainingBytes < LoadBytes) {
|
||||||
|
LoadBytes >>= 1; // Reduce the load size by half.
|
||||||
|
LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
|
||||||
|
}
|
||||||
|
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
|
||||||
|
LD->getPointerInfo().getWithOffset(Offset),
|
||||||
|
LoadVT, LD->isVolatile(),
|
||||||
|
LD->isNonTemporal(), LD->getAlignment());
|
||||||
|
}
|
||||||
|
|
||||||
|
RemainingBytes -= LoadBytes;
|
||||||
|
Offset += LoadBytes;
|
||||||
|
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
|
||||||
|
DAG.getIntPtrConstant(LoadBytes));
|
||||||
|
|
||||||
|
LoadVals.push_back(ScalarLoad.getValue(0));
|
||||||
|
LoadChains.push_back(ScalarLoad.getValue(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract bits, pack and extend/trunc them into destination type.
|
||||||
|
unsigned SrcEltBits = SrcEltVT.getSizeInBits();
|
||||||
|
SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
|
||||||
|
|
||||||
|
unsigned BitOffset = 0;
|
||||||
|
unsigned WideIdx = 0;
|
||||||
|
unsigned WideBits = WideVT.getSizeInBits();
|
||||||
|
|
||||||
|
for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
|
||||||
|
SDValue Lo, Hi, ShAmt;
|
||||||
|
|
||||||
|
if (BitOffset < WideBits) {
|
||||||
|
ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
|
||||||
|
Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
|
||||||
|
Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
BitOffset += SrcEltBits;
|
||||||
|
if (BitOffset >= WideBits) {
|
||||||
|
WideIdx++;
|
||||||
|
Offset -= WideBits;
|
||||||
|
if (Offset > 0) {
|
||||||
|
ShAmt = DAG.getConstant(SrcEltBits - Offset,
|
||||||
|
TLI.getShiftAmountTy(WideVT));
|
||||||
|
Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
|
||||||
|
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Hi.getNode())
|
||||||
|
Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
|
||||||
|
|
||||||
|
switch (ExtType) {
|
||||||
|
default: llvm_unreachable("Unknown extended-load op!");
|
||||||
|
case ISD::EXTLOAD:
|
||||||
|
Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
|
||||||
|
break;
|
||||||
|
case ISD::ZEXTLOAD:
|
||||||
|
Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
|
||||||
|
break;
|
||||||
|
case ISD::SEXTLOAD:
|
||||||
|
ShAmt = DAG.getConstant(WideBits - SrcEltBits,
|
||||||
|
TLI.getShiftAmountTy(WideVT));
|
||||||
|
Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
|
||||||
|
Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
|
||||||
|
Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Vals.push_back(Lo);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
|
||||||
|
|
||||||
|
for (unsigned Idx=0; Idx<NumElem; Idx++) {
|
||||||
|
SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
|
||||||
|
Op.getNode()->getValueType(0).getScalarType(),
|
||||||
|
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
|
||||||
|
SrcVT.getScalarType(),
|
||||||
|
LD->isVolatile(), LD->isNonTemporal(),
|
||||||
|
LD->getAlignment());
|
||||||
|
|
||||||
|
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
|
||||||
|
DAG.getIntPtrConstant(Stride));
|
||||||
|
|
||||||
|
Vals.push_back(ScalarLoad.getValue(0));
|
||||||
|
LoadChains.push_back(ScalarLoad.getValue(1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||||
&LoadChains[0], LoadChains.size());
|
&LoadChains[0], LoadChains.size());
|
||||||
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
|
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||||
Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
|
Op.getNode()->getValueType(0), &Vals[0], Vals.size());
|
||||||
|
|
||||||
AddLegalizedOperand(Op.getValue(0), Value);
|
AddLegalizedOperand(Op.getValue(0), Value);
|
||||||
AddLegalizedOperand(Op.getValue(1), NewChain);
|
AddLegalizedOperand(Op.getValue(1), NewChain);
|
||||||
|
66
test/CodeGen/X86/pr15267.ll
Normal file
66
test/CodeGen/X86/pr15267.ll
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
|
||||||
|
|
||||||
|
define <4 x i3> @test1(<4 x i3>* %in) nounwind {
|
||||||
|
%ret = load <4 x i3>* %in, align 1
|
||||||
|
ret <4 x i3> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: test1
|
||||||
|
; CHECK: movzwl
|
||||||
|
; CHECK: shrl $3
|
||||||
|
; CHECK: andl $7
|
||||||
|
; CHECK: andl $7
|
||||||
|
; CHECK: vmovd
|
||||||
|
; CHECK: pinsrd $1
|
||||||
|
; CHECK: shrl $6
|
||||||
|
; CHECK: andl $7
|
||||||
|
; CHECK: pinsrd $2
|
||||||
|
; CHECK: shrl $9
|
||||||
|
; CHECK: andl $7
|
||||||
|
; CHECK: pinsrd $3
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define <4 x i1> @test2(<4 x i1>* %in) nounwind {
|
||||||
|
%ret = load <4 x i1>* %in, align 1
|
||||||
|
ret <4 x i1> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: test2
|
||||||
|
; CHECK: movzbl
|
||||||
|
; CHECK: shrl
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: vmovd
|
||||||
|
; CHECK: pinsrd $1
|
||||||
|
; CHECK: shrl $2
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: pinsrd $2
|
||||||
|
; CHECK: shrl $3
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: pinsrd $3
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define <4 x i64> @test3(<4 x i1>* %in) nounwind {
|
||||||
|
%wide.load35 = load <4 x i1>* %in, align 1
|
||||||
|
%sext = sext <4 x i1> %wide.load35 to <4 x i64>
|
||||||
|
ret <4 x i64> %sext
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: test3
|
||||||
|
; CHECK: movzbl
|
||||||
|
; CHECK: shrl
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: vmovd
|
||||||
|
; CHECK: pinsrd $1
|
||||||
|
; CHECK: shrl $2
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: pinsrd $2
|
||||||
|
; CHECK: shrl $3
|
||||||
|
; CHECK: andl $1
|
||||||
|
; CHECK: pinsrd $3
|
||||||
|
; CHECK: pslld
|
||||||
|
; CHECK: psrad
|
||||||
|
; CHECK: pmovsxdq
|
||||||
|
; CHECK: pmovsxdq
|
||||||
|
; CHECK: ret
|
Loading…
x
Reference in New Issue
Block a user