mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-16 14:31:59 +00:00
Fix for PR18045:
http://llvm.org/bugs/show_bug.cgi?id=18045 Short issue description: For X86 machines with sse < sse4.1 we got failures for some particular load/store vector sequences: $ clang-trunk -m32 -O2 test-case.c fatal error: error in backend: Cannot select: 0x4200920: v4i32,ch = load 0x41d6ab0, 0x4205850, 0x41dcb10<LD16[getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)](align=4)> [ORD=82] [ID=58] 0x4205850: i32 = X86ISD::Wrapper 0x41d5490 [ORD=26] [ID=43] 0x41d5490: i32 = TargetGlobalAddress<[4 x i32]* @e> 0 [ORD=26] [ID=23] 0x41dcb10: i32 = undef [ID=2] The reason is that EltsFromConsecutiveLoads could emit such load instruction both before and after legalize stage. Though this instruction is not legal for machines with SSSE3 and lower. The fix: In EltsFromConsecutiveLoads, if we have passed legalize stage, we check whether nodes it emits are legal. P.S.: If you get failure in time from 12:00 and till 22:00 (UTC-8), perhaps I'll slow with response, so you better reject this commit. Thanks! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197492 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
efa95f887f
commit
5842a037fb
@ -5426,7 +5426,8 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
|
|||||||
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
|
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
|
||||||
/// There's even a handy isZeroNode for that purpose.
|
/// There's even a handy isZeroNode for that purpose.
|
||||||
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
|
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
|
||||||
SDLoc &DL, SelectionDAG &DAG) {
|
SDLoc &DL, SelectionDAG &DAG,
|
||||||
|
bool isAfterLegalize) {
|
||||||
EVT EltVT = VT.getVectorElementType();
|
EVT EltVT = VT.getVectorElementType();
|
||||||
unsigned NumElems = Elts.size();
|
unsigned NumElems = Elts.size();
|
||||||
|
|
||||||
@ -5462,7 +5463,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
|
|||||||
// load of the entire vector width starting at the base pointer. If we found
|
// load of the entire vector width starting at the base pointer. If we found
|
||||||
// consecutive loads for the low half, generate a vzext_load node.
|
// consecutive loads for the low half, generate a vzext_load node.
|
||||||
if (LastLoadedElt == NumElems - 1) {
|
if (LastLoadedElt == NumElems - 1) {
|
||||||
|
|
||||||
|
if (isAfterLegalize &&
|
||||||
|
!DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
SDValue NewLd = SDValue();
|
SDValue NewLd = SDValue();
|
||||||
|
|
||||||
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
|
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
|
||||||
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
|
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
|
||||||
LDBase->getPointerInfo(),
|
LDBase->getPointerInfo(),
|
||||||
@ -6106,7 +6113,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
V[i] = Op.getOperand(i);
|
V[i] = Op.getOperand(i);
|
||||||
|
|
||||||
// Check for elements which are consecutive loads.
|
// Check for elements which are consecutive loads.
|
||||||
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
|
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
|
||||||
if (LD.getNode())
|
if (LD.getNode())
|
||||||
return LD;
|
return LD;
|
||||||
|
|
||||||
@ -16379,7 +16386,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
|
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
|
||||||
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
|
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
|
||||||
|
|
||||||
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
|
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// PerformTruncateCombine - Converts truncate operation to
|
/// PerformTruncateCombine - Converts truncate operation to
|
||||||
|
27
test/CodeGen/X86/v4i32load-crash.ll
Normal file
27
test/CodeGen/X86/v4i32load-crash.ll
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s
|
||||||
|
|
||||||
|
;PR18045:
|
||||||
|
;Issue of selection for 'v4i32 load'.
|
||||||
|
;This instruction is not legal for X86 CPUs with sse < 'sse4.1'.
|
||||||
|
;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads
|
||||||
|
;static function after legilize stage.
|
||||||
|
|
||||||
|
@e = external global [4 x i32], align 4
|
||||||
|
@f = external global [4 x i32], align 4
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
define void @fn3(i32 %el) {
|
||||||
|
entry:
|
||||||
|
%0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
|
||||||
|
%1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
|
||||||
|
%2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
|
||||||
|
%3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
|
||||||
|
%4 = insertelement <4 x i32> undef, i32 %0, i32 0
|
||||||
|
%5 = insertelement <4 x i32> %4, i32 %1, i32 1
|
||||||
|
%6 = insertelement <4 x i32> %5, i32 %2, i32 2
|
||||||
|
%7 = insertelement <4 x i32> %6, i32 %3, i32 3
|
||||||
|
%8 = add <4 x i32> %6, %7
|
||||||
|
store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user