mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-04 10:30:01 +00:00
Fixed a bug when lowering build_vector (PR19694)
When lowering build_vector to an insertps, we would still lower it, even if the source vectors weren't v4x32. This would break on avx if the source was a v8x32. We now check the type of the source vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208487 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e283f74133
commit
4ccf0ebb19
@ -5458,7 +5458,12 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
|
||||
return SDValue();
|
||||
|
||||
SDValue V = FirstNonZero.getOperand(0);
|
||||
unsigned FirstNonZeroDst = cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
|
||||
MVT VVT = V.getSimpleValueType();
|
||||
if (VVT != MVT::v4f32 && VVT != MVT::v4i32)
|
||||
return SDValue();
|
||||
|
||||
unsigned FirstNonZeroDst =
|
||||
cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
|
||||
unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
|
||||
unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
|
||||
unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
|
||||
@ -5498,8 +5503,8 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
|
||||
else
|
||||
ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
|
||||
|
||||
SDValue InsertpsMask = DAG.getIntPtrConstant(
|
||||
ElementMoveMask | (~NonZeros & 0xf));
|
||||
SDValue InsertpsMask =
|
||||
DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
|
||||
return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
|
||||
}
|
||||
|
||||
|
@ -314,3 +314,21 @@ define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
|
||||
%1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
;; Ensure we don't use insertps from non v4x32 vectors.
|
||||
;; On SSE4.1 it works because bigger vectors use more than 1 register.
|
||||
;; On AVX they get passed in a single register.
|
||||
;; FIXME: We could probably optimize this case, if we're only using the
|
||||
;; first 4 indices.
|
||||
define <4 x i32> @insert_from_diff_size(<8 x i32> %x) {
|
||||
; CHECK-LABEL: insert_from_diff_size:
|
||||
; CHECK-NOT: insertps
|
||||
; CHECK: ret
|
||||
%vecext = extractelement <8 x i32> %x, i32 0
|
||||
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||
%vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
|
||||
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
|
||||
%a.0 = extractelement <8 x i32> %x, i32 0
|
||||
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a.0, i32 3
|
||||
ret <4 x i32> %vecinit3
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user