mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
The VPSHUFB 256-bit instruction may be generated when one of input vector is undefined or zeroinitializer.
I've added the "zeroinitializer" case in this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163506 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1ac4587eb3
commit
8100d244ff
@ -6030,13 +6030,24 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
|
||||
SDValue V1 = SVOp->getOperand(0);
|
||||
SDValue V2 = SVOp->getOperand(1);
|
||||
DebugLoc dl = SVOp->getDebugLoc();
|
||||
ArrayRef<int> MaskVals = SVOp->getMask();
|
||||
SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
|
||||
|
||||
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
|
||||
bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
|
||||
bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
|
||||
|
||||
if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || !V2IsUndef)
|
||||
// VPSHUFB may be generated if
|
||||
// (1) one of input vector is undefined or zeroinitializer.
|
||||
// The mask value 0x80 puts 0 in the corresponding slot of the vector.
|
||||
// And (2) the mask indexes don't cross the 128-bit lane.
|
||||
if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() ||
|
||||
(!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
|
||||
return SDValue();
|
||||
|
||||
if (V1IsAllZero && !V2IsAllZero) {
|
||||
CommuteVectorShuffleMask(MaskVals, 32);
|
||||
V1 = V2;
|
||||
}
|
||||
SmallVector<SDValue, 32> pshufbMask;
|
||||
for (unsigned i = 0; i != 32; i++) {
|
||||
int EltIdx = MaskVals[i];
|
||||
|
@ -37,3 +37,26 @@ define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind {
|
||||
i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
|
||||
ret <32 x i8>%S
|
||||
}
|
||||
|
||||
; CHECK: vpshufb1_test
|
||||
; CHECK; vpshufb {{.*\(%r.*}}, %ymm
|
||||
; CHECK: ret
|
||||
define <32 x i8> @vpshufb1_test(<32 x i8> %a) nounwind {
|
||||
%S = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
|
||||
i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,
|
||||
i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25,
|
||||
i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
|
||||
ret <32 x i8>%S
|
||||
}
|
||||
|
||||
|
||||
; CHECK: vpshufb2_test
|
||||
; CHECK; vpshufb {{.*\(%r.*}}, %ymm
|
||||
; CHECK: ret
|
||||
define <32 x i8> @vpshufb2_test(<32 x i8> %a) nounwind {
|
||||
%S = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
|
||||
i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,
|
||||
i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25,
|
||||
i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
|
||||
ret <32 x i8>%S
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user