mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
Simplify handling of v16i8 shuffles and fix a missed optimization.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157043 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ad75364815
commit
b82b5abf78
@ -5770,21 +5770,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
|
||||
DebugLoc dl = SVOp->getDebugLoc();
|
||||
ArrayRef<int> MaskVals = SVOp->getMask();
|
||||
|
||||
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
|
||||
|
||||
// If we have SSSE3, case 1 is generated when all result bytes come from
|
||||
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
|
||||
// present, fall back to case 3.
|
||||
// FIXME: kill V2Only once shuffles are canonizalized by getNode.
|
||||
bool V1Only = true;
|
||||
bool V2Only = true;
|
||||
for (unsigned i = 0; i < 16; ++i) {
|
||||
int EltIdx = MaskVals[i];
|
||||
if (EltIdx < 0)
|
||||
continue;
|
||||
if (EltIdx < 16)
|
||||
V2Only = false;
|
||||
else
|
||||
V1Only = false;
|
||||
}
|
||||
|
||||
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
|
||||
if (TLI.getSubtarget()->hasSSSE3()) {
|
||||
@ -5796,23 +5786,16 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
|
||||
// Otherwise, we have elements from both input vectors, and must zero out
|
||||
// elements that come from V2 in the first mask, and V1 in the second mask
|
||||
// so that we can OR them together.
|
||||
bool TwoInputs = !(V1Only || V2Only);
|
||||
for (unsigned i = 0; i != 16; ++i) {
|
||||
int EltIdx = MaskVals[i];
|
||||
if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
|
||||
pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
|
||||
continue;
|
||||
}
|
||||
if (EltIdx < 0 || EltIdx >= 16)
|
||||
EltIdx = 0x80;
|
||||
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
|
||||
}
|
||||
// If all the elements are from V2, assign it to V1 and return after
|
||||
// building the first pshufb.
|
||||
if (V2Only)
|
||||
V1 = V2;
|
||||
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::v16i8, &pshufbMask[0], 16));
|
||||
if (!TwoInputs)
|
||||
if (V2IsUndef)
|
||||
return V1;
|
||||
|
||||
// Calculate the shuffle mask for the second input, shuffle it, and
|
||||
@ -5820,10 +5803,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
|
||||
pshufbMask.clear();
|
||||
for (unsigned i = 0; i != 16; ++i) {
|
||||
int EltIdx = MaskVals[i];
|
||||
if (EltIdx < 16) {
|
||||
pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
|
||||
continue;
|
||||
}
|
||||
EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
|
||||
pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
|
||||
}
|
||||
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
|
||||
@ -5837,7 +5817,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
|
||||
// the 16 different words that comprise the two doublequadword input vectors.
|
||||
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
|
||||
V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
|
||||
SDValue NewV = V2Only ? V2 : V1;
|
||||
SDValue NewV = V1;
|
||||
for (int i = 0; i != 8; ++i) {
|
||||
int Elt0 = MaskVals[i*2];
|
||||
int Elt1 = MaskVals[i*2+1];
|
||||
@ -5847,9 +5827,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
|
||||
continue;
|
||||
|
||||
// This word of the result is already in the correct place, skip it.
|
||||
if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
|
||||
continue;
|
||||
if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
|
||||
if ((Elt0 == i*2) && (Elt1 == i*2+1))
|
||||
continue;
|
||||
|
||||
SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
|
||||
|
@ -249,9 +249,10 @@ entry:
|
||||
; X64: t16:
|
||||
; X64: pextrw $8, %xmm0, %eax
|
||||
; X64: pslldq $2, %xmm0
|
||||
; X64: movd %xmm0, %ecx
|
||||
; X64: pextrw $1, %xmm0, %edx
|
||||
; X64: pinsrw $0, %ecx, %xmm0
|
||||
; X64: pextrw $1, %xmm0, %ecx
|
||||
; X64: movzbl %cl, %ecx
|
||||
; X64: orl %eax, %ecx
|
||||
; X64: pinsrw $1, %ecx, %xmm0
|
||||
; X64: ret
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
|
||||
; RUN: grep pextrw %t | count 13
|
||||
; RUN: grep pinsrw %t | count 14
|
||||
; RUN: grep pextrw %t | count 12
|
||||
; RUN: grep pinsrw %t | count 13
|
||||
; RUN: grep rolw %t | count 13
|
||||
; RUN: not grep esp %t
|
||||
; RUN: not grep ebp %t
|
||||
|
Loading…
Reference in New Issue
Block a user