Make sure all possible shuffles are matched.

Use pshufd, pshuhw, and pshulw to shuffle v4f32 if shufps doesn't match.
Use shufps to shuffle v4f32 if pshufd, pshuhw, and pshulw don't match.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27259 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-03-30 19:54:57 +00:00
parent 99e746feba
commit 7d9061e300
2 changed files with 89 additions and 30 deletions

View File

@ -1486,23 +1486,17 @@ bool X86::isSHUFPMask(SDNode *N) {
if (NumElems != 4) return false;
// Each half must refer to only one of the vector.
SDOperand Elt = N->getOperand(0);
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
for (unsigned i = 1; i < NumElems / 2; ++i) {
for (unsigned i = 0; i < 2; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
cast<ConstantSDNode>(Elt)->getValue())
return false;
unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
if (Val >= 4) return false;
}
Elt = N->getOperand(NumElems / 2);
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) {
for (unsigned i = 2; i < 4; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
cast<ConstantSDNode>(Elt)->getValue())
return false;
unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
if (Val < 4) return false;
}
return true;
@ -2489,11 +2483,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
unsigned NumElems = PermMask.getNumOperands();
// Splat && PSHUFD's 2nd vector must be undef.
if (X86::isSplatMask(PermMask.Val) ||
((MVT::isInteger(VT) &&
(X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val))))) {
if (X86::isSplatMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
@ -2505,9 +2495,34 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
// Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
return SDOperand();
if (NumElems == 2 ||
X86::isSHUFPMask(PermMask.Val)) {
if (NumElems == 2)
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
// If VT is integer, try PSHUF* first, then SHUFP*.
if (MVT::isInteger(VT)) {
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return SDOperand();
}
if (X86::isSHUFPMask(PermMask.Val))
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return SDOperand();
}
}
assert(0 && "Unexpected VECTOR_SHUFFLE to lower");

View File

@ -106,14 +106,32 @@ def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFLWMask(N);
}], SHUFFLE_get_pshuflw_imm>;
// Only use PSHUF* for v4f32 if SHUFP does not match.
def PSHUFD_fp_shuffle_mask : PatLeaf<(build_vector), [{
return !X86::isSHUFPMask(N) &&
X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>;
def PSHUFHW_fp_shuffle_mask : PatLeaf<(build_vector), [{
return !X86::isSHUFPMask(N) &&
X86::isPSHUFHWMask(N);
}], SHUFFLE_get_pshufhw_imm>;
def PSHUFLW_fp_shuffle_mask : PatLeaf<(build_vector), [{
return !X86::isSHUFPMask(N) &&
X86::isPSHUFLWMask(N);
}], SHUFFLE_get_pshuflw_imm>;
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
// Only use SHUFP for v4i32 if no other options are available.
// FIXME: add tblgen hook to reduce the complexity of pattern.
def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{
return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N);
// Only use SHUFP for v4i32 if PSHUF* do not match.
def SHUFP_int_shuffle_mask : PatLeaf<(build_vector), [{
return !X86::isPSHUFDMask(N) &&
!X86::isPSHUFHWMask(N) &&
!X86::isPSHUFLWMask(N) &&
X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===//
@ -1278,14 +1296,14 @@ def PSHUFHWrm : Ii8<0x70, MRMDestMem,
// SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWrr : Ii8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
"pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
"pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
VR128:$src1, (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
def PSHUFLWrm : Ii8<0x70, MRMDestMem,
(ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
"pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
"pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
(bc_v8i16 (loadv2i64 addr:$src1)), (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
@ -1593,15 +1611,41 @@ def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
(v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
Requires<[HasSSE1]>;
// Shuffle v4i32 if others do not match
// Shuffle v4i32 with SHUFP* if others do not match.
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
SHUFP_shuffle_mask:$sm),
SHUFP_int_shuffle_mask:$sm),
(v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
SHUFP_shuffle_mask:$sm),
SHUFP_int_shuffle_mask:$sm),
(v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
// Shuffle v4f32 with PSHUF* if others do not match.
def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
PSHUFD_fp_shuffle_mask:$sm),
(v4f32 (PSHUFDrr VR128:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
PSHUFD_fp_shuffle_mask:$sm),
(v4f32 (PSHUFDrm addr:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
PSHUFHW_fp_shuffle_mask:$sm),
(v4f32 (PSHUFHWrr VR128:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
PSHUFHW_fp_shuffle_mask:$sm),
(v4f32 (PSHUFHWrm addr:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
PSHUFLW_fp_shuffle_mask:$sm),
(v4f32 (PSHUFLWrr VR128:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
PSHUFLW_fp_shuffle_mask:$sm),
(v4f32 (PSHUFLWrm addr:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;
// Logical ops
def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),