- Only use pshufd for v4i32 vector shuffles.

- Other shuffle related fixes.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27244 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-03-29 01:30:51 +00:00
parent f3a627262c
commit 4f5633883b
2 changed files with 83 additions and 61 deletions

View File

@ -1583,15 +1583,21 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
return Mask; return Mask;
} }
/// CommuteVectorShuffleIfNeeded - Swap vector_shuffle operands (as well as /// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
/// values in ther permute mask if needed. Return an empty SDOperand is it is /// values in ther permute mask if needed. Use V1 as second vector if it is
/// already well formed. /// undef. Return an empty SDOperand is it is already well formed.
static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2, static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2,
SDOperand Mask, MVT::ValueType VT, SDOperand Mask, MVT::ValueType VT,
SelectionDAG &DAG) { SelectionDAG &DAG) {
unsigned NumElems = Mask.getNumOperands(); unsigned NumElems = Mask.getNumOperands();
SDOperand Half1 = Mask.getOperand(0); SDOperand Half1 = Mask.getOperand(0);
SDOperand Half2 = Mask.getOperand(NumElems/2); SDOperand Half2 = Mask.getOperand(NumElems/2);
bool V2Undef = false;
if (V2.getOpcode() == ISD::UNDEF) {
V2Undef = true;
V2 = V1;
}
if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems && if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems &&
cast<ConstantSDNode>(Half2)->getValue() < NumElems) { cast<ConstantSDNode>(Half2)->getValue() < NumElems) {
// Swap the operands and change mask. // Swap the operands and change mask.
@ -1604,6 +1610,10 @@ static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2,
DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
} }
if (V2Undef)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
return SDOperand(); return SDOperand();
} }
@ -2387,8 +2397,26 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType(); MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands(); unsigned NumElems = PermMask.getNumOperands();
if (NumElems == 2) if (X86::isUNPCKLMask(PermMask.Val) ||
return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG); X86::isUNPCKHMask(PermMask.Val))
// Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
return SDOperand();
// PSHUFD's 2nd vector must be undef.
if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val))
if (V2.getOpcode() == ISD::UNDEF)
return SDOperand();
else
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),
PermMask);
if (NumElems == 2 ||
X86::isSplatMask(PermMask.Val) ||
X86::isSHUFPMask(PermMask.Val)) {
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
}
#if 0
else if (X86::isSplatMask(PermMask.Val)) { else if (X86::isSplatMask(PermMask.Val)) {
// Handle splat cases. // Handle splat cases.
if (V2.getOpcode() == ISD::UNDEF) if (V2.getOpcode() == ISD::UNDEF)
@ -2400,10 +2428,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()), DAG.getNode(ISD::UNDEF, V1.getValueType()),
PermMask); PermMask);
} else if (X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val)) {
// Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
return SDOperand();
} else if (X86::isPSHUFDMask(PermMask.Val)) { } else if (X86::isPSHUFDMask(PermMask.Val)) {
if (V2.getOpcode() == ISD::UNDEF) if (V2.getOpcode() == ISD::UNDEF)
// Leave the VECTOR_SHUFFLE alone. It matches PSHUFD. // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD.
@ -2414,7 +2438,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
DAG.getNode(ISD::UNDEF, V1.getValueType()), DAG.getNode(ISD::UNDEF, V1.getValueType()),
PermMask); PermMask);
} else if (X86::isSHUFPMask(PermMask.Val)) } else if (X86::isSHUFPMask(PermMask.Val))
return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG); return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
#endif
assert(0 && "Unexpected VECTOR_SHUFFLE to lower"); assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
abort(); abort();

View File

@ -79,9 +79,8 @@ def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKHMask(N); return X86::isUNPCKHMask(N);
}]>; }]>;
// Only use PSHUF if it is not a splat.
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
return !X86::isSplatMask(N) && X86::isPSHUFDMask(N); return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>; }], SHUFFLE_get_shuf_imm>;
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
@ -918,86 +917,92 @@ def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
def PSHUFDrr : PDIi8<0x70, MRMDestReg, def PSHUFDrr : PDIi8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i8imm:$src2), (ops VR128:$dst, VR128:$src1, i8imm:$src2),
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>; "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
VR128:$src1, (undef),
PSHUFD_shuffle_mask:$src2)))]>;
def PSHUFDrm : PDIi8<0x70, MRMSrcMem, def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
(ops VR128:$dst, i128mem:$src1, i8imm:$src2), (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>; "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
(load addr:$src1), (undef),
PSHUFD_shuffle_mask:$src2)))]>;
let isTwoAddress = 1 in { let isTwoAddress = 1 in {
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3), (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}", "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (vector_shuffle [(set VR128:$dst, (v4f32 (vector_shuffle
(v4f32 VR128:$src1), (v4f32 VR128:$src2), VR128:$src1, VR128:$src2,
SHUFP_shuffle_mask:$src3))]>; SHUFP_shuffle_mask:$src3)))]>;
def SHUFPSrm : PSIi8<0xC6, MRMSrcMem, def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3), (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}", "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (vector_shuffle [(set VR128:$dst, (v4f32 (vector_shuffle
(v4f32 VR128:$src1), (load addr:$src2), VR128:$src1, (load addr:$src2),
SHUFP_shuffle_mask:$src3))]>; SHUFP_shuffle_mask:$src3)))]>;
def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3), (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (vector_shuffle [(set VR128:$dst, (v2f64 (vector_shuffle
(v2f64 VR128:$src1), (v2f64 VR128:$src2), VR128:$src1, VR128:$src2,
SHUFP_shuffle_mask:$src3))]>; SHUFP_shuffle_mask:$src3)))]>;
def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3), (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (vector_shuffle [(set VR128:$dst, (v2f64 (vector_shuffle
(v2f64 VR128:$src1), (load addr:$src2), VR128:$src1, (load addr:$src2),
SHUFP_shuffle_mask:$src3))]>; SHUFP_shuffle_mask:$src3)))]>;
def UNPCKHPSrr : PSI<0x15, MRMSrcReg, def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2), (ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpckhps {$src2, $dst|$dst, $src2}", "unpckhps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v4f32 (vector_shuffle
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>; UNPCKH_shuffle_mask)))]>;
def UNPCKHPSrm : PSI<0x15, MRMSrcMem, def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2), (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpckhps {$src2, $dst|$dst, $src2}", "unpckhps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v4f32 (vector_shuffle
(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), VR128:$src1, (load addr:$src2),
UNPCKH_shuffle_mask)))]>; UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrr : PDI<0x15, MRMSrcReg, def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2), (ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpckhpd {$src2, $dst|$dst, $src2}", "unpckhpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v2f64 (vector_shuffle
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2, VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>; UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrm : PDI<0x15, MRMSrcMem, def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2), (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpckhpd {$src2, $dst|$dst, $src2}", "unpckhpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v2f64 (vector_shuffle
(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), VR128:$src1, (load addr:$src2),
UNPCKH_shuffle_mask)))]>; UNPCKH_shuffle_mask)))]>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg, def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2), (ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpcklps {$src2, $dst|$dst, $src2}", "unpcklps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v4f32 (vector_shuffle
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>; UNPCKL_shuffle_mask)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem, def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2), (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpcklps {$src2, $dst|$dst, $src2}", "unpcklps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v4f32 (vector_shuffle
(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), VR128:$src1, (load addr:$src2),
UNPCKL_shuffle_mask)))]>; UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg, def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2), (ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpcklpd {$src2, $dst|$dst, $src2}", "unpcklpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v2f64 (vector_shuffle
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2, VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>; UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrm : PDI<0x14, MRMSrcMem, def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2), (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpcklpd {$src2, $dst|$dst, $src2}", "unpcklpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst, (v2f64 (vector_shuffle
(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), VR128:$src1, (load addr:$src2),
UNPCKL_shuffle_mask)))]>; UNPCKL_shuffle_mask)))]>;
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -1354,11 +1359,3 @@ def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
(v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>; (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm), def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
(v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>; (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
// Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
(v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
(v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
Requires<[HasSSE2]>;