mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-17 21:35:07 +00:00
- Only use pshufd for v4i32 vector shuffles.
- Other shuffle related fixes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27244 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f3a627262c
commit
4f5633883b
@ -1583,15 +1583,21 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
|
|||||||
return Mask;
|
return Mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// CommuteVectorShuffleIfNeeded - Swap vector_shuffle operands (as well as
|
/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
|
||||||
/// values in ther permute mask if needed. Return an empty SDOperand is it is
|
/// values in ther permute mask if needed. Use V1 as second vector if it is
|
||||||
/// already well formed.
|
/// undef. Return an empty SDOperand is it is already well formed.
|
||||||
static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2,
|
static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2,
|
||||||
SDOperand Mask, MVT::ValueType VT,
|
SDOperand Mask, MVT::ValueType VT,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
unsigned NumElems = Mask.getNumOperands();
|
unsigned NumElems = Mask.getNumOperands();
|
||||||
SDOperand Half1 = Mask.getOperand(0);
|
SDOperand Half1 = Mask.getOperand(0);
|
||||||
SDOperand Half2 = Mask.getOperand(NumElems/2);
|
SDOperand Half2 = Mask.getOperand(NumElems/2);
|
||||||
|
bool V2Undef = false;
|
||||||
|
if (V2.getOpcode() == ISD::UNDEF) {
|
||||||
|
V2Undef = true;
|
||||||
|
V2 = V1;
|
||||||
|
}
|
||||||
|
|
||||||
if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems &&
|
if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems &&
|
||||||
cast<ConstantSDNode>(Half2)->getValue() < NumElems) {
|
cast<ConstantSDNode>(Half2)->getValue() < NumElems) {
|
||||||
// Swap the operands and change mask.
|
// Swap the operands and change mask.
|
||||||
@ -1604,6 +1610,10 @@ static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2,
|
|||||||
DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
|
DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
|
||||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
|
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (V2Undef)
|
||||||
|
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
|
||||||
|
|
||||||
return SDOperand();
|
return SDOperand();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2387,8 +2397,26 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
MVT::ValueType VT = Op.getValueType();
|
MVT::ValueType VT = Op.getValueType();
|
||||||
unsigned NumElems = PermMask.getNumOperands();
|
unsigned NumElems = PermMask.getNumOperands();
|
||||||
|
|
||||||
if (NumElems == 2)
|
if (X86::isUNPCKLMask(PermMask.Val) ||
|
||||||
return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
|
X86::isUNPCKHMask(PermMask.Val))
|
||||||
|
// Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
|
||||||
|
return SDOperand();
|
||||||
|
|
||||||
|
// PSHUFD's 2nd vector must be undef.
|
||||||
|
if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val))
|
||||||
|
if (V2.getOpcode() == ISD::UNDEF)
|
||||||
|
return SDOperand();
|
||||||
|
else
|
||||||
|
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
|
||||||
|
DAG.getNode(ISD::UNDEF, V1.getValueType()),
|
||||||
|
PermMask);
|
||||||
|
|
||||||
|
if (NumElems == 2 ||
|
||||||
|
X86::isSplatMask(PermMask.Val) ||
|
||||||
|
X86::isSHUFPMask(PermMask.Val)) {
|
||||||
|
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
else if (X86::isSplatMask(PermMask.Val)) {
|
else if (X86::isSplatMask(PermMask.Val)) {
|
||||||
// Handle splat cases.
|
// Handle splat cases.
|
||||||
if (V2.getOpcode() == ISD::UNDEF)
|
if (V2.getOpcode() == ISD::UNDEF)
|
||||||
@ -2400,10 +2428,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
|
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
|
||||||
DAG.getNode(ISD::UNDEF, V1.getValueType()),
|
DAG.getNode(ISD::UNDEF, V1.getValueType()),
|
||||||
PermMask);
|
PermMask);
|
||||||
} else if (X86::isUNPCKLMask(PermMask.Val) ||
|
|
||||||
X86::isUNPCKHMask(PermMask.Val)) {
|
|
||||||
// Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
|
|
||||||
return SDOperand();
|
|
||||||
} else if (X86::isPSHUFDMask(PermMask.Val)) {
|
} else if (X86::isPSHUFDMask(PermMask.Val)) {
|
||||||
if (V2.getOpcode() == ISD::UNDEF)
|
if (V2.getOpcode() == ISD::UNDEF)
|
||||||
// Leave the VECTOR_SHUFFLE alone. It matches PSHUFD.
|
// Leave the VECTOR_SHUFFLE alone. It matches PSHUFD.
|
||||||
@ -2414,7 +2438,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
DAG.getNode(ISD::UNDEF, V1.getValueType()),
|
DAG.getNode(ISD::UNDEF, V1.getValueType()),
|
||||||
PermMask);
|
PermMask);
|
||||||
} else if (X86::isSHUFPMask(PermMask.Val))
|
} else if (X86::isSHUFPMask(PermMask.Val))
|
||||||
return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
|
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
|
||||||
|
#endif
|
||||||
|
|
||||||
assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
|
assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
|
||||||
abort();
|
abort();
|
||||||
|
@ -79,9 +79,8 @@ def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
|
|||||||
return X86::isUNPCKHMask(N);
|
return X86::isUNPCKHMask(N);
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
// Only use PSHUF if it is not a splat.
|
|
||||||
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
|
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
|
return X86::isPSHUFDMask(N);
|
||||||
}], SHUFFLE_get_shuf_imm>;
|
}], SHUFFLE_get_shuf_imm>;
|
||||||
|
|
||||||
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
|
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
@ -918,86 +917,92 @@ def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
|
|||||||
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
||||||
def PSHUFDrr : PDIi8<0x70, MRMDestReg,
|
def PSHUFDrr : PDIi8<0x70, MRMDestReg,
|
||||||
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
|
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
|
||||||
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v4i32 (vector_shuffle
|
||||||
|
VR128:$src1, (undef),
|
||||||
|
PSHUFD_shuffle_mask:$src2)))]>;
|
||||||
def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
|
def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
|
||||||
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
|
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
|
||||||
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
|
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst, (v4i32 (vector_shuffle
|
||||||
|
(load addr:$src1), (undef),
|
||||||
|
PSHUFD_shuffle_mask:$src2)))]>;
|
||||||
|
|
||||||
let isTwoAddress = 1 in {
|
let isTwoAddress = 1 in {
|
||||||
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
|
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst, (vector_shuffle
|
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||||
(v4f32 VR128:$src1), (v4f32 VR128:$src2),
|
VR128:$src1, VR128:$src2,
|
||||||
SHUFP_shuffle_mask:$src3))]>;
|
SHUFP_shuffle_mask:$src3)))]>;
|
||||||
def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
|
def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
|
||||||
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst, (vector_shuffle
|
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||||
(v4f32 VR128:$src1), (load addr:$src2),
|
VR128:$src1, (load addr:$src2),
|
||||||
SHUFP_shuffle_mask:$src3))]>;
|
SHUFP_shuffle_mask:$src3)))]>;
|
||||||
def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
|
def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst, (vector_shuffle
|
[(set VR128:$dst, (v2f64 (vector_shuffle
|
||||||
(v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
VR128:$src1, VR128:$src2,
|
||||||
SHUFP_shuffle_mask:$src3))]>;
|
SHUFP_shuffle_mask:$src3)))]>;
|
||||||
def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
|
def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
|
||||||
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst, (vector_shuffle
|
[(set VR128:$dst, (v2f64 (vector_shuffle
|
||||||
(v2f64 VR128:$src1), (load addr:$src2),
|
VR128:$src1, (load addr:$src2),
|
||||||
SHUFP_shuffle_mask:$src3))]>;
|
SHUFP_shuffle_mask:$src3)))]>;
|
||||||
|
|
||||||
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
|
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"unpckhps {$src2, $dst|$dst, $src2}",
|
"unpckhps {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||||
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
|
VR128:$src1, VR128:$src2,
|
||||||
UNPCKH_shuffle_mask)))]>;
|
UNPCKH_shuffle_mask)))]>;
|
||||||
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
|
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"unpckhps {$src2, $dst|$dst, $src2}",
|
"unpckhps {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||||
(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
|
VR128:$src1, (load addr:$src2),
|
||||||
UNPCKH_shuffle_mask)))]>;
|
UNPCKH_shuffle_mask)))]>;
|
||||||
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
|
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"unpckhpd {$src2, $dst|$dst, $src2}",
|
"unpckhpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v2f64 (vector_shuffle
|
||||||
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
|
VR128:$src1, VR128:$src2,
|
||||||
UNPCKH_shuffle_mask)))]>;
|
UNPCKH_shuffle_mask)))]>;
|
||||||
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
|
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"unpckhpd {$src2, $dst|$dst, $src2}",
|
"unpckhpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v2f64 (vector_shuffle
|
||||||
(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
|
VR128:$src1, (load addr:$src2),
|
||||||
UNPCKH_shuffle_mask)))]>;
|
UNPCKH_shuffle_mask)))]>;
|
||||||
|
|
||||||
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
|
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"unpcklps {$src2, $dst|$dst, $src2}",
|
"unpcklps {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||||
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
|
VR128:$src1, VR128:$src2,
|
||||||
UNPCKL_shuffle_mask)))]>;
|
UNPCKL_shuffle_mask)))]>;
|
||||||
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
|
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"unpcklps {$src2, $dst|$dst, $src2}",
|
"unpcklps {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||||
(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
|
VR128:$src1, (load addr:$src2),
|
||||||
UNPCKL_shuffle_mask)))]>;
|
UNPCKL_shuffle_mask)))]>;
|
||||||
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
|
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
|
||||||
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
(ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||||
"unpcklpd {$src2, $dst|$dst, $src2}",
|
"unpcklpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v2f64 (vector_shuffle
|
||||||
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
|
VR128:$src1, VR128:$src2,
|
||||||
UNPCKL_shuffle_mask)))]>;
|
UNPCKL_shuffle_mask)))]>;
|
||||||
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
|
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
|
||||||
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
|
||||||
"unpcklpd {$src2, $dst|$dst, $src2}",
|
"unpcklpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst, (v2f64 (vector_shuffle
|
||||||
(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
|
VR128:$src1, (load addr:$src2),
|
||||||
UNPCKL_shuffle_mask)))]>;
|
UNPCKL_shuffle_mask)))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1354,11 +1359,3 @@ def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
|
|||||||
(v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
|
(v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
|
||||||
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
|
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
|
||||||
(v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
|
(v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
|
|
||||||
def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
|
|
||||||
(v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
|
|
||||||
Requires<[HasSSE2]>;
|
|
||||||
def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
|
|
||||||
(v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
|
|
||||||
Requires<[HasSSE2]>;
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user