mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-16 14:31:59 +00:00
x86 vector shuffle cleanup/fixes:
1. rename the movhp patfrag to movlhps, since thats what it actually matches 2. eliminate the bogus movhps load and store patterns, they were incorrect. The load transforms are already handled (correctly) by shufps/unpack. 3. revert a recent test change to its correct form. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86415 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
31c24bf5b3
commit
0b10b91465
@ -2528,6 +2528,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
|
||||
isUndefOrEqual(N->getMaskElt(3), 3);
|
||||
}
|
||||
|
||||
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
|
||||
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
|
||||
/// <2, 3, 2, 3>
|
||||
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
|
||||
unsigned NumElems = N->getValueType(0).getVectorNumElements();
|
||||
|
||||
if (NumElems != 4)
|
||||
return false;
|
||||
|
||||
return isUndefOrEqual(N->getMaskElt(0), 2) &&
|
||||
isUndefOrEqual(N->getMaskElt(1), 3) &&
|
||||
isUndefOrEqual(N->getMaskElt(2), 2) &&
|
||||
isUndefOrEqual(N->getMaskElt(3), 3);
|
||||
}
|
||||
|
||||
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
|
||||
bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
|
||||
@ -2547,10 +2562,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
|
||||
/// and MOVLHPS.
|
||||
bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
|
||||
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
|
||||
bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
|
||||
unsigned NumElems = N->getValueType(0).getVectorNumElements();
|
||||
|
||||
if (NumElems != 2 && NumElems != 4)
|
||||
@ -2567,21 +2581,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
|
||||
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
|
||||
/// <2, 3, 2, 3>
|
||||
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
|
||||
unsigned NumElems = N->getValueType(0).getVectorNumElements();
|
||||
|
||||
if (NumElems != 4)
|
||||
return false;
|
||||
|
||||
return isUndefOrEqual(N->getMaskElt(0), 2) &&
|
||||
isUndefOrEqual(N->getMaskElt(1), 3) &&
|
||||
isUndefOrEqual(N->getMaskElt(2), 2) &&
|
||||
isUndefOrEqual(N->getMaskElt(3), 3);
|
||||
}
|
||||
|
||||
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
|
||||
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
||||
@ -4275,7 +4274,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
|
||||
X86::isMOVSLDUPMask(SVOp) ||
|
||||
X86::isMOVHLPSMask(SVOp) ||
|
||||
X86::isMOVHPMask(SVOp) ||
|
||||
X86::isMOVLHPSMask(SVOp) ||
|
||||
X86::isMOVLPMask(SVOp)))
|
||||
return Op;
|
||||
|
||||
|
@ -286,7 +286,7 @@ namespace llvm {
|
||||
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
|
||||
/// as well as MOVLHPS.
|
||||
bool isMOVHPMask(ShuffleVectorSDNode *N);
|
||||
bool isMOVLHPSMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
|
||||
|
@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movhp : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
|
||||
return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movlp : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in {
|
||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||
"movhps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(movhp VR128:$src1,
|
||||
(movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
|
||||
} // AddedComplexity
|
||||
} // Constraints = "$src1 = $dst"
|
||||
@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
"movlhps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
|
||||
(v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
|
||||
|
||||
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in {
|
||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||
"movhpd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (movhp VR128:$src1,
|
||||
(v2f64 (movlhps VR128:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)))))]>;
|
||||
} // AddedComplexity
|
||||
} // Constraints = "$src1 = $dst"
|
||||
@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
|
||||
def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
|
||||
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
|
||||
@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
|
||||
// vector_shuffle v1, (load v2) <6, 7, 2, 3> using MOVHPS
|
||||
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4f32 (movhlps (load addr:$src1), VR128:$src2)),
|
||||
(MOVHPSrm VR128:$src2, addr:$src1)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2f64 (movhlps (load addr:$src1), VR128:$src2)),
|
||||
(MOVHPDrm VR128:$src2, addr:$src1)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
|
||||
// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
|
||||
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v4f32 (movhlps (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2f64 (movhlps (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
|
||||
addr:$src1),
|
||||
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
|
||||
addr:$src1),
|
||||
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
|
||||
let AddedComplexity = 15 in {
|
||||
// Setting the lowest element in the vector.
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
|
||||
; RUN: grep movlhps %t | count 1
|
||||
; RUN: grep movhps %t | count 1
|
||||
; RUN: grep movhlps %t | count 1
|
||||
|
||||
define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
|
||||
%tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]
|
||||
|
Loading…
x
Reference in New Issue
Block a user