mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-18 14:31:27 +00:00
Enable lowering ZERO_EXTEND/ANY_EXTEND to PMOVZX from SSE4.1
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166486 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bf261f11a0
commit
d9d09600ee
@ -6562,6 +6562,78 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
|
|||||||
getShuffleSHUFImmediate(SVOp), DAG);
|
getShuffleSHUFImmediate(SVOp), DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reduce a vector shuffle to zext.
|
||||||
|
SDValue
|
||||||
|
X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
// PMOVZX is only available from SSE41.
|
||||||
|
if (!Subtarget->hasSSE41())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
|
||||||
|
// Only AVX2 support 256-bit vector integer extending.
|
||||||
|
if (!Subtarget->hasAVX2() && VT.is256BitVector())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
SDValue V1 = Op.getOperand(0);
|
||||||
|
SDValue V2 = Op.getOperand(1);
|
||||||
|
unsigned NumElems = VT.getVectorNumElements();
|
||||||
|
|
||||||
|
// Extending is an unary operation and the element type of the source vector
|
||||||
|
// won't be equal to or larger than i64.
|
||||||
|
if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
|
||||||
|
VT.getVectorElementType() == MVT::i64)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
|
||||||
|
unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
|
||||||
|
while ((1 << Shift) < NumElems) {
|
||||||
|
if (SVOp->getMaskElt(1 << Shift) == 1)
|
||||||
|
break;
|
||||||
|
Shift += 1;
|
||||||
|
// The maximal ratio is 8, i.e. from i8 to i64.
|
||||||
|
if (Shift > 3)
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the shuffle mask.
|
||||||
|
unsigned Mask = (1U << Shift) - 1;
|
||||||
|
for (unsigned i = 0; i != NumElems; ++i) {
|
||||||
|
int EltIdx = SVOp->getMaskElt(i);
|
||||||
|
if ((i & Mask) != 0 && EltIdx != -1)
|
||||||
|
return SDValue();
|
||||||
|
if ((i & Mask) == 0 && EltIdx != (i >> Shift))
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
|
||||||
|
EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
|
||||||
|
EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
|
||||||
|
|
||||||
|
if (!isTypeLegal(NVT))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Simplify the operand as it's prepared to be fed into shuffle.
|
||||||
|
unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
|
||||||
|
if (V1.getOpcode() == ISD::BITCAST &&
|
||||||
|
V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||||
|
V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||||
|
V1.getOperand(0)
|
||||||
|
.getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
|
||||||
|
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
|
||||||
|
SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
|
||||||
|
// If it's foldable, i.e. normal load with single use, we will let code
|
||||||
|
// selection to fold it. Otherwise, we will short the conversion sequence.
|
||||||
|
if (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())
|
||||||
|
V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
|
||||||
|
}
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::BITCAST, DL, VT,
|
||||||
|
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
|
X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
|
||||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||||
@ -6592,6 +6664,11 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return PromoteSplat(SVOp, DAG);
|
return PromoteSplat(SVOp, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check integer expanding shuffles.
|
||||||
|
SDValue NewOp = lowerVectorIntExtend(Op, DAG);
|
||||||
|
if (NewOp.getNode())
|
||||||
|
return NewOp;
|
||||||
|
|
||||||
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
||||||
// do it!
|
// do it!
|
||||||
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
|
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
|
||||||
@ -11825,6 +11902,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
||||||
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
|
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
|
||||||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||||
|
case X86ISD::VZEXT: return "X86ISD::VZEXT";
|
||||||
|
case X86ISD::VSEXT: return "X86ISD::VSEXT";
|
||||||
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
|
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
|
||||||
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
|
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
|
||||||
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
|
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
|
||||||
@ -16529,6 +16608,21 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
return OptimizeConditionalInDecrement(N, DAG);
|
return OptimizeConditionalInDecrement(N, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// performVZEXTCombine - Performs build vector combines
|
||||||
|
static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
|
const X86Subtarget *Subtarget) {
|
||||||
|
// (vzext (bitcast (vzext (x)) -> (vzext x)
|
||||||
|
SDValue In = N->getOperand(0);
|
||||||
|
while (In.getOpcode() == ISD::BITCAST)
|
||||||
|
In = In.getOperand(0);
|
||||||
|
|
||||||
|
if (In.getOpcode() != X86ISD::VZEXT)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
DAGCombinerInfo &DCI) const {
|
DAGCombinerInfo &DCI) const {
|
||||||
SelectionDAG &DAG = DCI.DAG;
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
@ -16569,6 +16663,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
|
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
|
||||||
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
|
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
|
||||||
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
|
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
|
||||||
|
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
|
||||||
case X86ISD::SHUFP: // Handle all target specific shuffles
|
case X86ISD::SHUFP: // Handle all target specific shuffles
|
||||||
case X86ISD::PALIGN:
|
case X86ISD::PALIGN:
|
||||||
case X86ISD::UNPCKH:
|
case X86ISD::UNPCKH:
|
||||||
|
@ -236,6 +236,12 @@ namespace llvm {
|
|||||||
// VSEXT_MOVL - Vector move low and sign extend.
|
// VSEXT_MOVL - Vector move low and sign extend.
|
||||||
VSEXT_MOVL,
|
VSEXT_MOVL,
|
||||||
|
|
||||||
|
// VZEXT - Vector integer zero-extend.
|
||||||
|
VZEXT,
|
||||||
|
|
||||||
|
// VSEXT - Vector integer signed-extend.
|
||||||
|
VSEXT,
|
||||||
|
|
||||||
// VFPEXT - Vector FP extend.
|
// VFPEXT - Vector FP extend.
|
||||||
VFPEXT,
|
VFPEXT,
|
||||||
|
|
||||||
@ -832,6 +838,8 @@ namespace llvm {
|
|||||||
|
|
||||||
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
virtual SDValue
|
virtual SDValue
|
||||||
LowerFormalArguments(SDValue Chain,
|
LowerFormalArguments(SDValue Chain,
|
||||||
CallingConv::ID CallConv, bool isVarArg,
|
CallingConv::ID CallConv, bool isVarArg,
|
||||||
|
@ -90,6 +90,14 @@ def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
|
|||||||
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||||
|
|
||||||
|
def X86vzext : SDNode<"X86ISD::VZEXT",
|
||||||
|
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||||
|
SDTCisInt<0>, SDTCisInt<1>]>>;
|
||||||
|
|
||||||
|
def X86vsext : SDNode<"X86ISD::VSEXT",
|
||||||
|
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||||
|
SDTCisInt<0>, SDTCisInt<1>]>>;
|
||||||
|
|
||||||
def X86vfpext : SDNode<"X86ISD::VFPEXT",
|
def X86vfpext : SDNode<"X86ISD::VFPEXT",
|
||||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||||
SDTCisFP<0>, SDTCisFP<1>]>>;
|
SDTCisFP<0>, SDTCisFP<1>]>>;
|
||||||
|
@ -5841,6 +5841,81 @@ let Predicates = [UseSSE41] in {
|
|||||||
(PMOVZXBQrm addr:$src)>;
|
(PMOVZXBQrm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX2] in {
|
||||||
|
def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>;
|
||||||
|
def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX] in {
|
||||||
|
def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVZXBWrm addr:$src)>;
|
||||||
|
def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVZXBWrm addr:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(VPMOVZXBDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
|
||||||
|
(VPMOVZXBQrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVZXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVZXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(VPMOVZXWQrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(VPMOVZXDQrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(VPMOVZXDQrm addr:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [UseSSE41] in {
|
||||||
|
def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(PMOVZXBWrm addr:$src)>;
|
||||||
|
def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(PMOVZXBWrm addr:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(PMOVZXBDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
|
||||||
|
(PMOVZXBQrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(PMOVZXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(PMOVZXWDrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(PMOVZXWQrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
|
(PMOVZXDQrm addr:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
|
||||||
|
(PMOVZXDQrm addr:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE4.1 - Extract Instructions
|
// SSE4.1 - Extract Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
;CHECK: vcast
|
;CHECK: vcast
|
||||||
define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
|
define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
|
||||||
;CHECK: pshufd
|
;CHECK: pmovzxdq
|
||||||
;CHECK: pshufd
|
;CHECK: pmovzxdq
|
||||||
%af = bitcast <2 x float> %a to <2 x i32>
|
%af = bitcast <2 x float> %a to <2 x i32>
|
||||||
%bf = bitcast <2 x float> %b to <2 x i32>
|
%bf = bitcast <2 x float> %b to <2 x i32>
|
||||||
%x = sub <2 x i32> %af, %bf
|
%x = sub <2 x i32> %af, %bf
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
|
define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
|
||||||
entry:
|
entry:
|
||||||
%out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
%out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
; CHECK: shufb
|
; CHECK: pmovzxbd
|
||||||
ret <4 x i8> %out
|
ret <4 x i8> %out
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
; CHECK: load_store
|
; CHECK: load_store
|
||||||
define void @load_store(<4 x i16>* %in) {
|
define void @load_store(<4 x i16>* %in) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: movsd
|
; CHECK: pmovzxwd
|
||||||
%A27 = load <4 x i16>* %in, align 4
|
%A27 = load <4 x i16>* %in, align 4
|
||||||
%A28 = add <4 x i16> %A27, %A27
|
%A28 = add <4 x i16> %A27, %A27
|
||||||
; CHECK: movlpd
|
; CHECK: movlpd
|
||||||
@ -27,6 +27,6 @@ define <2 x i32> @load_64(<2 x i32>* %ptr) {
|
|||||||
BB:
|
BB:
|
||||||
%t = load <2 x i32>* %ptr
|
%t = load <2 x i32>* %ptr
|
||||||
ret <2 x i32> %t
|
ret <2 x i32> %t
|
||||||
;CHECK: movsd
|
;CHECK: pmovzxdq
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
}
|
}
|
||||||
|
@ -81,8 +81,7 @@ define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
|
|||||||
entry:
|
entry:
|
||||||
%G = load <4 x i8>* %p
|
%G = load <4 x i8>* %p
|
||||||
;CHECK: movl
|
;CHECK: movl
|
||||||
;CHECK: movd
|
;CHECK: pmovzxbd
|
||||||
;CHECK: pshufb
|
|
||||||
;CHECK: pand
|
;CHECK: pand
|
||||||
%K = inttoptr <4 x i8> %G to <4 x i32*>
|
%K = inttoptr <4 x i8> %G to <4 x i32*>
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
@ -105,7 +104,7 @@ define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
|
|||||||
entry:
|
entry:
|
||||||
%G = load <2 x i8*>* %p
|
%G = load <2 x i8*>* %p
|
||||||
;CHECK: movl
|
;CHECK: movl
|
||||||
;CHECK: movsd
|
;CHECK: pmovzxdq
|
||||||
%T = bitcast <2 x i8*> %G to <2 x i32*>
|
%T = bitcast <2 x i8*> %G to <2 x i32*>
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
ret <2 x i32*> %T
|
ret <2 x i32*> %T
|
||||||
|
@ -20,7 +20,7 @@ entry:
|
|||||||
; CHECK: shuff_f
|
; CHECK: shuff_f
|
||||||
define i32 @shuff_f(<4 x i8>* %A) {
|
define i32 @shuff_f(<4 x i8>* %A) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: pshufb
|
; CHECK: pmovzxbd
|
||||||
; CHECK: paddd
|
; CHECK: paddd
|
||||||
; CHECK: pshufb
|
; CHECK: pshufb
|
||||||
%0 = load <4 x i8>* %A, align 8
|
%0 = load <4 x i8>* %A, align 8
|
||||||
|
@ -2,8 +2,7 @@
|
|||||||
|
|
||||||
;CHECK: load_2_i8
|
;CHECK: load_2_i8
|
||||||
; A single 16-bit load
|
; A single 16-bit load
|
||||||
;CHECK: movzwl
|
;CHECK: pmovzxbq
|
||||||
;CHECK: pshufb
|
|
||||||
;CHECK: paddq
|
;CHECK: paddq
|
||||||
;CHECK: pshufb
|
;CHECK: pshufb
|
||||||
; A single 16-bit store
|
; A single 16-bit store
|
||||||
@ -19,8 +18,7 @@ define void @load_2_i8(<2 x i8>* %A) {
|
|||||||
|
|
||||||
;CHECK: load_2_i16
|
;CHECK: load_2_i16
|
||||||
; Read 32-bits
|
; Read 32-bits
|
||||||
;CHECK: movd
|
;CHECK: pmovzxwq
|
||||||
;CHECK: pshufb
|
|
||||||
;CHECK: paddq
|
;CHECK: paddq
|
||||||
;CHECK: pshufb
|
;CHECK: pshufb
|
||||||
;CHECK: movd
|
;CHECK: movd
|
||||||
@ -33,7 +31,7 @@ define void @load_2_i16(<2 x i16>* %A) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
;CHECK: load_2_i32
|
;CHECK: load_2_i32
|
||||||
;CHECK: pshufd
|
;CHECK: pmovzxdq
|
||||||
;CHECK: paddq
|
;CHECK: paddq
|
||||||
;CHECK: pshufd
|
;CHECK: pshufd
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
@ -45,8 +43,7 @@ define void @load_2_i32(<2 x i32>* %A) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
;CHECK: load_4_i8
|
;CHECK: load_4_i8
|
||||||
;CHECK: movd
|
;CHECK: pmovzxbd
|
||||||
;CHECK: pshufb
|
|
||||||
;CHECK: paddd
|
;CHECK: paddd
|
||||||
;CHECK: pshufb
|
;CHECK: pshufb
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
@ -58,7 +55,7 @@ define void @load_4_i8(<4 x i8>* %A) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
;CHECK: load_4_i16
|
;CHECK: load_4_i16
|
||||||
;CHECK: punpcklwd
|
;CHECK: pmovzxwd
|
||||||
;CHECK: paddd
|
;CHECK: paddd
|
||||||
;CHECK: pshufb
|
;CHECK: pshufb
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
@ -70,7 +67,7 @@ define void @load_4_i16(<4 x i16>* %A) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
;CHECK: load_8_i8
|
;CHECK: load_8_i8
|
||||||
;CHECK: punpcklbw
|
;CHECK: pmovzxbw
|
||||||
;CHECK: paddw
|
;CHECK: paddw
|
||||||
;CHECK: pshufb
|
;CHECK: pshufb
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
|
@ -10,8 +10,7 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
|
|||||||
entry:
|
entry:
|
||||||
; CHECK: cfi_def_cfa_offset
|
; CHECK: cfi_def_cfa_offset
|
||||||
; CHECK-NOT: set
|
; CHECK-NOT: set
|
||||||
; CHECK: punpcklwd
|
; CHECK: pmovzxwq
|
||||||
; CHECK: pshufd
|
|
||||||
; CHECK: pshufb
|
; CHECK: pshufb
|
||||||
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
|
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
|
||||||
%cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
|
%cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||||
|
@ -170,7 +170,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
|
|||||||
; CHECK: rot
|
; CHECK: rot
|
||||||
%i8vec3pack = type { <3 x i8>, i8 }
|
%i8vec3pack = type { <3 x i8>, i8 }
|
||||||
define %i8vec3pack @rot() nounwind {
|
define %i8vec3pack @rot() nounwind {
|
||||||
; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
|
; CHECK: pmovzxbd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%X = alloca %i8vec3pack, align 4
|
%X = alloca %i8vec3pack, align 4
|
||||||
%rot = alloca %i8vec3pack, align 4
|
%rot = alloca %i8vec3pack, align 4
|
||||||
|
Loading…
x
Reference in New Issue
Block a user