mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-09-30 19:55:11 +00:00
ZERO_EXTEND/SIGN_EXTEND/TRUNCATE optimization for AVX2
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155309 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
eb23f9e92e
commit
1da5867236
@ -4520,8 +4520,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
|
|||||||
SDValue Op = N0.getOperand(0);
|
SDValue Op = N0.getOperand(0);
|
||||||
if (Op.getValueType().bitsLT(VT)) {
|
if (Op.getValueType().bitsLT(VT)) {
|
||||||
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
|
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
|
||||||
|
AddToWorkList(Op.getNode());
|
||||||
} else if (Op.getValueType().bitsGT(VT)) {
|
} else if (Op.getValueType().bitsGT(VT)) {
|
||||||
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
|
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
|
||||||
|
AddToWorkList(Op.getNode());
|
||||||
}
|
}
|
||||||
return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
|
return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
|
||||||
N0.getValueType().getScalarType());
|
N0.getValueType().getScalarType());
|
||||||
|
@ -1222,6 +1222,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setTargetDAGCombine(ISD::LOAD);
|
setTargetDAGCombine(ISD::LOAD);
|
||||||
setTargetDAGCombine(ISD::STORE);
|
setTargetDAGCombine(ISD::STORE);
|
||||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||||
|
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||||
setTargetDAGCombine(ISD::TRUNCATE);
|
setTargetDAGCombine(ISD::TRUNCATE);
|
||||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||||
@ -13033,6 +13034,20 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
|
|
||||||
if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
|
if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
|
||||||
|
|
||||||
|
if (Subtarget->hasAVX2()) {
|
||||||
|
// AVX2: v4i64 -> v4i32
|
||||||
|
|
||||||
|
// VPERMD
|
||||||
|
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
|
||||||
|
|
||||||
|
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op);
|
||||||
|
Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32),
|
||||||
|
ShufMask);
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, DAG.getIntPtrConstant(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// AVX: v4i64 -> v4i32
|
||||||
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
|
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
|
||||||
DAG.getIntPtrConstant(0));
|
DAG.getIntPtrConstant(0));
|
||||||
|
|
||||||
@ -13057,6 +13072,40 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
}
|
}
|
||||||
if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
|
if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
|
||||||
|
|
||||||
|
if (Subtarget->hasAVX2()) {
|
||||||
|
// AVX2: v8i32 -> v8i16
|
||||||
|
|
||||||
|
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op);
|
||||||
|
// PSHUFB
|
||||||
|
SmallVector<SDValue,32> pshufbMask;
|
||||||
|
for (unsigned i = 0; i < 2; ++i) {
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
|
||||||
|
for (unsigned j = 0; j < 8; ++j)
|
||||||
|
pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
|
||||||
|
}
|
||||||
|
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8, &pshufbMask[0],
|
||||||
|
32);
|
||||||
|
Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV);
|
||||||
|
|
||||||
|
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op);
|
||||||
|
|
||||||
|
static const int ShufMask[] = {0, 2, -1, -1};
|
||||||
|
Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64),
|
||||||
|
&ShufMask[0]);
|
||||||
|
|
||||||
|
Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
|
||||||
|
DAG.getIntPtrConstant(0));
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
|
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
|
||||||
DAG.getIntPtrConstant(0));
|
DAG.getIntPtrConstant(0));
|
||||||
|
|
||||||
@ -14822,6 +14871,18 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
if (!Subtarget->hasAVX())
|
if (!Subtarget->hasAVX())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
SDValue Op = N->getOperand(0);
|
||||||
|
EVT OpVT = Op.getValueType();
|
||||||
|
DebugLoc dl = N->getDebugLoc();
|
||||||
|
|
||||||
|
if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
|
||||||
|
(VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
|
||||||
|
|
||||||
|
if (Subtarget->hasAVX2()) {
|
||||||
|
return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op);
|
||||||
|
}
|
||||||
|
|
||||||
// Optimize vectors in AVX mode
|
// Optimize vectors in AVX mode
|
||||||
// Sign extend v8i16 to v8i32 and
|
// Sign extend v8i16 to v8i32 and
|
||||||
// v4i32 to v4i64
|
// v4i32 to v4i64
|
||||||
@ -14831,14 +14892,6 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
|
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
|
||||||
// concat the vectors to original VT
|
// concat the vectors to original VT
|
||||||
|
|
||||||
EVT VT = N->getValueType(0);
|
|
||||||
SDValue Op = N->getOperand(0);
|
|
||||||
EVT OpVT = Op.getValueType();
|
|
||||||
DebugLoc dl = N->getDebugLoc();
|
|
||||||
|
|
||||||
if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
|
|
||||||
(VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
|
|
||||||
|
|
||||||
unsigned NumElems = OpVT.getVectorNumElements();
|
unsigned NumElems = OpVT.getVectorNumElements();
|
||||||
SmallVector<int,8> ShufMask1(NumElems, -1);
|
SmallVector<int,8> ShufMask1(NumElems, -1);
|
||||||
for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i;
|
for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i;
|
||||||
@ -14906,6 +14959,9 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
|
if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
|
||||||
((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
|
((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
|
||||||
|
|
||||||
|
if (Subtarget->hasAVX2())
|
||||||
|
return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
|
||||||
|
|
||||||
SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
|
SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
|
||||||
SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec,
|
SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec,
|
||||||
DAG);
|
DAG);
|
||||||
@ -15108,6 +15164,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
|
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
|
||||||
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
||||||
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
||||||
|
case ISD::ANY_EXTEND:
|
||||||
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget);
|
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget);
|
||||||
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
|
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
|
||||||
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
|
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
|
||||||
|
@ -71,6 +71,11 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
|||||||
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
|
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
|
||||||
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||||
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
||||||
|
|
||||||
|
def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
|
||||||
|
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||||
|
SDTCisOpSmallerThanOp<1, 0> ]>>;
|
||||||
|
|
||||||
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
|
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
|
||||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
|
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
|
||||||
|
|
||||||
|
@ -5730,14 +5730,26 @@ let Predicates = [HasSSE41] in {
|
|||||||
(PMOVZXDQrm addr:$src)>;
|
(PMOVZXDQrm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX2] in {
|
||||||
|
let AddedComplexity = 15 in {
|
||||||
|
def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))),
|
||||||
|
(VPMOVZXDQYrr VR128:$src)>;
|
||||||
|
def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))),
|
||||||
|
(VPMOVZXWDYrr VR128:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
|
||||||
|
def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
|
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
|
||||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
|
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasSSE41] in {
|
let Predicates = [HasSSE41] in {
|
||||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
|
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
|
||||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
|
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
68
test/CodeGen/X86/avx2-conversions.ll
Executable file
68
test/CodeGen/X86/avx2-conversions.ll
Executable file
@ -0,0 +1,68 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK: trunc4
|
||||||
|
; CHECK: vpermd
|
||||||
|
; CHECK-NOT: vinsert
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
|
||||||
|
%B = trunc <4 x i64> %A to <4 x i32>
|
||||||
|
ret <4 x i32>%B
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: trunc8
|
||||||
|
; CHECK: vpshufb
|
||||||
|
; CHECK-NOT: vinsert
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
|
||||||
|
%B = trunc <8 x i32> %A to <8 x i16>
|
||||||
|
ret <8 x i16>%B
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: sext4
|
||||||
|
; CHECK: vpmovsxdq
|
||||||
|
; CHECK-NOT: vinsert
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i64> @sext4(<4 x i32> %A) nounwind {
|
||||||
|
%B = sext <4 x i32> %A to <4 x i64>
|
||||||
|
ret <4 x i64>%B
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: sext8
|
||||||
|
; CHECK: vpmovsxwd
|
||||||
|
; CHECK-NOT: vinsert
|
||||||
|
; CHECK: ret
|
||||||
|
define <8 x i32> @sext8(<8 x i16> %A) nounwind {
|
||||||
|
%B = sext <8 x i16> %A to <8 x i32>
|
||||||
|
ret <8 x i32>%B
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: zext4
|
||||||
|
; CHECK: vpmovzxdq
|
||||||
|
; CHECK-NOT: vinsert
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i64> @zext4(<4 x i32> %A) nounwind {
|
||||||
|
%B = zext <4 x i32> %A to <4 x i64>
|
||||||
|
ret <4 x i64>%B
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: zext8
|
||||||
|
; CHECK: vpmovzxwd
|
||||||
|
; CHECK-NOT: vinsert
|
||||||
|
; CHECK: ret
|
||||||
|
define <8 x i32> @zext8(<8 x i16> %A) nounwind {
|
||||||
|
%B = zext <8 x i16> %A to <8 x i32>
|
||||||
|
ret <8 x i32>%B
|
||||||
|
}
|
||||||
|
; CHECK: zext_8i8_8i32
|
||||||
|
; CHECK: vpmovzxwd
|
||||||
|
; CHECK: vpand
|
||||||
|
; CHECK: ret
|
||||||
|
define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
|
||||||
|
%B = zext <8 x i8> %A to <8 x i32>
|
||||||
|
ret <8 x i32>%B
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user