X86: use vpsllvd (& friends) for 16-bit shifts on Haswell

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201558 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2014-02-18 11:15:32 +00:00
parent 257ed1ed69
commit d729dfc96e
2 changed files with 47 additions and 0 deletions

View File

@ -13161,6 +13161,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
@ -13204,6 +13205,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return R;
}
// It's worth extending once and using the v8i32 shifts for 16-bit types, but
// the extra overheads to get from v16i8 to v8i32 make the existing SSE
// solution better.
if (Subtarget->hasInt256() && VT == MVT::v8i16) {
MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, NewVT, R);
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();

View File

@ -266,3 +266,36 @@ define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
%c = sext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
}
define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK-LABEL: variable_shl16:
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
; CHECK: vpshufb
; CHECK: vpermq
%res = shl <8 x i16> %lhs, %rhs
ret <8 x i16> %res
}
define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK-LABEL: variable_ashr16:
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]]
; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
; CHECK: vpshufb
; CHECK: vpermq
%res = ashr <8 x i16> %lhs, %rhs
ret <8 x i16> %res
}
define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK-LABEL: variable_lshr16:
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
; CHECK: vpshufb
; CHECK: vpermq
%res = lshr <8 x i16> %lhs, %rhs
ret <8 x i16> %res
}