From f6aa6b12f132b41b1337fef14110696458a0f323 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 1 Nov 2011 21:18:39 +0000 Subject: [PATCH] Teach the x86 backend a couple tricks for dealing with v16i8 sra by a constant splat value. Fixes PR11289. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143498 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ test/CodeGen/X86/x86-shifts.ll | 20 ++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2ddb1b7163e..c9b642242ae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -966,6 +966,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v4i32, Custom); setOperationAction(ISD::SRA, MVT::v8i16, Custom); + setOperationAction(ISD::SRA, MVT::v16i8, Custom); } if (Subtarget->hasSSE42() || Subtarget->hasAVX()) @@ -9994,6 +9995,23 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) { + if (ShiftAmt == 7) { + // R s>> 7 === R s< 0 + SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); + } + + // R s>> a === ((R u>> a) ^ m) - m + SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + SmallVector V(16, DAG.getConstant(128 >> ShiftAmt, + MVT::i8)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); + return Res; + } } } diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll index 3e44eafa240..20bccab8ff7 100644 --- a/test/CodeGen/X86/x86-shifts.ll +++ b/test/CodeGen/X86/x86-shifts.ll @@ -170,3 +170,23 @@ define <16 x i8> @shr9(<16 x i8> %A) nounwind { ; CHECK: pand ; CHECK: ret } + +define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind { + %B = ashr <16 x i8> %A, + ret <16 x i8> %B +; CHECK: sra_v16i8_7: +; CHECK: pxor +; CHECK: pcmpgtb +; CHECK: ret +} + +define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind { + %B = ashr <16 x i8> %A, + ret <16 x i8> %B +; CHECK: sra_v16i8: +; CHECK: psrlw $3 +; CHECK: pand +; CHECK: pxor +; CHECK: psubb +; CHECK: ret +}