Add AVX matching patterns to Packed Bit Test intrinsics.

Apply the same approach of SSE4.1 ptest intrinsics but
create a new x86 node "testp" since AVX introduces
vtest{ps}{pd} instructions which set ZF and CF depending
on sign bit AND and ANDN of packed floating-point sources.

This is slightly different from what the "ptest" does.
Tests comming with the other 256 intrinsics tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110744 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-08-10 23:25:42 +00:00
parent 625051be7e
commit 045573ce21
4 changed files with 78 additions and 33 deletions

View File

@ -6987,24 +6987,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond); DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
} }
// ptest intrinsics. The intrinsic these come from are designed to return // ptest and testp intrinsics. The intrinsic these come from are designed to
// an integer value, not just an instruction so lower it to the ptest // return an integer value, not just an instruction so lower it to the ptest
// pattern and a setcc for the result. // or testp pattern and a setcc for the result.
case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc:{ case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestz_256:
case Intrinsic::x86_avx_ptestc_256:
case Intrinsic::x86_avx_ptestnzc_256:
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
unsigned X86CC = 0; unsigned X86CC = 0;
switch (IntNo) { switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256:
// ZF = 1 // ZF = 1
X86CC = X86::COND_E; X86CC = X86::COND_E;
break; break;
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256:
IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256:
// CF = 1 // CF = 1
X86CC = X86::COND_B; X86CC = X86::COND_B;
break; break;
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256:
IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestnzc: case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256:
// ZF and CF = 0 // ZF and CF = 0
X86CC = X86::COND_A; X86CC = X86::COND_A;
break; break;
@ -7012,7 +7046,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue LHS = Op.getOperand(1); SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2); SDValue RHS = Op.getOperand(2);
SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue CC = DAG.getConstant(X86CC, MVT::i8); SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@ -8033,6 +8068,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::AND: return "X86ISD::AND"; case X86ISD::AND: return "X86ISD::AND";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
} }

View File

@ -248,6 +248,9 @@ namespace llvm {
// PTEST - Vector bitwise comparisons // PTEST - Vector bitwise comparisons
PTEST, PTEST,
// TESTP - Vector packed fp sign bitwise comparisons
TESTP,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded // according to %al. An operator is needed so that this can be expanded
// with control flow. // with control flow.

View File

@ -117,9 +117,10 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVT<1, v4f32>, SDTCisVec<1>,
SDTCisVT<2, v4f32>]>; SDTCisSameAs<2, 1>]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// SSE Complex Patterns // SSE Complex Patterns

View File

@ -4647,46 +4647,51 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}", "vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
OpSize, VEX; OpSize, VEX;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}", "vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
OpSize, VEX;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
OpSize, VEX; OpSize, VEX;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX; "vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
OpSize, VEX;
} }
let Defs = [EFLAGS] in { let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest \t{$src2, $src1|$src1, $src2}", "ptest \t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
OpSize; OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"ptest \t{$src2, $src1|$src1, $src2}", "ptest \t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
OpSize; OpSize;
} }
// The bit test instructions below are AVX only // The bit test instructions below are AVX only
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop> { X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[]>, OpSize, VEX; [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[]>, OpSize, VEX; [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
OpSize, VEX;
} }
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem>; defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>; defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem>; defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>; defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//