mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 00:32:55 +00:00
Add AVX matching patterns to Packed Bit Test intrinsics.
Apply the same approach of SSE4.1 ptest intrinsics but create a new x86 node "testp" since AVX introduces vtest{ps}{pd} instructions which set ZF and CF depending on sign bit AND and ANDN of packed floating-point sources. This is slightly different from what the "ptest" does. Tests comming with the other 256 intrinsics tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110744 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
625051be7e
commit
045573ce21
@ -6987,24 +6987,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
|
||||
DAG.getConstant(X86CC, MVT::i8), Cond);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
// ptest intrinsics. The intrinsic these come from are designed to return
|
||||
// an integer value, not just an instruction so lower it to the ptest
|
||||
// pattern and a setcc for the result.
|
||||
// ptest and testp intrinsics. The intrinsic these come from are designed to
|
||||
// return an integer value, not just an instruction so lower it to the ptest
|
||||
// or testp pattern and a setcc for the result.
|
||||
case Intrinsic::x86_sse41_ptestz:
|
||||
case Intrinsic::x86_sse41_ptestc:
|
||||
case Intrinsic::x86_sse41_ptestnzc:{
|
||||
case Intrinsic::x86_sse41_ptestnzc:
|
||||
case Intrinsic::x86_avx_ptestz_256:
|
||||
case Intrinsic::x86_avx_ptestc_256:
|
||||
case Intrinsic::x86_avx_ptestnzc_256:
|
||||
case Intrinsic::x86_avx_vtestz_ps:
|
||||
case Intrinsic::x86_avx_vtestc_ps:
|
||||
case Intrinsic::x86_avx_vtestnzc_ps:
|
||||
case Intrinsic::x86_avx_vtestz_pd:
|
||||
case Intrinsic::x86_avx_vtestc_pd:
|
||||
case Intrinsic::x86_avx_vtestnzc_pd:
|
||||
case Intrinsic::x86_avx_vtestz_ps_256:
|
||||
case Intrinsic::x86_avx_vtestc_ps_256:
|
||||
case Intrinsic::x86_avx_vtestnzc_ps_256:
|
||||
case Intrinsic::x86_avx_vtestz_pd_256:
|
||||
case Intrinsic::x86_avx_vtestc_pd_256:
|
||||
case Intrinsic::x86_avx_vtestnzc_pd_256: {
|
||||
bool IsTestPacked = false;
|
||||
unsigned X86CC = 0;
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
|
||||
case Intrinsic::x86_avx_vtestz_ps:
|
||||
case Intrinsic::x86_avx_vtestz_pd:
|
||||
case Intrinsic::x86_avx_vtestz_ps_256:
|
||||
case Intrinsic::x86_avx_vtestz_pd_256:
|
||||
IsTestPacked = true; // Fallthrough
|
||||
case Intrinsic::x86_sse41_ptestz:
|
||||
case Intrinsic::x86_avx_ptestz_256:
|
||||
// ZF = 1
|
||||
X86CC = X86::COND_E;
|
||||
break;
|
||||
case Intrinsic::x86_avx_vtestc_ps:
|
||||
case Intrinsic::x86_avx_vtestc_pd:
|
||||
case Intrinsic::x86_avx_vtestc_ps_256:
|
||||
case Intrinsic::x86_avx_vtestc_pd_256:
|
||||
IsTestPacked = true; // Fallthrough
|
||||
case Intrinsic::x86_sse41_ptestc:
|
||||
case Intrinsic::x86_avx_ptestc_256:
|
||||
// CF = 1
|
||||
X86CC = X86::COND_B;
|
||||
break;
|
||||
case Intrinsic::x86_avx_vtestnzc_ps:
|
||||
case Intrinsic::x86_avx_vtestnzc_pd:
|
||||
case Intrinsic::x86_avx_vtestnzc_ps_256:
|
||||
case Intrinsic::x86_avx_vtestnzc_pd_256:
|
||||
IsTestPacked = true; // Fallthrough
|
||||
case Intrinsic::x86_sse41_ptestnzc:
|
||||
case Intrinsic::x86_avx_ptestnzc_256:
|
||||
// ZF and CF = 0
|
||||
X86CC = X86::COND_A;
|
||||
break;
|
||||
@ -7012,7 +7046,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
|
||||
|
||||
SDValue LHS = Op.getOperand(1);
|
||||
SDValue RHS = Op.getOperand(2);
|
||||
SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
|
||||
unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
|
||||
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
|
||||
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
@ -8033,6 +8068,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::AND: return "X86ISD::AND";
|
||||
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
|
||||
case X86ISD::PTEST: return "X86ISD::PTEST";
|
||||
case X86ISD::TESTP: return "X86ISD::TESTP";
|
||||
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
|
||||
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
|
||||
}
|
||||
|
@ -248,6 +248,9 @@ namespace llvm {
|
||||
// PTEST - Vector bitwise comparisons
|
||||
PTEST,
|
||||
|
||||
// TESTP - Vector packed fp sign bitwise comparisons
|
||||
TESTP,
|
||||
|
||||
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
|
||||
// according to %al. An operator is needed so that this can be expanded
|
||||
// with control flow.
|
||||
|
@ -117,9 +117,10 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
|
||||
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
|
||||
|
||||
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
||||
SDTCisVT<1, v4f32>,
|
||||
SDTCisVT<2, v4f32>]>;
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>;
|
||||
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE Complex Patterns
|
||||
|
@ -4646,47 +4646,52 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
|
||||
// the intel intrinsic that corresponds to this.
|
||||
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
||||
OpSize, VEX;
|
||||
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
|
||||
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
|
||||
OpSize;
|
||||
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
||||
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
// The bit test instructions below are AVX only
|
||||
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>, OpSize, VEX;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>, OpSize, VEX;
|
||||
X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem>;
|
||||
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>;
|
||||
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem>;
|
||||
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>;
|
||||
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
|
||||
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
|
||||
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
|
||||
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
Loading…
x
Reference in New Issue
Block a user