mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
Add AVX matching patterns to Packed Bit Test intrinsics.
Apply the same approach of SSE4.1 ptest intrinsics but create a new x86 node "testp" since AVX introduces vtest{ps}{pd} instructions which set ZF and CF depending on sign bit AND and ANDN of packed floating-point sources. This is slightly different from what the "ptest" does. Tests comming with the other 256 intrinsics tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110744 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
625051be7e
commit
045573ce21
@ -6987,24 +6987,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
|
|||||||
DAG.getConstant(X86CC, MVT::i8), Cond);
|
DAG.getConstant(X86CC, MVT::i8), Cond);
|
||||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||||
}
|
}
|
||||||
// ptest intrinsics. The intrinsic these come from are designed to return
|
// ptest and testp intrinsics. The intrinsic these come from are designed to
|
||||||
// an integer value, not just an instruction so lower it to the ptest
|
// return an integer value, not just an instruction so lower it to the ptest
|
||||||
// pattern and a setcc for the result.
|
// or testp pattern and a setcc for the result.
|
||||||
case Intrinsic::x86_sse41_ptestz:
|
case Intrinsic::x86_sse41_ptestz:
|
||||||
case Intrinsic::x86_sse41_ptestc:
|
case Intrinsic::x86_sse41_ptestc:
|
||||||
case Intrinsic::x86_sse41_ptestnzc:{
|
case Intrinsic::x86_sse41_ptestnzc:
|
||||||
|
case Intrinsic::x86_avx_ptestz_256:
|
||||||
|
case Intrinsic::x86_avx_ptestc_256:
|
||||||
|
case Intrinsic::x86_avx_ptestnzc_256:
|
||||||
|
case Intrinsic::x86_avx_vtestz_ps:
|
||||||
|
case Intrinsic::x86_avx_vtestc_ps:
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_ps:
|
||||||
|
case Intrinsic::x86_avx_vtestz_pd:
|
||||||
|
case Intrinsic::x86_avx_vtestc_pd:
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_pd:
|
||||||
|
case Intrinsic::x86_avx_vtestz_ps_256:
|
||||||
|
case Intrinsic::x86_avx_vtestc_ps_256:
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_ps_256:
|
||||||
|
case Intrinsic::x86_avx_vtestz_pd_256:
|
||||||
|
case Intrinsic::x86_avx_vtestc_pd_256:
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_pd_256: {
|
||||||
|
bool IsTestPacked = false;
|
||||||
unsigned X86CC = 0;
|
unsigned X86CC = 0;
|
||||||
switch (IntNo) {
|
switch (IntNo) {
|
||||||
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
|
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
|
||||||
|
case Intrinsic::x86_avx_vtestz_ps:
|
||||||
|
case Intrinsic::x86_avx_vtestz_pd:
|
||||||
|
case Intrinsic::x86_avx_vtestz_ps_256:
|
||||||
|
case Intrinsic::x86_avx_vtestz_pd_256:
|
||||||
|
IsTestPacked = true; // Fallthrough
|
||||||
case Intrinsic::x86_sse41_ptestz:
|
case Intrinsic::x86_sse41_ptestz:
|
||||||
|
case Intrinsic::x86_avx_ptestz_256:
|
||||||
// ZF = 1
|
// ZF = 1
|
||||||
X86CC = X86::COND_E;
|
X86CC = X86::COND_E;
|
||||||
break;
|
break;
|
||||||
|
case Intrinsic::x86_avx_vtestc_ps:
|
||||||
|
case Intrinsic::x86_avx_vtestc_pd:
|
||||||
|
case Intrinsic::x86_avx_vtestc_ps_256:
|
||||||
|
case Intrinsic::x86_avx_vtestc_pd_256:
|
||||||
|
IsTestPacked = true; // Fallthrough
|
||||||
case Intrinsic::x86_sse41_ptestc:
|
case Intrinsic::x86_sse41_ptestc:
|
||||||
|
case Intrinsic::x86_avx_ptestc_256:
|
||||||
// CF = 1
|
// CF = 1
|
||||||
X86CC = X86::COND_B;
|
X86CC = X86::COND_B;
|
||||||
break;
|
break;
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_ps:
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_pd:
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_ps_256:
|
||||||
|
case Intrinsic::x86_avx_vtestnzc_pd_256:
|
||||||
|
IsTestPacked = true; // Fallthrough
|
||||||
case Intrinsic::x86_sse41_ptestnzc:
|
case Intrinsic::x86_sse41_ptestnzc:
|
||||||
|
case Intrinsic::x86_avx_ptestnzc_256:
|
||||||
// ZF and CF = 0
|
// ZF and CF = 0
|
||||||
X86CC = X86::COND_A;
|
X86CC = X86::COND_A;
|
||||||
break;
|
break;
|
||||||
@ -7012,7 +7046,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
|
|||||||
|
|
||||||
SDValue LHS = Op.getOperand(1);
|
SDValue LHS = Op.getOperand(1);
|
||||||
SDValue RHS = Op.getOperand(2);
|
SDValue RHS = Op.getOperand(2);
|
||||||
SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
|
unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
|
||||||
|
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
|
||||||
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
|
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
|
||||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
||||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||||
@ -8033,6 +8068,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case X86ISD::AND: return "X86ISD::AND";
|
case X86ISD::AND: return "X86ISD::AND";
|
||||||
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
|
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
|
||||||
case X86ISD::PTEST: return "X86ISD::PTEST";
|
case X86ISD::PTEST: return "X86ISD::PTEST";
|
||||||
|
case X86ISD::TESTP: return "X86ISD::TESTP";
|
||||||
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
|
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
|
||||||
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
|
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
|
||||||
}
|
}
|
||||||
|
@ -248,6 +248,9 @@ namespace llvm {
|
|||||||
// PTEST - Vector bitwise comparisons
|
// PTEST - Vector bitwise comparisons
|
||||||
PTEST,
|
PTEST,
|
||||||
|
|
||||||
|
// TESTP - Vector packed fp sign bitwise comparisons
|
||||||
|
TESTP,
|
||||||
|
|
||||||
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
|
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
|
||||||
// according to %al. An operator is needed so that this can be expanded
|
// according to %al. An operator is needed so that this can be expanded
|
||||||
// with control flow.
|
// with control flow.
|
||||||
|
@ -117,9 +117,10 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
|
|||||||
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
|
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
|
||||||
|
|
||||||
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
||||||
SDTCisVT<1, v4f32>,
|
SDTCisVec<1>,
|
||||||
SDTCisVT<2, v4f32>]>;
|
SDTCisSameAs<2, 1>]>;
|
||||||
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||||
|
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE Complex Patterns
|
// SSE Complex Patterns
|
||||||
|
@ -4646,47 +4646,52 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
|
|||||||
// the intel intrinsic that corresponds to this.
|
// the intel intrinsic that corresponds to this.
|
||||||
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||||
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
|
||||||
OpSize, VEX;
|
OpSize, VEX;
|
||||||
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
|
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
||||||
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
|
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||||
|
[(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
|
||||||
|
OpSize, VEX;
|
||||||
|
|
||||||
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
|
||||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
|
||||||
OpSize, VEX;
|
OpSize, VEX;
|
||||||
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
|
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
|
||||||
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
|
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||||
|
[(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
|
||||||
|
OpSize, VEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [EFLAGS] in {
|
let Defs = [EFLAGS] in {
|
||||||
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
||||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
[(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The bit test instructions below are AVX only
|
// The bit test instructions below are AVX only
|
||||||
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||||
X86MemOperand x86memop> {
|
X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
|
||||||
def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||||
[]>, OpSize, VEX;
|
[(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
|
||||||
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||||
[]>, OpSize, VEX;
|
[(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
|
||||||
|
OpSize, VEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||||
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem>;
|
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
|
||||||
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>;
|
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
|
||||||
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem>;
|
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
|
||||||
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>;
|
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
Loading…
x
Reference in New Issue
Block a user