mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-02 07:32:52 +00:00
Add support for gcc __builtin_ia32_ptest{z,c,nzc} intrinsics. Lower
to ptest instruction plus setcc. Revamp ptest instruction. Add test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77407 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
28daa10246
commit
71c6753d03
@ -864,6 +864,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
// Test instruction with bitwise comparison.
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX
|
||||
|
@ -6200,6 +6200,36 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
DAG.getConstant(X86CC, MVT::i8), Cond);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
// ptest intrinsics. The intrinsic these come from are designed to return
|
||||
// a boolean value, not just an instruction so lower it to the ptest
|
||||
// pattern and a conditional move to the result.
|
||||
case Intrinsic::x86_sse41_ptestz:
|
||||
case Intrinsic::x86_sse41_ptestc:
|
||||
case Intrinsic::x86_sse41_ptestnzc:{
|
||||
unsigned X86CC = 0;
|
||||
switch (IntNo) {
|
||||
default: break;
|
||||
case Intrinsic::x86_sse41_ptestz:
|
||||
// ZF = 1
|
||||
X86CC = X86::COND_E;
|
||||
break;
|
||||
case Intrinsic::x86_sse41_ptestc:
|
||||
// CF = 1
|
||||
X86CC = X86::COND_B;
|
||||
break;
|
||||
case Intrinsic::x86_sse41_ptestnzc:
|
||||
// ZF and CF = 0
|
||||
X86CC = X86::COND_A;
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue LHS = Op.getOperand(1);
|
||||
SDValue RHS = Op.getOperand(2);
|
||||
SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
|
||||
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
|
||||
// Fix vector shift instructions where the last operand is a non-immediate
|
||||
// i32 value.
|
||||
@ -7048,6 +7078,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::INC: return "X86ISD::INC";
|
||||
case X86ISD::DEC: return "X86ISD::DEC";
|
||||
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
|
||||
case X86ISD::PTEST: return "X86ISD::PTEST";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -244,7 +244,10 @@ namespace llvm {
|
||||
INC, DEC,
|
||||
|
||||
// MUL_IMM - X86 specific multiply by immediate.
|
||||
MUL_IMM
|
||||
MUL_IMM,
|
||||
|
||||
// PTEST - Vector bitwise comparisons
|
||||
PTEST
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -69,6 +69,9 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
|
||||
def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
|
||||
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
|
||||
|
||||
def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4f32>]>;
|
||||
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE Complex Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3618,11 +3621,17 @@ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
|
||||
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
|
||||
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
|
||||
|
||||
// ptest instruction we'll lower to this in X86ISelLowering primarily from
|
||||
// the intel intrinsic that corresponds to this.
|
||||
let Defs = [EFLAGS] in {
|
||||
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(X86ptest VR128:$src1, VR128:$src2),
|
||||
(implicit EFLAGS)]>, OpSize;
|
||||
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(X86ptest VR128:$src1, (load addr:$src2)),
|
||||
(implicit EFLAGS)]>, OpSize;
|
||||
}
|
||||
|
||||
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
|
@ -181,4 +181,19 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
|
||||
|
||||
; X64: _insertps_3:
|
||||
; X64: insertps $0, %xmm1, %xmm0
|
||||
}
|
||||
}
|
||||
|
||||
define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
|
||||
%tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
|
||||
ret i32 %tmp1
|
||||
; X32: _ptestz_1:
|
||||
; X32: ptest %xmm1, %xmm0
|
||||
; X32: sete %al
|
||||
|
||||
; X64: _ptestz_1:
|
||||
; X64: ptest %xmm1, %xmm0
|
||||
; X64: sete %al
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user