From 09df2ae0d056f846850732b4ec1ab49dee9791cc Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 7 Jul 2010 01:14:56 +0000 Subject: [PATCH] Add AVX SSE4.1 insertps, ptest and movntdqa instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107747 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 57 +++++++++++++++++-------- test/MC/AsmParser/X86/x86_32-encoding.s | 20 +++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 20 +++++++++ 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a1c25acb9cc..72619ad1715 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4602,33 +4602,49 @@ let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in // are optimized inserts that won't zero arbitrary elements in the destination // vector. The next one matches the intrinsic and could zero arbitrary elements // in the target vector. -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insertf32 opc, string OpcodeStr> { - def rr : SS4AIi8, +multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, OpSize; - def rm : SS4AIi8, OpSize; - } + def rm : SS4AIi8, OpSize; } -defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize, VEX; +} + let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest \t{$src2, $src1|$src1, $src2}", @@ -4640,6 +4656,11 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in +def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + OpSize, VEX; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index f0a879bb902..eae0f6584f8 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -12102,3 +12102,23 @@ // CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] vpinsrd $7, (%eax), %xmm2, %xmm5 +// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07] + vinsertps $7, %xmm2, %xmm5, %xmm1 + +// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07] + vinsertps $7, (%eax), %xmm5, %xmm1 + +// CHECK: vptest %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea] + vptest %xmm2, %xmm5 + +// CHECK: vptest (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10] + vptest (%eax), %xmm2 + +// CHECK: vmovntdqa (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] + vmovntdqa (%eax), %xmm2 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 013e2a52716..29529f3901f 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -2166,3 +2166,23 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] vpinsrq $7, (%rax), %xmm12, %xmm10 +// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07] + vinsertps $7, %xmm12, %xmm10, %xmm11 + +// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07] + vinsertps $7, (%rax), %xmm10, %xmm11 + +// CHECK: vptest %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4] + vptest %xmm12, %xmm10 + +// CHECK: vptest (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20] + vptest (%rax), %xmm12 + +// CHECK: vmovntdqa (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] + vmovntdqa (%rax), %xmm12 +