From 876085dcfa0b16071b73d4b1bb2f531bf897417a Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 30 Jun 2010 04:06:39 +0000 Subject: [PATCH] Add AVX SSE2 integer unpack instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107246 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 67 +++++++++++++++++++++---- test/MC/AsmParser/X86/x86_32-encoding.s | 64 +++++++++++++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 64 +++++++++++++++++++++++ 3 files changed, 185 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e0a058b3bf4..a93841f181d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2702,19 +2702,66 @@ let Predicates = [HasSSE2] in { //===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt in { - multiclass sse2_unpack opc, string OpcodeStr, ValueType vt, - PatFrag unp_frag, PatFrag bc_frag> { + PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> { def rr : PDI; + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>; def rm : PDI; + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (unp_frag VR128:$src1, + (bc_frag (memopv2i64 + addr:$src2))))]>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in { + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v2i64 (unpckl VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v2i64 (unpckh VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; } let Constraints = "$src1 = $dst" in { diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 03e12fbc60e..a06aa2665c1 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -11418,3 +11418,67 @@ // CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04] vpshuflw $4, (%eax), %xmm3 +// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0xd9] + vpunpcklbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x60,0x18] + vpunpcklbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0xd9] + vpunpcklwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x61,0x18] + vpunpcklwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0xd9] + vpunpckldq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x62,0x18] + vpunpckldq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9] + vpunpcklqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6c,0x18] + vpunpcklqdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0xd9] + vpunpckhbw %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x68,0x18] + vpunpckhbw (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0xd9] + vpunpckhwd %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x69,0x18] + vpunpckhwd (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9] + vpunpckhdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6a,0x18] + vpunpckhdq (%eax), %xmm2, %xmm3 + +// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9] + vpunpckhqdq %xmm1, %xmm2, %xmm3 + +// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x6d,0x18] + vpunpckhqdq (%eax), %xmm2, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index a0be0478dfc..79510974798 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -1458,3 +1458,67 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04] vpshuflw $4, (%rax), %xmm13 +// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb] + vpunpcklbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x60,0x28] + vpunpcklbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb] + vpunpcklwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x61,0x28] + vpunpcklwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb] + vpunpckldq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x62,0x28] + vpunpckldq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb] + vpunpcklqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6c,0x28] + vpunpcklqdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb] + vpunpckhbw %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x68,0x28] + vpunpckhbw (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb] + vpunpckhwd %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x69,0x28] + vpunpckhwd (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb] + vpunpckhdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6a,0x28] + vpunpckhdq (%rax), %xmm12, %xmm13 + +// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb] + vpunpckhqdq %xmm11, %xmm12, %xmm13 + +// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x6d,0x28] + vpunpckhqdq (%rax), %xmm12, %xmm13 +