Add AVX SSE2 integer unpack instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107246 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-06-30 04:06:39 +00:00
parent d252fec7ae
commit 876085dcfa
3 changed files with 185 additions and 10 deletions

View File

@ -2702,19 +2702,66 @@ let Predicates = [HasSSE2] in {
//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
PatFrag unp_frag, PatFrag bc_frag> {
PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (unp_frag VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))]>;
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (unp_frag VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
0>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
0>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
0>, VEX_4V;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v2i64 (unpckl VR128:$src1,
(memopv2i64 addr:$src2))))]>, VEX_4V;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
0>, VEX_4V;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
0>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
0>, VEX_4V;
/// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
/// knew to collapse (bitconvert VT to VT) into its operand.
def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v2i64 (unpckh VR128:$src1,
(memopv2i64 addr:$src2))))]>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {

View File

@ -11418,3 +11418,67 @@
// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
vpshuflw $4, (%eax), %xmm3
// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
vpunpcklbw %xmm1, %xmm2, %xmm3
// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
vpunpcklbw (%eax), %xmm2, %xmm3
// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
vpunpcklwd %xmm1, %xmm2, %xmm3
// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
vpunpcklwd (%eax), %xmm2, %xmm3
// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
vpunpckldq %xmm1, %xmm2, %xmm3
// CHECK: vpunpckldq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
vpunpckldq (%eax), %xmm2, %xmm3
// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
vpunpcklqdq %xmm1, %xmm2, %xmm3
// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
vpunpcklqdq (%eax), %xmm2, %xmm3
// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
vpunpckhbw %xmm1, %xmm2, %xmm3
// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
vpunpckhbw (%eax), %xmm2, %xmm3
// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
vpunpckhwd %xmm1, %xmm2, %xmm3
// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
vpunpckhwd (%eax), %xmm2, %xmm3
// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
vpunpckhdq %xmm1, %xmm2, %xmm3
// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
vpunpckhdq (%eax), %xmm2, %xmm3
// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
vpunpckhqdq %xmm1, %xmm2, %xmm3
// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
vpunpckhqdq (%eax), %xmm2, %xmm3

View File

@ -1458,3 +1458,67 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
vpshuflw $4, (%rax), %xmm13
// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
vpunpcklbw %xmm11, %xmm12, %xmm13
// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x60,0x28]
vpunpcklbw (%rax), %xmm12, %xmm13
// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
vpunpcklwd %xmm11, %xmm12, %xmm13
// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x61,0x28]
vpunpcklwd (%rax), %xmm12, %xmm13
// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
vpunpckldq %xmm11, %xmm12, %xmm13
// CHECK: vpunpckldq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x62,0x28]
vpunpckldq (%rax), %xmm12, %xmm13
// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
vpunpcklqdq %xmm11, %xmm12, %xmm13
// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
vpunpcklqdq (%rax), %xmm12, %xmm13
// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
vpunpckhbw %xmm11, %xmm12, %xmm13
// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x68,0x28]
vpunpckhbw (%rax), %xmm12, %xmm13
// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
vpunpckhwd %xmm11, %xmm12, %xmm13
// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x69,0x28]
vpunpckhwd (%rax), %xmm12, %xmm13
// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
vpunpckhdq %xmm11, %xmm12, %xmm13
// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
vpunpckhdq (%rax), %xmm12, %xmm13
// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
vpunpckhqdq %xmm11, %xmm12, %xmm13
// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
vpunpckhqdq (%rax), %xmm12, %xmm13