From 2bfb8f6ef8ac6970acfd90f4b93f5e58b7d2e62c Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 9 Jul 2010 21:20:35 +0000 Subject: [PATCH] Add AVX 256-bit unpack and interleave git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 17 +++++++++++++ test/MC/AsmParser/X86/x86_32-encoding.s | 32 +++++++++++++++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 32 +++++++++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e8736524f30..45b48012b42 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -165,6 +165,10 @@ def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; +// FIXME: move this to a more appropriate place after all AVX is done. +def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; +def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; + // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. // FIXME: 8 byte alignment for mmx reads is not required @@ -1539,6 +1543,19 @@ let AddedComplexity = 10 in { defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedDouble>, OpSize, VEX_4V; + + defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, + VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, + VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, + VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, + VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index e4bf29e0b24..35ef1fa303d 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -12354,3 +12354,35 @@ // CHECK: encoding: [0xc5,0xfd,0x11,0x08] vmovupd %ymm1, (%eax) +// CHECK: vunpckhps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x15,0xe1] + vunpckhps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x15,0xe1] + vunpckhpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklps %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xec,0x14,0xe1] + vunpcklps %ymm1, %ymm2, %ymm4 + +// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4 +// CHECK: encoding: [0xc5,0xed,0x14,0xe1] + vunpcklpd %ymm1, %ymm2, %ymm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 452aa532a81..a875cbc6092 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -2428,3 +2428,35 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x7d,0x11,0x18] vmovupd %ymm11, (%rax) +// CHECK: vunpckhps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3] + vunpckhps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3] + vunpckhpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklps %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3] + vunpcklps %ymm11, %ymm12, %ymm4 + +// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4 +// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3] + vunpcklpd %ymm11, %ymm12, %ymm4 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10 +