Reapply support for AVX unpack and interleave instructions, with

testcases this time.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106593 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-06-22 23:02:38 +00:00
parent 186b3d2606
commit 0caca3967b
3 changed files with 106 additions and 44 deletions

View File

@ -444,6 +444,22 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
RC:$src1, (mem_frag addr:$src2)))], d>;
}
/// sse12_unpack_interleave - SSE 1 & 2 unpack and interleave
multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
PatFrag mem_frag, RegisterClass RC,
X86MemOperand x86memop, string asm,
Domain d> {
def rr : PI<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1, RC:$src2)))], d>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))], d>;
}
//===----------------------------------------------------------------------===//
// SSE1 Instructions
//===----------------------------------------------------------------------===//
@ -1388,50 +1404,32 @@ let Constraints = "$src1 = $dst" in {
VR128:$src1, (memopv2f64 addr:$src2))))]>;
let AddedComplexity = 10 in {
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (unpckh VR128:$src1,
(memopv4f32 addr:$src2))))]>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (unpckh VR128:$src1,
(memopv2f64 addr:$src2))))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
let Constraints = "", isAsmParserOnly = 1 in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, VEX_4V;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, VEX_4V;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
}
defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
SSEPackedSingle>, TB;
defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
SSEPackedDouble>, TB, OpSize;
defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
SSEPackedSingle>, TB;
defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
SSEPackedDouble>, TB, OpSize;
} // AddedComplexity
} // Constraints = "$src1 = $dst"

View File

@ -10318,3 +10318,35 @@
// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
vmovsd %xmm4, %xmm2, %xmm5
// CHECK: vunpckhps %xmm1, %xmm2, %xmm4
// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
vunpckhps %xmm1, %xmm2, %xmm4
// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4
// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
vunpckhpd %xmm1, %xmm2, %xmm4
// CHECK: vunpcklps %xmm1, %xmm2, %xmm4
// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
vunpcklps %xmm1, %xmm2, %xmm4
// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4
// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
vunpcklpd %xmm1, %xmm2, %xmm4
// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5

View File

@ -382,3 +382,35 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
// CHECK: fixup A - offset: 5, value: CPI1_0-4
pshufb CPI1_0(%rip), %xmm1
// CHECK: vunpckhps %xmm15, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
vunpckhps %xmm15, %xmm12, %xmm13
// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
vunpckhpd %xmm15, %xmm12, %xmm13
// CHECK: vunpcklps %xmm15, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
vunpcklps %xmm15, %xmm12, %xmm13
// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
vunpcklpd %xmm15, %xmm12, %xmm13
// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15
// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15