Move SSE3 Move patterns to a more appropriate section

Add AVX SSE3 packed horizontal and & sub instructions


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107405 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-07-01 17:35:02 +00:00
parent 7144821c61
commit c6fcdeb8f9
3 changed files with 119 additions and 30 deletions

View File

@ -3221,10 +3221,20 @@ let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
// Move Unaligned Integer
let isAsmParserOnly = 1 in
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
(undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
// Several Move patterns
let AddedComplexity = 5 in {
def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@ -3236,6 +3246,22 @@ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
}
// vector_shuffle v1, <undef> <1, 1, 3, 3>
let AddedComplexity = 15 in
def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
(MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
(MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
// vector_shuffle v1, <undef> <0, 0, 2, 2>
let AddedComplexity = 15 in
def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
(MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
@ -3275,28 +3301,43 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
// SSE3 Instructions
//===---------------------------------------------------------------------===//
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
// Horizontal ops
class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
@ -3308,32 +3349,16 @@ let Constraints = "$src1 = $dst" in {
def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
}
//===---------------------------------------------------------------------===//
// SSSE3 Instructions
//===---------------------------------------------------------------------===//
// Thread synchronization
def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
[(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
[(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
// vector_shuffle v1, <undef> <1, 1, 3, 3>
let AddedComplexity = 15 in
def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
(MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
(MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
// vector_shuffle v1, <undef> <0, 0, 2, 2>
let AddedComplexity = 15 in
def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
(MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
//===---------------------------------------------------------------------===//
// SSSE3 Instructions
//===---------------------------------------------------------------------===//
/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, Intrinsic IntId128> {

View File

@ -11582,3 +11582,35 @@
// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
vaddsubpd (%eax), %xmm1, %xmm2
// CHECK: vhaddps %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
vhaddps %xmm1, %xmm2, %xmm3
// CHECK: vhaddps (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
vhaddps (%eax), %xmm2, %xmm3
// CHECK: vhaddpd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
vhaddpd %xmm1, %xmm2, %xmm3
// CHECK: vhaddpd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
vhaddpd (%eax), %xmm2, %xmm3
// CHECK: vhsubps %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
vhsubps %xmm1, %xmm2, %xmm3
// CHECK: vhsubps (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
vhsubps (%eax), %xmm2, %xmm3
// CHECK: vhsubpd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
vhsubpd %xmm1, %xmm2, %xmm3
// CHECK: vhsubpd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
vhsubpd (%eax), %xmm2, %xmm3

View File

@ -1630,3 +1630,35 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
vaddsubpd (%rax), %xmm11, %xmm12
// CHECK: vhaddps %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
vhaddps %xmm11, %xmm12, %xmm13
// CHECK: vhaddps (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
vhaddps (%rax), %xmm12, %xmm13
// CHECK: vhaddpd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
vhaddpd %xmm11, %xmm12, %xmm13
// CHECK: vhaddpd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
vhaddpd (%rax), %xmm12, %xmm13
// CHECK: vhsubps %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
vhsubps %xmm11, %xmm12, %xmm13
// CHECK: vhsubps (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
vhsubps (%rax), %xmm12, %xmm13
// CHECK: vhsubpd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
vhsubpd %xmm11, %xmm12, %xmm13
// CHECK: vhsubpd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
vhsubpd (%rax), %xmm12, %xmm13