mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
[X86][SSE] Add SSE MOVQ instructions to SSEPackedInt domain
Patch to explicitly add the SSE MOVQ (rr,mr,rm) instructions to SSEPackedInt domain - prevents a number of costly domain switches. Differential Revision: http://reviews.llvm.org/D7600 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229439 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2deb1d0b54
commit
0638f4e115
@ -5011,7 +5011,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
|
||||
// Move Quadword Int to Packed Quadword Int
|
||||
//
|
||||
|
||||
let SchedRW = [WriteLoad] in {
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteLoad] in {
|
||||
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
@ -5023,12 +5023,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))],
|
||||
IIC_SSE_MOVDQ>, XS,
|
||||
Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
|
||||
} // SchedRW
|
||||
} // ExeDomain, SchedRW
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Move Packed Quadword Int to Quadword Int
|
||||
//
|
||||
let SchedRW = [WriteStore] in {
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
|
||||
def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(store (i64 (vector_extract (v2i64 VR128:$src),
|
||||
@ -5039,7 +5039,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
|
||||
[(store (i64 (vector_extract (v2i64 VR128:$src),
|
||||
(iPTR 0))), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>;
|
||||
} // SchedRW
|
||||
} // ExeDomain, SchedRW
|
||||
|
||||
// For disassembler only
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
|
||||
@ -5060,7 +5060,7 @@ let Predicates = [UseSSE2] in
|
||||
def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
|
||||
(MOVPQI2QImr addr:$dst, VR128:$src)>;
|
||||
|
||||
let isCodeGenOnly = 1, AddedComplexity = 20 in {
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in {
|
||||
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
@ -5076,7 +5076,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
(loadi64 addr:$src))))))],
|
||||
IIC_SSE_MOVDQ>,
|
||||
XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
|
||||
}
|
||||
} // ExeDomain, isCodeGenOnly, AddedComplexity
|
||||
|
||||
let Predicates = [UseAVX], AddedComplexity = 20 in {
|
||||
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
|
||||
@ -5102,7 +5102,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
|
||||
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
|
||||
// IA32 document. movq xmm1, xmm2 does clear the high bits.
|
||||
//
|
||||
let SchedRW = [WriteVecLogic] in {
|
||||
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
|
||||
let AddedComplexity = 15 in
|
||||
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
@ -5115,9 +5115,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
|
||||
IIC_SSE_MOVQ_RR>,
|
||||
XS, Requires<[UseSSE2]>;
|
||||
} // SchedRW
|
||||
} // ExeDomain, SchedRW
|
||||
|
||||
let isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
|
||||
let AddedComplexity = 20 in
|
||||
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
@ -5133,7 +5133,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
IIC_SSE_MOVDQ>,
|
||||
XS, Requires<[UseSSE2]>;
|
||||
}
|
||||
} // isCodeGenOnly, SchedRW
|
||||
} // ExeDomain, isCodeGenOnly, SchedRW
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
let Predicates = [UseAVX] in {
|
||||
|
@ -26,7 +26,7 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: zero_test
|
||||
; CHECK: xorps %xmm0, %xmm0
|
||||
; CHECK: pxor %xmm0, %xmm0
|
||||
; CHECK: ret
|
||||
|
||||
define void @zero_test() {
|
||||
|
@ -255,7 +255,7 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
|
||||
define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test20:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: orps %xmm1, %xmm0
|
||||
; CHECK-NEXT: por %xmm1, %xmm0
|
||||
; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: retq
|
||||
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
|
||||
|
@ -54,7 +54,7 @@ define void @test1() {
|
||||
;
|
||||
; X64-LABEL: test1:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: xorps %xmm0, %xmm0
|
||||
; X64-NEXT: pxor %xmm0, %xmm0
|
||||
; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
|
||||
; X64-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
|
||||
|
@ -9,8 +9,8 @@
|
||||
; SSE: movaps %xmm0, (%rsp)
|
||||
; SSE: callq killcommon
|
||||
|
||||
; AVX: vmovaps compl+128(%rip), %xmm0
|
||||
; AVX: vmovaps %xmm0, (%rsp)
|
||||
; AVX: vmovdqa compl+128(%rip), %xmm0
|
||||
; AVX: vmovdqa %xmm0, (%rsp)
|
||||
; AVX: callq killcommon
|
||||
|
||||
@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
|
||||
|
Loading…
Reference in New Issue
Block a user