mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-20 14:29:27 +00:00
[x86] Remove the 2-addr-to-3-addr "optimization" from shufps to pshufd.
This trades a (register-renamer-friendly) movaps for a floating point / integer domain cross. That is a very bad trade, even on architectures where domain crossing is relatively fast. On any chip where there is even a cycle stall, this is a Very Bad Idea. It doesn't even seem likely to cause a spill to be introduced because the reason for the copy is to destructively shuffle in place. Thanks to Ben Kramer for fixing a bug in this code that my new shuffle lowering exposed and highlighting that perhaps it should just go away. =] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
560bddce20
commit
1d02acb7a0
@ -2139,34 +2139,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
|
||||
unsigned MIOpc = MI->getOpcode();
|
||||
switch (MIOpc) {
|
||||
case X86::SHUFPSrri: {
|
||||
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
|
||||
if (!Subtarget.hasSSE2()) return nullptr;
|
||||
|
||||
unsigned B = MI->getOperand(1).getReg();
|
||||
unsigned C = MI->getOperand(2).getReg();
|
||||
if (B != C) return nullptr;
|
||||
int64_t M = MI->getOperand(3).getImm();
|
||||
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
|
||||
.addOperand(Dest).addOperand(Src).addImm(M);
|
||||
break;
|
||||
}
|
||||
case X86::SHUFPDrri: {
|
||||
assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
|
||||
if (!Subtarget.hasSSE2()) return nullptr;
|
||||
|
||||
unsigned B = MI->getOperand(1).getReg();
|
||||
unsigned C = MI->getOperand(2).getReg();
|
||||
if (B != C) return nullptr;
|
||||
unsigned M = MI->getOperand(3).getImm();
|
||||
|
||||
// Convert to PSHUFD mask.
|
||||
M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
|
||||
|
||||
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
|
||||
.addOperand(Dest).addOperand(Src).addImm(M);
|
||||
break;
|
||||
}
|
||||
case X86::SHL64ri: {
|
||||
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
|
||||
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
|
||||
|
@ -1,11 +0,0 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin13 -mcpu=pentium4 | FileCheck %s
|
||||
|
||||
define <4 x float> @test1(<4 x i32>, <4 x float> %b) {
|
||||
%s = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
|
||||
ret <4 x float> %s
|
||||
|
||||
; We convert shufps -> pshufd here to save a move.
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: pshufd $-27, %xmm1, %xmm0
|
||||
; CHECK-NEXT: ret
|
||||
}
|
@ -15,7 +15,8 @@ entry:
|
||||
|
||||
; OSX_SINCOS-LABEL: test1:
|
||||
; OSX_SINCOS: callq ___sincosf_stret
|
||||
; OSX_SINCOS: pshufd {{.*}} ## xmm1 = xmm0[1,1,2,3]
|
||||
; OSX_SINCOS: movaps %xmm0, %xmm1
|
||||
; OSX_SINCOS: shufps {{.*}} ## xmm1 = xmm1[1,1,2,3]
|
||||
; OSX_SINCOS: addss %xmm0, %xmm1
|
||||
|
||||
; OSX_NOOPT: test1
|
||||
|
@ -291,7 +291,8 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
|
||||
; X32-LABEL: buildvector:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; X32-NEXT: movaps %xmm0, %xmm2
|
||||
; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X32-NEXT: addss %xmm1, %xmm0
|
||||
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X32-NEXT: addss %xmm2, %xmm1
|
||||
@ -300,7 +301,8 @@ define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
|
||||
;
|
||||
; X64-LABEL: buildvector:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; X64-NEXT: movaps %xmm0, %xmm2
|
||||
; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X64-NEXT: addss %xmm1, %xmm0
|
||||
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X64-NEXT: addss %xmm2, %xmm1
|
||||
|
@ -5,7 +5,8 @@
|
||||
define void @test1(<2 x float> %Q, float *%P2) nounwind {
|
||||
; X64-LABEL: test1:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X64-NEXT: movaps %xmm0, %xmm1
|
||||
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X64-NEXT: addss %xmm0, %xmm1
|
||||
; X64-NEXT: movss %xmm1, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
@ -13,7 +14,8 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
|
||||
; X32-LABEL: test1:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X32-NEXT: movaps %xmm0, %xmm1
|
||||
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X32-NEXT: addss %xmm0, %xmm1
|
||||
; X32-NEXT: movss %xmm1, (%eax)
|
||||
; X32-NEXT: retl
|
||||
|
@ -186,7 +186,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
|
||||
define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE-LABEL: shuffle_v2f64_32:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
|
||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v2f64_32:
|
||||
|
@ -396,14 +396,16 @@ define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i3
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: andps %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_bitwise_ops_test4b:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: andps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_bitwise_ops_test4b:
|
||||
@ -435,14 +437,16 @@ define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i3
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: orps %xmm1, %xmm0
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_bitwise_ops_test5b:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: orps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_bitwise_ops_test5b:
|
||||
@ -1124,7 +1128,8 @@ define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test1:
|
||||
@ -1252,7 +1257,8 @@ define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) {
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test6:
|
||||
@ -1651,7 +1657,8 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test2b:
|
||||
@ -1660,7 +1667,8 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_test2b:
|
||||
@ -1668,7 +1676,8 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_test2b:
|
||||
|
Loading…
x
Reference in New Issue
Block a user