2010-07-05 05:52:56 +00:00
|
|
|
; RUN: llc < %s -march=x86-64 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
|
2010-07-05 03:56:55 +00:00
|
|
|
; RUN: llc < %s -mcpu=yonah -march=x86 -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32
|
2010-07-04 22:57:10 +00:00
|
|
|
|
|
|
|
; PR7518
|
|
|
|
define void @test1(<2 x float> %Q, float *%P2) nounwind {
|
|
|
|
%a = extractelement <2 x float> %Q, i32 0
|
|
|
|
%b = extractelement <2 x float> %Q, i32 1
|
|
|
|
%c = fadd float %a, %b
|
|
|
|
|
|
|
|
store float %c, float* %P2
|
|
|
|
ret void
|
Just rip v2f32 support completely out of the X86 backend. In
the example in the testcase, we now generate:
_test1: ## @test1
movss 4(%esp), %xmm0
addss 8(%esp), %xmm0
movl 12(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
_test1: ## @test1
subl $20, %esp
movl 24(%esp), %eax
movq %mm0, (%esp)
movq %mm0, 8(%esp)
movss (%esp), %xmm0
addss 12(%esp), %xmm0
movss %xmm0, (%eax)
addl $20, %esp
ret
v2f32 support did not work reliably because most of the X86
backend didn't know it was legal. It was apparently only added
to support returning source-level v2f32 values in MMX registers
in x86-32 mode. If ABI compatibility is important on this
GCC-extended-vector type for some reason, then the frontend
should generate IR that returns v2i32 instead of v2f32. However,
we generally don't try very hard to be abi compatible on gcc
extended vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107601 91177308-0d34-0410-b5e6-96231b3b80d8
2010-07-04 23:07:25 +00:00
|
|
|
; X64: test1:
|
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This
affects two places in the code: handling cross block values and handling
function return and arguments. Since vectors are already widened by
legalizetypes, this gives us much better code and unblocks x86-64 abi
and SPU abi work.
For example, this (which is a silly example of a cross-block value):
define <4 x float> @test2(<4 x float> %A) nounwind {
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%C = fadd <2 x float> %B, %B
br label %BB
BB:
%D = fadd <2 x float> %C, %C
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x float> %E
}
Now compiles into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
addps %xmm0, %xmm0
ret
previously it compiled into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
pshufd $1, %xmm0, %xmm1
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm1, %xmm0
addps %xmm0, %xmm0
ret
This implements rdar://8230384
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
2010-08-25 22:49:25 +00:00
|
|
|
; X64-NEXT: pshufd $1, %xmm0, %xmm1
|
|
|
|
; X64-NEXT: addss %xmm0, %xmm1
|
|
|
|
; X64-NEXT: movss %xmm1, (%rdi)
|
Just rip v2f32 support completely out of the X86 backend. In
the example in the testcase, we now generate:
_test1: ## @test1
movss 4(%esp), %xmm0
addss 8(%esp), %xmm0
movl 12(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
_test1: ## @test1
subl $20, %esp
movl 24(%esp), %eax
movq %mm0, (%esp)
movq %mm0, 8(%esp)
movss (%esp), %xmm0
addss 12(%esp), %xmm0
movss %xmm0, (%eax)
addl $20, %esp
ret
v2f32 support did not work reliably because most of the X86
backend didn't know it was legal. It was apparently only added
to support returning source-level v2f32 values in MMX registers
in x86-32 mode. If ABI compatibility is important on this
GCC-extended-vector type for some reason, then the frontend
should generate IR that returns v2i32 instead of v2f32. However,
we generally don't try very hard to be abi compatible on gcc
extended vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107601 91177308-0d34-0410-b5e6-96231b3b80d8
2010-07-04 23:07:25 +00:00
|
|
|
; X64-NEXT: ret
|
|
|
|
|
|
|
|
; X32: test1:
|
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This
affects two places in the code: handling cross block values and handling
function return and arguments. Since vectors are already widened by
legalizetypes, this gives us much better code and unblocks x86-64 abi
and SPU abi work.
For example, this (which is a silly example of a cross-block value):
define <4 x float> @test2(<4 x float> %A) nounwind {
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%C = fadd <2 x float> %B, %B
br label %BB
BB:
%D = fadd <2 x float> %C, %C
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x float> %E
}
Now compiles into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
addps %xmm0, %xmm0
ret
previously it compiled into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
pshufd $1, %xmm0, %xmm1
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm1, %xmm0
addps %xmm0, %xmm0
ret
This implements rdar://8230384
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
2010-08-25 22:49:25 +00:00
|
|
|
; X32-NEXT: pshufd $1, %xmm0, %xmm1
|
|
|
|
; X32-NEXT: addss %xmm0, %xmm1
|
|
|
|
; X32-NEXT: movl 4(%esp), %eax
|
|
|
|
; X32-NEXT: movss %xmm1, (%eax)
|
Just rip v2f32 support completely out of the X86 backend. In
the example in the testcase, we now generate:
_test1: ## @test1
movss 4(%esp), %xmm0
addss 8(%esp), %xmm0
movl 12(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
_test1: ## @test1
subl $20, %esp
movl 24(%esp), %eax
movq %mm0, (%esp)
movq %mm0, 8(%esp)
movss (%esp), %xmm0
addss 12(%esp), %xmm0
movss %xmm0, (%eax)
addl $20, %esp
ret
v2f32 support did not work reliably because most of the X86
backend didn't know it was legal. It was apparently only added
to support returning source-level v2f32 values in MMX registers
in x86-32 mode. If ABI compatibility is important on this
GCC-extended-vector type for some reason, then the frontend
should generate IR that returns v2i32 instead of v2f32. However,
we generally don't try very hard to be abi compatible on gcc
extended vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107601 91177308-0d34-0410-b5e6-96231b3b80d8
2010-07-04 23:07:25 +00:00
|
|
|
; X32-NEXT: ret
|
2010-07-04 22:57:10 +00:00
|
|
|
}
|
|
|
|
|
2010-07-05 05:52:56 +00:00
|
|
|
|
|
|
|
define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {
|
|
|
|
%Z = fadd <2 x float> %Q, %R
|
|
|
|
ret <2 x float> %Z
|
|
|
|
|
|
|
|
; X64: test2:
|
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This
affects two places in the code: handling cross block values and handling
function return and arguments. Since vectors are already widened by
legalizetypes, this gives us much better code and unblocks x86-64 abi
and SPU abi work.
For example, this (which is a silly example of a cross-block value):
define <4 x float> @test2(<4 x float> %A) nounwind {
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%C = fadd <2 x float> %B, %B
br label %BB
BB:
%D = fadd <2 x float> %C, %C
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x float> %E
}
Now compiles into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
addps %xmm0, %xmm0
ret
previously it compiled into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
pshufd $1, %xmm0, %xmm1
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm1, %xmm0
addps %xmm0, %xmm0
ret
This implements rdar://8230384
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
2010-08-25 22:49:25 +00:00
|
|
|
; X64-NEXT: addps %xmm1, %xmm0
|
2010-07-05 05:52:56 +00:00
|
|
|
; X64-NEXT: ret
|
|
|
|
}
|
Change handling of illegal vector types to widen when possible instead of
expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This
affects two places in the code: handling cross block values and handling
function return and arguments. Since vectors are already widened by
legalizetypes, this gives us much better code and unblocks x86-64 abi
and SPU abi work.
For example, this (which is a silly example of a cross-block value):
define <4 x float> @test2(<4 x float> %A) nounwind {
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%C = fadd <2 x float> %B, %B
br label %BB
BB:
%D = fadd <2 x float> %C, %C
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x float> %E
}
Now compiles into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
addps %xmm0, %xmm0
ret
previously it compiled into:
_test2: ## @test2
## BB#0:
addps %xmm0, %xmm0
pshufd $1, %xmm0, %xmm1
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm1, %xmm0
addps %xmm0, %xmm0
ret
This implements rdar://8230384
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
2010-08-25 22:49:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
define <2 x float> @test3(<4 x float> %A) nounwind {
|
|
|
|
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%C = fadd <2 x float> %B, %B
|
|
|
|
ret <2 x float> %C
|
|
|
|
; CHECK: test3:
|
|
|
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x float> @test4(<2 x float> %A) nounwind {
|
|
|
|
%C = fadd <2 x float> %A, %A
|
|
|
|
ret <2 x float> %C
|
|
|
|
; CHECK: test4:
|
|
|
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @test5(<4 x float> %A) nounwind {
|
|
|
|
%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
%C = fadd <2 x float> %B, %B
|
|
|
|
br label %BB
|
|
|
|
|
|
|
|
BB:
|
|
|
|
%D = fadd <2 x float> %C, %C
|
|
|
|
%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x float> %E
|
|
|
|
|
|
|
|
; CHECK: _test5:
|
|
|
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: addps %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
|