mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-05 17:39:16 +00:00
update this.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113116 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3ae0924d6b
commit
f0f5780b39
@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) {
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
SSE has instructions for doing operations on complex numbers, we should pattern
|
||||
match them. Compiling this:
|
||||
match them. For example, this should turn into a horizontal add:
|
||||
|
||||
typedef float __attribute__((vector_size(16))) v4f32;
|
||||
float f32(v4f32 A) {
|
||||
return A[0]+A[1]+A[2]+A[3];
|
||||
}
|
||||
|
||||
Instead we get this:
|
||||
|
||||
_f32: ## @f32
|
||||
pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0]
|
||||
addss %xmm0, %xmm1
|
||||
pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0]
|
||||
movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1]
|
||||
movaps %xmm0, %xmm3
|
||||
addss %xmm1, %xmm3
|
||||
movdqa %xmm2, %xmm0
|
||||
addss %xmm3, %xmm0
|
||||
ret
|
||||
|
||||
Also, there are cases where some simple local SLP would improve codegen a bit.
|
||||
compiling this:
|
||||
|
||||
_Complex float f32(_Complex float A, _Complex float B) {
|
||||
return A+B;
|
||||
@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) {
|
||||
|
||||
into:
|
||||
|
||||
_f32:
|
||||
_f32: ## @f32
|
||||
movdqa %xmm0, %xmm2
|
||||
addss %xmm1, %xmm2
|
||||
pshufd $16, %xmm2, %xmm2
|
||||
pshufd $1, %xmm1, %xmm1
|
||||
pshufd $1, %xmm0, %xmm0
|
||||
addss %xmm1, %xmm0
|
||||
pshufd $16, %xmm0, %xmm1
|
||||
movdqa %xmm2, %xmm0
|
||||
unpcklps %xmm1, %xmm0
|
||||
pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0]
|
||||
pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0]
|
||||
addss %xmm1, %xmm3
|
||||
movaps %xmm2, %xmm0
|
||||
unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
|
||||
ret
|
||||
|
||||
seems silly.
|
||||
seems silly when it could just be one addps.
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
Loading…
x
Reference in New Issue
Block a user