mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 17:32:19 +00:00
Finish implementing a readme entry: when inserting an i64 variable
into a vector of zeros or undef, and when the top part is obviously zero, we can just use movd + shuffle. This allows us to compile vec_set-B.ll into: _test3: movl $1234567, %eax andl 4(%esp), %eax movd %eax, %xmm0 ret instead of: _test3: subl $28, %esp movl $1234567, %eax andl 32(%esp), %eax movl %eax, (%esp) movl $0, 4(%esp) movq (%esp), %xmm0 addl $28, %esp ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
529de8a457
commit
67f453aae7
@ -781,41 +781,3 @@ LLVM should be able to generate the same thing as gcc. This looks like it is
|
||||
just a matter of matching (scalar_to_vector (load x)) to movd.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
These two functions should compile to identical code on x86-32:
|
||||
|
||||
define <2 x i64> @test2(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> undef, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
||||
define <2 x i64> @test2(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
||||
The later compiles to:
|
||||
|
||||
_test2:
|
||||
movl $1234567, %eax
|
||||
andl 4(%esp), %eax
|
||||
movd %eax, %xmm0
|
||||
ret
|
||||
|
||||
the former compiles to:
|
||||
|
||||
_test2:
|
||||
subl $28, %esp
|
||||
movl $1234567, %eax
|
||||
andl 32(%esp), %eax
|
||||
movl %eax, (%esp)
|
||||
movl $0, 4(%esp)
|
||||
movaps (%esp), %xmm0
|
||||
addl $28, %esp
|
||||
ret
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -3063,11 +3063,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(ISD::UNDEF, VT);
|
||||
}
|
||||
|
||||
// Splat is obviously ok. Let legalizer expand it to a shuffle.
|
||||
if (Values.size() == 1)
|
||||
return SDOperand();
|
||||
|
||||
// Special case for single non-zero element.
|
||||
// Special case for single non-zero, non-undef, element.
|
||||
if (NumNonZero == 1 && NumElems <= 4) {
|
||||
unsigned Idx = CountTrailingZeros_32(NonZeros);
|
||||
SDOperand Item = Op.getOperand(Idx);
|
||||
@ -3141,6 +3137,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
|
||||
}
|
||||
}
|
||||
|
||||
// Splat is obviously ok. Let legalizer expand it to a shuffle.
|
||||
if (Values.size() == 1)
|
||||
return SDOperand();
|
||||
|
||||
// A vector full of immediates; various special cases are already
|
||||
// handled, so this is best done with a single constant-pool load.
|
||||
if (IsAllConstants)
|
||||
|
24
test/CodeGen/X86/vec_set-B.ll
Normal file
24
test/CodeGen/X86/vec_set-B.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 | not grep movaps
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep esp | count 2
|
||||
|
||||
; These should both generate something like this:
|
||||
;_test3:
|
||||
; movl $1234567, %eax
|
||||
; andl 4(%esp), %eax
|
||||
; movd %eax, %xmm0
|
||||
; ret
|
||||
|
||||
define <2 x i64> @test3(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
||||
define <2 x i64> @test2(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> undef, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user