optimize bitcast(trunc(bitcast(x))) where the result is a float and 'x'

is a vector to be a vector element extraction.  This allows clang to
compile:

struct S { float A, B, C, D; };
float foo(struct S A) { return A.A + A.B+A.C+A.D; }

into:

_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movapd	%xmm1, %xmm3
	addss	%xmm2, %xmm3
	movd	%xmm1, %rax
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm3, %xmm0
	ret

instead of:

_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	movd	%eax, %xmm0
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movd	%xmm1, %rax
	movd	%eax, %xmm1
	addss	%xmm2, %xmm1
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm1, %xmm0
	ret

... eliminating half of the horribleness.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112227 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2010-08-26 21:55:42 +00:00
parent a6140a1444
commit e5a1426174
2 changed files with 56 additions and 0 deletions

View File

@ -1335,6 +1335,35 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
return new ShuffleVectorInst(InVal, V2, Mask);
}
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC) {
Value *Src = CI.getOperand(0);
// If this is a bitcast from int to float, check to see if the int is an
// extraction from a vector.
Value *VecInput = 0;
if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
isa<VectorType>(VecInput->getType())) {
const VectorType *VecTy = cast<VectorType>(VecInput->getType());
const Type *DestTy = CI.getType();
// If the element type of the vector doesn't match the result type, but the
// vector type's size is a multiple of the result type, bitcast it to be a
// vector type we can extract from.
if (VecTy->getElementType() != DestTy &&
VecTy->getPrimitiveSizeInBits() % DestTy->getPrimitiveSizeInBits()==0) {
VecTy = VectorType::get(DestTy,
VecTy->getPrimitiveSizeInBits() / DestTy->getPrimitiveSizeInBits());
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
if (VecTy->getElementType() == DestTy)
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
}
return 0;
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
@ -1386,6 +1415,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
((Instruction*)NULL));
}
}
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
return I;
if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {

View File

@ -13,3 +13,25 @@ define i32 @test1(i64 %a) {
; CHECK: ret i32 0
}
; Optimize bitcasts that are extracting low element of vector. This happens
; because of SRoA.
; rdar://7892780
define float @test2(<2 x float> %A, <2 x i32> %B) {
%tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2]
%tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1]
%tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1]
%tmp = bitcast <2 x i32> %B to i64
%tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1]
%tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1]
%add = fadd float %tmp24, %tmp4
ret float %add
; CHECK: @test2
; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0
; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float>
; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0
; CHECK-NEXT: %add = fadd float %tmp24, %tmp4
; CHECK-NEXT: ret float %add
}