From 790077954384a4546fa68247744104936fd9b6cf Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Sat, 28 Aug 2010 01:50:57 +0000
Subject: [PATCH] handle the constant case of vector insertion.  For something
 like this:

struct S { float A, B, C, D; };

struct S g;
struct S bar() {
  struct S A = g;
  ++A.B;
  A.A = 42;
  return A;
}

we now generate:

_bar:                                   ## @bar
## BB#0:                                ## %entry
	movq	_g@GOTPCREL(%rip), %rax
	movss	12(%rax), %xmm0
	pshufd	$16, %xmm0, %xmm0
	movss	4(%rax), %xmm2
	movss	8(%rax), %xmm1
	pshufd	$16, %xmm1, %xmm1
	unpcklps	%xmm0, %xmm1
	addss	LCPI1_0(%rip), %xmm2
	pshufd	$16, %xmm2, %xmm2
	movss	LCPI1_1(%rip), %xmm0
	pshufd	$16, %xmm0, %xmm0
	unpcklps	%xmm2, %xmm0
	ret

instead of:

_bar:                                   ## @bar
## BB#0:                                ## %entry
	movq	_g@GOTPCREL(%rip), %rax
	movss	12(%rax), %xmm0
	pshufd	$16, %xmm0, %xmm0
	movss	4(%rax), %xmm2
	movss	8(%rax), %xmm1
	pshufd	$16, %xmm1, %xmm1
	unpcklps	%xmm0, %xmm1
	addss	LCPI1_0(%rip), %xmm2
	movd	%xmm2, %eax
	shlq	$32, %rax
	addq	$1109917696, %rax       ## imm = 0x42280000
	movd	%rax, %xmm0
	ret



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112345 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineCasts.cpp          | 35 +++++++++++++++++--
 test/Transforms/InstCombine/bitcast.ll        | 12 +++++++
 2 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 27eab7516f1..0434d6ba0fd 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1366,7 +1366,7 @@ static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
   return Value % Ty->getPrimitiveSizeInBits() == 0;
 }
 
-static bool getTypeSizeIndex(unsigned Value, const Type *Ty) {
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
   return Value / Ty->getPrimitiveSizeInBits();
 }
 
@@ -1384,6 +1384,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
   // If we got down to a value of the right type, we win, try inserting into the
   // right element.
   if (V->getType() == VecEltTy) {
+    // Inserting null doesn't actually insert any elements.
+    if (Constant *C = dyn_cast<Constant>(V))
+      if (C->isNullValue())
+        return true;
+    
     // Fail if multiple elements are inserted into this slot.
     if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
       return false;
@@ -1392,10 +1397,34 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     return true;
   }
   
-  //if (Constant *C = dyn_cast<Constant>(V)) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
     // Figure out the # elements this provides, and bitcast it or slice it up
     // as required.
-  //}
+    unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+                                        VecEltTy);
+    // If the constant is the size of a vector element, we just need to bitcast
+    // it to the right type so it gets properly inserted.
+    if (NumElts == 1)
+      return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+                                      ElementIndex, Elements, VecEltTy);
+    
+    // Okay, this is a constant that covers multiple elements.  Slice it up into
+    // pieces and insert each element-sized piece into the vector.
+    if (!isa<IntegerType>(C->getType()))
+      C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+                                       C->getType()->getPrimitiveSizeInBits()));
+    unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+    const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+    
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+                                                               i*ElementSize));
+      Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+      if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+        return false;
+    }
+    return true;
+  }
   
   if (!V->hasOneUse()) return false;
   
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 87e413ea27f..0718b8a3aee 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -91,3 +91,15 @@ define <2 x float> @test5(float %A, float %B) {
   ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
   ; CHECK-NEXT: ret <2 x float> 
 }
+
+define <2 x float> @test6(float %A){
+  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
+  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
+  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
+  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
+  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %tmp35
+; CHECK: @test6
+; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1
+; CHECK: ret
+}