From 784f333aef929fbc5867ce17a165837401378c17 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Fri, 27 Aug 2010 18:31:05 +0000
Subject: [PATCH] Add an instcombine to clean up a common pattern produced by
 the SRoA "promote to large integer" code, eliminating some type conversions
 like this:

   %94 = zext i16 %93 to i32                       ; <i32> [#uses=2]
   %96 = lshr i32 %94, 8                           ; <i32> [#uses=1]
   %101 = trunc i32 %96 to i8                      ; <i8> [#uses=1]

This also unblocks other xforms from happening, now clang is able to compile:

struct S { float A, B, C, D; };
float foo(struct S A) { return A.A + A.B+A.C+A.D; }

into:

_foo:                                   ## @foo
## BB#0:                                ## %entry
	pshufd	$1, %xmm0, %xmm2
	addss	%xmm0, %xmm2
	movdqa	%xmm1, %xmm3
	addss	%xmm2, %xmm3
	pshufd	$1, %xmm1, %xmm0
	addss	%xmm3, %xmm0
	ret

on x86-64, instead of:

_foo:                                   ## @foo
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	shrq	$32, %rax
	movd	%eax, %xmm2
	addss	%xmm0, %xmm2
	movapd	%xmm1, %xmm3
	addss	%xmm2, %xmm3
	movd	%xmm1, %rax
	shrq	$32, %rax
	movd	%eax, %xmm0
	addss	%xmm3, %xmm0
	ret

This seems pretty close to optimal to me, at least without
using horizontal adds.  This also triggers in lots of other
code, including SPEC.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112278 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineCasts.cpp          | 23 +++++++++++++
 test/Transforms/InstCombine/trunc.ll          | 32 +++++++++++++++++++
 2 files changed, 55 insertions(+)
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 82c359194f7..1372a1891fa 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -454,6 +454,29 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
+  
+  // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+  Value *A = 0; ConstantInt *Cst = 0;
+  if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
+      Src->hasOneUse()) {
+    // We have three types to worry about here, the type of A, the source of
+    // the truncate (MidSize), and the destination of the truncate. We know that
+    // ASize < MidSize   and MidSize > ResultSize, but don't know the relation
+    // between ASize and ResultSize.
+    unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+    
+    // If the shift amount is larger than the size of A, then the result is
+    // known to be zero because all the input bits got shifted out.
+    if (Cst->getZExtValue() >= ASize)
+      return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+    // Since we're doing an lshr and a zero extend, and know that the shift
+    // amount is smaller than ASize, it is always safe to do the shift in A's
+    // type, then zero extend or truncate to the result.
+    Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+    Shift->takeName(Src);
+    return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+  }
 
   return 0;
 }
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index 2fa5d37b0f4..5cb34746676 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -48,3 +48,35 @@ define i64 @test4(i64 %a) {
 ; CHECK: %d = xor i64 {{.*}}, 8
 ; CHECK: ret i64 %d
 }
+
+define i32 @test5(i32 %A) {
+  %B = zext i32 %A to i128
+  %C = lshr i128 %B, 16
+  %D = trunc i128 %C to i32
+  ret i32 %D
+; CHECK: @test5
+; CHECK: %C = lshr i32 %A, 16
+; CHECK: ret i32 %C
+}
+
+define i32 @test6(i64 %A) {
+  %B = zext i64 %A to i128
+  %C = lshr i128 %B, 32
+  %D = trunc i128 %C to i32
+  ret i32 %D
+; CHECK: @test6
+; CHECK: %C = lshr i64 %A, 32
+; CHECK: %D = trunc i64 %C to i32
+; CHECK: ret i32 %D
+}
+
+define i92 @test7(i64 %A) {
+  %B = zext i64 %A to i128
+  %C = lshr i128 %B, 32
+  %D = trunc i128 %C to i92
+  ret i92 %D
+; CHECK: @test7
+; CHECK: %C = lshr i64 %A, 32
+; CHECK: %D = zext i64 %C to i92
+; CHECK: ret i92 %D
+}
\ No newline at end of file