From 6dc868581b20380802e6a011de8dd9766790cf7a Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Thu, 15 Apr 2010 05:40:59 +0000
Subject: [PATCH] teach codegen to turn trunc(zextload) into load when
 possible. This doesn't occur much at all, it only seems to formed in the case
 when the trunc optimization kicks in due to phase ordering.  In that case it
 is saves a few bytes on x86-32.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101350 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 test/CodeGen/X86/store-narrow.ll         | 52 ++++++++++++++++++------
 2 files changed, 40 insertions(+), 14 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5974aff03ca..69a26169053 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3637,7 +3637,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   // Do not generate loads of non-round integer types since these can
   // be expensive (and would be wrong if the type is not byte sized).
   if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
-      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 23aa6164778..f47bf31098b 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -1,5 +1,6 @@
 ; rdar://7860110
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s | FileCheck %s -check-prefix=X64
+; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.2"
 
@@ -12,8 +13,12 @@ entry:
   store i32 %D, i32* %a0, align 4
   ret void
   
-; CHECK: test1:
-; CHECK: movb	%sil, (%rdi)
+; X64: test1:
+; X64: movb	%sil, (%rdi)
+
+; X32: test1:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, (%{{.*}})
 }
 
 define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
@@ -25,8 +30,12 @@ entry:
   %D = or i32 %B, %CS
   store i32 %D, i32* %a0, align 4
   ret void
-; CHECK: test2:
-; CHECK: movb	%sil, 1(%rdi)
+; X64: test2:
+; X64: movb	%sil, 1(%rdi)
+
+; X32: test2:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 1(%{{.*}})
 }
 
 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -37,8 +46,12 @@ entry:
   %D = or i32 %B, %C
   store i32 %D, i32* %a0, align 4
   ret void
-; CHECK: test3:
-; CHECK: movw	%si, (%rdi)
+; X64: test3:
+; X64: movw	%si, (%rdi)
+
+; X32: test3:
+; X32: movw	8(%esp), %ax
+; X32: movw	%ax, (%{{.*}})
 }
 
 define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -50,8 +63,12 @@ entry:
   %D = or i32 %B, %CS
   store i32 %D, i32* %a0, align 4
   ret void
-; CHECK: test4:
-; CHECK: movw	%si, 2(%rdi)
+; X64: test4:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test4:
+; X32: movw	8(%esp), %ax
+; X32: movw	%ax, 2(%{{.*}})
 }
 
 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -63,8 +80,12 @@ entry:
   %D = or i64 %B, %CS
   store i64 %D, i64* %a0, align 4
   ret void
-; CHECK: test5:
-; CHECK: movw	%si, 2(%rdi)
+; X64: test5:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test5:
+; X32: movw	8(%esp), %ax
+; X32: movw	%ax, 2(%{{.*}})
 }
 
 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
@@ -76,6 +97,11 @@ entry:
   %D = or i64 %B, %CS
   store i64 %D, i64* %a0, align 4
   ret void
-; CHECK: test6:
-; CHECK: movb	%sil, 5(%rdi)
+; X64: test6:
+; X64: movb	%sil, 5(%rdi)
+
+
+; X32: test6:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 5(%{{.*}})
 }