From d6139425f5f5c303d5ae6a2202fb40a8f6dde9a6 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Tue, 20 Apr 2010 23:18:40 +0000
Subject: [PATCH] teach the x86 address matching stuff to handle (shl (or x,c),
 3) the same as (shl (add x, c), 3) when x doesn't have any bits from c set.

This finishes off PR1135.  Before we compiled the block to:
to:

LBB0_3:                                 ## %bb
	cmpb	$4, %dl
	sete	%dl
	addb	%dl, %cl
	movb	%cl, %dl
	shlb	$2, %dl
	addb	%r8b, %dl
	shlb	$2, %dl
	movzbl	%dl, %edx
	movl	%esi, (%rdi,%rdx,4)
	leaq	2(%rdx), %r9
	movl	%esi, (%rdi,%r9,4)
	leaq	1(%rdx), %r9
	movl	%esi, (%rdi,%r9,4)
	addq	$3, %rdx
	movl	%esi, (%rdi,%rdx,4)
	incb	%r8b
	decb	%al
	movb	%r8b, %dl
	jne	LBB0_1

Now we produce:

LBB0_3:                                 ## %bb
	cmpb	$4, %dl
	sete	%dl
	addb	%dl, %cl
	movb	%cl, %dl
	shlb	$2, %dl
	addb	%r8b, %dl
	shlb	$2, %dl
	movzbl	%dl, %edx
	movl	%esi, (%rdi,%rdx,4)
	movl	%esi, 8(%rdi,%rdx,4)
	movl	%esi, 4(%rdi,%rdx,4)
	movl	%esi, 12(%rdi,%rdx,4)
	incb	%r8b
	decb	%al
	movb	%r8b, %dl
	jne	LBB0_1



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101958 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelDAGToDAG.cpp | 29 +++++++++++++-----
 test/CodeGen/X86/or-address.ll     | 47 ++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 7 deletions(-)
 create mode 100644 test/CodeGen/X86/or-address.ll
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b0eb58a273d..d6744d16df3 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -694,6 +694,25 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   return false;
 }
 
+/// isLogicallyAddWithConstant - Return true if this node is semantically an
+/// add of a value with a constantint.
+static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
+  // Check for (add x, Cst)
+  if (V->getOpcode() == ISD::ADD)
+    return isa<ConstantSDNode>(V->getOperand(1));
+
+  // Check for (or x, Cst), where Cst & x == 0.
+  if (V->getOpcode() != ISD::OR ||
+      !isa<ConstantSDNode>(V->getOperand(1)))
+    return false;
+  
+  // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+  ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
+    
+  // Check to see if the LHS & C is zero.
+  return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
+}
+
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               X86ISelListener &DeadNodes,
                                               unsigned Depth) {
@@ -785,8 +804,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
         // Okay, we know that we have a scale by now.  However, if the scaled
         // value is an add of something and a constant, we can fold the
         // constant into the disp field here.
-        if (ShVal.getNode()->getOpcode() == ISD::ADD &&
-            isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
+        if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
           AM.IndexReg = ShVal.getNode()->getOperand(0);
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@@ -973,14 +991,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::OR:
     // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    if (isLogicallyAddWithConstant(N, CurDAG)) {
       X86ISelAddressMode Backup = AM;
+      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
       uint64_t Offset = CN->getSExtValue();
 
-      // Check to see if the LHS & C is zero.
-      if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue()))
-        break;
-
       // Start with the LHS as an addr mode.
       if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) &&
           // Address could not have picked a GV address for the displacement.
diff --git a/test/CodeGen/X86/or-address.ll b/test/CodeGen/X86/or-address.ll
new file mode 100644
index 00000000000..df0e1f936a3
--- /dev/null
+++ b/test/CodeGen/X86/or-address.ll
@@ -0,0 +1,47 @@
+; PR1135
+; RUN: llc %s -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+
+; CHECK: 	movl	%{{.*}},   (%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}},  8(%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}},  4(%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}}, 12(%rdi,%rdx,4)
+
+define void @test(i32* nocapture %array, i32 %r0) nounwind ssp noredzone {
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %j.010 = phi i8 [ 0, %bb.nph ], [ %14, %bb ]    ; <i8> [#uses=1]
+  %k.19 = phi i8 [ 0, %bb.nph ], [ %.k.1, %bb ]   ; <i8> [#uses=1]
+  %i0.08 = phi i8 [ 0, %bb.nph ], [ %15, %bb ]    ; <i8> [#uses=3]
+  %0 = icmp slt i8 %i0.08, 4                      ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i8 %i0.08, i8 0      ; <i8> [#uses=2]
+  %1 = icmp eq i8 %i0.08, 4                       ; <i1> [#uses=1]
+  %2 = zext i1 %1 to i8                           ; <i8> [#uses=1]
+  %.k.1 = add i8 %2, %k.19                        ; <i8> [#uses=2]
+  %3 = shl i8 %.k.1, 2                            ; <i8> [#uses=1]
+  %4 = add i8 %3, %iftmp.0.0                      ; <i8> [#uses=1]
+  %5 = shl i8 %4, 2                               ; <i8> [#uses=1]
+  %6 = zext i8 %5 to i64                          ; <i64> [#uses=4]
+  %7 = getelementptr inbounds i32* %array, i64 %6 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %7, align 4
+  %8 = or i64 %6, 2                               ; <i64> [#uses=1]
+  %9 = getelementptr inbounds i32* %array, i64 %8 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %9, align 4
+  %10 = or i64 %6, 1                              ; <i64> [#uses=1]
+  %11 = getelementptr inbounds i32* %array, i64 %10 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %11, align 4
+  %12 = or i64 %6, 3                              ; <i64> [#uses=1]
+  %13 = getelementptr inbounds i32* %array, i64 %12 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %13, align 4
+  %14 = add nsw i8 %j.010, 1                      ; <i8> [#uses=2]
+  %15 = add i8 %iftmp.0.0, 1                      ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %14, 32                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}