From 77502c93442c5953c05e39fcd4c17d9e2aca766f Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Mar 2009 02:25:09 +0000 Subject: [PATCH] Enhance address-mode folding of ISD::ADD to handle cases where the operands can't both be fully folded at the same time. For example, in the included testcase, a global variable is being added with an add of two values. The global variable wants RIP-relative addressing, so it can't share the address with another base register, but it's still possible to fold the initial add. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66865 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 13 +++++++++++++ test/CodeGen/X86/fold-add.ll | 25 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 test/CodeGen/X86/fold-add.ll diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 855c890120b..f81ab6f8a7b 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -907,6 +907,19 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, !MatchAddress(N.getNode()->getOperand(0), AM, false, Depth+1)) return false; AM = Backup; + + // If we couldn't fold both operands into the address at the same time, + // see if we can just put each operand into a register and fold at least + // the add. + if (AM.BaseType == X86ISelAddressMode::RegBase && + !AM.Base.Reg.getNode() && + !AM.IndexReg.getNode() && + !AM.isRIPRel) { + AM.Base.Reg = N.getNode()->getOperand(0); + AM.IndexReg = N.getNode()->getOperand(1); + AM.Scale = 1; + return false; + } break; } diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll new file mode 100644 index 00000000000..2828ad22efb --- /dev/null +++ b/test/CodeGen/X86/fold-add.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep {cmpb \$0, (%r.\*,%r.\*)} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin9.6" +@prev_length = internal global i32 0 ; [#uses=1] +@window = internal global [65536 x i8] zeroinitializer, align 32 ; <[65536 x i8]*> [#uses=1] +@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @longest_match to i8*)] ; <[1 x i8*]*> [#uses=0] + +define fastcc i32 @longest_match(i32 %cur_match) nounwind { +entry: + %0 = load i32* @prev_length, align 4 ; [#uses=3] + %1 = zext i32 %cur_match to i64 ; [#uses=1] + %2 = sext i32 %0 to i64 ; [#uses=1] + %.sum3 = add i64 %1, %2 ; [#uses=1] + %3 = getelementptr [65536 x i8]* @window, i64 0, i64 %.sum3 ; [#uses=1] + %4 = load i8* %3, align 1 ; [#uses=1] + %5 = icmp eq i8 %4, 0 ; [#uses=1] + br i1 %5, label %bb5, label %bb23 + +bb5: ; preds = %entry + ret i32 %0 + +bb23: ; preds = %entry + ret i32 %0 +}