diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 9765485b5d5..91d702a5660 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" @@ -70,6 +71,10 @@ static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); +static cl::opt AddrSinkUsingGEPs( + "addr-sink-using-gep", cl::Hidden, cl::init(false), + cl::desc("Address sinking in CGP using GEPs.")); + static cl::opt EnableAndCmpSinking( "enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches.")); @@ -2423,6 +2428,127 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && TM->getSubtarget().useAA())) { + // By default, we use the GEP-based method when AA is used later. This + // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Value *ResultPtr = 0, *ResultIndex = 0; + + // First, find the pointer. + if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { + ResultPtr = AddrMode.BaseReg; + AddrMode.BaseReg = 0; + } + + if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { + // We can't add more than one pointer together, nor can we scale a + // pointer (both of which seem meaningless). + if (ResultPtr || AddrMode.Scale != 1) + return false; + + ResultPtr = AddrMode.ScaledReg; + AddrMode.Scale = 0; + } + + if (AddrMode.BaseGV) { + if (ResultPtr) + return false; + + ResultPtr = AddrMode.BaseGV; + } + + // If the real base value actually came from an inttoptr, then the matcher + // will look through it and provide only the integer value. In that case, + // use it here. + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); + AddrMode.BaseReg = 0; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); + AddrMode.Scale = 0; + } + + if (!ResultPtr && + !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { + SunkAddr = Constant::getNullValue(Addr->getType()); + } else if (!ResultPtr) { + return false; + } else { + Type *I8PtrTy = + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + + ResultIndex = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null(ResultIndex); + if (I && (ResultIndex != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (ResultIndex) + ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); + else + ResultIndex = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (ResultIndex) { + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + ResultIndex = V; + } + + if (!ResultIndex) { + SunkAddr = ResultPtr; + } else { + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll index 94bced5200c..5a8f623e6f5 100644 --- a/test/CodeGen/ARM/phi.ll +++ b/test/CodeGen/ARM/phi.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v4t -addr-sink-using-gep=1 %s -o - | FileCheck %s ; diff --git a/test/CodeGen/ARM64/dagcombiner-indexed-load.ll b/test/CodeGen/ARM64/dagcombiner-indexed-load.ll index 6cea039ad85..2e4b658f1c9 100644 --- a/test/CodeGen/ARM64/dagcombiner-indexed-load.ll +++ b/test/CodeGen/ARM64/dagcombiner-indexed-load.ll @@ -1,4 +1,5 @@ ; RUN: llc -O3 < %s | FileCheck %s +; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s ; Test case for a DAG combiner bug where we combined an indexed load ; with an extension (sext, zext, or any) into a regular extended load, ; i.e., dropping the indexed value. diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll index ccf52974a67..df83f8b191c 100644 --- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll +++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll @@ -1,4 +1,5 @@ ; RUN: llc -mcpu=g5 < %s | FileCheck %s +; RUN: llc -mcpu=g5 -addr-sink-using-gep=1 < %s | FileCheck %s ;; Formerly crashed, see PR 1508 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" target triple = "powerpc64-apple-darwin8" diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll index e1f890192d1..4d7c3a185a8 100644 --- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll +++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \ -; RUN: grep push | count 3 +; RUN: llc < %s -march=x86 -mtriple=i686-darwin | FileCheck %s +; RUN: llc < %s -march=x86 -mtriple=i686-darwin -addr-sink-using-gep=1 | FileCheck %s define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind { entry: +; CHECK-LABEL: @foo +; CHECK: push +; CHECK: push +; CHECK: push +; CHECK-NOT: push + icmp sgt i32 %size, 0 ; :0 [#uses=1] br i1 %0, label %bb.preheader, label %return diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index 0ef3aa5b6f0..f6d68520b7b 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s +; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll index e3d6b349d6e..78e1dd287f6 100644 --- a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll +++ b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -codegenprepare %s -o - | FileCheck %s +; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s ; This file tests the different cases what are involved when codegen prepare ; tries to get sign extension out of the way of addressing mode. ; This tests require an actual target as addressing mode decisions depends @@ -281,6 +282,25 @@ define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) { ; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32* ; CHECK: load i32* [[ADDR2]] ; CHECK: ret +; CHECK-GEP-LABEL: @checkProfitability +; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64 +; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64 +; CHECK-GEP: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1 +; CHECK-GEP: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2 +; CHECK-GEP: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 +; BB then +; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32* +; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8* +; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48 +; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32* +; CHECK-GEP: load i32* [[ADDR1]] +; BB else +; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32* +; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8* +; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48 +; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32* +; CHECK-GEP: load i32* [[ADDR2]] +; CHECK-GEP: ret define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) { %shl = shl nsw i32 %arg1, 1 %add1 = add nsw i32 %shl, %arg2 diff --git a/test/CodeGen/X86/codegen-prepare.ll b/test/CodeGen/X86/codegen-prepare.ll index 316accfa41a..4ff0f1c0ba2 100644 --- a/test/CodeGen/X86/codegen-prepare.ll +++ b/test/CodeGen/X86/codegen-prepare.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -addr-sink-using-gep=1 | FileCheck %s ; Check that the CodeGenPrepare Pass ; does not wrongly rewrite the address computed by Instruction %4 diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll index 458f19dfc4f..e4af9b67f95 100644 --- a/test/CodeGen/X86/isel-sink.ll +++ b/test/CodeGen/X86/isel-sink.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86 -addr-sink-using-gep=1 | FileCheck %s define i32 @test(i32* %X, i32 %B) { ; CHECK-LABEL: test: diff --git a/test/CodeGen/X86/merge_store.ll b/test/CodeGen/X86/merge_store.ll index 940688c6252..f98963d8e90 100644 --- a/test/CodeGen/X86/merge_store.ll +++ b/test/CodeGen/X86/merge_store.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -addr-sink-using-gep=1 | FileCheck %s define void @merge_store(i32* nocapture %a) { ; CHECK-LABEL: merge_store: diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll index c24eda3ddc8..f4edf092641 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll +++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll @@ -1,4 +1,5 @@ ; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9 +; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 -addr-sink-using-gep=1 %s -o - | FileCheck %s -check-prefix=A9 ; @simple is the most basic chain of address induction variables. Chaining ; saves at least one register and avoids complex addressing and setup diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll index e42b67fd35a..937791dca41 100644 --- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64 ; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X32 ; @simple is the most basic chain of address induction variables. Chaining ; saves at least one register and avoids complex addressing and setup