diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 321151802bf..bb91d805893 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -39,7 +39,7 @@ template class SmallVectorImpl; /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - const TargetData *TD = 0); + unsigned Align, const TargetData *TD = 0); //===----------------------------------------------------------------------===// // Local constant propagation. diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 422ffd03a89..2d13298300d 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -200,14 +200,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, TD) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI, TD)) { + unsigned Align = LI.getAlignment(); + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); + SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); - V1->setAlignment(LI.getAlignment()); - V2->setAlignment(LI.getAlignment()); + SI->getOperand(2)->getName()+".val"); + V1->setAlignment(Align); + V2->setAlignment(Align); return SelectInst::Create(SI->getCondition(), V1, V2); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 9c184526650..6e709523af3 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1651,7 +1651,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (!allSingleSucc && // FIXME: REEVALUTE THIS. !isSafeToLoadUnconditionally(LoadPtr, - UnavailablePred->getTerminator(), TD)) { + UnavailablePred->getTerminator(), + LI->getAlignment(), TD)) { assert(NewInsts.empty() && "Should not have inserted instructions"); return false; } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 4119cb9db41..162d902cfa4 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -211,7 +211,8 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // FIXME: Writes to memory only matter if they may alias the pointer // being loaded from. if (CI->mayWriteToMemory() || - !isSafeToLoadUnconditionally(L->getPointerOperand(), L)) + !isSafeToLoadUnconditionally(L->getPointerOperand(), L, + L->getAlignment())) return false; } } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 425f5572969..62c0ae0d844 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -75,31 +75,38 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - const TargetData *TD) { + unsigned Align, const TargetData *TD) { uint64_t ByteOffset = 0; Value *Base = V; if (TD) Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); const Type *BaseType = 0; - if (const AllocaInst *AI = dyn_cast(Base)) - // If it is an alloca it is always safe to load from. + unsigned BaseAlign = 0; + if (const AllocaInst *AI = dyn_cast(Base)) { + // An alloca is safe to load from as load as it is suitably aligned. BaseType = AI->getAllocatedType(); - else if (const GlobalValue *GV = dyn_cast(Base)) { + BaseAlign = AI->getAlignment(); + } else if (const GlobalValue *GV = dyn_cast(Base)) { // Global variables are safe to load from but their size cannot be // guaranteed if they are overridden. - if (!isa(GV) && !GV->mayBeOverridden()) + if (!isa(GV) && !GV->mayBeOverridden()) { BaseType = GV->getType()->getElementType(); + BaseAlign = GV->getAlignment(); + } } + if (TD && BaseType && BaseAlign == 0) + BaseAlign = TD->getPrefTypeAlignment(BaseType); - if (BaseType) { + if (BaseType && Align <= BaseAlign) { if (!TD) return true; // Loading directly from an alloca or global is OK. if (BaseType->isSized()) { // Check if the load is within the bounds of the underlying object. const PointerType *AddrTy = cast(V->getType()); uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); - if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType)) + if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + (Align == 0 || (ByteOffset % Align) == 0)) return true; } } diff --git a/test/Transforms/GVN/load-pre-align.ll b/test/Transforms/GVN/load-pre-align.ll new file mode 100644 index 00000000000..3a66c0ba62f --- /dev/null +++ b/test/Transforms/GVN/load-pre-align.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" + +@p = external global i32 + +define arm_apcscc i32 @test(i32 %n) nounwind { +; CHECK: @test +entry: + br label %for.cond + +; loads aligned greater than the memory should not be moved past conditionals +; CHECK-NOT: load +; CHECK: br i1 + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %indvar.next, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: +; ...but PRE can still move the load out of for.end to here. +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: load + br label %for.end + +for.body: + %tmp3 = load i32* @p, align 8 + %dec = add i32 %tmp3, -1 + store i32 %dec, i32* @p + %cmp6 = icmp slt i32 %dec, 0 + br i1 %cmp6, label %for.body.for.end_crit_edge, label %for.inc + +for.body.for.end_crit_edge: + br label %for.end + +for.inc: + %indvar.next = add i32 %i.0, 1 + br label %for.cond + +for.end: + %tmp9 = load i32* @p, align 8 + ret i32 %tmp9 +}