diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 66add6ca01e..c9cfb87737e 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -151,6 +151,11 @@ namespace { /// bool isSafeToExecuteUnconditionally(Instruction &I); + /// isGuaranteedToExecute - Check that the instruction is guaranteed to + /// execute. + /// + bool isGuaranteedToExecute(Instruction &I); + /// pointerInvalidatedByLoop - Return true if the body of this loop may /// store into the memory location pointed to by V. /// @@ -577,6 +582,10 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { if (Inst.isSafeToSpeculativelyExecute()) return true; + return isGuaranteedToExecute(Inst); +} + +bool LICM::isGuaranteedToExecute(Instruction &Inst) { // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. @@ -713,34 +722,37 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // If there is an non-load/store instruction in the loop, we can't promote // it. - unsigned InstAlignment; - if (LoadInst *load = dyn_cast(Use)) { + if (isa(Use)) { assert(!cast(Use)->isVolatile() && "AST broken"); - InstAlignment = load->getAlignment(); } else if (StoreInst *store = dyn_cast(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; - InstAlignment = store->getAlignment(); + unsigned InstAlignment = store->getAlignment(); assert(!cast(Use)->isVolatile() && "AST broken"); + + // Note that we only check GuaranteedToExecute inside the store case + // so that we do not introduce stores where they did not exist before + // (which would break the LLVM concurrency model). + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isGuaranteedToExecute(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + + if (!GuaranteedToExecute) + GuaranteedToExecute = isGuaranteedToExecute(*Use); + } else return; // Not a load or store. - // If the alignment of this instruction allows us to specify a more - // restrictive (and performant) alignment and if we are sure this - // instruction will be executed, update the alignment. - // Larger is better, with the exception of 0 being the best alignment. - if ((InstAlignment > Alignment || InstAlignment == 0) - && (Alignment != 0)) - if (isSafeToExecuteUnconditionally(*Use)) { - GuaranteedToExecute = true; - Alignment = InstAlignment; - } - - if (!GuaranteedToExecute) - GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); - LoopUses.push_back(Use); } } diff --git a/test/Transforms/LICM/scalar-promote-memmodel.ll b/test/Transforms/LICM/scalar-promote-memmodel.ll new file mode 100644 index 00000000000..23d70f51351 --- /dev/null +++ b/test/Transforms/LICM/scalar-promote-memmodel.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -basicaa -licm -S | FileCheck %s + +; Make sure we don't hoist a conditionally-executed store out of the loop; +; it would violate the concurrency memory model + +@g = common global i32 0, align 4 + +define void @bar(i32 %n, i32 %b) nounwind uwtable ssp { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %tmp3 = load i32* @g, align 4 + %inc = add nsw i32 %tmp3, 1 + store i32 %inc, i32* @g, align 4 + br label %for.inc + +; CHECK: load i32* +; CHECK-NEXT: add +; CHECK-NEXT: store i32 + +for.inc: ; preds = %for.body, %if.then + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}