From 56efe24431b045be120d1fd5f6b0aa43a6b01c48 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 17 Aug 2011 22:22:24 +0000 Subject: [PATCH] Atomic load/store handling for the passes using memdep (GVN, DSE, memcpyopt). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137888 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/DeadStoreElimination.cpp | 8 +- lib/Transforms/Scalar/GVN.cpp | 6 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 6 +- .../Transforms/DeadStoreElimination/atomic.ll | 107 ++++++++++++++++++ test/Transforms/GVN/atomic.ll | 80 +++++++++++++ test/Transforms/MemCpyOpt/atomic.ll | 41 +++++++ 6 files changed, 239 insertions(+), 9 deletions(-) create mode 100644 test/Transforms/DeadStoreElimination/atomic.ll create mode 100644 test/Transforms/GVN/atomic.ll create mode 100644 test/Transforms/MemCpyOpt/atomic.ll diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index a645f921162..8559147b716 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -213,9 +213,9 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) { /// isRemovable - If the value of this instruction and the memory it writes to /// is unused, may we delete this instruction? static bool isRemovable(Instruction *I) { - // Don't remove volatile stores. + // Don't remove volatile/atomic stores. if (StoreInst *SI = dyn_cast(I)) - return !SI->isVolatile(); + return SI->isUnordered(); IntrinsicInst *II = cast(I); switch (II->getIntrinsicID()) { @@ -447,7 +447,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (StoreInst *SI = dyn_cast(Inst)) { if (LoadInst *DepLoad = dyn_cast(InstDep.getInst())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad && !SI->isVolatile()) { + SI->getOperand(0) == DepLoad && isRemovable(SI)) { DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); @@ -670,6 +670,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast(BBI)) { + if (!L->isUnordered()) // Be conservative with atomic/volatile load + break; LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast(BBI)) { LoadedLoc = AA->getLocation(V); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 9785b318924..3b4c252990a 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -982,8 +982,8 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); if (Offset+LoadSize > SrcValSize) { - assert(!SrcVal->isVolatile() && "Cannot widen volatile load!"); - assert(isa(SrcVal->getType())&&"Can't widen non-integer load"); + assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); + assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); // If we have a load/load clobber an DepLI can be widened to cover this // load, then we should widen it to the next power of 2 size big enough! unsigned NewLoadSize = Offset+LoadSize; @@ -1669,7 +1669,7 @@ bool GVN::processLoad(LoadInst *L) { if (!MD) return false; - if (L->isVolatile()) + if (!L->isSimple()) return false; if (L->use_empty()) { diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index ba5ee68ebb0..a2ef70f12e3 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -384,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (StoreInst *NextStore = dyn_cast(BI)) { // If this is a store, see if we can merge it in. - if (NextStore->isVolatile()) break; + if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) @@ -479,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { - if (SI->isVolatile()) return false; + if (!SI->isSimple()) return false; if (TD == 0) return false; @@ -487,7 +487,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast(SI->getOperand(0))) { - if (!LI->isVolatile() && LI->hasOneUse() && + if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; diff --git a/test/Transforms/DeadStoreElimination/atomic.ll b/test/Transforms/DeadStoreElimination/atomic.ll new file mode 100644 index 00000000000..2e84298ad40 --- /dev/null +++ b/test/Transforms/DeadStoreElimination/atomic.ll @@ -0,0 +1,107 @@ +; RUN: opt -basicaa -dse -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.0" + +; Sanity tests for atomic stores. +; Note that it turns out essentially every transformation DSE does is legal on +; atomic ops, just some transformations are not allowed across them. + +@x = common global i32 0, align 4 +@y = common global i32 0, align 4 + +declare void @randomop(i32*) + +; DSE across unordered store (allowed) +define void @test1() nounwind uwtable ssp { +; CHECK: test1 +; CHECK-NOT: store i32 0 +; CHECK: store i32 1 +entry: + store i32 0, i32* @x + store atomic i32 0, i32* @y unordered, align 4 + store i32 1, i32* @x + ret void +} + +; DSE across seq_cst load (allowed in theory; not implemented ATM) +define i32 @test2() nounwind uwtable ssp { +; CHECK: test2 +; CHECK: store i32 0 +; CHECK: store i32 1 +entry: + store i32 0, i32* @x + %x = load atomic i32* @y seq_cst, align 4 + store i32 1, i32* @x + ret i32 %x +} + +; DSE across seq_cst store (store before atomic store must not be removed) +define void @test3() nounwind uwtable ssp { +; CHECK: test3 +; CHECK: store i32 +; CHECK: store atomic i32 2 +entry: + store i32 0, i32* @x + store atomic i32 2, i32* @y seq_cst, align 4 + store i32 1, i32* @x + ret void +} + +; DSE remove unordered store (allowed) +define void @test4() nounwind uwtable ssp { +; CHECK: test4 +; CHECK-NOT: store atomic +; CHECK: store i32 1 +entry: + store atomic i32 0, i32* @x unordered, align 4 + store i32 1, i32* @x + ret void +} + +; DSE unordered store overwriting non-atomic store (allowed) +define void @test5() nounwind uwtable ssp { +; CHECK: test5 +; CHECK: store atomic i32 1 +entry: + store i32 0, i32* @x + store atomic i32 1, i32* @x unordered, align 4 + ret void +} + +; DSE no-op unordered atomic store (allowed) +define void @test6() nounwind uwtable ssp { +; CHECK: test6 +; CHECK-NOT: store +; CHECK: ret void +entry: + %x = load atomic i32* @x unordered, align 4 + store atomic i32 %x, i32* @x unordered, align 4 + ret void +} + +; DSE seq_cst store (be conservative; DSE doesn't have infrastructure +; to reason about atomic operations). +define void @test7() nounwind uwtable ssp { +; CHECK: test7 +; CHECK: store atomic +entry: + %a = alloca i32 + store atomic i32 0, i32* %a seq_cst, align 4 + ret void +} + +; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure +; to reason about atomic operations). +define i32 @test8() nounwind uwtable ssp { +; CHECK: test8 +; CHECK: store +; CHECK: load atomic +entry: + %a = alloca i32 + call void @randomop(i32* %a) + store i32 0, i32* %a, align 4 + %x = load atomic i32* @x seq_cst, align 4 + ret i32 %x +} + diff --git a/test/Transforms/GVN/atomic.ll b/test/Transforms/GVN/atomic.ll new file mode 100644 index 00000000000..094e22bd07e --- /dev/null +++ b/test/Transforms/GVN/atomic.ll @@ -0,0 +1,80 @@ +; RUN: opt -basicaa -gvn -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.0" + +@x = common global i32 0, align 4 +@y = common global i32 0, align 4 + +; GVN across unordered store (allowed) +define i32 @test1() nounwind uwtable ssp { +; CHECK: test1 +; CHECK: add i32 %x, %x +entry: + %x = load i32* @y + store atomic i32 %x, i32* @x unordered, align 4 + %y = load i32* @y + %z = add i32 %x, %y + ret i32 %z +} + +; GVN across seq_cst store (allowed in theory; not implemented ATM) +define i32 @test2() nounwind uwtable ssp { +; CHECK: test2 +; CHECK: add i32 %x, %y +entry: + %x = load i32* @y + store atomic i32 %x, i32* @x seq_cst, align 4 + %y = load i32* @y + %z = add i32 %x, %y + ret i32 %z +} + +; GVN across unordered load (allowed) +define i32 @test3() nounwind uwtable ssp { +; CHECK: test3 +; CHECK: add i32 %x, %x +entry: + %x = load i32* @y + %y = load atomic i32* @x unordered, align 4 + %z = load i32* @y + %a = add i32 %x, %z + %b = add i32 %y, %a + ret i32 %b +} + +; GVN across acquire load (load after atomic load must not be removed) +define i32 @test4() nounwind uwtable ssp { +; CHECK: test4 +; CHECK: load atomic i32* @x +; CHECK: load i32* @y +entry: + %x = load i32* @y + %y = load atomic i32* @x seq_cst, align 4 + %x2 = load i32* @y + %x3 = add i32 %x, %x2 + %y2 = add i32 %y, %x3 + ret i32 %y2 +} + +; GVN load to unordered load (allowed) +define i32 @test5() nounwind uwtable ssp { +; CHECK: test5 +; CHECK: add i32 %x, %x +entry: + %x = load atomic i32* @x unordered, align 4 + %y = load i32* @x + %z = add i32 %x, %y + ret i32 %z +} + +; GVN unordered load to load (unordered load must not be removed) +define i32 @test6() nounwind uwtable ssp { +; CHECK: test6 +; CHECK: load atomic i32* @x unordered +entry: + %x = load i32* @x + %x2 = load atomic i32* @x unordered, align 4 + %x3 = add i32 %x, %x2 + ret i32 %x3 +} diff --git a/test/Transforms/MemCpyOpt/atomic.ll b/test/Transforms/MemCpyOpt/atomic.ll new file mode 100644 index 00000000000..f351e6792e2 --- /dev/null +++ b/test/Transforms/MemCpyOpt/atomic.ll @@ -0,0 +1,41 @@ +; RUN: opt -basicaa -memcpyopt -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.0" + +@x = global i32 0 + +declare void @otherf(i32*) + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; memcpyopt should not touch atomic ops +define void @test1() nounwind uwtable ssp { +; CHECK: test1 +; CHECK: store atomic + %x = alloca [101 x i32], align 16 + %bc = bitcast [101 x i32]* %x to i8* + call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false) + %gep1 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 100 + store atomic i32 0, i32* %gep1 unordered, align 4 + %gep2 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 0 + call void @otherf(i32* %gep2) + ret void +} + +; memcpyopt across unordered store +define void @test2() nounwind uwtable ssp { +; CHECK: test2 +; CHECK: call +; CHECK-NEXT: store atomic +; CHECK-NEXT: call + %old = alloca i32 + %new = alloca i32 + call void @otherf(i32* nocapture %old) + store atomic i32 0, i32* @x unordered, align 4 + %v = load i32* %old + store i32 %v, i32* %new + call void @otherf(i32* nocapture %new) + ret void +} +