From 61db1f56d0b717d67557bbb2a9d83af1449458cb Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 26 Dec 2010 22:57:41 +0000 Subject: [PATCH] start using irbuilder to make mem intrinsics in a few passes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122572 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 79 +++++-------------- .../Scalar/ScalarReplAggregates.cpp | 63 ++++----------- test/Transforms/MemCpyOpt/align.ll | 2 +- 3 files changed, 36 insertions(+), 108 deletions(-) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 00ee1457857..d7da538fa3e 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include @@ -332,8 +333,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { } } - LLVMContext &Context = SI->getContext(); - // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. @@ -345,7 +344,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { return false; AliasAnalysis &AA = getAnalysis(); - Module *M = SI->getParent()->getParent()->getParent(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. @@ -431,28 +429,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { Alignment = TD->getABITypeAlignment(EltType); } - // Cast the start ptr to be i8* as memset requires. - const PointerType* StartPTy = cast(StartPtr->getType()); - const PointerType *i8Ptr = Type::getInt8PtrTy(Context, - StartPTy->getAddressSpace()); - if (StartPTy!= i8Ptr) - StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), - InsertPt); - - Value *Ops[] = { - StartPtr, ByteVal, // Start, value - // size - ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), - // align - ConstantInt::get(Type::getInt32Ty(Context), Alignment), - // volatile - ConstantInt::getFalse(Context), - }; - const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; - - Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); - - Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); + IRBuilder<> Builder(InsertPt); + Value *C = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); + DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; @@ -663,20 +643,11 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. - Intrinsic::ID ResultFn = Intrinsic::memcpy; - if (AA.alias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)) != - AliasAnalysis::NoAlias) - ResultFn = Intrinsic::memmove; + bool UseMemMove = false; + if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) + UseMemMove = true; // If all checks passed, then we can transform M. - const Type *ArgTys[3] = { - M->getRawDest()->getType(), - MDep->getRawSource()->getType(), - M->getLength()->getType() - }; - Function *MemCpyFun = - Intrinsic::getDeclaration(MDep->getParent()->getParent()->getParent(), - ResultFn, ArgTys, 3); // Make sure to use the lesser of the alignment of the source and the dest // since we're changing where we're reading from, but don't want to increase @@ -684,14 +655,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); - Value *Args[5] = { - M->getRawDest(), - MDep->getRawSource(), - M->getLength(), - ConstantInt::get(Type::getInt32Ty(MemCpyFun->getContext()), Align), - M->getVolatileCst() - }; - CallInst::Create(MemCpyFun, Args, Args+5, "", M); + + IRBuilder<> Builder(M); + if (UseMemMove) + Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), + Align, M->isVolatile()); + else + Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), + Align, M->isVolatile()); // Remove the instruction we're replacing. MD->removeInstruction(M); @@ -722,17 +693,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { if (GlobalVariable *GV = dyn_cast(M->getSource())) if (GV->isConstant() && GV->hasDefinitiveInitializer()) if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { - Value *Ops[] = { - M->getRawDest(), ByteVal, // Start, value - CopySize, // Size - M->getAlignmentCst(), // Alignment - ConstantInt::getFalse(M->getContext()), // volatile - }; - const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; - Module *Mod = M->getParent()->getParent()->getParent(); - Function *MemSetF = Intrinsic::getDeclaration(Mod, Intrinsic::memset, - Tys, 2); - CallInst::Create(MemSetF, Ops, Ops+5, "", M); + IRBuilder<> Builder(M); + Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize, + M->getAlignment(), false); MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -765,9 +728,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis &AA = getAnalysis(); // See if the pointers alias. - if (AA.alias(AA.getLocationForDest(M), - AA.getLocationForSource(M)) != - AliasAnalysis::NoAlias) + if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 5bd927c507d..5271f56d1e4 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -425,7 +425,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, continue; } - IRBuilder<> Builder(User->getParent(), User); + IRBuilder<> Builder(User); if (LoadInst *LI = dyn_cast(User)) { // The load is a bit extract from NewAI shifted right by Offset bits. @@ -1353,8 +1353,6 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } // Process each element of the aggregate. - Value *TheFn = MI->getCalledValue(); - const Type *BytePtrTy = MI->getRawDest()->getType(); bool SROADest = MI->getRawDest() == Inst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -1448,55 +1446,24 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // Otherwise, if we're storing a byte variable, use a memset call for // this element. } - - // Cast the element pointer to BytePtrTy. - if (EltPtr->getType() != BytePtrTy) - EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); - - // Cast the other pointer (if we have one) to BytePtrTy. - if (OtherElt && OtherElt->getType() != BytePtrTy) { - // Preserve address space of OtherElt - const PointerType* OtherPTy = cast(OtherElt->getType()); - const PointerType* PTy = cast(BytePtrTy); - if (OtherPTy->getElementType() != PTy->getElementType()) { - Type *NewOtherPTy = PointerType::get(PTy->getElementType(), - OtherPTy->getAddressSpace()); - OtherElt = new BitCastInst(OtherElt, NewOtherPTy, - OtherElt->getName(), MI); - } - } - + unsigned EltSize = TD->getTypeAllocSize(EltTy); + IRBuilder<> Builder(MI); + // Finally, insert the meminst for this element. - if (isa(MI)) { - Value *Ops[] = { - SROADest ? EltPtr : OtherElt, // Dest ptr - SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size - // Align - ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), - MI->getVolatileCst() - }; - // In case we fold the address space overloaded memcpy of A to B - // with memcpy of B to C, change the function to be a memcpy of A to C. - const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(), - Ops[2]->getType() }; - Module *M = MI->getParent()->getParent()->getParent(); - TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3); - CallInst::Create(TheFn, Ops, Ops + 5, "", MI); + if (isa(MI)) { + Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, + MI->isVolatile()); } else { - assert(isa(MI)); - Value *Ops[] = { - EltPtr, MI->getArgOperand(1), // Dest, Value, - ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size - Zero, // Align - ConstantInt::getFalse(MI->getContext()) // isVolatile - }; - const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; - Module *M = MI->getParent()->getParent()->getParent(); - TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); - CallInst::Create(TheFn, Ops, Ops + 5, "", MI); + assert(isa(MI)); + Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr + Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr + + if (isa(MI)) + Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); + else + Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); } } DeadInsts.push_back(MI); diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll index b0ae5f413a5..72ea66c20a0 100644 --- a/test/Transforms/MemCpyOpt/align.ll +++ b/test/Transforms/MemCpyOpt/align.ll @@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 ; The resulting memset is only 4-byte aligned, despite containing ; a 16-byte alignmed store in the middle. -; CHECK: call void @llvm.memset.p0i8.i64(i8* %a01, i8 0, i64 16, i32 4, i1 false) +; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false) define void @foo(i32* %p) { %a0 = getelementptr i32* %p, i64 0