//===- RSProfiling.cpp - Various profiling using random sampling ----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // These passes implement a random sampling based profiling. Different methods // of choosing when to sample are supported, as well as different types of // profiling. This is done as two passes. The first is a sequence of profiling // passes which insert profiling into the program, and remember what they // inserted. // // The second stage duplicates all instructions in a function, ignoring the // profiling code, then connects the two versions togeather at the entry and at // backedges. At each connection point a choice is made as to whether to jump // to the profiled code (take a sample) or execute the unprofiled code. // // It is highly recommended that after this pass one runs mem2reg and adce // (instcombine load-vn gdce dse also are good to run afterwards) // // This design is intended to make the profiling passes independent of the RS // framework, but any profiling pass that implements the RSProfiling interface // is compatible with the rs framework (and thus can be sampled) // // TODO: obviously the block and function profiling are almost identical to the // existing ones, so they can be unified (esp since these passes are valid // without the rs framework). // TODO: Fix choice code so that frequency is not hard coded // //===----------------------------------------------------------------------===// #include "llvm/Pass.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Intrinsics.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "RSProfiling.h" #include #include #include using namespace llvm; namespace { enum RandomMeth { GBV, GBVO, HOSTCC }; } static cl::opt RandomMethod("profile-randomness", cl::desc("How to randomly choose to profile:"), cl::values( clEnumValN(GBV, "global", "global counter"), clEnumValN(GBVO, "ra_global", "register allocated global counter"), clEnumValN(HOSTCC, "rdcc", "cycle counter"), clEnumValEnd)); namespace { /// NullProfilerRS - The basic profiler that does nothing. It is the default /// profiler and thus terminates RSProfiler chains. It is useful for /// measuring framework overhead class VISIBILITY_HIDDEN NullProfilerRS : public RSProfilers { public: static char ID; // Pass identification, replacement for typeid bool isProfiling(Value* v) { return false; } bool runOnModule(Module &M) { return false; } void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } }; } static RegisterAnalysisGroup A("Profiling passes"); static RegisterPass NP("insert-null-profiling-rs", "Measure profiling framework overhead"); static RegisterAnalysisGroup NPT(NP); namespace { /// Chooser - Something that chooses when to make a sample of the profiled code class VISIBILITY_HIDDEN Chooser { public: /// ProcessChoicePoint - is called for each basic block inserted to choose /// between normal and sample code virtual void ProcessChoicePoint(BasicBlock*) = 0; /// PrepFunction - is called once per function before other work is done. /// This gives the opertunity to insert new allocas and such. virtual void PrepFunction(Function*) = 0; virtual ~Chooser() {} }; //Things that implement sampling policies //A global value that is read-mod-stored to choose when to sample. //A sample is taken when the global counter hits 0 class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser { GlobalVariable* Counter; Value* ResetValue; const IntegerType* T; public: GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval); virtual ~GlobalRandomCounter(); virtual void PrepFunction(Function* F); virtual void ProcessChoicePoint(BasicBlock* bb); }; //Same is GRC, but allow register allocation of the global counter class VISIBILITY_HIDDEN GlobalRandomCounterOpt : public Chooser { GlobalVariable* Counter; Value* ResetValue; AllocaInst* AI; const IntegerType* T; public: GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval); virtual ~GlobalRandomCounterOpt(); virtual void PrepFunction(Function* F); virtual void ProcessChoicePoint(BasicBlock* bb); }; //Use the cycle counter intrinsic as a source of pseudo randomness when //deciding when to sample. class VISIBILITY_HIDDEN CycleCounter : public Chooser { uint64_t rm; Constant *F; public: CycleCounter(Module& m, uint64_t resetmask); virtual ~CycleCounter(); virtual void PrepFunction(Function* F); virtual void ProcessChoicePoint(BasicBlock* bb); }; /// ProfilerRS - Insert the random sampling framework struct VISIBILITY_HIDDEN ProfilerRS : public FunctionPass { static char ID; // Pass identification, replacement for typeid ProfilerRS() : FunctionPass(&ID) {} std::map TransCache; std::set ChoicePoints; Chooser* c; //Translate and duplicate values for the new profile free version of stuff Value* Translate(Value* v); //Duplicate an entire function (with out profiling) void Duplicate(Function& F, RSProfilers& LI); //Called once for each backedge, handle the insertion of choice points and //the interconection of the two versions of the code void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F); bool runOnFunction(Function& F); bool doInitialization(Module &M); virtual void getAnalysisUsage(AnalysisUsage &AU) const; }; } static RegisterPass X("insert-rs-profiling-framework", "Insert random sampling instrumentation framework"); char RSProfilers::ID = 0; char NullProfilerRS::ID = 0; char ProfilerRS::ID = 0; //Local utilities static void ReplacePhiPred(BasicBlock* btarget, BasicBlock* bold, BasicBlock* bnew); static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc); template static void recBackEdge(BasicBlock* bb, T& BackEdges, std::map& color, std::map& depth, std::map& finish, int& time); //find the back edges and where they go to template static void getBackEdges(Function& F, T& BackEdges); /////////////////////////////////////// // Methods of choosing when to profile /////////////////////////////////////// GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval) : T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, Init, "RandomSteeringCounter"); } GlobalRandomCounter::~GlobalRandomCounter() {} void GlobalRandomCounter::PrepFunction(Function* F) {} void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { BranchInst* t = cast(bb->getTerminator()); //decrement counter LoadInst* l = new LoadInst(Counter, "counter", t); ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), "countercc"); Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), "counternew", t); new StoreInst(nv, Counter, t); t->setCondition(s); //reset counter BasicBlock* oldnext = t->getSuccessor(0); BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), "reset", oldnext->getParent(), oldnext); TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); t->setSuccessor(0, resetblock); new StoreInst(ResetValue, Counter, t2); ReplacePhiPred(oldnext, bb, resetblock); } GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval) : AI(0), T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, Init, "RandomSteeringCounter"); } GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {} void GlobalRandomCounterOpt::PrepFunction(Function* F) { //make a local temporary to cache the global BasicBlock& bb = F->getEntryBlock(); BasicBlock::iterator InsertPt = bb.begin(); AI = new AllocaInst(T, 0, "localcounter", InsertPt); LoadInst* l = new LoadInst(Counter, "counterload", InsertPt); new StoreInst(l, AI, InsertPt); //modify all functions and return values to restore the local variable to/from //the global variable for(Function::iterator fib = F->begin(), fie = F->end(); fib != fie; ++fib) for(BasicBlock::iterator bib = fib->begin(), bie = fib->end(); bib != bie; ++bib) if (isa(bib)) { LoadInst* l = new LoadInst(AI, "counter", bib); new StoreInst(l, Counter, bib); l = new LoadInst(Counter, "counter", ++bib); new StoreInst(l, AI, bib--); } else if (isa(bib)) { LoadInst* l = new LoadInst(AI, "counter", bib); new StoreInst(l, Counter, bib); BasicBlock* bb = cast(bib)->getNormalDest(); BasicBlock::iterator i = bb->getFirstNonPHI(); l = new LoadInst(Counter, "counter", i); bb = cast(bib)->getUnwindDest(); i = bb->getFirstNonPHI(); l = new LoadInst(Counter, "counter", i); new StoreInst(l, AI, i); } else if (isa(&*bib) || isa(&*bib)) { LoadInst* l = new LoadInst(AI, "counter", bib); new StoreInst(l, Counter, bib); } } void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { BranchInst* t = cast(bb->getTerminator()); //decrement counter LoadInst* l = new LoadInst(AI, "counter", t); ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), "countercc"); Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), "counternew", t); new StoreInst(nv, AI, t); t->setCondition(s); //reset counter BasicBlock* oldnext = t->getSuccessor(0); BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), "reset", oldnext->getParent(), oldnext); TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); t->setSuccessor(0, resetblock); new StoreInst(ResetValue, AI, t2); ReplacePhiPred(oldnext, bb, resetblock); } CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) { F = Intrinsic::getDeclaration(&m, Intrinsic::readcyclecounter); } CycleCounter::~CycleCounter() {} void CycleCounter::PrepFunction(Function* F) {} void CycleCounter::ProcessChoicePoint(BasicBlock* bb) { BranchInst* t = cast(bb->getTerminator()); CallInst* c = CallInst::Create(F, "rdcc", t); BinaryOperator* b = BinaryOperator::CreateAnd(c, ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm), "mrdcc", t); ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b, ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), "mrdccc"); t->setCondition(s); } /////////////////////////////////////// // Profiling: /////////////////////////////////////// bool RSProfilers_std::isProfiling(Value* v) { if (profcode.find(v) != profcode.end()) return true; //else RSProfilers& LI = getAnalysis(); return LI.isProfiling(v); } void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray) { // Insert the increment after any alloca or PHI instructions... BasicBlock::iterator InsertPos = BB->getFirstNonPHI(); while (isa(InsertPos)) ++InsertPos; // Create the getelementptr constant expression std::vector Indices(2); Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext())); Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum); Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], 2); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos); profcode.insert(OldVal); Value *NewVal = BinaryOperator::CreateAdd(OldVal, ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1), "NewCounter", InsertPos); profcode.insert(NewVal); profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos)); } void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const { //grab any outstanding profiler, or get the null one AU.addRequired(); } /////////////////////////////////////// // RS Framework /////////////////////////////////////// Value* ProfilerRS::Translate(Value* v) { if(TransCache[v]) return TransCache[v]; if (BasicBlock* bb = dyn_cast(v)) { if (bb == &bb->getParent()->getEntryBlock()) TransCache[bb] = bb; //don't translate entry block else TransCache[bb] = BasicBlock::Create(v->getContext(), "dup_" + bb->getName(), bb->getParent(), NULL); return TransCache[bb]; } else if (Instruction* i = dyn_cast(v)) { //we have already translated this //do not translate entry block allocas if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) { TransCache[i] = i; return i; } else { //translate this Instruction* i2 = i->clone(v->getContext()); if (i->hasName()) i2->setName("dup_" + i->getName()); TransCache[i] = i2; //NumNewInst++; for (unsigned x = 0; x < i2->getNumOperands(); ++x) i2->setOperand(x, Translate(i2->getOperand(x))); return i2; } } else if (isa(v) || isa(v) || isa(v)) { TransCache[v] = v; return v; } llvm_unreachable("Value not handled"); return 0; } void ProfilerRS::Duplicate(Function& F, RSProfilers& LI) { //perform a breadth first search, building up a duplicate of the code std::queue worklist; std::set seen; //This loop ensures proper BB order, to help performance for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib) worklist.push(fib); while (!worklist.empty()) { Translate(worklist.front()); worklist.pop(); } //remember than reg2mem created a new entry block we don't want to duplicate worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0)); seen.insert(&F.getEntryBlock()); while (!worklist.empty()) { BasicBlock* bb = worklist.front(); worklist.pop(); if(seen.find(bb) == seen.end()) { BasicBlock* bbtarget = cast(Translate(bb)); BasicBlock::InstListType& instlist = bbtarget->getInstList(); for (BasicBlock::iterator iib = bb->begin(), iie = bb->end(); iib != iie; ++iib) { //NumOldInst++; if (!LI.isProfiling(&*iib)) { Instruction* i = cast(Translate(iib)); instlist.insert(bbtarget->end(), i); } } //updated search state; seen.insert(bb); TerminatorInst* ti = bb->getTerminator(); for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) { BasicBlock* bbs = ti->getSuccessor(x); if (seen.find(bbs) == seen.end()) { worklist.push(bbs); } } } } } void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) { //given a backedge from B -> A, and translations A' and B', //a: insert C and C' //b: add branches in C to A and A' and in C' to A and A' //c: mod terminators@B, replace A with C //d: mod terminators@B', replace A' with C' //e: mod phis@A for pred B to be pred C // if multiple entries, simplify to one //f: mod phis@A' for pred B' to be pred C' // if multiple entries, simplify to one //g: for all phis@A with pred C using x // add in edge from C' using x' // add in edge from C using x in A' //a: Function::iterator BBN = src; ++BBN; BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN); //ChoicePoints.insert(bbC); BBN = cast(Translate(src)); BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN); ChoicePoints.insert(bbCp); //b: BranchInst::Create(cast(Translate(dst)), bbC); BranchInst::Create(dst, cast(Translate(dst)), ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp); //c: { TerminatorInst* iB = src->getTerminator(); for (unsigned x = 0; x < iB->getNumSuccessors(); ++x) if (iB->getSuccessor(x) == dst) iB->setSuccessor(x, bbC); } //d: { TerminatorInst* iBp = cast(Translate(src->getTerminator())); for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x) if (iBp->getSuccessor(x) == cast(Translate(dst))) iBp->setSuccessor(x, bbCp); } //e: ReplacePhiPred(dst, src, bbC); //src could be a switch, in which case we are replacing several edges with one //thus collapse those edges int the Phi CollapsePhi(dst, bbC); //f: ReplacePhiPred(cast(Translate(dst)), cast(Translate(src)),bbCp); CollapsePhi(cast(Translate(dst)), bbCp); //g: for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie; ++ib) if (PHINode* phi = dyn_cast(&*ib)) { for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) if(bbC == phi->getIncomingBlock(x)) { phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp); cast(Translate(phi))->addIncoming(phi->getIncomingValue(x), bbC); } phi->removeIncomingValue(bbC); } } bool ProfilerRS::runOnFunction(Function& F) { if (!F.isDeclaration()) { std::set > BackEdges; RSProfilers& LI = getAnalysis(); getBackEdges(F, BackEdges); Duplicate(F, LI); //assume that stuff worked. now connect the duplicated basic blocks //with the originals in such a way as to preserve ssa. yuk! for (std::set >::iterator ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib) ProcessBackEdge(ib->first, ib->second, F); //oh, and add the edge from the reg2mem created entry node to the //duplicated second node TerminatorInst* T = F.getEntryBlock().getTerminator(); ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0), cast( Translate(T->getSuccessor(0))), ConstantInt::get(Type::getInt1Ty(F.getContext()), true))); //do whatever is needed now that the function is duplicated c->PrepFunction(&F); //add entry node to choice points ChoicePoints.insert(&F.getEntryBlock()); for (std::set::iterator ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii) c->ProcessChoicePoint(*ii); ChoicePoints.clear(); TransCache.clear(); return true; } return false; } bool ProfilerRS::doInitialization(Module &M) { switch (RandomMethod) { case GBV: c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()), (1 << 14) - 1); break; case GBVO: c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()), (1 << 14) - 1); break; case HOSTCC: c = new CycleCounter(M, (1 << 14) - 1); break; }; return true; } void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequiredID(DemoteRegisterToMemoryID); } /////////////////////////////////////// // Utilities: /////////////////////////////////////// static void ReplacePhiPred(BasicBlock* btarget, BasicBlock* bold, BasicBlock* bnew) { for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); ib != ie; ++ib) if (PHINode* phi = dyn_cast(&*ib)) { for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) if(bold == phi->getIncomingBlock(x)) phi->setIncomingBlock(x, bnew); } } static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) { for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); ib != ie; ++ib) if (PHINode* phi = dyn_cast(&*ib)) { std::map counter; for(unsigned i = 0; i < phi->getNumIncomingValues(); ) { if (counter[phi->getIncomingBlock(i)]) { assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]); phi->removeIncomingValue(i, false); } else { counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i); ++i; } } } } template static void recBackEdge(BasicBlock* bb, T& BackEdges, std::map& color, std::map& depth, std::map& finish, int& time) { color[bb] = 1; ++time; depth[bb] = time; TerminatorInst* t= bb->getTerminator(); for(unsigned i = 0; i < t->getNumSuccessors(); ++i) { BasicBlock* bbnew = t->getSuccessor(i); if (color[bbnew] == 0) recBackEdge(bbnew, BackEdges, color, depth, finish, time); else if (color[bbnew] == 1) { BackEdges.insert(std::make_pair(bb, bbnew)); //NumBackEdges++; } } color[bb] = 2; ++time; finish[bb] = time; } //find the back edges and where they go to template static void getBackEdges(Function& F, T& BackEdges) { std::map color; std::map depth; std::map finish; int time = 0; recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time); DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n"); } //Creation functions ModulePass* llvm::createNullProfilerRSPass() { return new NullProfilerRS(); } FunctionPass* llvm::createRSProfilingPass() { return new ProfilerRS(); }