llvm-6502/lib/Transforms/Instrumentation/RSProfiling.cpp

//===- RSProfiling.cpp - Various profiling using random sampling ----------===//
//
//                      The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// These passes implement a random sampling based profiling.  Different methods
// of choosing when to sample are supported, as well as different types of
// profiling.  This is done as two passes.  The first is a sequence of profiling
// passes which insert profiling into the program, and remember what they 
// inserted.
//
// The second stage duplicates all instructions in a function, ignoring the 
// profiling code, then connects the two versions togeather at the entry and at
// backedges.  At each connection point a choice is made as to whether to jump
// to the profiled code (take a sample) or execute the unprofiled code.
//
// It is highly recommeneded that after this pass one runs mem2reg and adce
// (instcombine load-vn gdce dse also are good to run afterwards)
//
// This design is intended to make the profiling passes independent of the RS
// framework, but any profiling pass that implements the RSProfiling interface
// is compatible with the rs framework (and thus can be sampled)
//
// TODO: obviously the block and function profiling are almost identical to the
// existing ones, so they can be unified (esp since these passes are valid
// without the rs framework).
// TODO: Fix choice code so that frequency is not hard coded
//
//===----------------------------------------------------------------------===//

#include "llvm/Pass.h"
#include "llvm/Module.h"
#include "llvm/Instructions.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Instrumentation.h"
//#include "ProfilingUtils.h"
#include "RSProfiling.h"

#include <set>
#include <map>
#include <queue>
#include <list>
#include <iostream>

using namespace llvm;

namespace {
  Statistic<> NumBackEdges("bedge", "Number of BackEdges");

  enum RandomMeth {
    GBV, GBVO, HOSTCC
  };

  cl::opt<RandomMeth> RandomMethod("profile-randomness",
      cl::desc("How to randomly choose to profile:"),
      cl::values(
                 clEnumValN(GBV, "global", "global counter"),
                 clEnumValN(GBVO, "ra_global", 
			    "register allocated global counter"),
                 clEnumValN(HOSTCC, "rdcc", "cycle counter"),
                 clEnumValEnd));
  
  /// NullProfilerRS - The basic profiler that does nothing.  It is the default
  /// profiler and thus terminates RSProfiler chains.  It is useful for 
  /// measuring framework overhead
  class NullProfilerRS : public RSProfilers {
  public:
    bool isProfiling(Value* v) {
      return false;
    }
    bool runOnModule(Module &M) {
      return false;
    }
    void getAnalysisUsage(AnalysisUsage &AU) const {
      AU.setPreservesAll();
    }
  };

  static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
  static RegisterOpt<NullProfilerRS> NP("insert-null-profiling-rs",
					"Measure profiling framework overhead");
  static RegisterAnalysisGroup<RSProfilers, NullProfilerRS, true> NPT;

  /// Chooser - Something that chooses when to make a sample of the profiled code
  class Chooser {
  public:
    /// ProcessChoicePoint - is called for each basic block inserted to choose 
    /// between normal and sample code
    virtual void ProcessChoicePoint(BasicBlock*) = 0;
    /// PrepFunction - is called once per function before other work is done.
    /// This gives the opertunity to insert new allocas and such.
    virtual void PrepFunction(Function*) = 0;
    virtual ~Chooser() {}
  };

  //Things that implement sampling policies
  //A global value that is read-mod-stored to choose when to sample.
  //A sample is taken when the global counter hits 0
  class GlobalRandomCounter : public Chooser {
    GlobalVariable* Counter;
    Value* ResetValue;
    const Type* T;
  public:
    GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
    virtual ~GlobalRandomCounter();
    virtual void PrepFunction(Function* F);
    virtual void ProcessChoicePoint(BasicBlock* bb);
  };

  //Same is GRC, but allow register allocation of the global counter
  class GlobalRandomCounterOpt : public Chooser {
    GlobalVariable* Counter;
    Value* ResetValue;
    AllocaInst* AI;
    const Type* T;
  public:
    GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
    virtual ~GlobalRandomCounterOpt();
    virtual void PrepFunction(Function* F);
    virtual void ProcessChoicePoint(BasicBlock* bb);
  };

  //Use the cycle counter intrinsic as a source of pseudo randomness when
  //deciding when to sample.
  class CycleCounter : public Chooser {
    uint64_t rm;
    Function* F;
  public:
    CycleCounter(Module& m, uint64_t resetmask);
    virtual ~CycleCounter();
    virtual void PrepFunction(Function* F);
    virtual void ProcessChoicePoint(BasicBlock* bb);
  };

  /// ProfilerRS - Insert the random sampling framework
  struct ProfilerRS : public FunctionPass {
    std::map<Value*, Value*> TransCache;
    std::set<BasicBlock*> ChoicePoints;
    Chooser* c;

    //Translate and duplicate values for the new profile free version of stuff
    Value* Translate(Value* v);
    //Duplicate an entire function (with out profiling)
    void Duplicate(Function& F, RSProfilers& LI);
    //Called once for each backedge, handle the insertion of choice points and
    //the interconection of the two versions of the code
    void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F);
    bool runOnFunction(Function& F);
    bool doInitialization(Module &M);
    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
  };

  RegisterOpt<ProfilerRS> X("insert-rs-profiling-framework",
			   "Insert random sampling instrumentation  framework");
};

//Local utilities
static void ReplacePhiPred(BasicBlock* btarget, 
                           BasicBlock* bold, BasicBlock* bnew);

static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc);

template<class T>
static void recBackEdge(BasicBlock* bb, T& BackEdges, 
                        std::map<BasicBlock*, int>& color,
                        std::map<BasicBlock*, int>& depth,
                        std::map<BasicBlock*, int>& finish,
                        int& time);

//find the back edges and where they go to
template<class T>
static void getBackEdges(Function& F, T& BackEdges);


///////////////////////////////////////
// Methods of choosing when to profile
///////////////////////////////////////
  
GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t, 
                                         uint64_t resetval) : T(t) {
  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
                               ConstantUInt::get(T, resetval),
                               "RandomSteeringCounter", &M);
  ResetValue = ConstantUInt::get(T, resetval);
}

GlobalRandomCounter::~GlobalRandomCounter() {}

void GlobalRandomCounter::PrepFunction(Function* F) {}

void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
  BranchInst* t = cast<BranchInst>(bb->getTerminator());
  
  //decrement counter
  LoadInst* l = new LoadInst(Counter, "counter", t);
  
  SetCondInst* s = new SetCondInst(Instruction::SetEQ, l, 
				   ConstantUInt::get(T, 0), 
                                   "countercc", t);
  Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
                                     "counternew", t);
  new StoreInst(nv, Counter, t);
  t->setCondition(s);
  
  //reset counter
  BasicBlock* oldnext = t->getSuccessor(0);
  BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(), 
					  oldnext);
  TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
  t->setSuccessor(0, resetblock);
  new StoreInst(ResetValue, Counter, t2);
  ReplacePhiPred(oldnext, bb, resetblock);
}

GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t, 
                                               uint64_t resetval) 
  : AI(0), T(t) {
  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
                               ConstantUInt::get(T, resetval),
                               "RandomSteeringCounter", &M);
  ResetValue = ConstantUInt::get(T, resetval);
}

GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}

void GlobalRandomCounterOpt::PrepFunction(Function* F) {
  //make a local temporary to cache the global
  BasicBlock& bb = F->getEntryBlock();
  AI = new AllocaInst(T, 0, "localcounter", bb.begin());
  LoadInst* l = new LoadInst(Counter, "counterload", AI->getNext());
  new StoreInst(l, AI, l->getNext());
  
  //modify all functions and return values to restore the local variable to/from
  //the global variable
  for(Function::iterator fib = F->begin(), fie = F->end();
      fib != fie; ++fib)
    for(BasicBlock::iterator bib = fib->begin(), bie = fib->end();
        bib != bie; ++bib)
      if (isa<CallInst>(&*bib)) {
        LoadInst* l = new LoadInst(AI, "counter", bib);
        new StoreInst(l, Counter, bib);
        l = new LoadInst(Counter, "counter", bib->getNext());
        new StoreInst(l, AI, l->getNext());
      } else if (isa<InvokeInst>(&*bib)) {
        LoadInst* l = new LoadInst(AI, "counter", bib);
        new StoreInst(l, Counter, bib);
        
        BasicBlock* bb = cast<InvokeInst>(&*bib)->getNormalDest();
        Instruction* i = bb->begin();
        while (isa<PHINode>(i)) i = i->getNext();
        l = new LoadInst(Counter, "counter", i);
        
        bb = cast<InvokeInst>(&*bib)->getUnwindDest();
        i = bb->begin();
        while (isa<PHINode>(i)) i = i->getNext();
        l = new LoadInst(Counter, "counter", i);
        new StoreInst(l, AI, l->getNext());
      } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
        LoadInst* l = new LoadInst(AI, "counter", bib);
        new StoreInst(l, Counter, bib);
      }
}

void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
  BranchInst* t = cast<BranchInst>(bb->getTerminator());
  
  //decrement counter
  LoadInst* l = new LoadInst(AI, "counter", t);
  
  SetCondInst* s = new SetCondInst(Instruction::SetEQ, l, 
				   ConstantUInt::get(T, 0), 
                                   "countercc", t);
  Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
                                     "counternew", t);
  new StoreInst(nv, AI, t);
  t->setCondition(s);
  
  //reset counter
  BasicBlock* oldnext = t->getSuccessor(0);
  BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(), 
					  oldnext);
  TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
  t->setSuccessor(0, resetblock);
  new StoreInst(ResetValue, AI, t2);
  ReplacePhiPred(oldnext, bb, resetblock);
}


CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) {
  F = m.getOrInsertFunction("llvm.readcyclecounter", Type::ULongTy, NULL);
}

CycleCounter::~CycleCounter() {}

void CycleCounter::PrepFunction(Function* F) {}

void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
  BranchInst* t = cast<BranchInst>(bb->getTerminator());
  
  CallInst* c = new CallInst(F, "rdcc", t);
  BinaryOperator* b = 
    BinaryOperator::createAnd(c, ConstantUInt::get(Type::ULongTy, rm),
			      "mrdcc", t);
  
  SetCondInst* s = new SetCondInst(Instruction::SetEQ, b, 
				   ConstantUInt::get(Type::ULongTy, 0), 
                                   "mrdccc", t);
  t->setCondition(s);
}

///////////////////////////////////////
// Profiling:
///////////////////////////////////////
bool RSProfilers_std::isProfiling(Value* v) {
  if (profcode.find(v) != profcode.end())
    return true;
  //else
  RSProfilers& LI = getAnalysis<RSProfilers>();
  return LI.isProfiling(v);
}

void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
                                          GlobalValue *CounterArray) {
  // Insert the increment after any alloca or PHI instructions...
  BasicBlock::iterator InsertPos = BB->begin();
  while (isa<AllocaInst>(InsertPos) || isa<PHINode>(InsertPos))
    ++InsertPos;
  
  // Create the getelementptr constant expression
  std::vector<Constant*> Indices(2);
  Indices[0] = Constant::getNullValue(Type::IntTy);
  Indices[1] = ConstantSInt::get(Type::IntTy, CounterNum);
  Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray, Indices);
  
  // Load, increment and store the value back.
  Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
  profcode.insert(OldVal);
  Value *NewVal = BinaryOperator::createAdd(OldVal,
					    ConstantInt::get(Type::UIntTy, 1),
					    "NewCounter", InsertPos);
  profcode.insert(NewVal);
  profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
}

void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const {
  //grab any outstanding profiler, or get the null one
  AU.addRequired<RSProfilers>();
}

///////////////////////////////////////
// RS Framework
///////////////////////////////////////

Value* ProfilerRS::Translate(Value* v) {
  if(TransCache[v])
    return TransCache[v];
  
  if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) {
    if (bb == &bb->getParent()->getEntryBlock())
      TransCache[bb] = bb; //don't translate entry block
    else
      TransCache[bb] = new BasicBlock("dup_" + bb->getName(), bb->getParent(), 
				      NULL);
    return TransCache[bb];
  } else if (Instruction* i = dyn_cast<Instruction>(v)) {
    //we have already translated this
    //do not translate entry block allocas
    if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) {
      TransCache[i] = i;
      return i;
    } else {
      //translate this
      Instruction* i2 = i->clone();
      if (i->hasName())
        i2->setName("dup_" + i->getName());
      TransCache[i] = i2;
      //NumNewInst++;
      for (unsigned x = 0; x < i2->getNumOperands(); ++x)
        i2->setOperand(x, Translate(i2->getOperand(x)));
      return i2;
    }
  } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) {
    TransCache[v] = v;
    return v;
  }
  assert(0 && "Value not handled");
  return 0;
}

void ProfilerRS::Duplicate(Function& F, RSProfilers& LI)
{
  //perform a breadth first search, building up a duplicate of the code
  std::queue<BasicBlock*> worklist;
  std::set<BasicBlock*> seen;
  
  //This loop ensures proper BB order, to help performance
  for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib)
    worklist.push(fib);
  while (!worklist.empty()) {
    Translate(worklist.front());
    worklist.pop();
  }
  
  //remember than reg2mem created a new entry block we don't want to duplicate
  worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0));
  seen.insert(&F.getEntryBlock());
  
  while (!worklist.empty()) {
    BasicBlock* bb = worklist.front();
    worklist.pop();
    if(seen.find(bb) == seen.end()) {
      BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb));
      BasicBlock::InstListType& instlist = bbtarget->getInstList();
      for (BasicBlock::iterator iib = bb->begin(), iie = bb->end(); 
           iib != iie; ++iib) {
        //NumOldInst++;
        if (!LI.isProfiling(&*iib)) {
          Instruction* i = cast<Instruction>(Translate(iib));
          instlist.insert(bbtarget->end(), i);
        }
      }
      //updated search state;
      seen.insert(bb);
      TerminatorInst* ti = bb->getTerminator();
      for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) {
        BasicBlock* bbs = ti->getSuccessor(x);
        if (seen.find(bbs) == seen.end()) {
          worklist.push(bbs);
        }
      }
    }
  }
}

void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) {
  //given a backedge from B -> A, and translations A' and B',
  //a: insert C and C'
  //b: add branches in C to A and A' and in C' to A and A'
  //c: mod terminators@B, replace A with C
  //d: mod terminators@B', replace A' with C'
  //e: mod phis@A for pred B to be pred C
  //       if multiple entries, simplify to one
  //f: mod phis@A' for pred B' to be pred C'
  //       if multiple entries, simplify to one
  //g: for all phis@A with pred C using x
  //       add in edge from C' using x'
  //       add in edge from C using x in A'
  
  //a:
  BasicBlock* bbC = new BasicBlock("choice", &F, src->getNext() );
  //ChoicePoints.insert(bbC);
  BasicBlock* bbCp = 
    new BasicBlock("choice", &F, cast<BasicBlock>(Translate(src))->getNext() );
  ChoicePoints.insert(bbCp);
  
  //b:
  new BranchInst(cast<BasicBlock>(Translate(dst)), bbC);
  new BranchInst(dst, cast<BasicBlock>(Translate(dst)), 
		 ConstantBool::get(true), bbCp);
  //c:
  {
    TerminatorInst* iB = src->getTerminator();
    for (unsigned x = 0; x < iB->getNumSuccessors(); ++x)
      if (iB->getSuccessor(x) == dst)
        iB->setSuccessor(x, bbC);
  }
  //d:
  {
    TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator()));
    for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x)
      if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst)))
        iBp->setSuccessor(x, bbCp);
  }
  //e:
  ReplacePhiPred(dst, src, bbC);
  //src could be a switch, in which case we are replacing several edges with one
  //thus collapse those edges int the Phi
  CollapsePhi(dst, bbC);
  //f:
  ReplacePhiPred(cast<BasicBlock>(Translate(dst)),
		 cast<BasicBlock>(Translate(src)),bbCp);
  CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp);
  //g:
  for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie;
      ++ib)
    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
      for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
        if(bbC == phi->getIncomingBlock(x)) {
          phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp);
          cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x), 
						     bbC);
        }
      phi->removeIncomingValue(bbC);
    }
}

bool ProfilerRS::runOnFunction(Function& F) {
  if (!F.isExternal()) {
    std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges;
    RSProfilers& LI = getAnalysis<RSProfilers>();
    
    getBackEdges(F, BackEdges);
    Duplicate(F, LI);
    //assume that stuff worked.  now connect the duplicated basic blocks 
    //with the originals in such a way as to preserve ssa.  yuk!
    for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator 
	   ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib)
      ProcessBackEdge(ib->first, ib->second, F);
    
    //oh, and add the edge from the reg2mem created entry node to the 
    //duplicated second node
    TerminatorInst* T = F.getEntryBlock().getTerminator();
    ReplaceInstWithInst(T, new BranchInst(T->getSuccessor(0),
			       cast<BasicBlock>(Translate(T->getSuccessor(0))),
					  ConstantBool::get(true)));
    
    //do whatever is needed now that the function is duplicated
    c->PrepFunction(&F);
    
    //add entry node to choice points
    ChoicePoints.insert(&F.getEntryBlock());
    
    for (std::set<BasicBlock*>::iterator 
	   ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii)
      c->ProcessChoicePoint(*ii);
    
    ChoicePoints.clear();
    TransCache.clear();
    
    return true;
  }
  return false;
}

bool ProfilerRS::doInitialization(Module &M) {
  switch (RandomMethod) {
  case GBV:
    c = new GlobalRandomCounter(M, Type::UIntTy, (1 << 14) - 1);
    break;
  case GBVO:
    c = new GlobalRandomCounterOpt(M, Type::UIntTy, (1 << 14) - 1);
    break;
  case HOSTCC:
    c = new CycleCounter(M, (1 << 14) - 1);
    break;
  };
  return true;
}

void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const {
  AU.addRequired<RSProfilers>();
  AU.addRequiredID(DemoteRegisterToMemoryID);
}

///////////////////////////////////////
// Utilities:
///////////////////////////////////////
static void ReplacePhiPred(BasicBlock* btarget, 
                           BasicBlock* bold, BasicBlock* bnew) {
  for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
      ib != ie; ++ib)
    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
      for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
        if(bold == phi->getIncomingBlock(x))
          phi->setIncomingBlock(x, bnew);
    }
}

static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) {
  for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
      ib != ie; ++ib)
    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
      unsigned total = phi->getNumIncomingValues();
      std::map<BasicBlock*, Value*> counter;
      for(unsigned i = 0; i < phi->getNumIncomingValues(); ) {
        if (counter[phi->getIncomingBlock(i)]) {
          assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]);
          phi->removeIncomingValue(i, false);
        } else {
          counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i);
          ++i;
        }
      }
    } 
}

template<class T>
static void recBackEdge(BasicBlock* bb, T& BackEdges, 
                        std::map<BasicBlock*, int>& color,
                        std::map<BasicBlock*, int>& depth,
                        std::map<BasicBlock*, int>& finish,
                        int& time)
{
  color[bb] = 1;
  ++time;
  depth[bb] = time;
  TerminatorInst* t= bb->getTerminator();
  for(unsigned i = 0; i < t->getNumSuccessors(); ++i) {
    BasicBlock* bbnew = t->getSuccessor(i);
    if (color[bbnew] == 0)
      recBackEdge(bbnew, BackEdges, color, depth, finish, time);
    else if (color[bbnew] == 1) {
      BackEdges.insert(std::make_pair(bb, bbnew));
      //NumBackEdges++;
    }
  }
  color[bb] = 2;
  ++time;
  finish[bb] = time;
}


//find the back edges and where they go to
template<class T>
static void getBackEdges(Function& F, T& BackEdges) {
  std::map<BasicBlock*, int> color;
  std::map<BasicBlock*, int> depth;
  std::map<BasicBlock*, int> finish;
  int time = 0;
  recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
  DEBUG(std::cerr << F.getName() << " " << BackEdges.size() << "\n");
}


//Creation functions
ModulePass* llvm::createNullProfilerRSPass() {
  return new NullProfilerRS();
}

FunctionPass* llvm::createRSProfilingPass() {
  return new ProfilerRS();
}