Add a new optimization pass: Stack Coloring, that merges disjoint static allocations (allocas). Allocas are known to be

disjoint if they are marked by disjoint lifetime markers (@llvm.lifetime.XXX intrinsics).



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163299 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2012-09-06 09:17:37 +00:00
parent 6c822eea47
commit c05d30601c
22 changed files with 1058 additions and 17 deletions

View File

@ -499,6 +499,13 @@ Release Notes</a>.</h1>
<div>
<p>Stack Coloring - We have implemented a new optimization pass
to merge stack objects which are used in disjoin areas of the code.
This optimization reduces the required stack space significantly, in cases
where it is clear to the optimizer that the stack slot is not shared.
We use the lifetime markers to tell the codegen that a certain alloca
is used within a region.</p>
<p>We have put a significant amount of work into the code generator
infrastructure, which allows us to implement more aggressive algorithms and
make it run faster:</p>

View File

@ -311,7 +311,7 @@ public:
return !(*this == RHS);
}
// Intersection, union, disjoint union.
/// Intersection, union, disjoint union.
BitVector &operator&=(const BitVector &RHS) {
unsigned ThisWords = NumBitWords(size());
unsigned RHSWords = NumBitWords(RHS.size());
@ -328,7 +328,7 @@ public:
return *this;
}
// reset - Reset bits that are set in RHS. Same as *this &= ~RHS.
/// reset - Reset bits that are set in RHS. Same as *this &= ~RHS.
BitVector &reset(const BitVector &RHS) {
unsigned ThisWords = NumBitWords(size());
unsigned RHSWords = NumBitWords(RHS.size());
@ -338,6 +338,23 @@ public:
return *this;
}
/// test - Check if (This - RHS) is zero.
/// This is the same as reset(RHS) and any().
bool test(const BitVector &RHS) const {
unsigned ThisWords = NumBitWords(size());
unsigned RHSWords = NumBitWords(RHS.size());
unsigned i;
for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
if ((Bits[i] & ~RHS.Bits[i]) != 0)
return true;
for (; i != ThisWords ; ++i)
if (Bits[i] != 0)
return true;
return false;
}
BitVector &operator|=(const BitVector &RHS) {
if (size() < RHS.size())
resize(RHS.size());

View File

@ -637,6 +637,10 @@ namespace ISD {
ATOMIC_LOAD_UMIN,
ATOMIC_LOAD_UMAX,
/// This corresponds to the llvm.lifetime.* intrinsics. The first operand
/// is the chain and the second operand is the alloca pointer.
LIFETIME_START, LIFETIME_END,
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific pre-isel opcode values start here.
BUILTIN_OP_END

View File

@ -28,6 +28,7 @@ class MachineFunction;
class MachineBasicBlock;
class TargetFrameLowering;
class BitVector;
class Value;
/// The CalleeSavedInfo class tracks the information need to locate where a
/// callee saved register is in the current frame.
@ -103,14 +104,18 @@ class MachineFrameInfo {
// protector.
bool MayNeedSP;
/// Alloca - If this stack object is originated from an Alloca instruction
/// this value saves the original IR allocation. Can be NULL.
const Value *Alloca;
// PreAllocated - If true, the object was mapped into the local frame
// block and doesn't need additional handling for allocation beyond that.
bool PreAllocated;
StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
bool isSS, bool NSP)
bool isSS, bool NSP, const Value *Val)
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
isSpillSlot(isSS), MayNeedSP(NSP), PreAllocated(false) {}
isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {}
};
/// Objects - The list of stack objects allocated...
@ -362,6 +367,14 @@ public:
ensureMaxAlignment(Align);
}
/// getObjectAllocation - Return the underlying Alloca of the specified
/// stack object if it exists. Returns 0 if none exists.
const Value* getObjectAllocation(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
return Objects[ObjectIdx+NumFixedObjects].Alloca;
}
/// NeedsStackProtector - Returns true if the object may need stack
/// protectors.
bool MayNeedStackProtector(int ObjectIdx) const {
@ -482,9 +495,10 @@ public:
/// a nonnegative identifier to represent it.
///
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
bool MayNeedSP = false) {
bool MayNeedSP = false, const Value *Alloca = 0) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP));
Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
Alloca));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
ensureMaxAlignment(Alignment);
@ -516,7 +530,7 @@ public:
///
int CreateVariableSizedObject(unsigned Alignment) {
HasVarSizedObjects = true;
Objects.push_back(StackObject(0, Alignment, 0, false, false, true));
Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
ensureMaxAlignment(Alignment);
return (int)Objects.size()-NumFixedObjects-1;
}

View File

@ -404,6 +404,10 @@ namespace llvm {
/// inserting cmov instructions.
extern char &EarlyIfConverterID;
/// StackSlotColoring - This pass performs stack coloring and merging.
/// It merges disjoint allocas to reduce the stack size.
extern char &StackColoringID;
/// IfConverter - This pass performs machine code if conversion.
extern char &IfConverterID;

View File

@ -232,6 +232,7 @@ void initializeSinkingPass(PassRegistry&);
void initializeSlotIndexesPass(PassRegistry&);
void initializeSpillPlacementPass(PassRegistry&);
void initializeStackProtectorPass(PassRegistry&);
void initializeStackColoringPass(PassRegistry&);
void initializeStackSlotColoringPass(PassRegistry&);
void initializeStripDeadDebugInfoPass(PassRegistry&);
void initializeStripDeadPrototypesPassPass(PassRegistry&);

View File

@ -745,6 +745,18 @@ def BUNDLE : Instruction {
let InOperandList = (ins variable_ops);
let AsmString = "BUNDLE";
}
def LIFETIME_START : Instruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "LIFETIME_START";
let neverHasSideEffects = 1;
}
def LIFETIME_END : Instruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "LIFETIME_END";
let neverHasSideEffects = 1;
}
}
//===----------------------------------------------------------------------===//

View File

@ -87,7 +87,11 @@ namespace TargetOpcode {
/// BUNDLE - This instruction represents an instruction bundle. Instructions
/// which immediately follow a BUNDLE instruction which are marked with
/// 'InsideBundle' flag are inside the bundle.
BUNDLE
BUNDLE = 14,
/// Lifetime markers.
LIFETIME_START = 15,
LIFETIME_END = 16
};
} // end namespace TargetOpcode
} // end namespace llvm

View File

@ -95,6 +95,7 @@ add_llvm_library(LLVMCodeGen
SplitKit.cpp
StackProtector.cpp
StackSlotColoring.cpp
StackColoring.cpp
StrongPHIElimination.cpp
TailDuplication.cpp
TargetFrameLoweringImpl.cpp

View File

@ -56,6 +56,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRegisterCoalescerPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackColoringPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeStrongPHIEliminationPass(Registry);
initializeTailDuplicatePassPass(Registry);

View File

@ -458,7 +458,9 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
unsigned StackAlign = TFI.getStackAlignment();
unsigned Align = MinAlign(SPOffset, StackAlign);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/false, false));
/*isSS*/ false,
/*NeedSP*/ false,
/*Alloca*/ 0));
return -++NumFixedObjects;
}

View File

@ -529,6 +529,10 @@ void TargetPassConfig::addMachineSSAOptimization() {
// instructions dead.
addPass(&OptimizePHIsID);
// This pass merges large allocas. StackSlotColoring is a different pass
// which merges spill slots.
addPass(&StackColoringID);
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
addPass(&LocalStackSlotAllocationID);

View File

@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
MayNeedSP);
MayNeedSP, AI);
}
for (; BB != EB; ++BB)

View File

@ -873,6 +873,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
case ISD::LIFETIME_START:
case ISD::LIFETIME_END: {
unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
.addFrameIndex(FI->getIndex());
break;
}
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)

View File

@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
break;
case ISD::MERGE_VALUES:
case ISD::TokenFactor:
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
case ISD::CopyToReg:
case ISD::CopyFromReg:
case ISD::EH_LABEL:

View File

@ -4161,6 +4161,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
Opcode == ISD::LIFETIME_START ||
Opcode == ISD::LIFETIME_END ||
(Opcode <= INT_MAX &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Constants.h"
#include "llvm/CallingConv.h"
#include "llvm/DebugInfo.h"
@ -5215,14 +5216,30 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
rw==1)); /* write */
return 0;
}
case Intrinsic::invariant_start:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
SDValue Ops[2];
AllocaInst *LifetimeObject =dyn_cast_or_null<AllocaInst>(
GetUnderlyingObject(I.getArgOperand(1), TD));
// Could not find an Alloca.
if (!LifetimeObject)
return 0;
int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
Ops[0] = getRoot();
Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
DAG.setRoot(Res);
return 0;
}
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
return 0;
case Intrinsic::invariant_end:
case Intrinsic::lifetime_end:
// Discard region information.
return 0;
case Intrinsic::donothing:

View File

@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
case ISD::DEBUGTRAP: return "debugtrap";
case ISD::LIFETIME_START: return "lifetime.start";
case ISD::LIFETIME_END: return "lifetime.end";
// Bit manipulation
case ISD::BSWAP: return "bswap";

View File

@ -1801,10 +1801,13 @@ WalkChainUsers(const SDNode *ChainedNode,
User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
continue;
if (User->getOpcode() == ISD::CopyToReg ||
User->getOpcode() == ISD::CopyFromReg ||
User->getOpcode() == ISD::INLINEASM ||
User->getOpcode() == ISD::EH_LABEL) {
unsigned UserOpcode = User->getOpcode();
if (UserOpcode == ISD::CopyToReg ||
UserOpcode == ISD::CopyFromReg ||
UserOpcode == ISD::INLINEASM ||
UserOpcode == ISD::EH_LABEL ||
UserOpcode == ISD::LIFETIME_START ||
UserOpcode == ISD::LIFETIME_END) {
// If their node ID got reset to -1 then they've already been selected.
// Treat them like a MachineOpcode.
if (User->getNodeId() == -1)
@ -2220,6 +2223,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::CopyFromReg:
case ISD::CopyToReg:
case ISD::EH_LABEL:
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
NodeToMatch->setNodeId(-1); // Mark selected.
return 0;
case ISD::AssertSext:

View File

@ -0,0 +1,657 @@
//===-- StackColoring.cpp -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass implements the stack-coloring optimization that looks for
// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
// which represent the possible lifetime of stack slots. It attempts to
// merge disjoint stack slots and reduce the used stack space.
// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
//
// TODO: In the future we plan to improve stack coloring in the following ways:
// 1. Allow merging multiple small slots into a single larger slot at different
// offsets.
// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
// spill slots.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "stackcoloring"
#include "MachineTraceMetrics.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/DebugInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static cl::opt<bool>
DisableColoring("no-stack-coloring",
cl::init(false), cl::Hidden,
cl::desc("Suppress stack coloring"));
STATISTIC(NumMarkerSeen, "Number of life markers found.");
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
//===----------------------------------------------------------------------===//
// StackColoring Pass
//===----------------------------------------------------------------------===//
namespace {
/// StackColoring - A machine pass for merging disjoint stack allocations,
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
class StackColoring : public MachineFunctionPass {
MachineFrameInfo *MFI;
MachineFunction *MF;
/// A class representing liveness information for a single basic block.
/// Each bit in the BitVector represents the liveness property
/// for a different stack slot.
struct BlockLifetimeInfo {
/// Which slots BEGINs in each basic block.
BitVector Begin;
/// Which slots ENDs in each basic block.
BitVector End;
/// Which slots are marked as LIVE_IN, coming into each basic block.
BitVector LiveIn;
/// Which slots are marked as LIVE_OUT, coming out of each basic block.
BitVector LiveOut;
};
/// Maps active slots (per bit) for each basic block.
DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness;
/// Maps serial numbers to basic blocks.
DenseMap<MachineBasicBlock*, int> BasicBlocks;
/// Maps basic blocks to a serial number.
SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering;
/// Maps liveness intervals for each slot.
SmallVector<LiveInterval*, 16> Intervals;
/// VNInfo is used for the construction of LiveIntervals.
VNInfo::Allocator VNInfoAllocator;
/// SlotIndex analysis object.
SlotIndexes* Indexes;
/// The list of lifetime markers found. These markers are to be removed
/// once the coloring is done.
SmallVector<MachineInstr*, 8> Markers;
/// SlotSizeSorter - A Sort utility for arranging stack slots according
/// to their size.
struct SlotSizeSorter {
MachineFrameInfo *MFI;
SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { }
bool operator()(int LHS, int RHS) {
// We use -1 to denote a uninteresting slot. Place these slots at the end.
if (LHS == -1) return false;
if (RHS == -1) return true;
// Sort according to size.
return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
}
};
public:
static char ID;
StackColoring() : MachineFunctionPass(ID) {
initializeStackColoringPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const;
bool runOnMachineFunction(MachineFunction &MF);
private:
/// Debug.
void dump();
/// Removes all of the lifetime marker instructions from the function.
/// \returns true if any markers were removed.
bool removeAllMarkers();
/// Scan the machine function and find all of the lifetime markers.
/// Record the findings in the BEGIN and END vectors.
/// \returns the number of markers found.
unsigned collectMarkers(unsigned NumSlot);
/// Perform the dataflow calculation and calculate the lifetime for each of
/// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
/// LifetimeLIVE_OUT maps that represent which stack slots are live coming
/// in and out blocks.
void calculateLocalLiveness();
/// Construct the LiveIntervals for the slots.
void calculateLiveIntervals(unsigned NumSlots);
/// Go over the machine function and change instructions which use stack
/// slots to use the joint slots.
void remapInstructions(DenseMap<int, int> &SlotRemap);
/// Map entries which point to other entries to their destination.
/// A->B->C becomes A->C.
void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
};
} // end anonymous namespace
char StackColoring::ID = 0;
char &llvm::StackColoringID = StackColoring::ID;
INITIALIZE_PASS_BEGIN(StackColoring,
"stack-coloring", "Merge disjoint stack slots", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_END(StackColoring,
"stack-coloring", "Merge disjoint stack slots", false, false)
void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
}
void StackColoring::dump() {
for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
FI != FE; ++FI) {
unsigned Num = BasicBlocks[*FI];
DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n");
Num = 0;
DEBUG(dbgs()<<"BEGIN : {");
for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i)
DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
DEBUG(dbgs()<<"END : {");
for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i)
DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
DEBUG(dbgs()<<"LIVE_IN: {");
for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i)
DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
DEBUG(dbgs()<<"LIVEOUT: {");
for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i)
DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" ");
DEBUG(dbgs()<<"}\n");
}
}
unsigned StackColoring::collectMarkers(unsigned NumSlot) {
unsigned MarkersFound = 0;
// Scan the function to find all lifetime markers.
// NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
// deterministic numbering, and because we'll need a post-order iteration
// later for solving the liveness dataflow problem.
for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
FI != FE; ++FI) {
// Assign a serial number to this basic block.
BasicBlocks[*FI] = BasicBlockNumbering.size();;
BasicBlockNumbering.push_back(*FI);
BlockLiveness[*FI].Begin.resize(NumSlot);
BlockLiveness[*FI].End.resize(NumSlot);
for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
BI != BE; ++BI) {
if (BI->getOpcode() != TargetOpcode::LIFETIME_START &&
BI->getOpcode() != TargetOpcode::LIFETIME_END)
continue;
Markers.push_back(BI);
bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
MachineOperand &MI = BI->getOperand(0);
unsigned Slot = MI.getIndex();
MarkersFound++;
const Value* Allocation = MFI->getObjectAllocation(Slot);
if (Allocation) {
DEBUG(dbgs()<<"Found lifetime marker for allocation: "<<
Allocation->getName()<<"\n");
}
if (IsStart) {
BlockLiveness[*FI].Begin.set(Slot);
} else {
if (BlockLiveness[*FI].Begin.test(Slot)) {
// Allocas that start and end within a single block are handled
// specially when computing the LiveIntervals to avoid pessimizing
// the liveness propagation.
BlockLiveness[*FI].Begin.reset(Slot);
} else {
BlockLiveness[*FI].End.set(Slot);
}
}
}
}
// Update statistics.
NumMarkerSeen += MarkersFound;
return MarkersFound;
}
void StackColoring::calculateLocalLiveness() {
// Perform a standard reverse dataflow computation to solve for
// global liveness. The BEGIN set here is equivalent to KILL in the standard
// formulation, and END is equivalent to GEN. The result of this computation
// is a map from blocks to bitvectors where the bitvectors represent which
// allocas are live in/out of that block.
SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
BasicBlockNumbering.end());
unsigned NumSSMIters = 0;
bool changed = true;
while (changed) {
changed = false;
++NumSSMIters;
SmallPtrSet<MachineBasicBlock*, 8> NextBBSet;
for (SmallVector<MachineBasicBlock*, 8>::iterator
PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
PI != PE; ++PI) {
MachineBasicBlock *BB = *PI;
if (!BBSet.count(BB)) continue;
BitVector LocalLiveIn;
BitVector LocalLiveOut;
// Forward propagation from begins to ends.
for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
PE = BB->pred_end(); PI != PE; ++PI)
LocalLiveIn |= BlockLiveness[*PI].LiveOut;
LocalLiveIn |= BlockLiveness[BB].End;
LocalLiveIn.reset(BlockLiveness[BB].Begin);
// Reverse propagation from ends to begins.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
LocalLiveOut |= BlockLiveness[*SI].LiveIn;
LocalLiveOut |= BlockLiveness[BB].Begin;
LocalLiveOut.reset(BlockLiveness[BB].End);
LocalLiveIn |= LocalLiveOut;
LocalLiveOut |= LocalLiveIn;
// After adopting the live bits, we need to turn-off the bits which
// are de-activated in this block.
LocalLiveOut.reset(BlockLiveness[BB].End);
LocalLiveIn.reset(BlockLiveness[BB].Begin);
if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) {
changed = true;
BlockLiveness[BB].LiveIn |= LocalLiveIn;
for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
PE = BB->pred_end(); PI != PE; ++PI)
NextBBSet.insert(*PI);
}
if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) {
changed = true;
BlockLiveness[BB].LiveOut |= LocalLiveOut;
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
NextBBSet.insert(*SI);
}
}
BBSet = NextBBSet;
}// while changed.
}
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SmallVector<SlotIndex, 16> Starts;
SmallVector<SlotIndex, 16> Finishes;
// For each block, find which slots are active within this block
// and update the live intervals.
for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end();
MBB != MBBe; ++MBB) {
Starts.clear();
Starts.resize(NumSlots);
Finishes.clear();
Finishes.resize(NumSlots);
BitVector Alive = BlockLiveness[MBB].LiveIn;
Alive |= BlockLiveness[MBB].LiveOut;
if (Alive.any()) {
for (int pos = Alive.find_first(); pos != -1;
pos = Alive.find_next(pos)) {
Starts[pos] = Indexes->getMBBStartIdx(MBB);
Finishes[pos] = Indexes->getMBBEndIdx(MBB);
}
}
for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(),
e = Markers.end(); it != e; ++it) {
MachineInstr *MI = *it;
assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
"Invalid Lifetime marker");
if (MI->getParent() == MBB) {
bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
MachineOperand &Mo = MI->getOperand(0);
int Slot = Mo.getIndex();
assert(Slot >= 0 && "Invalid slot");
if (IsStart) {
Starts[Slot] = Indexes->getInstructionIndex(MI);
} else {
Finishes[Slot] = Indexes->getInstructionIndex(MI);
}
}
}
for (unsigned i = 0; i < NumSlots; ++i) {
assert(!!Starts[i] == !!Finishes[i] && "Unmatched range");
if (Starts[i] == Finishes[i])
continue;
assert(Starts[i] && Finishes[i] && "Invalid interval");
VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
SlotIndex S = Starts[i];
SlotIndex F = Finishes[i];
if (S < F) {
// We have a single consecutive region.
Intervals[i]->addRange(LiveRange(S, F, ValNum));
} else {
// We have two non consecutive regions. This happens when
// LIFETIME_START appears after the LIFETIME_END marker.
SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
Intervals[i]->addRange(LiveRange(NewStart, F, ValNum));
Intervals[i]->addRange(LiveRange(S, NewFin, ValNum));
}
}
}
}
bool StackColoring::removeAllMarkers() {
unsigned Count = 0;
for (unsigned i = 0; i < Markers.size(); ++i) {
Markers[i]->eraseFromParent();
Count++;
}
Markers.clear();
DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
return Count;
}
void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
unsigned FixedInstr = 0;
unsigned FixedMemOp = 0;
unsigned FixedDbg = 0;
MachineModuleInfo *MMI = &MF->getMMI();
// Remap debug information that refers to stack slots.
MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
VE = VMap.end(); VI != VE; ++VI) {
const MDNode *Var = VI->first;
if (!Var) continue;
std::pair<unsigned, DebugLoc> &VP = VI->second;
if (SlotRemap.count(VP.first)) {
DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n");
VP.first = SlotRemap[VP.first];
FixedDbg++;
}
}
// Keep a list of *allocas* which need to be remapped.
DenseMap<const Value*, const Value*> Allocas;
for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
e = SlotRemap.end(); it != e; ++it) {
const Value* From = MFI->getObjectAllocation(it->first);
const Value* To = MFI->getObjectAllocation(it->second);
assert(To && From && "Invalid allocation object");
Allocas[From] = To;
}
// Remap all instructions to the new stack slots.
MachineFunction::iterator BB, BBE;
MachineBasicBlock::iterator I, IE;
for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
// Update the MachineMemOperand to use the new alloca.
for (MachineInstr::mmo_iterator MM = I->memoperands_begin(),
E = I->memoperands_end(); MM != E; ++MM) {
MachineMemOperand *MMO = *MM;
const Value *V = MMO->getValue();
if (!V)
continue;
// Climb up and find the original alloca.
V = GetUnderlyingObject(V);
// If we did not find one, or if the one that we found is not in our
// map, then move on.
if (!V || !Allocas.count(V))
continue;
MMO->setValue(Allocas[V]);
FixedMemOp++;
}
// Update all of the machine instruction operands.
for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
MachineOperand &MO = I->getOperand(i);
if (!MO.isFI())
continue;
int FromSlot = MO.getIndex();
// Don't touch arguments.
if (FromSlot<0)
continue;
// Only look at mapped slots.
if (!SlotRemap.count(FromSlot))
continue;
// Fix the machine instructions.
int ToSlot = SlotRemap[FromSlot];
MO.setIndex(ToSlot);
FixedInstr++;
}
}
DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
}
void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
unsigned NumSlots) {
// Expunge slot remap map.
for (unsigned i=0; i < NumSlots; ++i) {
// If we are remapping i
if (SlotRemap.count(i)) {
int Target = SlotRemap[i];
// As long as our target is mapped to something else, follow it.
while (SlotRemap.count(Target)) {
Target = SlotRemap[Target];
SlotRemap[i] = Target;
}
}
}
}
bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
DEBUG(dbgs() << "********** Stack Coloring **********\n"
<< "********** Function: "
<< ((Value*)Func.getFunction())->getName() << '\n');
MF = &Func;
MFI = MF->getFrameInfo();
Indexes = &getAnalysis<SlotIndexes>();
BlockLiveness.clear();
BasicBlocks.clear();
BasicBlockNumbering.clear();
Markers.clear();
Intervals.clear();
VNInfoAllocator.Reset();
unsigned NumSlots = MFI->getObjectIndexEnd();
// If there are no stack slots then there are no markers to remove.
if (!NumSlots)
return false;
SmallVector<int, 8> SortedSlots;
SortedSlots.reserve(NumSlots);
Intervals.reserve(NumSlots);
unsigned NumMarkers = collectMarkers(NumSlots);
unsigned TotalSize = 0;
DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
DEBUG(dbgs()<<"Slot structure:\n");
for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
TotalSize += MFI->getObjectSize(i);
}
DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
// Don't continue because there are not enough lifetime markers, or the
// stack or too small, or we are told not to optimize the slots.
if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
DEBUG(dbgs()<<"Will not try to merge slots.\n");
return removeAllMarkers();
}
for (unsigned i=0; i < NumSlots; ++i) {
LiveInterval *LI = new LiveInterval(i, 0);
Intervals.push_back(LI);
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
SortedSlots.push_back(i);
}
// Calculate the liveness of each block.
calculateLocalLiveness();
// Propagate the liveness information.
calculateLiveIntervals(NumSlots);
// Maps old slots to new slots.
DenseMap<int, int> SlotRemap;
unsigned RemovedSlots = 0;
unsigned ReducedSize = 0;
// Do not bother looking at empty intervals.
for (unsigned I = 0; I < NumSlots; ++I) {
if (Intervals[SortedSlots[I]]->empty())
SortedSlots[I] = -1;
}
// This is a simple greedy algorithm for merging allocas. First, sort the
// slots, placing the largest slots first. Next, perform an n^2 scan and look
// for disjoint slots. When you find disjoint slots, merge the samller one
// into the bigger one and update the live interval. Remove the small alloca
// and continue.
// Sort the slots according to their size. Place unused slots at the end.
std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));
bool Chanded = true;
while (Chanded) {
Chanded = false;
for (unsigned I = 0; I < NumSlots; ++I) {
if (SortedSlots[I] == -1)
continue;
for (unsigned J=0; J < NumSlots; ++J) {
if (SortedSlots[J] == -1)
continue;
int FirstSlot = SortedSlots[I];
int SecondSlot = SortedSlots[J];
LiveInterval *First = Intervals[FirstSlot];
LiveInterval *Second = Intervals[SecondSlot];
assert (!First->empty() && !Second->empty() && "Found an empty range");
// Merge disjoint slots.
if (!First->overlaps(*Second)) {
Chanded = true;
First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
DEBUG(dbgs()<<"Merging #"<<I<<" and slots #"<<J<<" together.\n");
unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
MFI->getObjectAlignment(SecondSlot));
assert(MFI->getObjectSize(FirstSlot) >=
MFI->getObjectSize(SecondSlot) &&
"Merging a small object into a larger one");
RemovedSlots+=1;
ReducedSize += MFI->getObjectSize(SecondSlot);
MFI->setObjectAlignment(FirstSlot, MaxAlignment);
MFI->RemoveStackObject(SecondSlot);
}
}
}
}// While changed.
// Record statistics.
StackSpaceSaved += ReducedSize;
StackSlotMerged += RemovedSlots;
DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
ReducedSize<<" bytes\n");
// Scan the entire function and update all machine operands that use frame
// indices to use the remapped frame index.
expungeSlotMap(SlotRemap, NumSlots);
remapInstructions(SlotRemap);
// Release the intervals.
for (unsigned I = 0; I < NumSlots; ++I) {
delete Intervals[I];
}
return removeAllMarkers();
}

View File

@ -0,0 +1,272 @@
; RUN: llc < %s | FileCheck %s --check-prefix=YESCOLOR
; RUN: llc -no-stack-coloring < %s | FileCheck %s --check-prefix=NOCOLOR
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
;YESCOLOR: subq $136, %rsp
;NOCOLOR: subq $264, %rsp
define i32 @myCall_w2(i32 %in) {
entry:
%a = alloca [17 x i8*], align 8
%a2 = alloca [16 x i8*], align 8
%b = bitcast [17 x i8*]* %a to i8*
%b2 = bitcast [16 x i8*]* %a2 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %b)
%t1 = call i32 @foo(i32 %in, i8* %b)
%t2 = call i32 @foo(i32 %in, i8* %b)
call void @llvm.lifetime.end(i64 -1, i8* %b)
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t3 = call i32 @foo(i32 %in, i8* %b2)
%t4 = call i32 @foo(i32 %in, i8* %b2)
call void @llvm.lifetime.end(i64 -1, i8* %b2)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
ret i32 %t7
}
;YESCOLOR: subq $272, %rsp
;NOCOLOR: subq $272, %rsp
define i32 @myCall2_no_merge(i32 %in, i1 %d) {
entry:
%a = alloca [17 x i8*], align 8
%a2 = alloca [16 x i8*], align 8
%b = bitcast [17 x i8*]* %a to i8*
%b2 = bitcast [16 x i8*]* %a2 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %b)
%t1 = call i32 @foo(i32 %in, i8* %b)
%t2 = call i32 @foo(i32 %in, i8* %b)
br i1 %d, label %bb2, label %bb3
bb2:
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t3 = call i32 @foo(i32 %in, i8* %b2)
%t4 = call i32 @foo(i32 %in, i8* %b2)
call void @llvm.lifetime.end(i64 -1, i8* %b2)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
call void @llvm.lifetime.end(i64 -1, i8* %b)
ret i32 %t7
bb3:
call void @llvm.lifetime.end(i64 -1, i8* %b)
ret i32 0
}
;YESCOLOR: subq $144, %rsp
;NOCOLOR: subq $272, %rsp
define i32 @myCall2_w2(i32 %in, i1 %d) {
entry:
%a = alloca [17 x i8*], align 8
%a2 = alloca [16 x i8*], align 8
%b = bitcast [17 x i8*]* %a to i8*
%b2 = bitcast [16 x i8*]* %a2 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %b)
%t1 = call i32 @foo(i32 %in, i8* %b)
%t2 = call i32 @foo(i32 %in, i8* %b)
call void @llvm.lifetime.end(i64 -1, i8* %b)
br i1 %d, label %bb2, label %bb3
bb2:
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t3 = call i32 @foo(i32 %in, i8* %b2)
%t4 = call i32 @foo(i32 %in, i8* %b2)
call void @llvm.lifetime.end(i64 -1, i8* %b2)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
ret i32 %t7
bb3:
ret i32 0
}
;YESCOLOR: subq $208, %rsp
;NOCOLOR: subq $400, %rsp
define i32 @myCall_w4(i32 %in) {
entry:
%a1 = alloca [14 x i8*], align 8
%a2 = alloca [13 x i8*], align 8
%a3 = alloca [12 x i8*], align 8
%a4 = alloca [11 x i8*], align 8
%b1 = bitcast [14 x i8*]* %a1 to i8*
%b2 = bitcast [13 x i8*]* %a2 to i8*
%b3 = bitcast [12 x i8*]* %a3 to i8*
%b4 = bitcast [11 x i8*]* %a4 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %b4)
call void @llvm.lifetime.start(i64 -1, i8* %b1)
%t1 = call i32 @foo(i32 %in, i8* %b1)
%t2 = call i32 @foo(i32 %in, i8* %b1)
call void @llvm.lifetime.end(i64 -1, i8* %b1)
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t9 = call i32 @foo(i32 %in, i8* %b2)
%t8 = call i32 @foo(i32 %in, i8* %b2)
call void @llvm.lifetime.end(i64 -1, i8* %b2)
call void @llvm.lifetime.start(i64 -1, i8* %b3)
%t3 = call i32 @foo(i32 %in, i8* %b3)
%t4 = call i32 @foo(i32 %in, i8* %b3)
call void @llvm.lifetime.end(i64 -1, i8* %b3)
%t11 = call i32 @foo(i32 %in, i8* %b4)
call void @llvm.lifetime.end(i64 -1, i8* %b4)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
ret i32 %t7
}
;YESCOLOR: subq $112, %rsp
;NOCOLOR: subq $400, %rsp
define i32 @myCall2_w4(i32 %in) {
entry:
%a1 = alloca [14 x i8*], align 8
%a2 = alloca [13 x i8*], align 8
%a3 = alloca [12 x i8*], align 8
%a4 = alloca [11 x i8*], align 8
%b1 = bitcast [14 x i8*]* %a1 to i8*
%b2 = bitcast [13 x i8*]* %a2 to i8*
%b3 = bitcast [12 x i8*]* %a3 to i8*
%b4 = bitcast [11 x i8*]* %a4 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %b1)
%t1 = call i32 @foo(i32 %in, i8* %b1)
%t2 = call i32 @foo(i32 %in, i8* %b1)
call void @llvm.lifetime.end(i64 -1, i8* %b1)
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t9 = call i32 @foo(i32 %in, i8* %b2)
%t8 = call i32 @foo(i32 %in, i8* %b2)
call void @llvm.lifetime.end(i64 -1, i8* %b2)
call void @llvm.lifetime.start(i64 -1, i8* %b3)
%t3 = call i32 @foo(i32 %in, i8* %b3)
%t4 = call i32 @foo(i32 %in, i8* %b3)
call void @llvm.lifetime.end(i64 -1, i8* %b3)
br i1 undef, label %bb2, label %bb3
bb2:
call void @llvm.lifetime.start(i64 -1, i8* %b4)
%t11 = call i32 @foo(i32 %in, i8* %b4)
call void @llvm.lifetime.end(i64 -1, i8* %b4)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
ret i32 %t7
bb3:
ret i32 0
}
;YESCOLOR: subq $144, %rsp
;NOCOLOR: subq $272, %rsp
define i32 @myCall2_noend(i32 %in, i1 %d) {
entry:
%a = alloca [17 x i8*], align 8
%a2 = alloca [16 x i8*], align 8
%b = bitcast [17 x i8*]* %a to i8*
%b2 = bitcast [16 x i8*]* %a2 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %b)
%t1 = call i32 @foo(i32 %in, i8* %b)
%t2 = call i32 @foo(i32 %in, i8* %b)
call void @llvm.lifetime.end(i64 -1, i8* %b)
br i1 %d, label %bb2, label %bb3
bb2:
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t3 = call i32 @foo(i32 %in, i8* %b2)
%t4 = call i32 @foo(i32 %in, i8* %b2)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
ret i32 %t7
bb3:
ret i32 0
}
;YESCOLOR: subq $144, %rsp
;NOCOLOR: subq $272, %rsp
define i32 @myCall2_noend2(i32 %in, i1 %d) {
entry:
%a = alloca [17 x i8*], align 8
%a2 = alloca [16 x i8*], align 8
%b = bitcast [17 x i8*]* %a to i8*
%b2 = bitcast [16 x i8*]* %a2 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %b)
%t1 = call i32 @foo(i32 %in, i8* %b)
%t2 = call i32 @foo(i32 %in, i8* %b)
br i1 %d, label %bb2, label %bb3
bb2:
call void @llvm.lifetime.end(i64 -1, i8* %b)
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t3 = call i32 @foo(i32 %in, i8* %b2)
%t4 = call i32 @foo(i32 %in, i8* %b2)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
ret i32 %t7
bb3:
ret i32 0
}
;YESCOLOR: subq $144, %rsp
;NOCOLOR: subq $272, %rsp
define i32 @myCall2_nostart(i32 %in, i1 %d) {
entry:
%a = alloca [17 x i8*], align 8
%a2 = alloca [16 x i8*], align 8
%b = bitcast [17 x i8*]* %a to i8*
%b2 = bitcast [16 x i8*]* %a2 to i8*
%t1 = call i32 @foo(i32 %in, i8* %b)
%t2 = call i32 @foo(i32 %in, i8* %b)
call void @llvm.lifetime.end(i64 -1, i8* %b)
br i1 %d, label %bb2, label %bb3
bb2:
call void @llvm.lifetime.start(i64 -1, i8* %b2)
%t3 = call i32 @foo(i32 %in, i8* %b2)
%t4 = call i32 @foo(i32 %in, i8* %b2)
%t5 = add i32 %t1, %t2
%t6 = add i32 %t3, %t4
%t7 = add i32 %t5, %t6
ret i32 %t7
bb3:
ret i32 0
}
; Adopt the test from Transforms/Inline/array_merge.ll'
;YESCOLOR: subq $816, %rsp
;NOCOLOR: subq $1616, %rsp
define void @array_merge() nounwind ssp {
entry:
%A.i1 = alloca [100 x i32], align 4
%B.i2 = alloca [100 x i32], align 4
%A.i = alloca [100 x i32], align 4
%B.i = alloca [100 x i32], align 4
%0 = bitcast [100 x i32]* %A.i to i8*
call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind
%1 = bitcast [100 x i32]* %B.i to i8*
call void @llvm.lifetime.start(i64 -1, i8* %1) nounwind
call void @bar([100 x i32]* %A.i, [100 x i32]* %B.i) nounwind
call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
call void @llvm.lifetime.end(i64 -1, i8* %1) nounwind
%2 = bitcast [100 x i32]* %A.i1 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %2) nounwind
%3 = bitcast [100 x i32]* %B.i2 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %3) nounwind
call void @bar([100 x i32]* %A.i1, [100 x i32]* %B.i2) nounwind
call void @llvm.lifetime.end(i64 -1, i8* %2) nounwind
call void @llvm.lifetime.end(i64 -1, i8* %3) nounwind
ret void
}
declare void @bar([100 x i32]* , [100 x i32]*) nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
declare i32 @foo(i32, i8*)

View File

@ -300,6 +300,8 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
"REG_SEQUENCE",
"COPY",
"BUNDLE",
"LIFETIME_START",
"LIFETIME_END",
0
};
const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();