mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
* Remove function to find "main" in a Module, there's a method for that
* Removing extraneous empty space and empty comment lines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12014 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cbee990a29
commit
99cc88bb64
@ -91,10 +91,7 @@ static bool isSync(const CallInst& CI) {
|
||||
// Code generation pass that transforms code to identify where Cilk keywords
|
||||
// should be inserted. This relies on `llvm-dis -c' to print out the keywords.
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class Cilkifier: public InstVisitor<Cilkifier>
|
||||
{
|
||||
class Cilkifier: public InstVisitor<Cilkifier> {
|
||||
Function* DummySyncFunc;
|
||||
|
||||
// Data used when transforming each function.
|
||||
@ -124,16 +121,14 @@ public:
|
||||
};
|
||||
|
||||
|
||||
Cilkifier::Cilkifier(Module& M)
|
||||
{
|
||||
Cilkifier::Cilkifier(Module& M) {
|
||||
// create the dummy Sync function and add it to the Module
|
||||
DummySyncFunc = M.getOrInsertFunction(DummySyncFuncName, Type::VoidTy, 0);
|
||||
}
|
||||
|
||||
void Cilkifier::TransformFunc(Function* F,
|
||||
const hash_set<Function*>& _cilkFunctions,
|
||||
PgmDependenceGraph& _depGraph)
|
||||
{
|
||||
PgmDependenceGraph& _depGraph) {
|
||||
// Memoize the information for this function
|
||||
cilkFunctions = &_cilkFunctions;
|
||||
depGraph = &_depGraph;
|
||||
@ -159,37 +154,35 @@ void Cilkifier::DFSVisitInstr(Instruction* I,
|
||||
stmtsVisited.insert(I);
|
||||
|
||||
// If there is a dependence from root to I, insert Sync and return
|
||||
if (depsOfRoot.find(I) != depsOfRoot.end())
|
||||
{ // Insert a sync before I and stop searching along this path.
|
||||
// If I is a Phi instruction, the dependence can only be an SSA dep.
|
||||
// and we need to insert the sync in the predecessor on the appropriate
|
||||
// incoming edge!
|
||||
CallInst* syncI = 0;
|
||||
if (PHINode* phiI = dyn_cast<PHINode>(I))
|
||||
{ // check all operands of the Phi and insert before each one
|
||||
for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
|
||||
if (phiI->getIncomingValue(i) == root)
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
|
||||
phiI->getIncomingBlock(i)->getTerminator());
|
||||
}
|
||||
else
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
|
||||
if (depsOfRoot.find(I) != depsOfRoot.end()) {
|
||||
// Insert a sync before I and stop searching along this path.
|
||||
// If I is a Phi instruction, the dependence can only be an SSA dep.
|
||||
// and we need to insert the sync in the predecessor on the appropriate
|
||||
// incoming edge!
|
||||
CallInst* syncI = 0;
|
||||
if (PHINode* phiI = dyn_cast<PHINode>(I)) {
|
||||
// check all operands of the Phi and insert before each one
|
||||
for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
|
||||
if (phiI->getIncomingValue(i) == root)
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
|
||||
phiI->getIncomingBlock(i)->getTerminator());
|
||||
} else
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
|
||||
|
||||
// Remember the sync for each spawn to eliminate redundant ones later
|
||||
spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
|
||||
// Remember the sync for each spawn to eliminate redundant ones later
|
||||
spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
|
||||
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// else visit unvisited successors
|
||||
if (BranchInst* brI = dyn_cast<BranchInst>(I))
|
||||
{ // visit first instruction in each successor BB
|
||||
for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
|
||||
if (stmtsVisited.find(&brI->getSuccessor(i)->front())
|
||||
== stmtsVisited.end())
|
||||
DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
|
||||
}
|
||||
else
|
||||
if (BranchInst* brI = dyn_cast<BranchInst>(I)) {
|
||||
// visit first instruction in each successor BB
|
||||
for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
|
||||
if (stmtsVisited.find(&brI->getSuccessor(i)->front())
|
||||
== stmtsVisited.end())
|
||||
DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
|
||||
} else
|
||||
if (Instruction* nextI = I->getNext())
|
||||
if (stmtsVisited.find(nextI) == stmtsVisited.end())
|
||||
DFSVisitInstr(nextI, root, depsOfRoot);
|
||||
@ -214,44 +207,37 @@ void Cilkifier::visitCallInst(CallInst& CI)
|
||||
std::vector<const PHINode*> phiUsers;
|
||||
hash_set<const PHINode*> phisSeen; // ensures we don't visit a phi twice
|
||||
for (Value::use_iterator UI=CI.use_begin(), UE=CI.use_end(); UI != UE; ++UI)
|
||||
if (const PHINode* phiUser = dyn_cast<PHINode>(*UI))
|
||||
{
|
||||
if (phisSeen.find(phiUser) == phisSeen.end())
|
||||
{
|
||||
phiUsers.push_back(phiUser);
|
||||
phisSeen.insert(phiUser);
|
||||
}
|
||||
if (const PHINode* phiUser = dyn_cast<PHINode>(*UI)) {
|
||||
if (phisSeen.find(phiUser) == phisSeen.end()) {
|
||||
phiUsers.push_back(phiUser);
|
||||
phisSeen.insert(phiUser);
|
||||
}
|
||||
}
|
||||
else
|
||||
depsOfRoot.insert(cast<Instruction>(*UI));
|
||||
|
||||
// Now we've found the non-Phi users and immediate phi users.
|
||||
// Recursively walk the phi users and add their non-phi users.
|
||||
for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back())
|
||||
{
|
||||
phiUser = phiUsers.back();
|
||||
for (Value::use_const_iterator UI=phiUser->use_begin(),
|
||||
UE=phiUser->use_end(); UI != UE; ++UI)
|
||||
if (const PHINode* pn = dyn_cast<PHINode>(*UI))
|
||||
{
|
||||
if (phisSeen.find(pn) == phisSeen.end())
|
||||
{
|
||||
phiUsers.push_back(pn);
|
||||
phisSeen.insert(pn);
|
||||
}
|
||||
}
|
||||
else
|
||||
depsOfRoot.insert(cast<Instruction>(*UI));
|
||||
}
|
||||
for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back()) {
|
||||
phiUser = phiUsers.back();
|
||||
for (Value::use_const_iterator UI=phiUser->use_begin(),
|
||||
UE=phiUser->use_end(); UI != UE; ++UI)
|
||||
if (const PHINode* pn = dyn_cast<PHINode>(*UI)) {
|
||||
if (phisSeen.find(pn) == phisSeen.end()) {
|
||||
phiUsers.push_back(pn);
|
||||
phisSeen.insert(pn);
|
||||
}
|
||||
} else
|
||||
depsOfRoot.insert(cast<Instruction>(*UI));
|
||||
}
|
||||
|
||||
// Walk paths of the CFG starting at the call instruction and insert
|
||||
// one sync before the first dependence on each path, if any.
|
||||
if (! depsOfRoot.empty())
|
||||
{
|
||||
stmtsVisited.clear(); // start a new DFS for this CallInst
|
||||
assert(CI.getNext() && "Call instruction cannot be a terminator!");
|
||||
DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
|
||||
}
|
||||
if (! depsOfRoot.empty()) {
|
||||
stmtsVisited.clear(); // start a new DFS for this CallInst
|
||||
assert(CI.getNext() && "Call instruction cannot be a terminator!");
|
||||
DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
|
||||
}
|
||||
|
||||
// Now, eliminate all users of the SSA value of the CallInst, i.e.,
|
||||
// if the call instruction returns a value, delete the return value
|
||||
@ -304,31 +290,28 @@ FindParallelCalls::FindParallelCalls(Function& F,
|
||||
// Now we've found all CallInsts reachable from each CallInst.
|
||||
// Find those CallInsts that are parallel with at least one other CallInst
|
||||
// by counting total inEdges and outEdges.
|
||||
//
|
||||
unsigned long totalNumCalls = completed.size();
|
||||
|
||||
if (totalNumCalls == 1)
|
||||
{ // Check first for the special case of a single call instruction not
|
||||
// in any loop. It is not parallel, even if it has no dependences
|
||||
// (this is why it is a special case).
|
||||
//
|
||||
// FIXME:
|
||||
// THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
|
||||
// PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
|
||||
//
|
||||
return;
|
||||
}
|
||||
if (totalNumCalls == 1) {
|
||||
// Check first for the special case of a single call instruction not
|
||||
// in any loop. It is not parallel, even if it has no dependences
|
||||
// (this is why it is a special case).
|
||||
//
|
||||
// FIXME:
|
||||
// THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
|
||||
// PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
|
||||
return;
|
||||
}
|
||||
|
||||
hash_map<CallInst*, unsigned long> numDeps;
|
||||
for (hash_map<CallInst*, DependentsSet>::iterator II = dependents.begin(),
|
||||
IE = dependents.end(); II != IE; ++II)
|
||||
{
|
||||
CallInst* fromCI = II->first;
|
||||
numDeps[fromCI] += II->second.size();
|
||||
for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
|
||||
DI != DE; ++DI)
|
||||
numDeps[*DI]++; // *DI can be reached from II->first
|
||||
}
|
||||
IE = dependents.end(); II != IE; ++II) {
|
||||
CallInst* fromCI = II->first;
|
||||
numDeps[fromCI] += II->second.size();
|
||||
for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
|
||||
DI != DE; ++DI)
|
||||
numDeps[*DI]++; // *DI can be reached from II->first
|
||||
}
|
||||
|
||||
for (hash_map<CallInst*, DependentsSet>::iterator
|
||||
II = dependents.begin(), IE = dependents.end(); II != IE; ++II)
|
||||
@ -347,36 +330,31 @@ void FindParallelCalls::VisitOutEdges(Instruction* I,
|
||||
stmtsVisited.insert(I);
|
||||
|
||||
if (CallInst* CI = dyn_cast<CallInst>(I))
|
||||
|
||||
// FIXME: Ignoring parallelism in a loop. Here we're actually *ignoring*
|
||||
// a self-dependence in order to get the count comparison right above.
|
||||
// When we include loop parallelism, self-dependences should be included.
|
||||
//
|
||||
if (CI != root)
|
||||
|
||||
{ // CallInst root has a path to CallInst I and any calls reachable from I
|
||||
depsOfRoot.insert(CI);
|
||||
if (completed[CI])
|
||||
{ // We have already visited I so we know all nodes it can reach!
|
||||
DependentsSet& depsOfI = dependents[CI];
|
||||
depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
|
||||
return;
|
||||
}
|
||||
if (CI != root) {
|
||||
// CallInst root has a path to CallInst I and any calls reachable from I
|
||||
depsOfRoot.insert(CI);
|
||||
if (completed[CI]) {
|
||||
// We have already visited I so we know all nodes it can reach!
|
||||
DependentsSet& depsOfI = dependents[CI];
|
||||
depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If we reach here, we need to visit all children of I
|
||||
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(*I);
|
||||
! DI.fini(); ++DI)
|
||||
{
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, root, depsOfRoot);
|
||||
}
|
||||
! DI.fini(); ++DI) {
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, root, depsOfRoot);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void FindParallelCalls::visitCallInst(CallInst& CI)
|
||||
{
|
||||
void FindParallelCalls::visitCallInst(CallInst& CI) {
|
||||
if (completed[&CI])
|
||||
return;
|
||||
stmtsVisited.clear(); // clear flags to do a fresh DFS
|
||||
@ -384,12 +362,11 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
|
||||
// Visit all children of CI using a recursive walk through dep graph
|
||||
DependentsSet& depsOfRoot = dependents[&CI];
|
||||
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(CI);
|
||||
! DI.fini(); ++DI)
|
||||
{
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, &CI, depsOfRoot);
|
||||
}
|
||||
! DI.fini(); ++DI) {
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, &CI, depsOfRoot);
|
||||
}
|
||||
|
||||
completed[&CI] = true;
|
||||
}
|
||||
@ -411,8 +388,7 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
class Parallelize: public Pass
|
||||
{
|
||||
class Parallelize: public Pass {
|
||||
public:
|
||||
/// Driver functions to transform a program
|
||||
///
|
||||
@ -433,68 +409,50 @@ namespace {
|
||||
}
|
||||
|
||||
|
||||
static Function* FindMain(Module& M)
|
||||
{
|
||||
for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
|
||||
if (FI->getName() == std::string("main"))
|
||||
return FI;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
bool Parallelize::run(Module& M)
|
||||
{
|
||||
bool Parallelize::run(Module& M) {
|
||||
hash_set<Function*> parallelFunctions;
|
||||
hash_set<Function*> safeParallelFunctions;
|
||||
hash_set<const GlobalValue*> indirectlyCalled;
|
||||
|
||||
// If there is no main (i.e., for an incomplete program), we can do nothing.
|
||||
// If there is a main, mark main as a parallel function.
|
||||
//
|
||||
Function* mainFunc = FindMain(M);
|
||||
Function* mainFunc = M.getMainFunction();
|
||||
if (!mainFunc)
|
||||
return false;
|
||||
|
||||
// (1) Find candidate parallel functions and mark them as Cilk functions
|
||||
//
|
||||
for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
|
||||
if (! FI->isExternal())
|
||||
{
|
||||
Function* F = FI;
|
||||
DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
|
||||
if (! FI->isExternal()) {
|
||||
Function* F = FI;
|
||||
DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
|
||||
|
||||
// All the hard analysis work gets done here!
|
||||
//
|
||||
FindParallelCalls finder(*F,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(*F));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
|
||||
// All the hard analysis work gets done here!
|
||||
FindParallelCalls finder(*F,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(*F));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
|
||||
|
||||
// Now we know which call instructions are useful to parallelize.
|
||||
// Remember those callee functions.
|
||||
//
|
||||
for (std::vector<CallInst*>::iterator
|
||||
CII = finder.parallelCalls.begin(),
|
||||
CIE = finder.parallelCalls.end(); CII != CIE; ++CII)
|
||||
{
|
||||
// Check if this is a direct call...
|
||||
if ((*CII)->getCalledFunction() != NULL)
|
||||
{ // direct call: if this is to a non-external function,
|
||||
// mark it as a parallelizable function
|
||||
if (! (*CII)->getCalledFunction()->isExternal())
|
||||
parallelFunctions.insert((*CII)->getCalledFunction());
|
||||
}
|
||||
else
|
||||
{ // Indirect call: mark all potential callees as bad
|
||||
std::vector<GlobalValue*> callees =
|
||||
tdg.getNodeForValue((*CII)->getCalledValue())
|
||||
.getNode()->getGlobals();
|
||||
indirectlyCalled.insert(callees.begin(), callees.end());
|
||||
}
|
||||
}
|
||||
// Now we know which call instructions are useful to parallelize.
|
||||
// Remember those callee functions.
|
||||
for (std::vector<CallInst*>::iterator
|
||||
CII = finder.parallelCalls.begin(),
|
||||
CIE = finder.parallelCalls.end(); CII != CIE; ++CII) {
|
||||
// Check if this is a direct call...
|
||||
if ((*CII)->getCalledFunction() != NULL) {
|
||||
// direct call: if this is to a non-external function,
|
||||
// mark it as a parallelizable function
|
||||
if (! (*CII)->getCalledFunction()->isExternal())
|
||||
parallelFunctions.insert((*CII)->getCalledFunction());
|
||||
} else {
|
||||
// Indirect call: mark all potential callees as bad
|
||||
std::vector<GlobalValue*> callees =
|
||||
tdg.getNodeForValue((*CII)->getCalledValue())
|
||||
.getNode()->getGlobals();
|
||||
indirectlyCalled.insert(callees.begin(), callees.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove all indirectly called functions from the list of Cilk functions.
|
||||
//
|
||||
for (hash_set<Function*>::iterator PFI = parallelFunctions.begin(),
|
||||
PFE = parallelFunctions.end(); PFI != PFE; ++PFI)
|
||||
if (indirectlyCalled.count(*PFI) == 0)
|
||||
@ -524,15 +482,13 @@ bool Parallelize::run(Module& M)
|
||||
// This should identify both functions and calls to such functions
|
||||
// to the code generator.
|
||||
// (4) Also, insert calls to sync at appropriate points.
|
||||
//
|
||||
Cilkifier cilkifier(M);
|
||||
for (hash_set<Function*>::iterator CFI = safeParallelFunctions.begin(),
|
||||
CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI)
|
||||
{
|
||||
cilkifier.TransformFunc(*CFI, safeParallelFunctions,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
|
||||
}
|
||||
CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI) {
|
||||
cilkifier.TransformFunc(*CFI, safeParallelFunctions,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -91,10 +91,7 @@ static bool isSync(const CallInst& CI) {
|
||||
// Code generation pass that transforms code to identify where Cilk keywords
|
||||
// should be inserted. This relies on `llvm-dis -c' to print out the keywords.
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class Cilkifier: public InstVisitor<Cilkifier>
|
||||
{
|
||||
class Cilkifier: public InstVisitor<Cilkifier> {
|
||||
Function* DummySyncFunc;
|
||||
|
||||
// Data used when transforming each function.
|
||||
@ -124,16 +121,14 @@ public:
|
||||
};
|
||||
|
||||
|
||||
Cilkifier::Cilkifier(Module& M)
|
||||
{
|
||||
Cilkifier::Cilkifier(Module& M) {
|
||||
// create the dummy Sync function and add it to the Module
|
||||
DummySyncFunc = M.getOrInsertFunction(DummySyncFuncName, Type::VoidTy, 0);
|
||||
}
|
||||
|
||||
void Cilkifier::TransformFunc(Function* F,
|
||||
const hash_set<Function*>& _cilkFunctions,
|
||||
PgmDependenceGraph& _depGraph)
|
||||
{
|
||||
PgmDependenceGraph& _depGraph) {
|
||||
// Memoize the information for this function
|
||||
cilkFunctions = &_cilkFunctions;
|
||||
depGraph = &_depGraph;
|
||||
@ -159,37 +154,35 @@ void Cilkifier::DFSVisitInstr(Instruction* I,
|
||||
stmtsVisited.insert(I);
|
||||
|
||||
// If there is a dependence from root to I, insert Sync and return
|
||||
if (depsOfRoot.find(I) != depsOfRoot.end())
|
||||
{ // Insert a sync before I and stop searching along this path.
|
||||
// If I is a Phi instruction, the dependence can only be an SSA dep.
|
||||
// and we need to insert the sync in the predecessor on the appropriate
|
||||
// incoming edge!
|
||||
CallInst* syncI = 0;
|
||||
if (PHINode* phiI = dyn_cast<PHINode>(I))
|
||||
{ // check all operands of the Phi and insert before each one
|
||||
for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
|
||||
if (phiI->getIncomingValue(i) == root)
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
|
||||
phiI->getIncomingBlock(i)->getTerminator());
|
||||
}
|
||||
else
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
|
||||
if (depsOfRoot.find(I) != depsOfRoot.end()) {
|
||||
// Insert a sync before I and stop searching along this path.
|
||||
// If I is a Phi instruction, the dependence can only be an SSA dep.
|
||||
// and we need to insert the sync in the predecessor on the appropriate
|
||||
// incoming edge!
|
||||
CallInst* syncI = 0;
|
||||
if (PHINode* phiI = dyn_cast<PHINode>(I)) {
|
||||
// check all operands of the Phi and insert before each one
|
||||
for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
|
||||
if (phiI->getIncomingValue(i) == root)
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
|
||||
phiI->getIncomingBlock(i)->getTerminator());
|
||||
} else
|
||||
syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
|
||||
|
||||
// Remember the sync for each spawn to eliminate redundant ones later
|
||||
spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
|
||||
// Remember the sync for each spawn to eliminate redundant ones later
|
||||
spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
|
||||
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// else visit unvisited successors
|
||||
if (BranchInst* brI = dyn_cast<BranchInst>(I))
|
||||
{ // visit first instruction in each successor BB
|
||||
for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
|
||||
if (stmtsVisited.find(&brI->getSuccessor(i)->front())
|
||||
== stmtsVisited.end())
|
||||
DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
|
||||
}
|
||||
else
|
||||
if (BranchInst* brI = dyn_cast<BranchInst>(I)) {
|
||||
// visit first instruction in each successor BB
|
||||
for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
|
||||
if (stmtsVisited.find(&brI->getSuccessor(i)->front())
|
||||
== stmtsVisited.end())
|
||||
DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
|
||||
} else
|
||||
if (Instruction* nextI = I->getNext())
|
||||
if (stmtsVisited.find(nextI) == stmtsVisited.end())
|
||||
DFSVisitInstr(nextI, root, depsOfRoot);
|
||||
@ -214,44 +207,37 @@ void Cilkifier::visitCallInst(CallInst& CI)
|
||||
std::vector<const PHINode*> phiUsers;
|
||||
hash_set<const PHINode*> phisSeen; // ensures we don't visit a phi twice
|
||||
for (Value::use_iterator UI=CI.use_begin(), UE=CI.use_end(); UI != UE; ++UI)
|
||||
if (const PHINode* phiUser = dyn_cast<PHINode>(*UI))
|
||||
{
|
||||
if (phisSeen.find(phiUser) == phisSeen.end())
|
||||
{
|
||||
phiUsers.push_back(phiUser);
|
||||
phisSeen.insert(phiUser);
|
||||
}
|
||||
if (const PHINode* phiUser = dyn_cast<PHINode>(*UI)) {
|
||||
if (phisSeen.find(phiUser) == phisSeen.end()) {
|
||||
phiUsers.push_back(phiUser);
|
||||
phisSeen.insert(phiUser);
|
||||
}
|
||||
}
|
||||
else
|
||||
depsOfRoot.insert(cast<Instruction>(*UI));
|
||||
|
||||
// Now we've found the non-Phi users and immediate phi users.
|
||||
// Recursively walk the phi users and add their non-phi users.
|
||||
for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back())
|
||||
{
|
||||
phiUser = phiUsers.back();
|
||||
for (Value::use_const_iterator UI=phiUser->use_begin(),
|
||||
UE=phiUser->use_end(); UI != UE; ++UI)
|
||||
if (const PHINode* pn = dyn_cast<PHINode>(*UI))
|
||||
{
|
||||
if (phisSeen.find(pn) == phisSeen.end())
|
||||
{
|
||||
phiUsers.push_back(pn);
|
||||
phisSeen.insert(pn);
|
||||
}
|
||||
}
|
||||
else
|
||||
depsOfRoot.insert(cast<Instruction>(*UI));
|
||||
}
|
||||
for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back()) {
|
||||
phiUser = phiUsers.back();
|
||||
for (Value::use_const_iterator UI=phiUser->use_begin(),
|
||||
UE=phiUser->use_end(); UI != UE; ++UI)
|
||||
if (const PHINode* pn = dyn_cast<PHINode>(*UI)) {
|
||||
if (phisSeen.find(pn) == phisSeen.end()) {
|
||||
phiUsers.push_back(pn);
|
||||
phisSeen.insert(pn);
|
||||
}
|
||||
} else
|
||||
depsOfRoot.insert(cast<Instruction>(*UI));
|
||||
}
|
||||
|
||||
// Walk paths of the CFG starting at the call instruction and insert
|
||||
// one sync before the first dependence on each path, if any.
|
||||
if (! depsOfRoot.empty())
|
||||
{
|
||||
stmtsVisited.clear(); // start a new DFS for this CallInst
|
||||
assert(CI.getNext() && "Call instruction cannot be a terminator!");
|
||||
DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
|
||||
}
|
||||
if (! depsOfRoot.empty()) {
|
||||
stmtsVisited.clear(); // start a new DFS for this CallInst
|
||||
assert(CI.getNext() && "Call instruction cannot be a terminator!");
|
||||
DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
|
||||
}
|
||||
|
||||
// Now, eliminate all users of the SSA value of the CallInst, i.e.,
|
||||
// if the call instruction returns a value, delete the return value
|
||||
@ -304,31 +290,28 @@ FindParallelCalls::FindParallelCalls(Function& F,
|
||||
// Now we've found all CallInsts reachable from each CallInst.
|
||||
// Find those CallInsts that are parallel with at least one other CallInst
|
||||
// by counting total inEdges and outEdges.
|
||||
//
|
||||
unsigned long totalNumCalls = completed.size();
|
||||
|
||||
if (totalNumCalls == 1)
|
||||
{ // Check first for the special case of a single call instruction not
|
||||
// in any loop. It is not parallel, even if it has no dependences
|
||||
// (this is why it is a special case).
|
||||
//
|
||||
// FIXME:
|
||||
// THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
|
||||
// PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
|
||||
//
|
||||
return;
|
||||
}
|
||||
if (totalNumCalls == 1) {
|
||||
// Check first for the special case of a single call instruction not
|
||||
// in any loop. It is not parallel, even if it has no dependences
|
||||
// (this is why it is a special case).
|
||||
//
|
||||
// FIXME:
|
||||
// THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
|
||||
// PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
|
||||
return;
|
||||
}
|
||||
|
||||
hash_map<CallInst*, unsigned long> numDeps;
|
||||
for (hash_map<CallInst*, DependentsSet>::iterator II = dependents.begin(),
|
||||
IE = dependents.end(); II != IE; ++II)
|
||||
{
|
||||
CallInst* fromCI = II->first;
|
||||
numDeps[fromCI] += II->second.size();
|
||||
for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
|
||||
DI != DE; ++DI)
|
||||
numDeps[*DI]++; // *DI can be reached from II->first
|
||||
}
|
||||
IE = dependents.end(); II != IE; ++II) {
|
||||
CallInst* fromCI = II->first;
|
||||
numDeps[fromCI] += II->second.size();
|
||||
for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
|
||||
DI != DE; ++DI)
|
||||
numDeps[*DI]++; // *DI can be reached from II->first
|
||||
}
|
||||
|
||||
for (hash_map<CallInst*, DependentsSet>::iterator
|
||||
II = dependents.begin(), IE = dependents.end(); II != IE; ++II)
|
||||
@ -347,36 +330,31 @@ void FindParallelCalls::VisitOutEdges(Instruction* I,
|
||||
stmtsVisited.insert(I);
|
||||
|
||||
if (CallInst* CI = dyn_cast<CallInst>(I))
|
||||
|
||||
// FIXME: Ignoring parallelism in a loop. Here we're actually *ignoring*
|
||||
// a self-dependence in order to get the count comparison right above.
|
||||
// When we include loop parallelism, self-dependences should be included.
|
||||
//
|
||||
if (CI != root)
|
||||
|
||||
{ // CallInst root has a path to CallInst I and any calls reachable from I
|
||||
depsOfRoot.insert(CI);
|
||||
if (completed[CI])
|
||||
{ // We have already visited I so we know all nodes it can reach!
|
||||
DependentsSet& depsOfI = dependents[CI];
|
||||
depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
|
||||
return;
|
||||
}
|
||||
if (CI != root) {
|
||||
// CallInst root has a path to CallInst I and any calls reachable from I
|
||||
depsOfRoot.insert(CI);
|
||||
if (completed[CI]) {
|
||||
// We have already visited I so we know all nodes it can reach!
|
||||
DependentsSet& depsOfI = dependents[CI];
|
||||
depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If we reach here, we need to visit all children of I
|
||||
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(*I);
|
||||
! DI.fini(); ++DI)
|
||||
{
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, root, depsOfRoot);
|
||||
}
|
||||
! DI.fini(); ++DI) {
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, root, depsOfRoot);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void FindParallelCalls::visitCallInst(CallInst& CI)
|
||||
{
|
||||
void FindParallelCalls::visitCallInst(CallInst& CI) {
|
||||
if (completed[&CI])
|
||||
return;
|
||||
stmtsVisited.clear(); // clear flags to do a fresh DFS
|
||||
@ -384,12 +362,11 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
|
||||
// Visit all children of CI using a recursive walk through dep graph
|
||||
DependentsSet& depsOfRoot = dependents[&CI];
|
||||
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(CI);
|
||||
! DI.fini(); ++DI)
|
||||
{
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, &CI, depsOfRoot);
|
||||
}
|
||||
! DI.fini(); ++DI) {
|
||||
Instruction* sink = &DI->getSink()->getInstr();
|
||||
if (stmtsVisited.find(sink) == stmtsVisited.end())
|
||||
VisitOutEdges(sink, &CI, depsOfRoot);
|
||||
}
|
||||
|
||||
completed[&CI] = true;
|
||||
}
|
||||
@ -411,8 +388,7 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
class Parallelize: public Pass
|
||||
{
|
||||
class Parallelize: public Pass {
|
||||
public:
|
||||
/// Driver functions to transform a program
|
||||
///
|
||||
@ -433,68 +409,50 @@ namespace {
|
||||
}
|
||||
|
||||
|
||||
static Function* FindMain(Module& M)
|
||||
{
|
||||
for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
|
||||
if (FI->getName() == std::string("main"))
|
||||
return FI;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
bool Parallelize::run(Module& M)
|
||||
{
|
||||
bool Parallelize::run(Module& M) {
|
||||
hash_set<Function*> parallelFunctions;
|
||||
hash_set<Function*> safeParallelFunctions;
|
||||
hash_set<const GlobalValue*> indirectlyCalled;
|
||||
|
||||
// If there is no main (i.e., for an incomplete program), we can do nothing.
|
||||
// If there is a main, mark main as a parallel function.
|
||||
//
|
||||
Function* mainFunc = FindMain(M);
|
||||
Function* mainFunc = M.getMainFunction();
|
||||
if (!mainFunc)
|
||||
return false;
|
||||
|
||||
// (1) Find candidate parallel functions and mark them as Cilk functions
|
||||
//
|
||||
for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
|
||||
if (! FI->isExternal())
|
||||
{
|
||||
Function* F = FI;
|
||||
DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
|
||||
if (! FI->isExternal()) {
|
||||
Function* F = FI;
|
||||
DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
|
||||
|
||||
// All the hard analysis work gets done here!
|
||||
//
|
||||
FindParallelCalls finder(*F,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(*F));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
|
||||
// All the hard analysis work gets done here!
|
||||
FindParallelCalls finder(*F,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(*F));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
|
||||
|
||||
// Now we know which call instructions are useful to parallelize.
|
||||
// Remember those callee functions.
|
||||
//
|
||||
for (std::vector<CallInst*>::iterator
|
||||
CII = finder.parallelCalls.begin(),
|
||||
CIE = finder.parallelCalls.end(); CII != CIE; ++CII)
|
||||
{
|
||||
// Check if this is a direct call...
|
||||
if ((*CII)->getCalledFunction() != NULL)
|
||||
{ // direct call: if this is to a non-external function,
|
||||
// mark it as a parallelizable function
|
||||
if (! (*CII)->getCalledFunction()->isExternal())
|
||||
parallelFunctions.insert((*CII)->getCalledFunction());
|
||||
}
|
||||
else
|
||||
{ // Indirect call: mark all potential callees as bad
|
||||
std::vector<GlobalValue*> callees =
|
||||
tdg.getNodeForValue((*CII)->getCalledValue())
|
||||
.getNode()->getGlobals();
|
||||
indirectlyCalled.insert(callees.begin(), callees.end());
|
||||
}
|
||||
}
|
||||
// Now we know which call instructions are useful to parallelize.
|
||||
// Remember those callee functions.
|
||||
for (std::vector<CallInst*>::iterator
|
||||
CII = finder.parallelCalls.begin(),
|
||||
CIE = finder.parallelCalls.end(); CII != CIE; ++CII) {
|
||||
// Check if this is a direct call...
|
||||
if ((*CII)->getCalledFunction() != NULL) {
|
||||
// direct call: if this is to a non-external function,
|
||||
// mark it as a parallelizable function
|
||||
if (! (*CII)->getCalledFunction()->isExternal())
|
||||
parallelFunctions.insert((*CII)->getCalledFunction());
|
||||
} else {
|
||||
// Indirect call: mark all potential callees as bad
|
||||
std::vector<GlobalValue*> callees =
|
||||
tdg.getNodeForValue((*CII)->getCalledValue())
|
||||
.getNode()->getGlobals();
|
||||
indirectlyCalled.insert(callees.begin(), callees.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove all indirectly called functions from the list of Cilk functions.
|
||||
//
|
||||
for (hash_set<Function*>::iterator PFI = parallelFunctions.begin(),
|
||||
PFE = parallelFunctions.end(); PFI != PFE; ++PFI)
|
||||
if (indirectlyCalled.count(*PFI) == 0)
|
||||
@ -524,15 +482,13 @@ bool Parallelize::run(Module& M)
|
||||
// This should identify both functions and calls to such functions
|
||||
// to the code generator.
|
||||
// (4) Also, insert calls to sync at appropriate points.
|
||||
//
|
||||
Cilkifier cilkifier(M);
|
||||
for (hash_set<Function*>::iterator CFI = safeParallelFunctions.begin(),
|
||||
CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI)
|
||||
{
|
||||
cilkifier.TransformFunc(*CFI, safeParallelFunctions,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
|
||||
}
|
||||
CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI) {
|
||||
cilkifier.TransformFunc(*CFI, safeParallelFunctions,
|
||||
getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
|
||||
/* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user