diff --git a/lib/Analysis/DataStructure/Parallelize.cpp b/lib/Analysis/DataStructure/Parallelize.cpp
index e22c86cb7fe..c2451fda525 100644
--- a/lib/Analysis/DataStructure/Parallelize.cpp
+++ b/lib/Analysis/DataStructure/Parallelize.cpp
@@ -91,10 +91,7 @@ static bool isSync(const CallInst& CI) {
 // Code generation pass that transforms code to identify where Cilk keywords
 // should be inserted.  This relies on `llvm-dis -c' to print out the keywords.
 //---------------------------------------------------------------------------- 
-
-
-class Cilkifier: public InstVisitor<Cilkifier>
-{
+class Cilkifier: public InstVisitor<Cilkifier> {
   Function* DummySyncFunc;
 
   // Data used when transforming each function.
@@ -124,16 +121,14 @@ public:
 };
 
 
-Cilkifier::Cilkifier(Module& M)
-{
+Cilkifier::Cilkifier(Module& M) {
   // create the dummy Sync function and add it to the Module
   DummySyncFunc = M.getOrInsertFunction(DummySyncFuncName, Type::VoidTy, 0);
 }
 
 void Cilkifier::TransformFunc(Function* F,
                               const hash_set<Function*>& _cilkFunctions,
-                              PgmDependenceGraph& _depGraph)
-{
+                              PgmDependenceGraph& _depGraph) {
   // Memoize the information for this function
   cilkFunctions = &_cilkFunctions;
   depGraph = &_depGraph;
@@ -159,37 +154,35 @@ void Cilkifier::DFSVisitInstr(Instruction* I,
   stmtsVisited.insert(I);
 
   // If there is a dependence from root to I, insert Sync and return
-  if (depsOfRoot.find(I) != depsOfRoot.end())
-    { // Insert a sync before I and stop searching along this path.
-      // If I is a Phi instruction, the dependence can only be an SSA dep.
-      // and we need to insert the sync in the predecessor on the appropriate
-      // incoming edge!
-      CallInst* syncI = 0;
-      if (PHINode* phiI = dyn_cast<PHINode>(I))
-        { // check all operands of the Phi and insert before each one
-          for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
-            if (phiI->getIncomingValue(i) == root)
-              syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
-                                   phiI->getIncomingBlock(i)->getTerminator());
-        }
-      else
-        syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
+  if (depsOfRoot.find(I) != depsOfRoot.end()) {
+    // Insert a sync before I and stop searching along this path.
+    // If I is a Phi instruction, the dependence can only be an SSA dep.
+    // and we need to insert the sync in the predecessor on the appropriate
+    // incoming edge!
+    CallInst* syncI = 0;
+    if (PHINode* phiI = dyn_cast<PHINode>(I)) {
+      // check all operands of the Phi and insert before each one
+      for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
+        if (phiI->getIncomingValue(i) == root)
+          syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
+                               phiI->getIncomingBlock(i)->getTerminator());
+    } else
+      syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
 
-      // Remember the sync for each spawn to eliminate redundant ones later
-      spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
+    // Remember the sync for each spawn to eliminate redundant ones later
+    spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
 
-      return;
-    }
+    return;
+  }
 
   // else visit unvisited successors
-  if (BranchInst* brI = dyn_cast<BranchInst>(I))
-    { // visit first instruction in each successor BB
-      for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
-        if (stmtsVisited.find(&brI->getSuccessor(i)->front())
-            == stmtsVisited.end())
-          DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
-    }
-  else
+  if (BranchInst* brI = dyn_cast<BranchInst>(I)) {
+    // visit first instruction in each successor BB
+    for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
+      if (stmtsVisited.find(&brI->getSuccessor(i)->front())
+          == stmtsVisited.end())
+        DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
+  } else
     if (Instruction* nextI = I->getNext())
       if (stmtsVisited.find(nextI) == stmtsVisited.end())
         DFSVisitInstr(nextI, root, depsOfRoot);
@@ -214,44 +207,37 @@ void Cilkifier::visitCallInst(CallInst& CI)
   std::vector<const PHINode*> phiUsers;
   hash_set<const PHINode*> phisSeen;    // ensures we don't visit a phi twice
   for (Value::use_iterator UI=CI.use_begin(), UE=CI.use_end(); UI != UE; ++UI)
-    if (const PHINode* phiUser = dyn_cast<PHINode>(*UI))
-      {
-        if (phisSeen.find(phiUser) == phisSeen.end())
-          {
-            phiUsers.push_back(phiUser);
-            phisSeen.insert(phiUser);
-          }
+    if (const PHINode* phiUser = dyn_cast<PHINode>(*UI)) {
+      if (phisSeen.find(phiUser) == phisSeen.end()) {
+        phiUsers.push_back(phiUser);
+        phisSeen.insert(phiUser);
       }
+    }
     else
       depsOfRoot.insert(cast<Instruction>(*UI));
 
   // Now we've found the non-Phi users and immediate phi users.
   // Recursively walk the phi users and add their non-phi users.
-  for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back())
-    {
-      phiUser = phiUsers.back();
-      for (Value::use_const_iterator UI=phiUser->use_begin(),
-             UE=phiUser->use_end(); UI != UE; ++UI)
-        if (const PHINode* pn = dyn_cast<PHINode>(*UI))
-          {
-            if (phisSeen.find(pn) == phisSeen.end())
-              {
-                phiUsers.push_back(pn);
-                phisSeen.insert(pn);
-              }
-          }
-        else
-          depsOfRoot.insert(cast<Instruction>(*UI));
-    }
+  for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back()) {
+    phiUser = phiUsers.back();
+    for (Value::use_const_iterator UI=phiUser->use_begin(),
+           UE=phiUser->use_end(); UI != UE; ++UI)
+      if (const PHINode* pn = dyn_cast<PHINode>(*UI)) {
+        if (phisSeen.find(pn) == phisSeen.end()) {
+          phiUsers.push_back(pn);
+          phisSeen.insert(pn);
+        }
+      } else
+        depsOfRoot.insert(cast<Instruction>(*UI));
+  }
 
   // Walk paths of the CFG starting at the call instruction and insert
   // one sync before the first dependence on each path, if any.
-  if (! depsOfRoot.empty())
-    {
-      stmtsVisited.clear();             // start a new DFS for this CallInst
-      assert(CI.getNext() && "Call instruction cannot be a terminator!");
-      DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
-    }
+  if (! depsOfRoot.empty()) {
+    stmtsVisited.clear();             // start a new DFS for this CallInst
+    assert(CI.getNext() && "Call instruction cannot be a terminator!");
+    DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
+  }
 
   // Now, eliminate all users of the SSA value of the CallInst, i.e., 
   // if the call instruction returns a value, delete the return value
@@ -304,31 +290,28 @@ FindParallelCalls::FindParallelCalls(Function& F,
   // Now we've found all CallInsts reachable from each CallInst.
   // Find those CallInsts that are parallel with at least one other CallInst
   // by counting total inEdges and outEdges.
-  // 
   unsigned long totalNumCalls = completed.size();
 
-  if (totalNumCalls == 1)
-    { // Check first for the special case of a single call instruction not
-      // in any loop.  It is not parallel, even if it has no dependences
-      // (this is why it is a special case).
-      //
-      // FIXME:
-      // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
-      // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
-      // 
-      return;
-    }
+  if (totalNumCalls == 1) {
+    // Check first for the special case of a single call instruction not
+    // in any loop.  It is not parallel, even if it has no dependences
+    // (this is why it is a special case).
+    //
+    // FIXME:
+    // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
+    // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
+    return;
+  }
 
   hash_map<CallInst*, unsigned long> numDeps;
   for (hash_map<CallInst*, DependentsSet>::iterator II = dependents.begin(),
-         IE = dependents.end(); II != IE; ++II)
-    {
-      CallInst* fromCI = II->first;
-      numDeps[fromCI] += II->second.size();
-      for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
-           DI != DE; ++DI)
-        numDeps[*DI]++;                 // *DI can be reached from II->first
-    }
+         IE = dependents.end(); II != IE; ++II) {
+    CallInst* fromCI = II->first;
+    numDeps[fromCI] += II->second.size();
+    for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
+         DI != DE; ++DI)
+      numDeps[*DI]++;                 // *DI can be reached from II->first
+  }
 
   for (hash_map<CallInst*, DependentsSet>::iterator
          II = dependents.begin(), IE = dependents.end(); II != IE; ++II)
@@ -347,36 +330,31 @@ void FindParallelCalls::VisitOutEdges(Instruction* I,
   stmtsVisited.insert(I);
 
   if (CallInst* CI = dyn_cast<CallInst>(I))
-
     // FIXME: Ignoring parallelism in a loop.  Here we're actually *ignoring*
     // a self-dependence in order to get the count comparison right above.
     // When we include loop parallelism, self-dependences should be included.
-    // 
-    if (CI != root)
-
-      { // CallInst root has a path to CallInst I and any calls reachable from I
-        depsOfRoot.insert(CI);
-        if (completed[CI])
-          { // We have already visited I so we know all nodes it can reach!
-            DependentsSet& depsOfI = dependents[CI];
-            depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
-            return;
-          }
+    if (CI != root) {
+      // CallInst root has a path to CallInst I and any calls reachable from I
+      depsOfRoot.insert(CI);
+      if (completed[CI]) {
+        // We have already visited I so we know all nodes it can reach!
+        DependentsSet& depsOfI = dependents[CI];
+        depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
+        return;
       }
+    }
 
   // If we reach here, we need to visit all children of I
   for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(*I);
-       ! DI.fini(); ++DI)
-    {
-      Instruction* sink = &DI->getSink()->getInstr();
-      if (stmtsVisited.find(sink) == stmtsVisited.end())
-        VisitOutEdges(sink, root, depsOfRoot);
-    }
+       ! DI.fini(); ++DI) {
+    Instruction* sink = &DI->getSink()->getInstr();
+    if (stmtsVisited.find(sink) == stmtsVisited.end())
+      VisitOutEdges(sink, root, depsOfRoot);
+  }
 }
 
 
-void FindParallelCalls::visitCallInst(CallInst& CI)
-{
+void FindParallelCalls::visitCallInst(CallInst& CI) {
   if (completed[&CI])
     return;
   stmtsVisited.clear();                      // clear flags to do a fresh DFS
@@ -384,12 +362,11 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
   // Visit all children of CI using a recursive walk through dep graph
   DependentsSet& depsOfRoot = dependents[&CI];
   for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(CI);
-       ! DI.fini(); ++DI)
-    {
-      Instruction* sink = &DI->getSink()->getInstr();
-      if (stmtsVisited.find(sink) == stmtsVisited.end())
-        VisitOutEdges(sink, &CI, depsOfRoot);
-    }
+       ! DI.fini(); ++DI) {
+    Instruction* sink = &DI->getSink()->getInstr();
+    if (stmtsVisited.find(sink) == stmtsVisited.end())
+      VisitOutEdges(sink, &CI, depsOfRoot);
+  }
 
   completed[&CI] = true;
 }
@@ -411,8 +388,7 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
 //---------------------------------------------------------------------------- 
 
 namespace {
-  class Parallelize: public Pass
-  {
+  class Parallelize: public Pass {
   public:
     /// Driver functions to transform a program
     ///
@@ -433,68 +409,50 @@ namespace {
 }
 
 
-static Function* FindMain(Module& M)
-{
-  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
-    if (FI->getName() == std::string("main"))
-      return FI;
-  return NULL;
-}
-
-
-bool Parallelize::run(Module& M)
-{
+bool Parallelize::run(Module& M) {
   hash_set<Function*> parallelFunctions;
   hash_set<Function*> safeParallelFunctions;
   hash_set<const GlobalValue*> indirectlyCalled;
 
   // If there is no main (i.e., for an incomplete program), we can do nothing.
   // If there is a main, mark main as a parallel function.
-  // 
-  Function* mainFunc = FindMain(M);
+  Function* mainFunc = M.getMainFunction();
   if (!mainFunc)
     return false;
 
   // (1) Find candidate parallel functions and mark them as Cilk functions
-  // 
   for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
-    if (! FI->isExternal())
-      {
-        Function* F = FI;
-        DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
+    if (! FI->isExternal()) {
+      Function* F = FI;
+      DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
 
-        // All the hard analysis work gets done here!
-        // 
-        FindParallelCalls finder(*F,
-                                getAnalysis<PgmDependenceGraph>().getGraph(*F));
-                        /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
+      // All the hard analysis work gets done here!
+      FindParallelCalls finder(*F,
+                              getAnalysis<PgmDependenceGraph>().getGraph(*F));
+                      /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
 
-        // Now we know which call instructions are useful to parallelize.
-        // Remember those callee functions.
-        // 
-        for (std::vector<CallInst*>::iterator
-               CII = finder.parallelCalls.begin(),
-               CIE = finder.parallelCalls.end(); CII != CIE; ++CII)
-          {
-            // Check if this is a direct call...
-            if ((*CII)->getCalledFunction() != NULL)
-              { // direct call: if this is to a non-external function,
-                // mark it as a parallelizable function
-                if (! (*CII)->getCalledFunction()->isExternal())
-                  parallelFunctions.insert((*CII)->getCalledFunction());
-              }
-            else
-              { // Indirect call: mark all potential callees as bad
-                std::vector<GlobalValue*> callees =
-                  tdg.getNodeForValue((*CII)->getCalledValue())
-                  .getNode()->getGlobals();
-                indirectlyCalled.insert(callees.begin(), callees.end());
-              }
-          }
+      // Now we know which call instructions are useful to parallelize.
+      // Remember those callee functions.
+      for (std::vector<CallInst*>::iterator
+             CII = finder.parallelCalls.begin(),
+             CIE = finder.parallelCalls.end(); CII != CIE; ++CII) {
+        // Check if this is a direct call...
+        if ((*CII)->getCalledFunction() != NULL) {
+          // direct call: if this is to a non-external function,
+          // mark it as a parallelizable function
+          if (! (*CII)->getCalledFunction()->isExternal())
+            parallelFunctions.insert((*CII)->getCalledFunction());
+        } else {
+          // Indirect call: mark all potential callees as bad
+          std::vector<GlobalValue*> callees =
+            tdg.getNodeForValue((*CII)->getCalledValue())
+            .getNode()->getGlobals();
+          indirectlyCalled.insert(callees.begin(), callees.end());
+        }
       }
+    }
 
   // Remove all indirectly called functions from the list of Cilk functions.
-  // 
   for (hash_set<Function*>::iterator PFI = parallelFunctions.begin(),
          PFE = parallelFunctions.end(); PFI != PFE; ++PFI)
     if (indirectlyCalled.count(*PFI) == 0)
@@ -524,15 +482,13 @@ bool Parallelize::run(Module& M)
   //     This should identify both functions and calls to such functions
   //     to the code generator.
   // (4) Also, insert calls to sync at appropriate points.
-  // 
   Cilkifier cilkifier(M);
   for (hash_set<Function*>::iterator CFI = safeParallelFunctions.begin(),
-         CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI)
-    {
-      cilkifier.TransformFunc(*CFI, safeParallelFunctions,
-                             getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
-      /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
-    }
+         CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI) {
+    cilkifier.TransformFunc(*CFI, safeParallelFunctions,
+                           getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
+    /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
+  }
 
   return true;
 }
diff --git a/lib/Transforms/IPO/Parallelize.cpp b/lib/Transforms/IPO/Parallelize.cpp
index e22c86cb7fe..c2451fda525 100644
--- a/lib/Transforms/IPO/Parallelize.cpp
+++ b/lib/Transforms/IPO/Parallelize.cpp
@@ -91,10 +91,7 @@ static bool isSync(const CallInst& CI) {
 // Code generation pass that transforms code to identify where Cilk keywords
 // should be inserted.  This relies on `llvm-dis -c' to print out the keywords.
 //---------------------------------------------------------------------------- 
-
-
-class Cilkifier: public InstVisitor<Cilkifier>
-{
+class Cilkifier: public InstVisitor<Cilkifier> {
   Function* DummySyncFunc;
 
   // Data used when transforming each function.
@@ -124,16 +121,14 @@ public:
 };
 
 
-Cilkifier::Cilkifier(Module& M)
-{
+Cilkifier::Cilkifier(Module& M) {
   // create the dummy Sync function and add it to the Module
   DummySyncFunc = M.getOrInsertFunction(DummySyncFuncName, Type::VoidTy, 0);
 }
 
 void Cilkifier::TransformFunc(Function* F,
                               const hash_set<Function*>& _cilkFunctions,
-                              PgmDependenceGraph& _depGraph)
-{
+                              PgmDependenceGraph& _depGraph) {
   // Memoize the information for this function
   cilkFunctions = &_cilkFunctions;
   depGraph = &_depGraph;
@@ -159,37 +154,35 @@ void Cilkifier::DFSVisitInstr(Instruction* I,
   stmtsVisited.insert(I);
 
   // If there is a dependence from root to I, insert Sync and return
-  if (depsOfRoot.find(I) != depsOfRoot.end())
-    { // Insert a sync before I and stop searching along this path.
-      // If I is a Phi instruction, the dependence can only be an SSA dep.
-      // and we need to insert the sync in the predecessor on the appropriate
-      // incoming edge!
-      CallInst* syncI = 0;
-      if (PHINode* phiI = dyn_cast<PHINode>(I))
-        { // check all operands of the Phi and insert before each one
-          for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
-            if (phiI->getIncomingValue(i) == root)
-              syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
-                                   phiI->getIncomingBlock(i)->getTerminator());
-        }
-      else
-        syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
+  if (depsOfRoot.find(I) != depsOfRoot.end()) {
+    // Insert a sync before I and stop searching along this path.
+    // If I is a Phi instruction, the dependence can only be an SSA dep.
+    // and we need to insert the sync in the predecessor on the appropriate
+    // incoming edge!
+    CallInst* syncI = 0;
+    if (PHINode* phiI = dyn_cast<PHINode>(I)) {
+      // check all operands of the Phi and insert before each one
+      for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
+        if (phiI->getIncomingValue(i) == root)
+          syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
+                               phiI->getIncomingBlock(i)->getTerminator());
+    } else
+      syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
 
-      // Remember the sync for each spawn to eliminate redundant ones later
-      spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
+    // Remember the sync for each spawn to eliminate redundant ones later
+    spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
 
-      return;
-    }
+    return;
+  }
 
   // else visit unvisited successors
-  if (BranchInst* brI = dyn_cast<BranchInst>(I))
-    { // visit first instruction in each successor BB
-      for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
-        if (stmtsVisited.find(&brI->getSuccessor(i)->front())
-            == stmtsVisited.end())
-          DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
-    }
-  else
+  if (BranchInst* brI = dyn_cast<BranchInst>(I)) {
+    // visit first instruction in each successor BB
+    for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
+      if (stmtsVisited.find(&brI->getSuccessor(i)->front())
+          == stmtsVisited.end())
+        DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
+  } else
     if (Instruction* nextI = I->getNext())
       if (stmtsVisited.find(nextI) == stmtsVisited.end())
         DFSVisitInstr(nextI, root, depsOfRoot);
@@ -214,44 +207,37 @@ void Cilkifier::visitCallInst(CallInst& CI)
   std::vector<const PHINode*> phiUsers;
   hash_set<const PHINode*> phisSeen;    // ensures we don't visit a phi twice
   for (Value::use_iterator UI=CI.use_begin(), UE=CI.use_end(); UI != UE; ++UI)
-    if (const PHINode* phiUser = dyn_cast<PHINode>(*UI))
-      {
-        if (phisSeen.find(phiUser) == phisSeen.end())
-          {
-            phiUsers.push_back(phiUser);
-            phisSeen.insert(phiUser);
-          }
+    if (const PHINode* phiUser = dyn_cast<PHINode>(*UI)) {
+      if (phisSeen.find(phiUser) == phisSeen.end()) {
+        phiUsers.push_back(phiUser);
+        phisSeen.insert(phiUser);
       }
+    }
     else
       depsOfRoot.insert(cast<Instruction>(*UI));
 
   // Now we've found the non-Phi users and immediate phi users.
   // Recursively walk the phi users and add their non-phi users.
-  for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back())
-    {
-      phiUser = phiUsers.back();
-      for (Value::use_const_iterator UI=phiUser->use_begin(),
-             UE=phiUser->use_end(); UI != UE; ++UI)
-        if (const PHINode* pn = dyn_cast<PHINode>(*UI))
-          {
-            if (phisSeen.find(pn) == phisSeen.end())
-              {
-                phiUsers.push_back(pn);
-                phisSeen.insert(pn);
-              }
-          }
-        else
-          depsOfRoot.insert(cast<Instruction>(*UI));
-    }
+  for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back()) {
+    phiUser = phiUsers.back();
+    for (Value::use_const_iterator UI=phiUser->use_begin(),
+           UE=phiUser->use_end(); UI != UE; ++UI)
+      if (const PHINode* pn = dyn_cast<PHINode>(*UI)) {
+        if (phisSeen.find(pn) == phisSeen.end()) {
+          phiUsers.push_back(pn);
+          phisSeen.insert(pn);
+        }
+      } else
+        depsOfRoot.insert(cast<Instruction>(*UI));
+  }
 
   // Walk paths of the CFG starting at the call instruction and insert
   // one sync before the first dependence on each path, if any.
-  if (! depsOfRoot.empty())
-    {
-      stmtsVisited.clear();             // start a new DFS for this CallInst
-      assert(CI.getNext() && "Call instruction cannot be a terminator!");
-      DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
-    }
+  if (! depsOfRoot.empty()) {
+    stmtsVisited.clear();             // start a new DFS for this CallInst
+    assert(CI.getNext() && "Call instruction cannot be a terminator!");
+    DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
+  }
 
   // Now, eliminate all users of the SSA value of the CallInst, i.e., 
   // if the call instruction returns a value, delete the return value
@@ -304,31 +290,28 @@ FindParallelCalls::FindParallelCalls(Function& F,
   // Now we've found all CallInsts reachable from each CallInst.
   // Find those CallInsts that are parallel with at least one other CallInst
   // by counting total inEdges and outEdges.
-  // 
   unsigned long totalNumCalls = completed.size();
 
-  if (totalNumCalls == 1)
-    { // Check first for the special case of a single call instruction not
-      // in any loop.  It is not parallel, even if it has no dependences
-      // (this is why it is a special case).
-      //
-      // FIXME:
-      // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
-      // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
-      // 
-      return;
-    }
+  if (totalNumCalls == 1) {
+    // Check first for the special case of a single call instruction not
+    // in any loop.  It is not parallel, even if it has no dependences
+    // (this is why it is a special case).
+    //
+    // FIXME:
+    // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
+    // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
+    return;
+  }
 
   hash_map<CallInst*, unsigned long> numDeps;
   for (hash_map<CallInst*, DependentsSet>::iterator II = dependents.begin(),
-         IE = dependents.end(); II != IE; ++II)
-    {
-      CallInst* fromCI = II->first;
-      numDeps[fromCI] += II->second.size();
-      for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
-           DI != DE; ++DI)
-        numDeps[*DI]++;                 // *DI can be reached from II->first
-    }
+         IE = dependents.end(); II != IE; ++II) {
+    CallInst* fromCI = II->first;
+    numDeps[fromCI] += II->second.size();
+    for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
+         DI != DE; ++DI)
+      numDeps[*DI]++;                 // *DI can be reached from II->first
+  }
 
   for (hash_map<CallInst*, DependentsSet>::iterator
          II = dependents.begin(), IE = dependents.end(); II != IE; ++II)
@@ -347,36 +330,31 @@ void FindParallelCalls::VisitOutEdges(Instruction* I,
   stmtsVisited.insert(I);
 
   if (CallInst* CI = dyn_cast<CallInst>(I))
-
     // FIXME: Ignoring parallelism in a loop.  Here we're actually *ignoring*
     // a self-dependence in order to get the count comparison right above.
     // When we include loop parallelism, self-dependences should be included.
-    // 
-    if (CI != root)
-
-      { // CallInst root has a path to CallInst I and any calls reachable from I
-        depsOfRoot.insert(CI);
-        if (completed[CI])
-          { // We have already visited I so we know all nodes it can reach!
-            DependentsSet& depsOfI = dependents[CI];
-            depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
-            return;
-          }
+    if (CI != root) {
+      // CallInst root has a path to CallInst I and any calls reachable from I
+      depsOfRoot.insert(CI);
+      if (completed[CI]) {
+        // We have already visited I so we know all nodes it can reach!
+        DependentsSet& depsOfI = dependents[CI];
+        depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
+        return;
       }
+    }
 
   // If we reach here, we need to visit all children of I
   for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(*I);
-       ! DI.fini(); ++DI)
-    {
-      Instruction* sink = &DI->getSink()->getInstr();
-      if (stmtsVisited.find(sink) == stmtsVisited.end())
-        VisitOutEdges(sink, root, depsOfRoot);
-    }
+       ! DI.fini(); ++DI) {
+    Instruction* sink = &DI->getSink()->getInstr();
+    if (stmtsVisited.find(sink) == stmtsVisited.end())
+      VisitOutEdges(sink, root, depsOfRoot);
+  }
 }
 
 
-void FindParallelCalls::visitCallInst(CallInst& CI)
-{
+void FindParallelCalls::visitCallInst(CallInst& CI) {
   if (completed[&CI])
     return;
   stmtsVisited.clear();                      // clear flags to do a fresh DFS
@@ -384,12 +362,11 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
   // Visit all children of CI using a recursive walk through dep graph
   DependentsSet& depsOfRoot = dependents[&CI];
   for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(CI);
-       ! DI.fini(); ++DI)
-    {
-      Instruction* sink = &DI->getSink()->getInstr();
-      if (stmtsVisited.find(sink) == stmtsVisited.end())
-        VisitOutEdges(sink, &CI, depsOfRoot);
-    }
+       ! DI.fini(); ++DI) {
+    Instruction* sink = &DI->getSink()->getInstr();
+    if (stmtsVisited.find(sink) == stmtsVisited.end())
+      VisitOutEdges(sink, &CI, depsOfRoot);
+  }
 
   completed[&CI] = true;
 }
@@ -411,8 +388,7 @@ void FindParallelCalls::visitCallInst(CallInst& CI)
 //---------------------------------------------------------------------------- 
 
 namespace {
-  class Parallelize: public Pass
-  {
+  class Parallelize: public Pass {
   public:
     /// Driver functions to transform a program
     ///
@@ -433,68 +409,50 @@ namespace {
 }
 
 
-static Function* FindMain(Module& M)
-{
-  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
-    if (FI->getName() == std::string("main"))
-      return FI;
-  return NULL;
-}
-
-
-bool Parallelize::run(Module& M)
-{
+bool Parallelize::run(Module& M) {
   hash_set<Function*> parallelFunctions;
   hash_set<Function*> safeParallelFunctions;
   hash_set<const GlobalValue*> indirectlyCalled;
 
   // If there is no main (i.e., for an incomplete program), we can do nothing.
   // If there is a main, mark main as a parallel function.
-  // 
-  Function* mainFunc = FindMain(M);
+  Function* mainFunc = M.getMainFunction();
   if (!mainFunc)
     return false;
 
   // (1) Find candidate parallel functions and mark them as Cilk functions
-  // 
   for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
-    if (! FI->isExternal())
-      {
-        Function* F = FI;
-        DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
+    if (! FI->isExternal()) {
+      Function* F = FI;
+      DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
 
-        // All the hard analysis work gets done here!
-        // 
-        FindParallelCalls finder(*F,
-                                getAnalysis<PgmDependenceGraph>().getGraph(*F));
-                        /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
+      // All the hard analysis work gets done here!
+      FindParallelCalls finder(*F,
+                              getAnalysis<PgmDependenceGraph>().getGraph(*F));
+                      /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
 
-        // Now we know which call instructions are useful to parallelize.
-        // Remember those callee functions.
-        // 
-        for (std::vector<CallInst*>::iterator
-               CII = finder.parallelCalls.begin(),
-               CIE = finder.parallelCalls.end(); CII != CIE; ++CII)
-          {
-            // Check if this is a direct call...
-            if ((*CII)->getCalledFunction() != NULL)
-              { // direct call: if this is to a non-external function,
-                // mark it as a parallelizable function
-                if (! (*CII)->getCalledFunction()->isExternal())
-                  parallelFunctions.insert((*CII)->getCalledFunction());
-              }
-            else
-              { // Indirect call: mark all potential callees as bad
-                std::vector<GlobalValue*> callees =
-                  tdg.getNodeForValue((*CII)->getCalledValue())
-                  .getNode()->getGlobals();
-                indirectlyCalled.insert(callees.begin(), callees.end());
-              }
-          }
+      // Now we know which call instructions are useful to parallelize.
+      // Remember those callee functions.
+      for (std::vector<CallInst*>::iterator
+             CII = finder.parallelCalls.begin(),
+             CIE = finder.parallelCalls.end(); CII != CIE; ++CII) {
+        // Check if this is a direct call...
+        if ((*CII)->getCalledFunction() != NULL) {
+          // direct call: if this is to a non-external function,
+          // mark it as a parallelizable function
+          if (! (*CII)->getCalledFunction()->isExternal())
+            parallelFunctions.insert((*CII)->getCalledFunction());
+        } else {
+          // Indirect call: mark all potential callees as bad
+          std::vector<GlobalValue*> callees =
+            tdg.getNodeForValue((*CII)->getCalledValue())
+            .getNode()->getGlobals();
+          indirectlyCalled.insert(callees.begin(), callees.end());
+        }
       }
+    }
 
   // Remove all indirectly called functions from the list of Cilk functions.
-  // 
   for (hash_set<Function*>::iterator PFI = parallelFunctions.begin(),
          PFE = parallelFunctions.end(); PFI != PFE; ++PFI)
     if (indirectlyCalled.count(*PFI) == 0)
@@ -524,15 +482,13 @@ bool Parallelize::run(Module& M)
   //     This should identify both functions and calls to such functions
   //     to the code generator.
   // (4) Also, insert calls to sync at appropriate points.
-  // 
   Cilkifier cilkifier(M);
   for (hash_set<Function*>::iterator CFI = safeParallelFunctions.begin(),
-         CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI)
-    {
-      cilkifier.TransformFunc(*CFI, safeParallelFunctions,
-                             getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
-      /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
-    }
+         CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI) {
+    cilkifier.TransformFunc(*CFI, safeParallelFunctions,
+                           getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
+    /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
+  }
 
   return true;
 }