This change is to fix rdar://12571717 which is about assertion in Reassociate pass.

The assertion is trigged when the Reassociater tries to transform expression ... + 2 * n * 3 + 2 * m + ... into: ... + 2 * (n*3 + m). In the process of the transformation, a helper routine folds the constant 2*3 into 6, confusing optimizer which is trying the to eliminate the common factor 2, and cannot find 2 any more. Review is pending. But I'd like commit first in order to help those who are waiting for this fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167740 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 17:39:16 +00:00 · 2012-11-12 19:34:11 +00:00 · 2012-11-12 19:34:11 +00:00 · 0a46bf13a3
commit 0a46bf13a3
parent ae692f2bae
3 changed files with 330 additions and 11 deletions
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@ -1,4 +1,4 @@
-//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
+
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -41,6 +41,8 @@
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <deque>
+#include <set>
 using namespace llvm;

 STATISTIC(NumChanged, "Number of insts reassociated");
@ -113,10 +115,148 @@ namespace {
 }

 namespace {
+
+  class Reassociate;
+
+  class isInstDeadFunc {
+  public:
+    bool operator() (Instruction* I) {
+      return isInstructionTriviallyDead(I);
+    }
+  };
+  
+  class RmInstCallBackFunc {
+    Reassociate *reassoc_;
+  public:
+    RmInstCallBackFunc(Reassociate* ra): reassoc_(ra) {}
+    inline void operator() (Instruction*);
+  };
+
+  // The worklist has following traits:
+  //  - it is pretty much a dequeue.
+  //  - has "set" semantic, meaning all elements in the worklist are distinct.
+  //  - efficient in-place element removal (by replacing the element with
+  //    invalid value 0).
+  //
+  class RedoWorklist {
+  public:
+    typedef AssertingVH<Instruction> value_type;
+    typedef std::set<value_type> set_type;
+    typedef std::deque<value_type> deque_type;
+    // caller cannot modify element via iterator, hence constant.
+    typedef deque_type::const_iterator iterator;
+    typedef deque_type::const_iterator const_iterator;
+    typedef deque_type::size_type size_type;
+  
+    RedoWorklist() {}
+  
+    bool empty() const {
+      return deque_.empty();
+    }
+  
+    size_type size() const {
+      return deque_.size();
+    }
+
+    // return true iff X is in the worklist
+    bool found(const value_type &X) {
+      return set_.find(X) != set_.end();
+    }
+  
+    iterator begin() {
+      return deque_.begin();
+    }
+  
+    const_iterator begin() const {
+      return deque_.begin();
+    }
+  
+    iterator end() {
+      return deque_.end();
+    }
+  
+    const_iterator end() const {
+      return deque_.end();
+    }
+  
+    const value_type &back() const {
+      assert(!empty() && "worklist is empty");
+      return deque_.back();
+    }
+  
+    // If element X is already in the worklist, do nothing but return false;
+    // otherwise, append X to the worklist and return true.
+    //
+    bool push_back(const value_type &X) {
+      bool result = set_.insert(X).second;
+      if (result)
+        deque_.push_back(X);
+      return result;
+    }
+  
+    // insert() is the alias of push_back()
+    bool insert(const value_type &X) {
+      return push_back(X);
+    }
+
+    void clear() {
+      set_.clear();
+      deque_.clear();
+    }
+  
+    void pop_back() {
+      assert(!empty() && "worklist is empty");
+      set_.erase(back());
+      deque_.pop_back();
+    }
+    
+    value_type pop_back_val() {
+      value_type Ret = back();
+      pop_back();
+      return Ret;
+    }
+
+    const value_type &front() const {
+      assert(!empty() && "worklist is empty");
+      return deque_.front();
+    }
+
+    void pop_front() {
+      assert(!empty() && "worklist is empty");
+      set_.erase(front());
+      deque_.pop_front();
+    }
+    
+    value_type pop_front_val() {
+      value_type Ret = front();
+      pop_front();
+      return Ret;
+    }
+
+    // Remove an element from the worklist. Return true iff the element was 
+    // in the worklist.
+    bool remove(const value_type& X);
+
+    template <typename pred, typename call_back_func>
+    int inplace_remove(pred p, call_back_func cb);
+
+    template <typename pred, typename call_back_func>
+    int inplace_rremove(pred p, call_back_func cb);
+
+    void append(RedoWorklist&);
+
+  private:
+    set_type set_;
+    deque_type deque_;
+  };
+
  class Reassociate : public FunctionPass {
+    friend class RmInstCallBackFunc;
+
    DenseMap<BasicBlock*, unsigned> RankMap;
    DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
-    SetVector<AssertingVH<Instruction> > RedoInsts;
+    RedoWorklist RedoInsts;
+    RedoWorklist TmpRedoInsts;
    bool MadeChange;
  public:
    static char ID; // Pass identification, replacement for typeid
@ -141,9 +281,12 @@ namespace {
                                SmallVectorImpl<Factor> &Factors);
    Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
                                   SmallVectorImpl<Factor> &Factors);
+    void removeNegFromMulOps(SmallVectorImpl<ValueEntry> &Ops);
    Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
    Value *RemoveFactorFromExpression(Value *V, Value *Factor);
    void EraseInst(Instruction *I);
+    void EraseInstCallBack(Instruction *I);
+    void EraseAllDeadInst();
    void OptimizeInst(Instruction *I);
  };
 }
@ -182,6 +325,75 @@ static bool isUnmovableInstruction(Instruction *I) {
  return false;
 }

+inline void RmInstCallBackFunc::operator() (Instruction* I) {
+  reassoc_->EraseInstCallBack(I);
+}
+
+// Remove an item from the worklist. Return true iff the element was 
+// in the worklist.
+bool RedoWorklist::remove(const value_type& X) {
+  if (set_.erase(X)) {
+    deque_type::iterator I = std::find(deque_.begin(), deque_.end(), X);
+    assert(I != deque_.end() && "Can not find element");
+    deque_.erase(I);
+    return true;
+  }
+  return false;
+}
+
+// Forward go through each element e, calling p(e) to tell if e should be 
+// removed or not; if p(e) = true, then e will be replaced with NULL to 
+// indicate it is removed from the worklist, and functor cb will be 
+// called for further processing on e. The functors should not invalidate
+// the iterator by inserting or deleteing element to and from the worklist.
+// 
+// Returns the number of instruction being deleted.
+template <typename pred, typename call_back_func>
+int RedoWorklist::inplace_remove(pred p, call_back_func cb) {
+  int cnt = 0;
+  for (typename deque_type::iterator iter = deque_.begin(),
+       iter_e = deque_.end(); iter != iter_e; iter++) {
+    value_type &element = *iter;
+    if (p(element) && set_.erase(element)) {
+      Instruction* t = element;
+      element.~value_type();
+      new (&element) value_type(NULL);
+      cb(t);
+      cnt ++;
+    }
+  }
+  return cnt;
+}
+
+// inplace_rremove() is the same as inplace_remove() except that elements 
+// are visited in backward order.
+template <typename pred, typename call_back_func>
+int RedoWorklist::inplace_rremove(pred p, call_back_func cb) {
+  int cnt = 0;
+  for (typename deque_type::reverse_iterator iter = deque_.rbegin(),
+       iter_e = deque_.rend(); iter != iter_e; iter++) {
+    value_type &element = *iter;
+    if (p(element) && set_.erase(element)) {
+      Instruction* t = element;
+      element.~value_type();
+      new (&element) value_type(NULL);
+      cb(t);
+      cnt ++;
+    }
+  }
+  return cnt;
+}
+
+void RedoWorklist::append(RedoWorklist& that) {
+  deque_type &that_deque = that.deque_;
+
+  while (!that_deque.empty()) {
+    push_back(that_deque.front());
+    that_deque.pop_front();
+  }
+  that.clear();
+}
+
 void Reassociate::BuildRankMap(Function &F) {
  unsigned i = 2;

@ -1418,8 +1630,66 @@ Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
  return V;
 }

+// Multiply Ops may have some negation operators. This situation arises
+// when the negation operators have multiple uses, and LinearizeExprTree() has
+// to treat them as leaf operands. Before multiplication optimization begins,
+// get rid of the negations wherever possible.
+void Reassociate::removeNegFromMulOps(SmallVectorImpl<ValueEntry> &Ops) {
+  int32_t NegIdx = -1;
+
+  // loop over all elements except the last one
+  for (int32_t Idx = 0, IdxEnd = Ops.size() - 1; Idx < IdxEnd; Idx++) {
+    ValueEntry &VE = Ops[Idx];
+    if (!BinaryOperator::isNeg(VE.Op))
+      continue;
+    
+    if (NegIdx < 0) {
+      NegIdx = Idx;
+      continue;
+    }
+
+    // Find a pair of negation operators, say -X and -Y, change them to 
+    // X and Y respectively.
+    ValueEntry &VEX = Ops[NegIdx];
+    Value *OpX = cast<BinaryOperator>(VEX.Op)->getOperand(1);
+    VEX.Op = OpX;
+    VEX.Rank = getRank(OpX);
+
+    Value *OpY = cast<BinaryOperator>(VE.Op)->getOperand(1);
+    VE.Op = OpY;
+    VE.Rank = getRank(OpY);
+    NegIdx = -1;
+  }
+
+  if (NegIdx >= 0) {
+    // We have visited odd number of negation operators so far. 
+    // Check if the last element is negation as well. 
+    ValueEntry &Last = Ops.back();
+    Value *LastOp = Last.Op;
+    if (!isa<ConstantInt>(LastOp) && !BinaryOperator::isNeg(LastOp))
+      return;
+
+    ValueEntry& PrevNeg = Ops[NegIdx]; 
+    Value *Op = cast<BinaryOperator>(PrevNeg.Op)->getOperand(1);
+    PrevNeg.Op = Op;
+    PrevNeg.Rank = getRank(Op);
+
+    if (isa<ConstantInt>(LastOp))
+      Last.Op = ConstantExpr::getNeg(cast<Constant>(LastOp));
+    else {
+      LastOp = cast<BinaryOperator>(PrevNeg.Op)->getOperand(1);
+      Last.Op = LastOp;
+      Last.Rank = getRank(LastOp);
+    }
+  }
+}
+
 Value *Reassociate::OptimizeMul(BinaryOperator *I,
                                SmallVectorImpl<ValueEntry> &Ops) {
+
+  // Simplify the operands: (-x)*(-y) -> x*y, and (-x)*c -> x*(-c)
+  removeNegFromMulOps(Ops);
+
  // We can only optimize the multiplies when there is a chain of more than
  // three, such that a balanced tree might require fewer total multiplies.
  if (Ops.size() < 4)
@ -1478,14 +1748,17 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
  return 0;
 }

-/// EraseInst - Zap the given instruction, adding interesting operands to the
-/// work list.
-void Reassociate::EraseInst(Instruction *I) {
+// EraseInstCallBack is a helper function of EraseInst which will be called to 
+// delete an individual instruction, and it is also a callback funciton when 
+// EraseAllDeadInst is called to delete all dead instruciton in the Redo 
+// worklist (RedoInsts). 
+//  
+void Reassociate::EraseInstCallBack(Instruction *I) {
+  DEBUG(dbgs() << "Erase instruction :" << *I << "\n");
  assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
  SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
  // Erase the dead instruction.
  ValueRankMap.erase(I);
-  RedoInsts.remove(I);
  I->eraseFromParent();
  // Optimize its operands.
  SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
@ -1497,10 +1770,36 @@ void Reassociate::EraseInst(Instruction *I) {
      while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode &&
             Visited.insert(Op))
        Op = Op->use_back();
-      RedoInsts.insert(Op);
+      
+      // The caller may be itearating the RedoInsts. Inserting a new element to 
+      // RedoInsts will invaidate the iterator. Instead, we temporally place the 
+      // new candidate to TmpRedoInsts. It is up to caller to combine 
+      // TmpRedoInsts and RedoInsts together.
+      //
+      if (!RedoInsts.found(Op))
+        TmpRedoInsts.insert(Op);
    }
 }

+/// EraseInst - Zap the given instruction, adding interesting operands to the
+/// work list.
+void Reassociate::EraseInst(Instruction *I) {
+  RedoInsts.remove(I);
+
+  // Since EraseInstCallBack() put new reassociation candidates to TmpRedoInsts
+  // we need to copy the candidates back to RedoInsts.
+  TmpRedoInsts.clear();
+  EraseInstCallBack(I);
+  RedoInsts.append(TmpRedoInsts);
+}
+
+/// EraseAllDeadInst - Remove all dead instructions from the worklist. 
+void Reassociate::EraseAllDeadInst() {
+  TmpRedoInsts.clear();
+  RedoInsts.inplace_rremove(isInstDeadFunc(), RmInstCallBackFunc(this));
+  RedoInsts.append(TmpRedoInsts);
+}
+
 /// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
 /// instructions is not allowed.
 void Reassociate::OptimizeInst(Instruction *I) {
@ -1508,6 +1807,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
  if (!isa<BinaryOperator>(I))
    return;

+  DEBUG(dbgs() << "\n>Opt Instruction: " << *I << '\n');
+
  if (I->getOpcode() == Instruction::Shl &&
      isa<ConstantInt>(I->getOperand(1)))
    // If an operand of this shift is a reassociable multiply, or if the shift
@ -1686,9 +1987,14 @@ bool Reassociate::runOnFunction(Function &F) {
        ++II;
      }

+    DEBUG(dbgs() << "Process instructions in worklist\n");
+    EraseAllDeadInst();
+
    // If this produced extra instructions to optimize, handle them now.
    while (!RedoInsts.empty()) {
-      Instruction *I = RedoInsts.pop_back_val();
+      Instruction *I = RedoInsts.pop_front_val();
+      if (!I)
+        continue;
      if (isInstructionTriviallyDead(I))
        EraseInst(I);
      else
--- a/test/Transforms/Reassociate/mul_neg.ll
+++ b/test/Transforms/Reassociate/mul_neg.ll
@ -0,0 +1,13 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+
+; t=-a; retval = t*7|t => t-a; retval => a*-7|t
+define i32 @mulneg(i32 %a) nounwind uwtable ssp {
+entry:
+  %sub = sub nsw i32 0, %a
+  %tmp1 = mul i32 %sub, 7
+  %tmp2 = xor i32 %sub, %tmp1
+  ret i32 %tmp2
+; CHECK: entry
+; CHECK: %tmp1 = mul i32 %a, -7 
+; CHECK: ret
+}
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@ -1,7 +1,7 @@
 ; RUN: opt < %s -reassociate -S | FileCheck %s

 define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
-; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
+; Check that a*a*b+a*a*c is turned into (a*a)*(b+c).
 ; CHECK: @multistep1
  %t0 = mul i64 %a, %b
  %t1 = mul i64 %a, %t0 ; a*(a*b)
@ -9,8 +9,8 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
  %t3 = mul i64 %a, %t2 ; a*(a*c)
  %t4 = add i64 %t1, %t3
 ; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: mul i64 %a, %a
+; CHECK-NEXT: mul i64 %tmp{{.*}}, %tmp{{.*}}
 ; CHECK-NEXT: ret
  ret i64 %t4
 }