diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 33c387cb71c..f40d34f2488 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -111,10 +111,11 @@ namespace { uint32_t nextValueNumber; Expression create_expression(Instruction* I); + Expression create_intrinsic_expression(CallInst *C, uint32_t opcode, + bool IsCommutative); Expression create_cmp_expression(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS); - Expression create_extractvalue_expression(ExtractValueInst* EI); uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } @@ -193,6 +194,29 @@ Expression ValueTable::create_expression(Instruction *I) { return e; } +Expression ValueTable::create_intrinsic_expression(CallInst *C, uint32_t opcode, + bool IsCommutative) { + Expression e; + e.opcode = opcode; + StructType *ST = cast(C->getType()); + assert(ST); + e.type = *ST->element_begin(); + + for (unsigned i = 0, ei = C->getNumArgOperands(); i < ei; ++i) + e.varargs.push_back(lookup_or_add(C->getArgOperand(i))); + if (IsCommutative) { + // Ensure that commutative instructions that only differ by a permutation + // of their operands get the same value number by sorting the operand value + // numbers. Since all commutative instructions have two operands it is more + // efficient to sort by hand rather than using, say, std::sort. + assert(C->getNumArgOperands() == 2 && "Unsupported commutative instruction!"); + if (e.varargs[0] > e.varargs[1]) + std::swap(e.varargs[0], e.varargs[1]); + } + + return e; +} + Expression ValueTable::create_cmp_expression(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { @@ -212,58 +236,6 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode, return e; } -Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { - assert(EI != 0 && "Not an ExtractValueInst?"); - Expression e; - e.type = EI->getType(); - e.opcode = 0; - - IntrinsicInst *I = dyn_cast(EI->getAggregateOperand()); - if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { - // EI might be an extract from one of our recognised intrinsics. If it - // is we'll synthesize a semantically equivalent expression instead on - // an extract value expression. - switch (I->getIntrinsicID()) { - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - e.opcode = Instruction::Add; - break; - case Intrinsic::ssub_with_overflow: - case Intrinsic::usub_with_overflow: - e.opcode = Instruction::Sub; - break; - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: - e.opcode = Instruction::Mul; - break; - default: - break; - } - - if (e.opcode != 0) { - // Intrinsic recognized. Grab its args to finish building the expression. - assert(I->getNumArgOperands() == 2 && - "Expect two args for recognised intrinsics."); - e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); - e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); - return e; - } - } - - // Not a recognised intrinsic. Fall back to producing an extract value - // expression. - e.opcode = EI->getOpcode(); - for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); - OI != OE; ++OI) - e.varargs.push_back(lookup_or_add(*OI)); - - for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - - return e; -} - //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -397,8 +369,29 @@ uint32_t ValueTable::lookup_or_add(Value *V) { Instruction* I = cast(V); Expression exp; switch (I->getOpcode()) { - case Instruction::Call: - return lookup_or_add_call(cast(I)); + case Instruction::Call: { + CallInst *C = cast(I); + if (IntrinsicInst *II = dyn_cast(C)) { + switch (II->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + exp = create_intrinsic_expression(C, Instruction::Add, true); + break; + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + exp = create_intrinsic_expression(C, Instruction::Sub, false); + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + exp = create_intrinsic_expression(C, Instruction::Mul, true); + break; + default: + return lookup_or_add_call(C); + } + } else { + return lookup_or_add_call(C); + } + } break; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -437,10 +430,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::ShuffleVector: case Instruction::InsertValue: case Instruction::GetElementPtr: - exp = create_expression(I); - break; case Instruction::ExtractValue: - exp = create_extractvalue_expression(cast(I)); + exp = create_expression(I); break; default: valueNumbering[V] = nextValueNumber; @@ -2189,6 +2180,54 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, return Changed; } +static bool normalOpAfterIntrinsic(Instruction *I, Value *Repl) +{ + switch (I->getOpcode()) { + case Instruction::Add: + if (IntrinsicInst *II = dyn_cast(Repl)) + return II->getIntrinsicID() == Intrinsic::sadd_with_overflow + || II->getIntrinsicID() == Intrinsic::uadd_with_overflow; + return false; + case Instruction::Sub: + if (IntrinsicInst *II = dyn_cast(Repl)) + return II->getIntrinsicID() == Intrinsic::ssub_with_overflow + || II->getIntrinsicID() == Intrinsic::usub_with_overflow; + return false; + case Instruction::Mul: + if (IntrinsicInst *II = dyn_cast(Repl)) + return II->getIntrinsicID() == Intrinsic::smul_with_overflow + || II->getIntrinsicID() == Intrinsic::umul_with_overflow; + return false; + default: + return false; + } +} + +static bool intrinsicAterNormalOp(Instruction *I, Value *Repl) +{ + IntrinsicInst *II = dyn_cast(I); + if (!II) + return false; + + Instruction *RI = dyn_cast(Repl); + if (!RI) + return false; + + switch (RI->getOpcode()) { + case Instruction::Add: + return II->getIntrinsicID() == Intrinsic::sadd_with_overflow + || II->getIntrinsicID() == Intrinsic::uadd_with_overflow; + case Instruction::Sub: + return II->getIntrinsicID() == Intrinsic::ssub_with_overflow + || II->getIntrinsicID() == Intrinsic::usub_with_overflow; + case Instruction::Mul: + return II->getIntrinsicID() == Intrinsic::smul_with_overflow + || II->getIntrinsicID() == Intrinsic::umul_with_overflow; + default: + return false; + } +} + /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I) { @@ -2302,6 +2341,27 @@ bool GVN::processInstruction(Instruction *I) { return false; } + if (normalOpAfterIntrinsic(I, repl)) { + // An intrinsic followed by a normal operation (e.g. sadd_with_overflow + // followed by a sadd): replace the second instruction with an extract. + IntrinsicInst *II = cast(repl); + assert(II); + repl = ExtractValueInst::Create(II, 0, I->getName() + ".repl", I); + } else if (intrinsicAterNormalOp(I, repl)) { + // A normal operation followed by an intrinsic (e.g. sadd followed by a + // sadd_with_overflow). + // Clone the intrinsic, and insert it before the replacing instruction. Then + // replace the (current) instruction with the cloned one. In a subsequent + // run, the original replacement (the non-intrinsic) will be be replaced by + // the new intrinsic. + Instruction *RI = dyn_cast(repl); + assert(RI); + Instruction *newIntrinsic = I->clone(); + newIntrinsic->setName(I->getName() + ".repl"); + newIntrinsic->insertBefore(RI); + repl = newIntrinsic; + } + // Remove it! patchAndReplaceAllUsesWith(I, repl); if (MD && repl->getType()->getScalarType()->isPointerTy()) diff --git a/test/Transforms/GVN/overflow.ll b/test/Transforms/GVN/overflow.ll new file mode 100644 index 00000000000..4d5ac66dc3f --- /dev/null +++ b/test/Transforms/GVN/overflow.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -gvn < %s | FileCheck %s + +define i32 @sadd1(i32 %a, i32 %b) #0 { +; CHECK-LABEL: @sadd1( +entry: + %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %cmp = extractvalue { i32, i1 } %sadd, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + ret i32 42 + +if.end: ; preds = %entry + %sadd3 = add i32 %a, %b + ret i32 %sadd3 +; CHECK-NOT: add i32 %a, %b +; CHECK: %sadd3.repl = extractvalue { i32, i1 } %sadd, 0 +; CHECK: ret i32 %sadd3.repl +} + +define i32 @sadd2(i32 %a, i32 %b) #0 { +entry: + %sadd3 = add i32 %a, %b + %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %cmp = extractvalue { i32, i1 } %sadd, 1 + br i1 %cmp, label %if.then, label %if.end +; CHECK-NOT: %sadd3 = add i32 %a, %b +; CHECK: %sadd.repl = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) +; CHECK-NOT: %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) +; CHECK: %sadd3.repl = extractvalue { i32, i1 } %sadd.repl, 0 + +if.then: ; preds = %entry + %sadd4 = add i32 %sadd3, 1 + ret i32 %sadd4 +; CHECK: %sadd4 = add i32 %sadd3.repl, 1 + +if.end: ; preds = %entry + ret i32 %sadd3 +; CHECK: ret i32 %sadd3.repl +} + + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone