GVN: merge overflow intrinsics with non-overflow instructions.

When an overflow intrinsic is followed by a non-overflow instruction,
replace the latter with an extract. For example:

  %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
  %sadd3 = add i32 %a, %b

Here the add statement will be replaced by an extract.

When an overflow intrinsic follows a non-overflow instruction, a clone
of the intrinsic is inserted before the normal instruction, which makes
it the same as the previous case. Subsequent runs of GVN can then clean
up the duplicate instructions and insert the extract.

This fixes PR8817.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203553 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Erik Verbruggen 2014-03-11 09:36:48 +00:00
parent d4b9957d1f
commit cf5240642b
2 changed files with 161 additions and 58 deletions

View File

@ -111,10 +111,11 @@ namespace {
uint32_t nextValueNumber;
Expression create_expression(Instruction* I);
Expression create_intrinsic_expression(CallInst *C, uint32_t opcode,
bool IsCommutative);
Expression create_cmp_expression(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS);
Expression create_extractvalue_expression(ExtractValueInst* EI);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
@ -193,6 +194,29 @@ Expression ValueTable::create_expression(Instruction *I) {
return e;
}
Expression ValueTable::create_intrinsic_expression(CallInst *C, uint32_t opcode,
bool IsCommutative) {
Expression e;
e.opcode = opcode;
StructType *ST = cast<StructType>(C->getType());
assert(ST);
e.type = *ST->element_begin();
for (unsigned i = 0, ei = C->getNumArgOperands(); i < ei; ++i)
e.varargs.push_back(lookup_or_add(C->getArgOperand(i)));
if (IsCommutative) {
// Ensure that commutative instructions that only differ by a permutation
// of their operands get the same value number by sorting the operand value
// numbers. Since all commutative instructions have two operands it is more
// efficient to sort by hand rather than using, say, std::sort.
assert(C->getNumArgOperands() == 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
}
return e;
}
Expression ValueTable::create_cmp_expression(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
@ -212,58 +236,6 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode,
return e;
}
Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
assert(EI != 0 && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
e.opcode = 0;
IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
// EI might be an extract from one of our recognised intrinsics. If it
// is we'll synthesize a semantically equivalent expression instead on
// an extract value expression.
switch (I->getIntrinsicID()) {
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
e.opcode = Instruction::Add;
break;
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
e.opcode = Instruction::Sub;
break;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
e.opcode = Instruction::Mul;
break;
default:
break;
}
if (e.opcode != 0) {
// Intrinsic recognized. Grab its args to finish building the expression.
assert(I->getNumArgOperands() == 2 &&
"Expect two args for recognised intrinsics.");
e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
return e;
}
}
// Not a recognised intrinsic. Fall back to producing an extract value
// expression.
e.opcode = EI->getOpcode();
for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
OI != OE; ++OI)
e.varargs.push_back(lookup_or_add(*OI));
for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
II != IE; ++II)
e.varargs.push_back(*II);
return e;
}
//===----------------------------------------------------------------------===//
// ValueTable External Functions
//===----------------------------------------------------------------------===//
@ -397,8 +369,29 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
Instruction* I = cast<Instruction>(V);
Expression exp;
switch (I->getOpcode()) {
case Instruction::Call:
return lookup_or_add_call(cast<CallInst>(I));
case Instruction::Call: {
CallInst *C = cast<CallInst>(I);
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(C)) {
switch (II->getIntrinsicID()) {
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
exp = create_intrinsic_expression(C, Instruction::Add, true);
break;
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
exp = create_intrinsic_expression(C, Instruction::Sub, false);
break;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
exp = create_intrinsic_expression(C, Instruction::Mul, true);
break;
default:
return lookup_or_add_call(C);
}
} else {
return lookup_or_add_call(C);
}
} break;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@ -437,10 +430,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::ShuffleVector:
case Instruction::InsertValue:
case Instruction::GetElementPtr:
exp = create_expression(I);
break;
case Instruction::ExtractValue:
exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
exp = create_expression(I);
break;
default:
valueNumbering[V] = nextValueNumber;
@ -2189,6 +2180,54 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
return Changed;
}
static bool normalOpAfterIntrinsic(Instruction *I, Value *Repl)
{
switch (I->getOpcode()) {
case Instruction::Add:
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Repl))
return II->getIntrinsicID() == Intrinsic::sadd_with_overflow
|| II->getIntrinsicID() == Intrinsic::uadd_with_overflow;
return false;
case Instruction::Sub:
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Repl))
return II->getIntrinsicID() == Intrinsic::ssub_with_overflow
|| II->getIntrinsicID() == Intrinsic::usub_with_overflow;
return false;
case Instruction::Mul:
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Repl))
return II->getIntrinsicID() == Intrinsic::smul_with_overflow
|| II->getIntrinsicID() == Intrinsic::umul_with_overflow;
return false;
default:
return false;
}
}
static bool intrinsicAterNormalOp(Instruction *I, Value *Repl)
{
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
if (!II)
return false;
Instruction *RI = dyn_cast<Instruction>(Repl);
if (!RI)
return false;
switch (RI->getOpcode()) {
case Instruction::Add:
return II->getIntrinsicID() == Intrinsic::sadd_with_overflow
|| II->getIntrinsicID() == Intrinsic::uadd_with_overflow;
case Instruction::Sub:
return II->getIntrinsicID() == Intrinsic::ssub_with_overflow
|| II->getIntrinsicID() == Intrinsic::usub_with_overflow;
case Instruction::Mul:
return II->getIntrinsicID() == Intrinsic::smul_with_overflow
|| II->getIntrinsicID() == Intrinsic::umul_with_overflow;
default:
return false;
}
}
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I) {
@ -2302,6 +2341,27 @@ bool GVN::processInstruction(Instruction *I) {
return false;
}
if (normalOpAfterIntrinsic(I, repl)) {
// An intrinsic followed by a normal operation (e.g. sadd_with_overflow
// followed by a sadd): replace the second instruction with an extract.
IntrinsicInst *II = cast<IntrinsicInst>(repl);
assert(II);
repl = ExtractValueInst::Create(II, 0, I->getName() + ".repl", I);
} else if (intrinsicAterNormalOp(I, repl)) {
// A normal operation followed by an intrinsic (e.g. sadd followed by a
// sadd_with_overflow).
// Clone the intrinsic, and insert it before the replacing instruction. Then
// replace the (current) instruction with the cloned one. In a subsequent
// run, the original replacement (the non-intrinsic) will be be replaced by
// the new intrinsic.
Instruction *RI = dyn_cast<Instruction>(repl);
assert(RI);
Instruction *newIntrinsic = I->clone();
newIntrinsic->setName(I->getName() + ".repl");
newIntrinsic->insertBefore(RI);
repl = newIntrinsic;
}
// Remove it!
patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())

View File

@ -0,0 +1,43 @@
; RUN: opt -S -gvn < %s | FileCheck %s
define i32 @sadd1(i32 %a, i32 %b) #0 {
; CHECK-LABEL: @sadd1(
entry:
%sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
%cmp = extractvalue { i32, i1 } %sadd, 1
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
ret i32 42
if.end: ; preds = %entry
%sadd3 = add i32 %a, %b
ret i32 %sadd3
; CHECK-NOT: add i32 %a, %b
; CHECK: %sadd3.repl = extractvalue { i32, i1 } %sadd, 0
; CHECK: ret i32 %sadd3.repl
}
define i32 @sadd2(i32 %a, i32 %b) #0 {
entry:
%sadd3 = add i32 %a, %b
%sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
%cmp = extractvalue { i32, i1 } %sadd, 1
br i1 %cmp, label %if.then, label %if.end
; CHECK-NOT: %sadd3 = add i32 %a, %b
; CHECK: %sadd.repl = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
; CHECK-NOT: %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
; CHECK: %sadd3.repl = extractvalue { i32, i1 } %sadd.repl, 0
if.then: ; preds = %entry
%sadd4 = add i32 %sadd3, 1
ret i32 %sadd4
; CHECK: %sadd4 = add i32 %sadd3.repl, 1
if.end: ; preds = %entry
ret i32 %sadd3
; CHECK: ret i32 %sadd3.repl
}
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone