llvm-6502/lib/Transforms/Utils/BypassSlowDivision.cpp
Preston Gurd 9a2cfffdb6 Bypass Slow Divides
* Only apply divide bypass optimization when not optimizing for size. 
* Fixed bug caused by constant for 0 value of type Int32,
  used dividend type to generate the constant instead.
* For atom x86-64 apply the divide bypass to use 16-bit divides instead of
  64-bit divides when operand values are small enough.
* Added lit tests for 64-bit divide bypass.

Patch by Tyler Nowicki!




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176442 91177308-0d34-0410-b5e6-96231b3b80d8
2013-03-04 18:13:57 +00:00

263 lines
9.8 KiB
C++

//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains an optimization for div and rem on architectures that
// execute short instructions significantly faster than longer instructions.
// For example, on Intel Atom 32-bit divides are slow enough that during
// runtime it is profitable to check the value of the operands, and if they are
// positive and less than 256 use an unsigned 8-bit divide.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "bypass-slow-division"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
using namespace llvm;
namespace {
struct DivOpInfo {
bool SignedOp;
Value *Dividend;
Value *Divisor;
DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
};
struct DivPhiNodes {
PHINode *Quotient;
PHINode *Remainder;
DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
: Quotient(InQuotient), Remainder(InRemainder) {}
};
}
namespace llvm {
template<>
struct DenseMapInfo<DivOpInfo> {
static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
return Val1.SignedOp == Val2.SignedOp &&
Val1.Dividend == Val2.Dividend &&
Val1.Divisor == Val2.Divisor;
}
static DivOpInfo getEmptyKey() {
return DivOpInfo(false, 0, 0);
}
static DivOpInfo getTombstoneKey() {
return DivOpInfo(true, 0, 0);
}
static unsigned getHashValue(const DivOpInfo &Val) {
return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
reinterpret_cast<uintptr_t>(Val.Divisor)) ^
(unsigned)Val.SignedOp;
}
};
typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
}
// insertFastDiv - Substitutes the div/rem instruction with code that checks the
// value of the operands and uses a shorter-faster div/rem instruction when
// possible and the longer-slower div/rem instruction otherwise.
static bool insertFastDiv(Function &F,
Function::iterator &I,
BasicBlock::iterator &J,
IntegerType *BypassType,
bool UseDivOp,
bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
Instruction *Instr = J;
Value *Dividend = Instr->getOperand(0);
Value *Divisor = Instr->getOperand(1);
if (isa<ConstantInt>(Divisor) ||
(isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
// Operations with immediate values should have
// been solved and replaced during compile time.
return false;
}
// Basic Block is split before divide
BasicBlock *MainBB = I;
BasicBlock *SuccessorBB = I->splitBasicBlock(J);
++I; //advance iterator I to successorBB
// Add new basic block for slow divide operation
BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
MainBB->getParent(), SuccessorBB);
SlowBB->moveBefore(SuccessorBB);
IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
Value *SlowQuotientV;
Value *SlowRemainderV;
if (UseSignedOp) {
SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
} else {
SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
}
SlowBuilder.CreateBr(SuccessorBB);
// Add new basic block for fast divide operation
BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
MainBB->getParent(), SuccessorBB);
FastBB->moveBefore(SlowBB);
IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
BypassType);
Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
BypassType);
// udiv/urem because optimization only handles positive numbers
Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
ShortDivisorV);
Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
ShortDivisorV);
Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
ShortQuotientV,
Dividend->getType());
Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
ShortRemainderV,
Dividend->getType());
FastBuilder.CreateBr(SuccessorBB);
// Phi nodes for result of div and rem
IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
QuoPhi->addIncoming(SlowQuotientV, SlowBB);
QuoPhi->addIncoming(FastQuotientV, FastBB);
PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
RemPhi->addIncoming(SlowRemainderV, SlowBB);
RemPhi->addIncoming(FastRemainderV, FastBB);
// Replace Instr with appropriate phi node
if (UseDivOp)
Instr->replaceAllUsesWith(QuoPhi);
else
Instr->replaceAllUsesWith(RemPhi);
Instr->eraseFromParent();
// Combine operands into a single value with OR for value testing below
MainBB->getInstList().back().eraseFromParent();
IRBuilder<> MainBuilder(MainBB, MainBB->end());
Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
// BitMask is inverted to check if the operands are
// larger than the bypass type
uint64_t BitMask = ~BypassType->getBitMask();
Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
// Compare operand values and branch
Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
// point iterator J at first instruction of successorBB
J = I->begin();
// Cache phi nodes to be used later in place of other instances
// of div or rem with the same sign, dividend, and divisor
DivOpInfo Key(UseSignedOp, Dividend, Divisor);
DivPhiNodes Value(QuoPhi, RemPhi);
PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
return true;
}
// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
// operands and operation are identical. Otherwise call insertFastDiv to perform
// the optimization and cache the resulting dividend and remainder.
static bool reuseOrInsertFastDiv(Function &F,
Function::iterator &I,
BasicBlock::iterator &J,
IntegerType *BypassType,
bool UseDivOp,
bool UseSignedOp,
DivCacheTy &PerBBDivCache) {
// Get instruction operands
Instruction *Instr = J;
DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
if (CacheI == PerBBDivCache.end()) {
// If previous instance does not exist, insert fast div
return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
PerBBDivCache);
}
// Replace operation value with previously generated phi node
DivPhiNodes &Value = CacheI->second;
if (UseDivOp) {
// Replace all uses of div instruction with quotient phi node
J->replaceAllUsesWith(Value.Quotient);
} else {
// Replace all uses of rem instruction with remainder phi node
J->replaceAllUsesWith(Value.Remainder);
}
// Advance to next operation
++J;
// Remove redundant operation
Instr->eraseFromParent();
return true;
}
// bypassSlowDivision - This optimization identifies DIV instructions that can
// be profitably bypassed and carried out with a shorter, faster divide.
bool llvm::bypassSlowDivision(Function &F,
Function::iterator &I,
const DenseMap<unsigned int, unsigned int> &BypassWidths) {
DivCacheTy DivCache;
bool MadeChange = false;
for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
// Get instruction details
unsigned Opcode = J->getOpcode();
bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
bool UseSignedOp = Opcode == Instruction::SDiv ||
Opcode == Instruction::SRem;
// Only optimize div or rem ops
if (!UseDivOp && !UseRemOp)
continue;
// Skip division on vector types, only optimize integer instructions
if (!J->getType()->isIntegerTy())
continue;
// Get bitwidth of div/rem instruction
IntegerType *T = cast<IntegerType>(J->getType());
unsigned int bitwidth = T->getBitWidth();
// Continue if bitwidth is not bypassed
DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
if (BI == BypassWidths.end())
continue;
// Get type for div/rem instruction with bypass bitwidth
IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
UseSignedOp, DivCache);
}
return MadeChange;
}