[Hexagon] Use constant extenders to fix up hardware loops

Use a loop instruction with a constant extender for a hardware
loop instruction that is too far away from the start of the loop.
This is cheaper than changing the SA register value.

Differential Revision: http://reviews.llvm.org/D9262


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235882 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Brendon Cahoon 2015-04-27 14:16:43 +00:00
parent af3ec2cfd4
commit 2afd045e03
7 changed files with 150 additions and 72 deletions

View File

@ -6,9 +6,8 @@
// License. See LICENSE.TXT for details.
//
// The loop start address in the LOOPn instruction is encoded as a distance
// from the LOOPn instruction itself. If the start address is too far from
// the LOOPn instruction, the loop needs to be set up manually, i.e. via
// direct transfers to SAn and LCn.
// from the LOOPn instruction itself. If the start address is too far from
// the LOOPn instruction, the instruction needs to use a constant extender.
// This pass will identify and convert such LOOPn instructions to a proper
// form.
//===----------------------------------------------------------------------===//
@ -21,12 +20,15 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/PassSupport.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
static cl::opt<unsigned> MaxLoopRange(
"hexagon-loop-range", cl::Hidden, cl::init(200),
cl::desc("Restrict range of loopN instructions (testing only)"));
namespace llvm {
void initializeHexagonFixupHwLoopsPass(PassRegistry&);
}
@ -52,20 +54,15 @@ namespace {
}
private:
/// \brief Maximum distance between the loop instr and the basic block.
/// Just an estimate.
static const unsigned MAX_LOOP_DISTANCE = 200;
/// \brief Check the offset between each loop instruction and
/// the loop basic block to determine if we can use the LOOP instruction
/// or if we need to set the LC/SA registers explicitly.
bool fixupLoopInstrs(MachineFunction &MF);
/// \brief Add the instruction to set the LC and SA registers explicitly.
void convertLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII,
RegScavenger &RS);
/// \brief Replace loop instruction with the constant extended
/// version if the loop label is too far from the loop instruction.
void useExtLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII);
};
char HexagonFixupHwLoops::ID = 0;
@ -78,20 +75,18 @@ FunctionPass *llvm::createHexagonFixupHwLoops() {
return new HexagonFixupHwLoops();
}
/// \brief Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr *MI) {
return MI->getOpcode() == Hexagon::J2_loop0r ||
MI->getOpcode() == Hexagon::J2_loop0i;
MI->getOpcode() == Hexagon::J2_loop0i ||
MI->getOpcode() == Hexagon::J2_loop1r ||
MI->getOpcode() == Hexagon::J2_loop1i;
}
bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
bool Changed = fixupLoopInstrs(MF);
return Changed;
return fixupLoopInstrs(MF);
}
/// \brief For Hexagon, if the loop label is to far from the
/// loop instruction then we need to set the LC0 and SA0 registers
/// explicitly instead of using LOOP(start,count). This function
@ -105,41 +100,49 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
// Offset of the current instruction from the start.
unsigned InstOffset = 0;
// Map for each basic block to it's first instruction.
DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
DenseMap<const MachineBasicBlock *, unsigned> BlockToInstOffset;
const HexagonInstrInfo *HII =
static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
// First pass - compute the offset of each basic block.
for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
MBB != MBBe; ++MBB) {
BlockToInstOffset[MBB] = InstOffset;
InstOffset += (MBB->size() * 4);
for (const MachineBasicBlock &MBB : MF) {
if (MBB.getAlignment()) {
// Although we don't know the exact layout of the final code, we need
// to account for alignment padding somehow. This heuristic pads each
// aligned basic block according to the alignment value.
int ByteAlign = (1u << MBB.getAlignment()) - 1;
InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
}
BlockToInstOffset[&MBB] = InstOffset;
for (const MachineInstr &MI : MBB)
InstOffset += HII->getSize(&MI);
}
// Second pass - check each loop instruction to see if it needs to
// be converted.
// Second pass - check each loop instruction to see if it needs to be
// converted.
InstOffset = 0;
bool Changed = false;
RegScavenger RS;
// Loop over all the basic blocks.
for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
MBB != MBBe; ++MBB) {
InstOffset = BlockToInstOffset[MBB];
RS.enterBasicBlock(MBB);
for (MachineBasicBlock &MBB : MF) {
InstOffset = BlockToInstOffset[&MBB];
// Loop over all the instructions.
MachineBasicBlock::iterator MIE = MBB->end();
MachineBasicBlock::iterator MII = MBB->begin();
MachineBasicBlock::iterator MII = MBB.begin();
MachineBasicBlock::iterator MIE = MBB.end();
while (MII != MIE) {
InstOffset += HII->getSize(&*MII);
if (MII->isDebugValue()) {
++MII;
continue;
}
if (isHardwareLoop(MII)) {
RS.forward(MII);
assert(MII->getOperand(0).isMBB() &&
"Expect a basic block as loop operand");
int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
unsigned Dist = Sub > 0 ? Sub : -Sub;
if (Dist > MAX_LOOP_DISTANCE) {
// Convert to explicity setting LC0 and SA0.
convertLoopInstr(MF, MII, RS);
MII = MBB->erase(MII);
int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
if ((unsigned)abs(diff) > MaxLoopRange) {
useExtLoopInstr(MF, MII);
MII = MBB.erase(MII);
Changed = true;
} else {
++MII;
@ -147,39 +150,38 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
} else {
++MII;
}
InstOffset += 4;
}
}
return Changed;
}
/// \brief convert a loop instruction to a sequence of instructions that
/// set the LC0 and SA0 register explicitly.
void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII,
RegScavenger &RS) {
/// \brief Replace loop instructions with the constant extended version.
void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineBasicBlock *MBB = MII->getParent();
DebugLoc DL = MII->getDebugLoc();
unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
// First, set the LC0 with the trip count.
if (MII->getOperand(1).isReg()) {
// Trip count is a register
BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
.addReg(MII->getOperand(1).getReg());
} else {
// Trip count is an immediate.
BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch)
.addImm(MII->getOperand(1).getImm());
BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
.addReg(Scratch);
MachineInstrBuilder MIB;
unsigned newOp;
switch (MII->getOpcode()) {
case Hexagon::J2_loop0r:
newOp = Hexagon::J2_loop0rext;
break;
case Hexagon::J2_loop0i:
newOp = Hexagon::J2_loop0iext;
break;
case Hexagon::J2_loop1r:
newOp = Hexagon::J2_loop1rext;
break;
case Hexagon::J2_loop1i:
newOp = Hexagon::J2_loop1iext;
break;
default:
llvm_unreachable("Invalid Hardware Loop Instruction.");
}
// Then, set the SA0 with the loop start address.
BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch)
.addMBB(MII->getOperand(0).getMBB());
BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::SA0)
.addReg(Scratch);
MIB = BuildMI(*MBB, MII, DL, TII->get(newOp));
for (unsigned i = 0; i < MII->getNumOperands(); ++i)
MIB.addOperand(MII->getOperand(i));
}

View File

@ -1742,14 +1742,14 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
}
bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
if (isExtended) // Instruction must be extended.
return true;
unsigned isExtendable = (F >> HexagonII::ExtendablePos)
& HexagonII::ExtendableMask;
unsigned isExtendable =
(F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
if (!isExtendable)
return false;
@ -1785,6 +1785,27 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
return (ImmValue < MinValue || ImmValue > MaxValue);
}
// Return the number of bytes required to encode the instruction.
// Hexagon instructions are fixed length, 4 bytes, unless they
// use a constant extender, which requires another 4 bytes.
// For debug instructions and prolog labels, return 0.
unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
if (MI->isDebugValue() || MI->isPosition())
return 0;
unsigned Size = MI->getDesc().getSize();
if (!Size)
// Assume the default insn size in case it cannot be determined
// for whatever reason.
Size = HEXAGON_INSTR_SIZE;
if (isConstExtended(MI) || isExtended(MI))
Size += HEXAGON_INSTR_SIZE;
return Size;
}
// Returns the opcode to use when converting MI, which is a conditional jump,
// into a conditional instruction which uses the .new value of the predicate.
// We also use branch probabilities to add a hint to the jump.

View File

@ -1,3 +1,4 @@
//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
@ -203,7 +204,8 @@ public:
void immediateExtend(MachineInstr *MI) const;
bool isConstExtended(MachineInstr *MI) const;
bool isConstExtended(const MachineInstr *MI) const;
unsigned getSize(const MachineInstr *MI) const;
int getDotNewPredJumpOp(MachineInstr *MI,
const MachineBranchProbabilityInfo *MBPI) const;
unsigned getAddrMode(const MachineInstr* MI) const;

View File

@ -483,7 +483,9 @@ let PrintMethod = "printJumpTable" in
def jumptablebase : Operand<i32>;
def brtarget : Operand<OtherVT>;
def brtargetExt : Operand<OtherVT>;
def brtargetExt : Operand<OtherVT> {
let PrintMethod = "printExtBrtarget";
}
def calltarget : Operand<i32>;
def bblabel : Operand<i32>;

View File

@ -249,3 +249,17 @@ void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
O << ')';
}
void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const {
const MCOperand &MO = MI->getOperand(OpNo);
const MCInstrDesc &MII = getMII().get(MI->getOpcode());
assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
"Expecting an extendable operand");
if (MO.isExpr() || isExtended(MII.TSFlags)) {
O << "##";
}
printOperand(MI, OpNo, O);
}

View File

@ -56,6 +56,7 @@ namespace llvm {
void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
const;
void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
void printConstantPool(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const;

View File

@ -0,0 +1,36 @@
; RUN: llc -march=hexagon -hexagon-loop-range=0 < %s | FileCheck %s
; Test that the loop start address operand uses a constant extender
; if the offset is out of range.
; CHECK: loop0(##.LBB
; CHECK: endloop0
@g = external global i32, align 4
define void @test(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
entry:
%cmp6 = icmp slt i32 %n, 1
br i1 %cmp6, label %for.end, label %for.body.preheader
for.body.preheader:
br label %for.body
for.body:
%i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.07
%0 = load i32, i32* %arrayidx, align 4
%1 = load i32, i32* @g, align 4
%mul = mul nsw i32 %1, %0
%arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.07
store i32 %mul, i32* %arrayidx1, align 4
%inc = add nuw nsw i32 %i.07, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit:
br label %for.end
for.end:
ret void
}