mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-20 14:29:27 +00:00
[Hexagon] Generate loop1 instruction for nested loops
loop1 is for the outer loop and loop0 is for the inner loop. Differential Revision: http://reviews.llvm.org/D9680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237266 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0e0929ed98
commit
a036cd4093
@ -159,7 +159,7 @@ namespace {
|
||||
MachineOperand *InitialValue,
|
||||
const MachineOperand *Endvalue,
|
||||
int64_t IVBump) const;
|
||||
|
||||
|
||||
/// \brief Analyze the statements in a loop to determine if the loop
|
||||
/// has a computable trip count and, if so, return a value that represents
|
||||
/// the trip count expression.
|
||||
@ -179,15 +179,16 @@ namespace {
|
||||
|
||||
/// \brief Return true if the instruction is not valid within a hardware
|
||||
/// loop.
|
||||
bool isInvalidLoopOperation(const MachineInstr *MI) const;
|
||||
bool isInvalidLoopOperation(const MachineInstr *MI,
|
||||
bool IsInnerHWLoop) const;
|
||||
|
||||
/// \brief Return true if the loop contains an instruction that inhibits
|
||||
/// using the hardware loop.
|
||||
bool containsInvalidInstruction(MachineLoop *L) const;
|
||||
bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
|
||||
|
||||
/// \brief Given a loop, check if we can convert it to a hardware loop.
|
||||
/// If so, then perform the conversion and return true.
|
||||
bool convertToHardwareLoop(MachineLoop *L);
|
||||
bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
|
||||
|
||||
/// \brief Return true if the instruction is now dead.
|
||||
bool isDead(const MachineInstr *MI,
|
||||
@ -307,18 +308,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
||||
INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
|
||||
"Hexagon Hardware Loops", false, false)
|
||||
|
||||
|
||||
/// \brief Returns true if the instruction is a hardware loop instruction.
|
||||
static bool isHardwareLoop(const MachineInstr *MI) {
|
||||
return MI->getOpcode() == Hexagon::J2_loop0r ||
|
||||
MI->getOpcode() == Hexagon::J2_loop0i;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createHexagonHardwareLoops() {
|
||||
return new HexagonHardwareLoops();
|
||||
}
|
||||
|
||||
|
||||
bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
|
||||
DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
|
||||
|
||||
@ -329,12 +322,12 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
|
||||
MDT = &getAnalysis<MachineDominatorTree>();
|
||||
TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
|
||||
|
||||
for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
|
||||
I != E; ++I) {
|
||||
MachineLoop *L = *I;
|
||||
if (!L->getParentLoop())
|
||||
Changed |= convertToHardwareLoop(L);
|
||||
}
|
||||
for (auto &L : *MLI)
|
||||
if (!L->getParentLoop()) {
|
||||
bool L0Used = false;
|
||||
bool L1Used = false;
|
||||
Changed |= convertToHardwareLoop(L, L0Used, L1Used);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
@ -467,27 +460,27 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
|
||||
case Hexagon::C2_cmpeqi:
|
||||
case Hexagon::C2_cmpeq:
|
||||
case Hexagon::C2_cmpeqp:
|
||||
Cmp = Comparison::Kind::EQ;
|
||||
Cmp = Comparison::EQ;
|
||||
break;
|
||||
case Hexagon::C4_cmpneq:
|
||||
case Hexagon::C4_cmpneqi:
|
||||
Cmp = Comparison::Kind::NE;
|
||||
Cmp = Comparison::NE;
|
||||
break;
|
||||
case Hexagon::C4_cmplte:
|
||||
Cmp = Comparison::Kind::LEs;
|
||||
Cmp = Comparison::LEs;
|
||||
break;
|
||||
case Hexagon::C4_cmplteu:
|
||||
Cmp = Comparison::Kind::LEu;
|
||||
Cmp = Comparison::LEu;
|
||||
break;
|
||||
case Hexagon::C2_cmpgtui:
|
||||
case Hexagon::C2_cmpgtu:
|
||||
case Hexagon::C2_cmpgtup:
|
||||
Cmp = Comparison::Kind::GTu;
|
||||
Cmp = Comparison::GTu;
|
||||
break;
|
||||
case Hexagon::C2_cmpgti:
|
||||
case Hexagon::C2_cmpgt:
|
||||
case Hexagon::C2_cmpgtp:
|
||||
Cmp = Comparison::Kind::GTs;
|
||||
Cmp = Comparison::GTs;
|
||||
break;
|
||||
default:
|
||||
return (Comparison::Kind)0;
|
||||
@ -749,7 +742,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
|
||||
MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
|
||||
DebugLoc DL;
|
||||
if (InsertPos != PH->end())
|
||||
InsertPos->getDebugLoc();
|
||||
DL = InsertPos->getDebugLoc();
|
||||
|
||||
// If Start is an immediate and End is a register, the trip count
|
||||
// will be "reg - imm". Hexagon's "subtract immediate" instruction
|
||||
@ -828,7 +821,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
|
||||
const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
|
||||
(RegToImm ? TII->get(Hexagon::A2_subri) :
|
||||
TII->get(Hexagon::A2_addi));
|
||||
if (RegToReg || RegToImm) {
|
||||
if (RegToReg || RegToImm) {
|
||||
unsigned SubR = MRI->createVirtualRegister(IntRC);
|
||||
MachineInstrBuilder SubIB =
|
||||
BuildMI(*PH, InsertPos, DL, SubD, SubR);
|
||||
@ -902,51 +895,50 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
|
||||
return new CountValue(CountValue::CV_Register, CountR, CountSR);
|
||||
}
|
||||
|
||||
|
||||
/// \brief Return true if the operation is invalid within hardware loop.
|
||||
bool HexagonHardwareLoops::isInvalidLoopOperation(
|
||||
const MachineInstr *MI) const {
|
||||
bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
|
||||
bool IsInnerHWLoop) const {
|
||||
|
||||
// Call is not allowed because the callee may use a hardware loop except for
|
||||
// the case when the call never returns.
|
||||
if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
|
||||
return true;
|
||||
|
||||
// do not allow nested hardware loops
|
||||
if (isHardwareLoop(MI))
|
||||
return true;
|
||||
|
||||
// check if the instruction defines a hardware loop register
|
||||
// Check if the instruction defines a hardware loop register.
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
if (!MO.isReg() || !MO.isDef())
|
||||
continue;
|
||||
unsigned R = MO.getReg();
|
||||
if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
|
||||
R == Hexagon::SA0 || R == Hexagon::SA1)
|
||||
if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
|
||||
R == Hexagon::LC1 || R == Hexagon::SA1))
|
||||
return true;
|
||||
if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// \brief - Return true if the loop contains an instruction that inhibits
|
||||
/// the use of the hardware loop function.
|
||||
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
|
||||
/// \brief Return true if the loop contains an instruction that inhibits
|
||||
/// the use of the hardware loop instruction.
|
||||
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
|
||||
bool IsInnerHWLoop) const {
|
||||
const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
|
||||
DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
|
||||
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
|
||||
MachineBasicBlock *MBB = Blocks[i];
|
||||
for (MachineBasicBlock::iterator
|
||||
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
|
||||
const MachineInstr *MI = &*MII;
|
||||
if (isInvalidLoopOperation(MI))
|
||||
if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
|
||||
DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// \brief Returns true if the instruction is dead. This was essentially
|
||||
/// copied from DeadMachineInstructionElim::isDead, but with special cases
|
||||
/// for inline asm, physical registers and instructions with side effects
|
||||
@ -1041,19 +1033,47 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
|
||||
///
|
||||
/// The code makes several assumptions about the representation of the loop
|
||||
/// in llvm.
|
||||
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
|
||||
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
|
||||
bool &RecL0used,
|
||||
bool &RecL1used) {
|
||||
// This is just for sanity.
|
||||
assert(L->getHeader() && "Loop without a header?");
|
||||
|
||||
bool Changed = false;
|
||||
bool L0Used = false;
|
||||
bool L1Used = false;
|
||||
|
||||
// Process nested loops first.
|
||||
for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
|
||||
Changed |= convertToHardwareLoop(*I);
|
||||
for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
|
||||
Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
|
||||
L0Used |= RecL0used;
|
||||
L1Used |= RecL1used;
|
||||
}
|
||||
|
||||
// If a nested loop has been converted, then we can't convert this loop.
|
||||
if (Changed)
|
||||
if (Changed && L0Used && L1Used)
|
||||
return Changed;
|
||||
|
||||
unsigned LOOP_i;
|
||||
unsigned LOOP_r;
|
||||
unsigned ENDLOOP;
|
||||
|
||||
// Flag used to track loopN instruction:
|
||||
// 1 - Hardware loop is being generated for the inner most loop.
|
||||
// 0 - Hardware loop is being generated for the outer loop.
|
||||
unsigned IsInnerHWLoop = 1;
|
||||
|
||||
if (L0Used) {
|
||||
LOOP_i = Hexagon::J2_loop1i;
|
||||
LOOP_r = Hexagon::J2_loop1r;
|
||||
ENDLOOP = Hexagon::ENDLOOP1;
|
||||
IsInnerHWLoop = 0;
|
||||
} else {
|
||||
LOOP_i = Hexagon::J2_loop0i;
|
||||
LOOP_r = Hexagon::J2_loop0r;
|
||||
ENDLOOP = Hexagon::ENDLOOP0;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Stop trying after reaching the limit (if any).
|
||||
int Limit = HWLoopLimit;
|
||||
@ -1065,10 +1085,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
|
||||
#endif
|
||||
|
||||
// Does the loop contain any invalid instructions?
|
||||
if (containsInvalidInstruction(L))
|
||||
if (containsInvalidInstruction(L, IsInnerHWLoop))
|
||||
return false;
|
||||
|
||||
MachineBasicBlock *LastMBB = L->getExitingBlock();
|
||||
MachineBasicBlock *LastMBB = getExitingBlock(L);
|
||||
// Don't generate hw loop if the loop has more than one exit.
|
||||
if (!LastMBB)
|
||||
return false;
|
||||
@ -1141,8 +1161,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
|
||||
.addReg(TripCount->getReg(), 0, TripCount->getSubReg());
|
||||
// Add the Loop instruction to the beginning of the loop.
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
|
||||
.addMBB(LoopStart)
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
|
||||
.addReg(CountReg);
|
||||
} else {
|
||||
assert(TripCount->isImm() && "Expecting immediate value for trip count");
|
||||
@ -1150,14 +1169,14 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
|
||||
// if the immediate fits in the instructions. Otherwise, we need to
|
||||
// create a new virtual register.
|
||||
int64_t CountImm = TripCount->getImm();
|
||||
if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) {
|
||||
if (!TII->isValidOffset(LOOP_i, CountImm)) {
|
||||
unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
|
||||
.addImm(CountImm);
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
|
||||
.addMBB(LoopStart).addReg(CountReg);
|
||||
} else
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i))
|
||||
BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
|
||||
.addMBB(LoopStart).addImm(CountImm);
|
||||
}
|
||||
|
||||
@ -1171,8 +1190,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
|
||||
|
||||
// Replace the loop branch with an endloop instruction.
|
||||
DebugLoc LastIDL = LastI->getDebugLoc();
|
||||
BuildMI(*LastMBB, LastI, LastIDL,
|
||||
TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
|
||||
BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
|
||||
|
||||
// The loop ends with either:
|
||||
// - a conditional branch followed by an unconditional branch, or
|
||||
@ -1200,6 +1218,15 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
|
||||
removeIfDead(OldInsts[i]);
|
||||
|
||||
++NumHWLoops;
|
||||
|
||||
// Set RecL1used and RecL0used only after hardware loop has been
|
||||
// successfully generated. Doing it earlier can cause wrong loop instruction
|
||||
// to be used.
|
||||
if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
|
||||
RecL1used = true;
|
||||
else
|
||||
RecL0used = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1533,7 +1560,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
|
||||
if (Header->pred_size() > 2) {
|
||||
// Ensure that the header has only two predecessors: the preheader and
|
||||
// the loop latch. Any additional predecessors of the header should
|
||||
// join at the newly created preheader. Inspect all PHI nodes from the
|
||||
// join at the newly created preheader. Inspect all PHI nodes from the
|
||||
// header and create appropriate corresponding PHI nodes in the preheader.
|
||||
|
||||
for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
|
||||
|
68
test/CodeGen/Hexagon/hwloop-loop1.ll
Normal file
68
test/CodeGen/Hexagon/hwloop-loop1.ll
Normal file
@ -0,0 +1,68 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
|
||||
;
|
||||
; Generate loop1 instruction for double loop sequence.
|
||||
|
||||
; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
|
||||
; CHECK: endloop0
|
||||
; CHECK: loop1(.LBB{{.}}_{{.}}, #100)
|
||||
; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
|
||||
; CHECK: endloop0
|
||||
; CHECK: endloop1
|
||||
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%array = alloca [100 x i32], align 8
|
||||
%doublearray = alloca [100 x [100 x i32]], align 8
|
||||
%0 = bitcast [100 x i32]* %array to i8*
|
||||
call void @llvm.lifetime.start(i64 400, i8* %0) #1
|
||||
%1 = bitcast [100 x [100 x i32]]* %doublearray to i8*
|
||||
call void @llvm.lifetime.start(i64 40000, i8* %1) #1
|
||||
%arrayidx1 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 10, i32 10
|
||||
%arrayidx2.gep = getelementptr [100 x i32], [100 x i32]* %array, i32 0, i32 0
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%2 = phi i32 [ undef, %entry ], [ %.pre, %for.body.for.body_crit_edge ]
|
||||
%sum.031 = phi i32 [ undef, %entry ], [ %add, %for.body.for.body_crit_edge ]
|
||||
%arrayidx2.phi = phi i32* [ %arrayidx2.gep, %entry ], [ %arrayidx2.inc, %for.body.for.body_crit_edge ]
|
||||
%i.030 = phi i32 [ 1, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
|
||||
%add = add nsw i32 %2, %sum.031
|
||||
%exitcond33 = icmp eq i32 %i.030, 100
|
||||
%arrayidx2.inc = getelementptr i32, i32* %arrayidx2.phi, i32 1
|
||||
br i1 %exitcond33, label %for.cond7.preheader.preheader, label %for.body.for.body_crit_edge
|
||||
|
||||
for.cond7.preheader.preheader:
|
||||
br label %for.cond7.preheader
|
||||
|
||||
for.body.for.body_crit_edge:
|
||||
%.pre = load i32, i32* %arrayidx2.inc, align 4
|
||||
%phitmp = add i32 %i.030, 1
|
||||
br label %for.body
|
||||
|
||||
for.cond7.preheader:
|
||||
%i.129 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond7.preheader.preheader ]
|
||||
br label %for.body9
|
||||
|
||||
for.body9:
|
||||
%j.028 = phi i32 [ 0, %for.cond7.preheader ], [ %inc13, %for.body9 ]
|
||||
%arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 %i.129, i32 %j.028
|
||||
store i32 %add, i32* %arrayidx11, align 4
|
||||
%inc13 = add nsw i32 %j.028, 1
|
||||
%exitcond = icmp eq i32 %inc13, 100
|
||||
br i1 %exitcond, label %for.inc15, label %for.body9
|
||||
|
||||
for.inc15:
|
||||
%inc16 = add nsw i32 %i.129, 1
|
||||
%exitcond32 = icmp eq i32 %inc16, 100
|
||||
br i1 %exitcond32, label %for.end17, label %for.cond7.preheader
|
||||
|
||||
for.end17:
|
||||
%3 = load i32, i32* %arrayidx1, align 8
|
||||
call void @llvm.lifetime.end(i64 40000, i8* %1) #1
|
||||
call void @llvm.lifetime.end(i64 400, i8* %0) #1
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture) #1
|
||||
|
||||
declare void @llvm.lifetime.end(i64, i8* nocapture) #1
|
Loading…
x
Reference in New Issue
Block a user