Fix the Load/Store optimization pass to work with Thumb1.

Patch by Moritz Roth!



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208992 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
James Molloy 2014-05-16 14:14:30 +00:00
parent 2c9f8a715e
commit 40ae57cc0a
5 changed files with 285 additions and 40 deletions

View File

@ -15,9 +15,11 @@
#include "ARM.h" #include "ARM.h"
#include "ARMBaseInstrInfo.h" #include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h" #include "ARMBaseRegisterInfo.h"
#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h" #include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h" #include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAddressingModes.h"
#include "Thumb1RegisterInfo.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallPtrSet.h"
@ -66,6 +68,7 @@ namespace {
const TargetInstrInfo *TII; const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI; const TargetRegisterInfo *TRI;
const ARMSubtarget *STI; const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI; ARMFunctionInfo *AFI;
RegScavenger *RS; RegScavenger *RS;
bool isThumb1, isThumb2; bool isThumb1, isThumb2;
@ -94,7 +97,10 @@ namespace {
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
const MemOpQueue &MemOps, unsigned DefReg, const MemOpQueue &MemOps, unsigned DefReg,
unsigned RangeBegin, unsigned RangeEnd); unsigned RangeBegin, unsigned RangeEnd);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc dl, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode, int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@ -120,7 +126,6 @@ namespace {
ARMCC::CondCodes Pred, unsigned PredReg, ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps, unsigned Scratch, MemOpQueue &MemOps,
SmallVectorImpl<MachineBasicBlock::iterator> &Merges); SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB, bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI); MachineBasicBlock::iterator &MBBI);
@ -160,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::STMDB; case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB; case ARM_AM::ib: return ARM::STMIB;
} }
case ARM::tLDRi:
// tLDMIA is writeback-only - unless the base register is in the input
// reglist.
++NumLDMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::tLDMIA;
}
case ARM::tSTRi:
// There is no non-writeback tSTMIA either.
++NumSTMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::tSTMIA_UPD;
}
case ARM::t2LDRi8: case ARM::t2LDRi8:
case ARM::t2LDRi12: case ARM::t2LDRi12:
++NumLDMGened; ++NumLDMGened;
@ -218,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
case ARM::LDMIA_UPD: case ARM::LDMIA_UPD:
case ARM::STMIA: case ARM::STMIA:
case ARM::STMIA_UPD: case ARM::STMIA_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_RET: case ARM::t2LDMIA_RET:
case ARM::t2LDMIA: case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD: case ARM::t2LDMIA_UPD:
@ -264,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
} // end namespace ARM_AM } // end namespace ARM_AM
} // end namespace llvm } // end namespace llvm
static bool isT1i32Load(unsigned Opc) {
return Opc == ARM::tLDRi;
}
static bool isT2i32Load(unsigned Opc) { static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
} }
static bool isi32Load(unsigned Opc) { static bool isi32Load(unsigned Opc) {
return Opc == ARM::LDRi12 || isT2i32Load(Opc); return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
}
static bool isT1i32Store(unsigned Opc) {
return Opc == ARM::tSTRi;
} }
static bool isT2i32Store(unsigned Opc) { static bool isT2i32Store(unsigned Opc) {
@ -277,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) {
} }
static bool isi32Store(unsigned Opc) { static bool isi32Store(unsigned Opc) {
return Opc == ARM::STRi12 || isT2i32Store(Opc); return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
static unsigned getImmScale(unsigned Opc) {
switch (Opc) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::tLDRi:
case ARM::tSTRi:
return 1;
case ARM::tLDRHi:
case ARM::tSTRHi:
return 2;
case ARM::tLDRBi:
case ARM::tSTRBi:
return 4;
}
}
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
void
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc dl, unsigned Base,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
// Start updating any instructions with immediate offsets. Insert a sub before
// the first non-updateable instruction (if any).
for (; MBBI != MBB.end(); ++MBBI) {
if (MBBI->readsRegister(Base)) {
unsigned Opc = MBBI->getOpcode();
int Offset;
bool InsertSub = false;
if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
// Loads and stores with immediate offsets can be updated, but only if
// the new offset isn't negative.
// The MachineOperand containing the offset immediate is the last one
// before predicates.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
// The offsets are scaled by 1, 2 or 4 depending on the Opcode
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
if (Offset >= 0)
MO.setImm(Offset);
else
InsertSub = true;
} else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
// SUB/ADD using this register. Merge it with the update.
// If the merged offset is too large, insert a new sub instead.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
Offset = (Opc == ARM::tSUBi8) ?
MO.getImm() + WordOffset * 4 :
MO.getImm() - WordOffset * 4 ;
if (TL->isLegalAddImmediate(Offset)) {
MO.setImm(Offset);
// The base register has now been reset, so exit early.
return;
} else {
InsertSub = true;
}
} else {
// Can't update the instruction.
InsertSub = true;
}
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
.addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
.addImm(Pred).addReg(PredReg);
return;
}
}
if (MBBI->killsRegister(Base))
// Register got killed. Stop updating.
return;
}
// The end of the block was reached. This means register liveness escapes the
// block, and it's necessary to insert a sub before the last instruction.
if (MBB.succ_size() > 0)
// But only insert the SUB if there is actually a successor block.
// FIXME: Check more carefully if register is live at this point, e.g. by
// also examining the successor block's register liveness information.
AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
.addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
.addImm(Pred).addReg(PredReg);
} }
/// MergeOps - Create and insert a LDM or STM with Base as base register and /// MergeOps - Create and insert a LDM or STM with Base as base register and
@ -297,14 +423,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false; return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia; ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes. // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
bool haveIBAndDA = isNotVFP && !isThumb2; bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
if (Offset == 4 && haveIBAndDA) { if (Offset == 4 && haveIBAndDA) {
Mode = ARM_AM::ib; Mode = ARM_AM::ib;
} else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
Mode = ARM_AM::da; Mode = ARM_AM::da;
} else if (Offset == -4 * (int)NumRegs && isNotVFP) { } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg. // VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db; Mode = ARM_AM::db;
} else if (Offset != 0) { } else if (Offset != 0) {
@ -329,31 +456,88 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NewBase == 0) if (NewBase == 0)
return false; return false;
} }
int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
int BaseOpc =
isThumb2 ? ARM::t2ADDri :
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
if (Offset < 0) { if (Offset < 0) {
BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; BaseOpc =
isThumb2 ? ARM::t2SUBri :
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
Offset = - Offset; Offset = - Offset;
} }
int ImmedOffset = isThumb2
? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); if (!TL->isLegalAddImmediate(Offset))
if (ImmedOffset == -1) // FIXME: Try add with register operand?
// FIXME: Try t2ADDri12 or t2SUBri12?
return false; // Probably not worth it then. return false; // Probably not worth it then.
if (isThumb1) {
if (Base != NewBase) {
// Need to insert a MOV to the new base first.
// FIXME: If the immediate fits in 3 bits, use ADD instead.
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
}
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
.addReg(NewBase, getKillRegState(true)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0); .addImm(Pred).addReg(PredReg).addReg(0);
}
Base = NewBase; Base = NewBase;
BaseKill = true; // New base is always killed straight away. BaseKill = true; // New base is always killed straight away.
} }
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD); Opcode == ARM::VLDRD);
// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
// base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false; if (!Opcode) return false;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
.addReg(Base, getKillRegState(BaseKill)) bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
.addImm(Pred).addReg(PredReg);
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback. Check for this.
if (Opcode == ARM::tLDRi && isThumb1)
for (unsigned I = 0; I < NumRegs; ++I)
if (Base == Regs[I].first) {
Writeback = false;
break;
}
MachineInstrBuilder MIB;
if (Writeback) {
if (Opcode == ARM::tLDMIA)
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
// The base isn't dead after a merged instruction with writeback. Update
// future uses of the base with the added offset (if possible), or reset
// the base register as necessary.
if (!BaseKill)
UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
MIB.addReg(Base, getKillRegState(BaseKill));
}
MIB.addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i) for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second)); | getKillRegState(Regs[i].second));
@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false; bool CheckCPSRDef = false;
switch (MI->getOpcode()) { switch (MI->getOpcode()) {
default: return false; default: return false;
case ARM::tSUBi8:
case ARM::t2SUBri: case ARM::t2SUBri:
case ARM::SUBri: case ARM::SUBri:
CheckCPSRDef = true; CheckCPSRDef = true;
@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (Bytes == 0 || (Limit && Bytes >= Limit)) if (Bytes == 0 || (Limit && Bytes >= Limit))
return false; return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base && if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base && MI->getOperand(1).getReg() == Base &&
(MI->getOperand(2).getImm()*Scale) == Bytes && (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred && getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg)) MyPredReg == PredReg))
return false; return false;
@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false; bool CheckCPSRDef = false;
switch (MI->getOpcode()) { switch (MI->getOpcode()) {
default: return false; default: return false;
case ARM::tADDi8:
case ARM::t2ADDri: case ARM::t2ADDri:
case ARM::ADDri: case ARM::ADDri:
CheckCPSRDef = true; CheckCPSRDef = true;
@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
// Make sure the offset fits in 8 bits. // Make sure the offset fits in 8 bits.
return false; return false;
unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base && if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base && MI->getOperand(1).getReg() == Base &&
(MI->getOperand(2).getImm()*Scale) == Bytes && (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred && getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg)) MyPredReg == PredReg))
return false; return false;
@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
default: return 0; default: return 0;
case ARM::LDRi12: case ARM::LDRi12:
case ARM::STRi12: case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
case ARM::t2LDRi8: case ARM::t2LDRi8:
case ARM::t2LDRi12: case ARM::t2LDRi12:
case ARM::t2STRi8: case ARM::t2STRi8:
@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STMDA: case ARM::STMDA:
case ARM::STMDB: case ARM::STMDB:
case ARM::STMIB: case ARM::STMIB:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::t2LDMIA: case ARM::t2LDMIA:
case ARM::t2LDMDB: case ARM::t2LDMDB:
case ARM::t2STMIA: case ARM::t2STMIA:
@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator MBBI,
bool &Advance, bool &Advance,
MachineBasicBlock::iterator &I) { MachineBasicBlock::iterator &I) {
// Thumb1 is already using updating loads/stores.
if (isThumb1) return false;
MachineInstr *MI = MBBI; MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg(); unsigned Base = MI->getOperand(0).getReg();
bool BaseKill = MI->getOperand(0).isKill(); bool BaseKill = MI->getOperand(0).isKill();
@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
const TargetInstrInfo *TII, const TargetInstrInfo *TII,
bool &Advance, bool &Advance,
MachineBasicBlock::iterator &I) { MachineBasicBlock::iterator &I) {
// Thumb1 doesn't have updating LDR/STR.
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
MachineInstr *MI = MBBI; MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(1).getReg(); unsigned Base = MI->getOperand(1).getReg();
bool BaseKill = MI->getOperand(1).isKill(); bool BaseKill = MI->getOperand(1).isKill();
@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
return MI->getOperand(1).isReg(); return MI->getOperand(1).isReg();
case ARM::LDRi12: case ARM::LDRi12:
case ARM::STRi12: case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
case ARM::t2LDRi8: case ARM::t2LDRi8:
case ARM::t2LDRi12: case ARM::t2LDRi12:
case ARM::t2STRi8: case ARM::t2STRi8:
@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
return OffField; return OffField;
// Thumb1 immediate offsets are scaled by 4
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
return OffField * 4;
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
: ARM_AM::getAM5Offset(OffField) * 4; : ARM_AM::getAM5Offset(OffField) * 4;
if (isAM3) { if (isAM3) {
@ -1417,8 +1623,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// Try to find a free register to use as a new base in case it's needed. // Try to find a free register to use as a new base in case it's needed.
// First advance to the instruction just before the start of the chain. // First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps); AdvanceRS(MBB, MemOps);
// Find a scratch register. // Find a scratch register.
unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); unsigned Scratch =
RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
// Process the load / store instructions. // Process the load / store instructions.
RS->forward(std::prev(MBBI)); RS->forward(std::prev(MBBI));
@ -1484,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
/// => /// =>
/// ldmfd sp!, {..., pc} /// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
// Thumb1 LDM doesn't allow high registers.
if (isThumb1) return false;
if (MBB.empty()) return false; if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@ -1514,6 +1725,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget(); const TargetMachine &TM = Fn.getTarget();
TL = TM.getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>(); AFI = Fn.getInfo<ARMFunctionInfo>();
TII = TM.getInstrInfo(); TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo(); TRI = TM.getRegisterInfo();
@ -1522,9 +1734,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
isThumb2 = AFI->isThumb2Function(); isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2; isThumb1 = AFI->isThumbFunction() && !isThumb2;
// Don't do anything in this pass with Thumb1 for now.
if (isThumb1) return false;
bool Modified = false; bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) { ++MFI) {
@ -1585,11 +1794,6 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MRI = &Fn.getRegInfo(); MRI = &Fn.getRegInfo();
MF = &Fn; MF = &Fn;
ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
bool isThumb1 = AFI->isThumbFunction() && !AFI->isThumb2Function();
// Don't do anything in this pass with Thumb1 for now.
if (isThumb1) return false;
bool Modified = false; bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) ++MFI)

View File

@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code.
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
Add ldmia, stmia support.
//===---------------------------------------------------------------------===//
Thumb load / store address mode offsets are scaled. The values kept in the Thumb load / store address mode offsets are scaled. The values kept in the
instruction operands are pre-scale values. This probably ought to be changed instruction operands are pre-scale values. This probably ought to be changed
to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s ; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s ; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
; CHECK: sub sp, # ; CHECK: sub sp, #
; CHECK: mov r[[R0:[0-9]+]], sp ; CHECK: mov r[[R0:[0-9]+]], sp
; CHECK: str r{{[0-9+]}}, [r[[R0]] ; CHECK: str r{{[0-9+]}}, [r[[R0]]
; CHECK: str r{{[0-9+]}}, [r[[R0]] ; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
; RA_BASIC: stm r[[R0]]!
; CHECK-NOT: ldr r0, [sp ; CHECK-NOT: ldr r0, [sp
; CHECK: mov r[[R1:[0-9]+]], sp ; CHECK: mov r[[R1:[0-9]+]], sp
; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}} ; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}

View File

@ -0,0 +1,42 @@
; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
define i32 @t1() {
; CHECK-LABEL: t1:
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
ret i32 %tmp4
}
define i32 @t2() {
; CHECK-LABEL: t2:
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
define i32 @t3() {
; CHECK-LABEL: t3:
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
declare i32 @f1(i32, i32)
declare i32 @f2(i32, i32, i32)

View File

@ -5,6 +5,7 @@
define i32 @t1() { define i32 @t1() {
; CHECK-LABEL: t1: ; CHECK-LABEL: t1:
; CHECK: push {r7, lr} ; CHECK: push {r7, lr}
; CHECK: ldrd
; CHECK: pop {r7, pc} ; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1] %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
@ -27,6 +28,7 @@ define i32 @t2() {
define i32 @t3() { define i32 @t3() {
; CHECK-LABEL: t3: ; CHECK-LABEL: t3:
; CHECK: push {r7, lr} ; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc} ; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1] %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]