mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
Fix the Load/Store optimization pass to work with Thumb1.
Patch by Moritz Roth! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208992 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2c9f8a715e
commit
40ae57cc0a
@ -15,9 +15,11 @@
|
|||||||
#include "ARM.h"
|
#include "ARM.h"
|
||||||
#include "ARMBaseInstrInfo.h"
|
#include "ARMBaseInstrInfo.h"
|
||||||
#include "ARMBaseRegisterInfo.h"
|
#include "ARMBaseRegisterInfo.h"
|
||||||
|
#include "ARMISelLowering.h"
|
||||||
#include "ARMMachineFunctionInfo.h"
|
#include "ARMMachineFunctionInfo.h"
|
||||||
#include "ARMSubtarget.h"
|
#include "ARMSubtarget.h"
|
||||||
#include "MCTargetDesc/ARMAddressingModes.h"
|
#include "MCTargetDesc/ARMAddressingModes.h"
|
||||||
|
#include "Thumb1RegisterInfo.h"
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
#include "llvm/ADT/SmallPtrSet.h"
|
#include "llvm/ADT/SmallPtrSet.h"
|
||||||
@ -66,6 +68,7 @@ namespace {
|
|||||||
const TargetInstrInfo *TII;
|
const TargetInstrInfo *TII;
|
||||||
const TargetRegisterInfo *TRI;
|
const TargetRegisterInfo *TRI;
|
||||||
const ARMSubtarget *STI;
|
const ARMSubtarget *STI;
|
||||||
|
const TargetLowering *TL;
|
||||||
ARMFunctionInfo *AFI;
|
ARMFunctionInfo *AFI;
|
||||||
RegScavenger *RS;
|
RegScavenger *RS;
|
||||||
bool isThumb1, isThumb2;
|
bool isThumb1, isThumb2;
|
||||||
@ -94,7 +97,10 @@ namespace {
|
|||||||
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
|
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
|
||||||
const MemOpQueue &MemOps, unsigned DefReg,
|
const MemOpQueue &MemOps, unsigned DefReg,
|
||||||
unsigned RangeBegin, unsigned RangeEnd);
|
unsigned RangeBegin, unsigned RangeEnd);
|
||||||
|
void UpdateBaseRegUses(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MBBI,
|
||||||
|
DebugLoc dl, unsigned Base, unsigned WordOffset,
|
||||||
|
ARMCC::CondCodes Pred, unsigned PredReg);
|
||||||
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||||
int Offset, unsigned Base, bool BaseKill, int Opcode,
|
int Offset, unsigned Base, bool BaseKill, int Opcode,
|
||||||
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
|
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
|
||||||
@ -120,7 +126,6 @@ namespace {
|
|||||||
ARMCC::CondCodes Pred, unsigned PredReg,
|
ARMCC::CondCodes Pred, unsigned PredReg,
|
||||||
unsigned Scratch, MemOpQueue &MemOps,
|
unsigned Scratch, MemOpQueue &MemOps,
|
||||||
SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
|
SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
|
||||||
|
|
||||||
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
|
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
|
||||||
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
|
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator &MBBI);
|
MachineBasicBlock::iterator &MBBI);
|
||||||
@ -160,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
|
|||||||
case ARM_AM::db: return ARM::STMDB;
|
case ARM_AM::db: return ARM::STMDB;
|
||||||
case ARM_AM::ib: return ARM::STMIB;
|
case ARM_AM::ib: return ARM::STMIB;
|
||||||
}
|
}
|
||||||
|
case ARM::tLDRi:
|
||||||
|
// tLDMIA is writeback-only - unless the base register is in the input
|
||||||
|
// reglist.
|
||||||
|
++NumLDMGened;
|
||||||
|
switch (Mode) {
|
||||||
|
default: llvm_unreachable("Unhandled submode!");
|
||||||
|
case ARM_AM::ia: return ARM::tLDMIA;
|
||||||
|
}
|
||||||
|
case ARM::tSTRi:
|
||||||
|
// There is no non-writeback tSTMIA either.
|
||||||
|
++NumSTMGened;
|
||||||
|
switch (Mode) {
|
||||||
|
default: llvm_unreachable("Unhandled submode!");
|
||||||
|
case ARM_AM::ia: return ARM::tSTMIA_UPD;
|
||||||
|
}
|
||||||
case ARM::t2LDRi8:
|
case ARM::t2LDRi8:
|
||||||
case ARM::t2LDRi12:
|
case ARM::t2LDRi12:
|
||||||
++NumLDMGened;
|
++NumLDMGened;
|
||||||
@ -218,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
|
|||||||
case ARM::LDMIA_UPD:
|
case ARM::LDMIA_UPD:
|
||||||
case ARM::STMIA:
|
case ARM::STMIA:
|
||||||
case ARM::STMIA_UPD:
|
case ARM::STMIA_UPD:
|
||||||
|
case ARM::tLDMIA:
|
||||||
|
case ARM::tLDMIA_UPD:
|
||||||
|
case ARM::tSTMIA_UPD:
|
||||||
case ARM::t2LDMIA_RET:
|
case ARM::t2LDMIA_RET:
|
||||||
case ARM::t2LDMIA:
|
case ARM::t2LDMIA:
|
||||||
case ARM::t2LDMIA_UPD:
|
case ARM::t2LDMIA_UPD:
|
||||||
@ -264,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
|
|||||||
} // end namespace ARM_AM
|
} // end namespace ARM_AM
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
||||||
|
static bool isT1i32Load(unsigned Opc) {
|
||||||
|
return Opc == ARM::tLDRi;
|
||||||
|
}
|
||||||
|
|
||||||
static bool isT2i32Load(unsigned Opc) {
|
static bool isT2i32Load(unsigned Opc) {
|
||||||
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
|
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isi32Load(unsigned Opc) {
|
static bool isi32Load(unsigned Opc) {
|
||||||
return Opc == ARM::LDRi12 || isT2i32Load(Opc);
|
return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isT1i32Store(unsigned Opc) {
|
||||||
|
return Opc == ARM::tSTRi;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isT2i32Store(unsigned Opc) {
|
static bool isT2i32Store(unsigned Opc) {
|
||||||
@ -277,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool isi32Store(unsigned Opc) {
|
static bool isi32Store(unsigned Opc) {
|
||||||
return Opc == ARM::STRi12 || isT2i32Store(Opc);
|
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned getImmScale(unsigned Opc) {
|
||||||
|
switch (Opc) {
|
||||||
|
default: llvm_unreachable("Unhandled opcode!");
|
||||||
|
case ARM::tLDRi:
|
||||||
|
case ARM::tSTRi:
|
||||||
|
return 1;
|
||||||
|
case ARM::tLDRHi:
|
||||||
|
case ARM::tSTRHi:
|
||||||
|
return 2;
|
||||||
|
case ARM::tLDRBi:
|
||||||
|
case ARM::tSTRBi:
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update future uses of the base register with the offset introduced
|
||||||
|
/// due to writeback. This function only works on Thumb1.
|
||||||
|
void
|
||||||
|
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MBBI,
|
||||||
|
DebugLoc dl, unsigned Base,
|
||||||
|
unsigned WordOffset,
|
||||||
|
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||||
|
assert(isThumb1 && "Can only update base register uses for Thumb1!");
|
||||||
|
|
||||||
|
// Start updating any instructions with immediate offsets. Insert a sub before
|
||||||
|
// the first non-updateable instruction (if any).
|
||||||
|
for (; MBBI != MBB.end(); ++MBBI) {
|
||||||
|
if (MBBI->readsRegister(Base)) {
|
||||||
|
unsigned Opc = MBBI->getOpcode();
|
||||||
|
int Offset;
|
||||||
|
bool InsertSub = false;
|
||||||
|
|
||||||
|
if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
|
||||||
|
Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
|
||||||
|
Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
|
||||||
|
// Loads and stores with immediate offsets can be updated, but only if
|
||||||
|
// the new offset isn't negative.
|
||||||
|
// The MachineOperand containing the offset immediate is the last one
|
||||||
|
// before predicates.
|
||||||
|
MachineOperand &MO =
|
||||||
|
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
|
||||||
|
// The offsets are scaled by 1, 2 or 4 depending on the Opcode
|
||||||
|
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
|
||||||
|
if (Offset >= 0)
|
||||||
|
MO.setImm(Offset);
|
||||||
|
else
|
||||||
|
InsertSub = true;
|
||||||
|
|
||||||
|
} else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
|
||||||
|
// SUB/ADD using this register. Merge it with the update.
|
||||||
|
// If the merged offset is too large, insert a new sub instead.
|
||||||
|
MachineOperand &MO =
|
||||||
|
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
|
||||||
|
Offset = (Opc == ARM::tSUBi8) ?
|
||||||
|
MO.getImm() + WordOffset * 4 :
|
||||||
|
MO.getImm() - WordOffset * 4 ;
|
||||||
|
if (TL->isLegalAddImmediate(Offset)) {
|
||||||
|
MO.setImm(Offset);
|
||||||
|
// The base register has now been reset, so exit early.
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
InsertSub = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Can't update the instruction.
|
||||||
|
InsertSub = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (InsertSub) {
|
||||||
|
// An instruction above couldn't be updated, so insert a sub.
|
||||||
|
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
|
||||||
|
.addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
|
||||||
|
.addImm(Pred).addReg(PredReg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (MBBI->killsRegister(Base))
|
||||||
|
// Register got killed. Stop updating.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The end of the block was reached. This means register liveness escapes the
|
||||||
|
// block, and it's necessary to insert a sub before the last instruction.
|
||||||
|
if (MBB.succ_size() > 0)
|
||||||
|
// But only insert the SUB if there is actually a successor block.
|
||||||
|
// FIXME: Check more carefully if register is live at this point, e.g. by
|
||||||
|
// also examining the successor block's register liveness information.
|
||||||
|
AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
|
||||||
|
.addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
|
||||||
|
.addImm(Pred).addReg(PredReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// MergeOps - Create and insert a LDM or STM with Base as base register and
|
/// MergeOps - Create and insert a LDM or STM with Base as base register and
|
||||||
@ -297,14 +423,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
ARM_AM::AMSubMode Mode = ARM_AM::ia;
|
ARM_AM::AMSubMode Mode = ARM_AM::ia;
|
||||||
// VFP and Thumb2 do not support IB or DA modes.
|
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
|
||||||
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
|
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
|
||||||
bool haveIBAndDA = isNotVFP && !isThumb2;
|
bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
|
||||||
|
|
||||||
if (Offset == 4 && haveIBAndDA) {
|
if (Offset == 4 && haveIBAndDA) {
|
||||||
Mode = ARM_AM::ib;
|
Mode = ARM_AM::ib;
|
||||||
} else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
|
} else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
|
||||||
Mode = ARM_AM::da;
|
Mode = ARM_AM::da;
|
||||||
} else if (Offset == -4 * (int)NumRegs && isNotVFP) {
|
} else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
|
||||||
// VLDM/VSTM do not support DB mode without also updating the base reg.
|
// VLDM/VSTM do not support DB mode without also updating the base reg.
|
||||||
Mode = ARM_AM::db;
|
Mode = ARM_AM::db;
|
||||||
} else if (Offset != 0) {
|
} else if (Offset != 0) {
|
||||||
@ -329,31 +456,88 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
|||||||
if (NewBase == 0)
|
if (NewBase == 0)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
|
|
||||||
|
int BaseOpc =
|
||||||
|
isThumb2 ? ARM::t2ADDri :
|
||||||
|
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
|
||||||
|
|
||||||
if (Offset < 0) {
|
if (Offset < 0) {
|
||||||
BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
|
BaseOpc =
|
||||||
|
isThumb2 ? ARM::t2SUBri :
|
||||||
|
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
|
||||||
Offset = - Offset;
|
Offset = - Offset;
|
||||||
}
|
}
|
||||||
int ImmedOffset = isThumb2
|
|
||||||
? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
|
|
||||||
if (ImmedOffset == -1)
|
|
||||||
// FIXME: Try t2ADDri12 or t2SUBri12?
|
|
||||||
return false; // Probably not worth it then.
|
|
||||||
|
|
||||||
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
|
if (!TL->isLegalAddImmediate(Offset))
|
||||||
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
|
// FIXME: Try add with register operand?
|
||||||
.addImm(Pred).addReg(PredReg).addReg(0);
|
return false; // Probably not worth it then.
|
||||||
|
|
||||||
|
if (isThumb1) {
|
||||||
|
if (Base != NewBase) {
|
||||||
|
// Need to insert a MOV to the new base first.
|
||||||
|
// FIXME: If the immediate fits in 3 bits, use ADD instead.
|
||||||
|
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
|
||||||
|
.addReg(Base, getKillRegState(BaseKill))
|
||||||
|
.addImm(Pred).addReg(PredReg);
|
||||||
|
}
|
||||||
|
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
|
||||||
|
.addReg(NewBase, getKillRegState(true)).addImm(Offset)
|
||||||
|
.addImm(Pred).addReg(PredReg);
|
||||||
|
} else {
|
||||||
|
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
|
||||||
|
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
|
||||||
|
.addImm(Pred).addReg(PredReg).addReg(0);
|
||||||
|
}
|
||||||
|
|
||||||
Base = NewBase;
|
Base = NewBase;
|
||||||
BaseKill = true; // New base is always killed straight away.
|
BaseKill = true; // New base is always killed straight away.
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
|
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
|
||||||
Opcode == ARM::VLDRD);
|
Opcode == ARM::VLDRD);
|
||||||
|
|
||||||
|
// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
|
||||||
|
// base register writeback.
|
||||||
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
|
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
|
||||||
if (!Opcode) return false;
|
if (!Opcode) return false;
|
||||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
|
|
||||||
.addReg(Base, getKillRegState(BaseKill))
|
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
|
||||||
.addImm(Pred).addReg(PredReg);
|
|
||||||
|
// Exception: If the base register is in the input reglist, Thumb1 LDM is
|
||||||
|
// non-writeback. Check for this.
|
||||||
|
if (Opcode == ARM::tLDRi && isThumb1)
|
||||||
|
for (unsigned I = 0; I < NumRegs; ++I)
|
||||||
|
if (Base == Regs[I].first) {
|
||||||
|
Writeback = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineInstrBuilder MIB;
|
||||||
|
|
||||||
|
if (Writeback) {
|
||||||
|
if (Opcode == ARM::tLDMIA)
|
||||||
|
// Update tLDMIA with writeback if necessary.
|
||||||
|
Opcode = ARM::tLDMIA_UPD;
|
||||||
|
|
||||||
|
// The base isn't dead after a merged instruction with writeback. Update
|
||||||
|
// future uses of the base with the added offset (if possible), or reset
|
||||||
|
// the base register as necessary.
|
||||||
|
if (!BaseKill)
|
||||||
|
UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
|
||||||
|
|
||||||
|
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
|
||||||
|
|
||||||
|
// Thumb1: we might need to set base writeback when building the MI.
|
||||||
|
MIB.addReg(Base, getDefRegState(true))
|
||||||
|
.addReg(Base, getKillRegState(BaseKill));
|
||||||
|
} else {
|
||||||
|
// No writeback, simply build the MachineInstr.
|
||||||
|
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
|
||||||
|
MIB.addReg(Base, getKillRegState(BaseKill));
|
||||||
|
}
|
||||||
|
|
||||||
|
MIB.addImm(Pred).addReg(PredReg);
|
||||||
|
|
||||||
for (unsigned i = 0; i != NumRegs; ++i)
|
for (unsigned i = 0; i != NumRegs; ++i)
|
||||||
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
|
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
|
||||||
| getKillRegState(Regs[i].second));
|
| getKillRegState(Regs[i].second));
|
||||||
@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
|
|||||||
bool CheckCPSRDef = false;
|
bool CheckCPSRDef = false;
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: return false;
|
default: return false;
|
||||||
|
case ARM::tSUBi8:
|
||||||
case ARM::t2SUBri:
|
case ARM::t2SUBri:
|
||||||
case ARM::SUBri:
|
case ARM::SUBri:
|
||||||
CheckCPSRDef = true;
|
CheckCPSRDef = true;
|
||||||
@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
|
|||||||
if (Bytes == 0 || (Limit && Bytes >= Limit))
|
if (Bytes == 0 || (Limit && Bytes >= Limit))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
|
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
|
||||||
|
MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
|
||||||
if (!(MI->getOperand(0).getReg() == Base &&
|
if (!(MI->getOperand(0).getReg() == Base &&
|
||||||
MI->getOperand(1).getReg() == Base &&
|
MI->getOperand(1).getReg() == Base &&
|
||||||
(MI->getOperand(2).getImm()*Scale) == Bytes &&
|
(MI->getOperand(2).getImm() * Scale) == Bytes &&
|
||||||
getInstrPredicate(MI, MyPredReg) == Pred &&
|
getInstrPredicate(MI, MyPredReg) == Pred &&
|
||||||
MyPredReg == PredReg))
|
MyPredReg == PredReg))
|
||||||
return false;
|
return false;
|
||||||
@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
|
|||||||
bool CheckCPSRDef = false;
|
bool CheckCPSRDef = false;
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: return false;
|
default: return false;
|
||||||
|
case ARM::tADDi8:
|
||||||
case ARM::t2ADDri:
|
case ARM::t2ADDri:
|
||||||
case ARM::ADDri:
|
case ARM::ADDri:
|
||||||
CheckCPSRDef = true;
|
CheckCPSRDef = true;
|
||||||
@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
|
|||||||
// Make sure the offset fits in 8 bits.
|
// Make sure the offset fits in 8 bits.
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
|
unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
|
||||||
|
MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
|
||||||
if (!(MI->getOperand(0).getReg() == Base &&
|
if (!(MI->getOperand(0).getReg() == Base &&
|
||||||
MI->getOperand(1).getReg() == Base &&
|
MI->getOperand(1).getReg() == Base &&
|
||||||
(MI->getOperand(2).getImm()*Scale) == Bytes &&
|
(MI->getOperand(2).getImm() * Scale) == Bytes &&
|
||||||
getInstrPredicate(MI, MyPredReg) == Pred &&
|
getInstrPredicate(MI, MyPredReg) == Pred &&
|
||||||
MyPredReg == PredReg))
|
MyPredReg == PredReg))
|
||||||
return false;
|
return false;
|
||||||
@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
|
|||||||
default: return 0;
|
default: return 0;
|
||||||
case ARM::LDRi12:
|
case ARM::LDRi12:
|
||||||
case ARM::STRi12:
|
case ARM::STRi12:
|
||||||
|
case ARM::tLDRi:
|
||||||
|
case ARM::tSTRi:
|
||||||
case ARM::t2LDRi8:
|
case ARM::t2LDRi8:
|
||||||
case ARM::t2LDRi12:
|
case ARM::t2LDRi12:
|
||||||
case ARM::t2STRi8:
|
case ARM::t2STRi8:
|
||||||
@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
|
|||||||
case ARM::STMDA:
|
case ARM::STMDA:
|
||||||
case ARM::STMDB:
|
case ARM::STMDB:
|
||||||
case ARM::STMIB:
|
case ARM::STMIB:
|
||||||
|
case ARM::tLDMIA:
|
||||||
|
case ARM::tLDMIA_UPD:
|
||||||
|
case ARM::tSTMIA_UPD:
|
||||||
case ARM::t2LDMIA:
|
case ARM::t2LDMIA:
|
||||||
case ARM::t2LDMDB:
|
case ARM::t2LDMDB:
|
||||||
case ARM::t2STMIA:
|
case ARM::t2STMIA:
|
||||||
@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
|
|||||||
MachineBasicBlock::iterator MBBI,
|
MachineBasicBlock::iterator MBBI,
|
||||||
bool &Advance,
|
bool &Advance,
|
||||||
MachineBasicBlock::iterator &I) {
|
MachineBasicBlock::iterator &I) {
|
||||||
|
// Thumb1 is already using updating loads/stores.
|
||||||
|
if (isThumb1) return false;
|
||||||
|
|
||||||
MachineInstr *MI = MBBI;
|
MachineInstr *MI = MBBI;
|
||||||
unsigned Base = MI->getOperand(0).getReg();
|
unsigned Base = MI->getOperand(0).getReg();
|
||||||
bool BaseKill = MI->getOperand(0).isKill();
|
bool BaseKill = MI->getOperand(0).isKill();
|
||||||
@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
|
|||||||
const TargetInstrInfo *TII,
|
const TargetInstrInfo *TII,
|
||||||
bool &Advance,
|
bool &Advance,
|
||||||
MachineBasicBlock::iterator &I) {
|
MachineBasicBlock::iterator &I) {
|
||||||
|
// Thumb1 doesn't have updating LDR/STR.
|
||||||
|
// FIXME: Use LDM/STM with single register instead.
|
||||||
|
if (isThumb1) return false;
|
||||||
|
|
||||||
MachineInstr *MI = MBBI;
|
MachineInstr *MI = MBBI;
|
||||||
unsigned Base = MI->getOperand(1).getReg();
|
unsigned Base = MI->getOperand(1).getReg();
|
||||||
bool BaseKill = MI->getOperand(1).isKill();
|
bool BaseKill = MI->getOperand(1).isKill();
|
||||||
@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
|
|||||||
return MI->getOperand(1).isReg();
|
return MI->getOperand(1).isReg();
|
||||||
case ARM::LDRi12:
|
case ARM::LDRi12:
|
||||||
case ARM::STRi12:
|
case ARM::STRi12:
|
||||||
|
case ARM::tLDRi:
|
||||||
|
case ARM::tSTRi:
|
||||||
case ARM::t2LDRi8:
|
case ARM::t2LDRi8:
|
||||||
case ARM::t2LDRi12:
|
case ARM::t2LDRi12:
|
||||||
case ARM::t2STRi8:
|
case ARM::t2STRi8:
|
||||||
@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
|
|||||||
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
|
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
|
||||||
return OffField;
|
return OffField;
|
||||||
|
|
||||||
|
// Thumb1 immediate offsets are scaled by 4
|
||||||
|
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
|
||||||
|
return OffField * 4;
|
||||||
|
|
||||||
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
|
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
|
||||||
: ARM_AM::getAM5Offset(OffField) * 4;
|
: ARM_AM::getAM5Offset(OffField) * 4;
|
||||||
if (isAM3) {
|
if (isAM3) {
|
||||||
@ -1417,8 +1623,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
|||||||
// Try to find a free register to use as a new base in case it's needed.
|
// Try to find a free register to use as a new base in case it's needed.
|
||||||
// First advance to the instruction just before the start of the chain.
|
// First advance to the instruction just before the start of the chain.
|
||||||
AdvanceRS(MBB, MemOps);
|
AdvanceRS(MBB, MemOps);
|
||||||
|
|
||||||
// Find a scratch register.
|
// Find a scratch register.
|
||||||
unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
|
unsigned Scratch =
|
||||||
|
RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
|
||||||
|
|
||||||
// Process the load / store instructions.
|
// Process the load / store instructions.
|
||||||
RS->forward(std::prev(MBBI));
|
RS->forward(std::prev(MBBI));
|
||||||
|
|
||||||
@ -1484,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
|||||||
/// =>
|
/// =>
|
||||||
/// ldmfd sp!, {..., pc}
|
/// ldmfd sp!, {..., pc}
|
||||||
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
|
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
|
||||||
|
// Thumb1 LDM doesn't allow high registers.
|
||||||
|
if (isThumb1) return false;
|
||||||
if (MBB.empty()) return false;
|
if (MBB.empty()) return false;
|
||||||
|
|
||||||
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
|
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
|
||||||
@ -1514,6 +1725,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
|
|||||||
|
|
||||||
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
||||||
const TargetMachine &TM = Fn.getTarget();
|
const TargetMachine &TM = Fn.getTarget();
|
||||||
|
TL = TM.getTargetLowering();
|
||||||
AFI = Fn.getInfo<ARMFunctionInfo>();
|
AFI = Fn.getInfo<ARMFunctionInfo>();
|
||||||
TII = TM.getInstrInfo();
|
TII = TM.getInstrInfo();
|
||||||
TRI = TM.getRegisterInfo();
|
TRI = TM.getRegisterInfo();
|
||||||
@ -1522,9 +1734,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
|||||||
isThumb2 = AFI->isThumb2Function();
|
isThumb2 = AFI->isThumb2Function();
|
||||||
isThumb1 = AFI->isThumbFunction() && !isThumb2;
|
isThumb1 = AFI->isThumbFunction() && !isThumb2;
|
||||||
|
|
||||||
// Don't do anything in this pass with Thumb1 for now.
|
|
||||||
if (isThumb1) return false;
|
|
||||||
|
|
||||||
bool Modified = false;
|
bool Modified = false;
|
||||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||||
++MFI) {
|
++MFI) {
|
||||||
@ -1585,11 +1794,6 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
|||||||
MRI = &Fn.getRegInfo();
|
MRI = &Fn.getRegInfo();
|
||||||
MF = &Fn;
|
MF = &Fn;
|
||||||
|
|
||||||
ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
|
|
||||||
bool isThumb1 = AFI->isThumbFunction() && !AFI->isThumb2Function();
|
|
||||||
// Don't do anything in this pass with Thumb1 for now.
|
|
||||||
if (isThumb1) return false;
|
|
||||||
|
|
||||||
bool Modified = false;
|
bool Modified = false;
|
||||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||||
++MFI)
|
++MFI)
|
||||||
|
@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code.
|
|||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
Add ldmia, stmia support.
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
Thumb load / store address mode offsets are scaled. The values kept in the
|
Thumb load / store address mode offsets are scaled. The values kept in the
|
||||||
instruction operands are pre-scale values. This probably ought to be changed
|
instruction operands are pre-scale values. This probably ought to be changed
|
||||||
to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
|
to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
|
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
|
||||||
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
|
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC
|
||||||
|
|
||||||
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
|
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
|
||||||
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
|
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
|
||||||
@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
|
|||||||
; CHECK: sub sp, #
|
; CHECK: sub sp, #
|
||||||
; CHECK: mov r[[R0:[0-9]+]], sp
|
; CHECK: mov r[[R0:[0-9]+]], sp
|
||||||
; CHECK: str r{{[0-9+]}}, [r[[R0]]
|
; CHECK: str r{{[0-9+]}}, [r[[R0]]
|
||||||
; CHECK: str r{{[0-9+]}}, [r[[R0]]
|
; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
|
||||||
|
; RA_BASIC: stm r[[R0]]!
|
||||||
; CHECK-NOT: ldr r0, [sp
|
; CHECK-NOT: ldr r0, [sp
|
||||||
; CHECK: mov r[[R1:[0-9]+]], sp
|
; CHECK: mov r[[R1:[0-9]+]], sp
|
||||||
; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
|
; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
|
||||||
|
42
test/CodeGen/Thumb/thumb-ldm.ll
Normal file
42
test/CodeGen/Thumb/thumb-ldm.ll
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
|
||||||
|
|
||||||
|
@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
|
||||||
|
|
||||||
|
define i32 @t1() {
|
||||||
|
; CHECK-LABEL: t1:
|
||||||
|
; CHECK: push {r7, lr}
|
||||||
|
; CHECK: ldm
|
||||||
|
; CHECK: pop {r7, pc}
|
||||||
|
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
|
||||||
|
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
||||||
|
%tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
|
||||||
|
ret i32 %tmp4
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @t2() {
|
||||||
|
; CHECK-LABEL: t2:
|
||||||
|
; CHECK: push {r7, lr}
|
||||||
|
; CHECK: ldm
|
||||||
|
; CHECK: pop {r7, pc}
|
||||||
|
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
|
||||||
|
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
|
||||||
|
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
|
||||||
|
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
|
||||||
|
ret i32 %tmp6
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @t3() {
|
||||||
|
; CHECK-LABEL: t3:
|
||||||
|
; CHECK: push {r7, lr}
|
||||||
|
; CHECK: ldm
|
||||||
|
; CHECK: pop {r7, pc}
|
||||||
|
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
||||||
|
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
|
||||||
|
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
|
||||||
|
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
|
||||||
|
ret i32 %tmp6
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @f1(i32, i32)
|
||||||
|
|
||||||
|
declare i32 @f2(i32, i32, i32)
|
@ -5,6 +5,7 @@
|
|||||||
define i32 @t1() {
|
define i32 @t1() {
|
||||||
; CHECK-LABEL: t1:
|
; CHECK-LABEL: t1:
|
||||||
; CHECK: push {r7, lr}
|
; CHECK: push {r7, lr}
|
||||||
|
; CHECK: ldrd
|
||||||
; CHECK: pop {r7, pc}
|
; CHECK: pop {r7, pc}
|
||||||
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
|
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
|
||||||
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
||||||
@ -27,6 +28,7 @@ define i32 @t2() {
|
|||||||
define i32 @t3() {
|
define i32 @t3() {
|
||||||
; CHECK-LABEL: t3:
|
; CHECK-LABEL: t3:
|
||||||
; CHECK: push {r7, lr}
|
; CHECK: push {r7, lr}
|
||||||
|
; CHECK: ldm
|
||||||
; CHECK: pop {r7, pc}
|
; CHECK: pop {r7, pc}
|
||||||
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
|
||||||
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
|
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
|
||||||
|
Loading…
Reference in New Issue
Block a user