mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-27 14:34:58 +00:00
[Thumb] Make load/store optimizer less conservative.
If it's safe to clobber the condition flags, we can do a few extra things: it's then possible to reset the base register writeback using a SUBS, so we can try to merge even if the base register isn't dead after the merged instruction. This is effectively a (heavily bug-fixed) rewrite of r208992. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218386 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
43c6b6be8f
commit
8c4e64af8a
@ -97,6 +97,10 @@ namespace {
|
||||
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
|
||||
const MemOpQueue &MemOps, unsigned DefReg,
|
||||
unsigned RangeBegin, unsigned RangeEnd);
|
||||
void UpdateBaseRegUses(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
DebugLoc dl, unsigned Base, unsigned WordOffset,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg);
|
||||
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
int Offset, unsigned Base, bool BaseKill, int Opcode,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
|
||||
@ -140,6 +144,46 @@ namespace {
|
||||
char ARMLoadStoreOpt::ID = 0;
|
||||
}
|
||||
|
||||
static bool definesCPSR(const MachineInstr *MI) {
|
||||
for (const auto &MO : MI->operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
|
||||
// If the instruction has live CPSR def, then it's not safe to fold it
|
||||
// into load / store.
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int getMemoryOpOffset(const MachineInstr *MI) {
|
||||
int Opcode = MI->getOpcode();
|
||||
bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
|
||||
unsigned NumOperands = MI->getDesc().getNumOperands();
|
||||
unsigned OffField = MI->getOperand(NumOperands-3).getImm();
|
||||
|
||||
if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
|
||||
Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
|
||||
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
|
||||
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
|
||||
return OffField;
|
||||
|
||||
// Thumb1 immediate offsets are scaled by 4
|
||||
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
|
||||
return OffField * 4;
|
||||
|
||||
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
|
||||
: ARM_AM::getAM5Offset(OffField) * 4;
|
||||
ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
|
||||
: ARM_AM::getAM5Op(OffField);
|
||||
|
||||
if (Op == ARM_AM::sub)
|
||||
return -Offset;
|
||||
|
||||
return Offset;
|
||||
}
|
||||
|
||||
static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
|
||||
switch (Opcode) {
|
||||
default: llvm_unreachable("Unhandled opcode!");
|
||||
@ -307,6 +351,120 @@ static bool isi32Store(unsigned Opc) {
|
||||
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
|
||||
}
|
||||
|
||||
static unsigned getImmScale(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default: llvm_unreachable("Unhandled opcode!");
|
||||
case ARM::tLDRi:
|
||||
case ARM::tSTRi:
|
||||
return 1;
|
||||
case ARM::tLDRHi:
|
||||
case ARM::tSTRHi:
|
||||
return 2;
|
||||
case ARM::tLDRBi:
|
||||
case ARM::tSTRBi:
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
/// Update future uses of the base register with the offset introduced
|
||||
/// due to writeback. This function only works on Thumb1.
|
||||
void
|
||||
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
DebugLoc dl, unsigned Base,
|
||||
unsigned WordOffset,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
assert(isThumb1 && "Can only update base register uses for Thumb1!");
|
||||
// Start updating any instructions with immediate offsets. Insert a SUB before
|
||||
// the first non-updateable instruction (if any).
|
||||
for (; MBBI != MBB.end(); ++MBBI) {
|
||||
bool InsertSub = false;
|
||||
unsigned Opc = MBBI->getOpcode();
|
||||
|
||||
if (MBBI->readsRegister(Base)) {
|
||||
int Offset;
|
||||
bool IsLoad =
|
||||
Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
|
||||
bool IsStore =
|
||||
Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
|
||||
|
||||
if (IsLoad || IsStore) {
|
||||
// Loads and stores with immediate offsets can be updated, but only if
|
||||
// the new offset isn't negative.
|
||||
// The MachineOperand containing the offset immediate is the last one
|
||||
// before predicates.
|
||||
MachineOperand &MO =
|
||||
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
|
||||
// The offsets are scaled by 1, 2 or 4 depending on the Opcode.
|
||||
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
|
||||
|
||||
// If storing the base register, it needs to be reset first.
|
||||
unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
|
||||
|
||||
if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
|
||||
MO.setImm(Offset);
|
||||
else
|
||||
InsertSub = true;
|
||||
|
||||
} else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
|
||||
!definesCPSR(MBBI)) {
|
||||
// SUBS/ADDS using this register, with a dead def of the CPSR.
|
||||
// Merge it with the update; if the merged offset is too large,
|
||||
// insert a new sub instead.
|
||||
MachineOperand &MO =
|
||||
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
|
||||
Offset = (Opc == ARM::tSUBi8) ?
|
||||
MO.getImm() + WordOffset * 4 :
|
||||
MO.getImm() - WordOffset * 4 ;
|
||||
if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
|
||||
// FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
|
||||
// Offset == 0.
|
||||
MO.setImm(Offset);
|
||||
// The base register has now been reset, so exit early.
|
||||
return;
|
||||
} else {
|
||||
InsertSub = true;
|
||||
}
|
||||
|
||||
} else {
|
||||
// Can't update the instruction.
|
||||
InsertSub = true;
|
||||
}
|
||||
|
||||
} else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
|
||||
// Since SUBS sets the condition flags, we can't place the base reset
|
||||
// after an instruction that has a live CPSR def.
|
||||
// The base register might also contain an argument for a function call.
|
||||
InsertSub = true;
|
||||
}
|
||||
|
||||
if (InsertSub) {
|
||||
// An instruction above couldn't be updated, so insert a sub.
|
||||
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
|
||||
.addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
return;
|
||||
}
|
||||
|
||||
if (MBBI->killsRegister(Base))
|
||||
// Register got killed. Stop updating.
|
||||
return;
|
||||
}
|
||||
|
||||
// End of block was reached.
|
||||
if (MBB.succ_size() > 0) {
|
||||
// FIXME: Because of a bug, live registers are sometimes missing from
|
||||
// the successor blocks' live-in sets. This means we can't trust that
|
||||
// information and *always* have to reset at the end of a block.
|
||||
// See PR21029.
|
||||
if (MBBI != MBB.end()) --MBBI;
|
||||
AddDefaultT1CC(
|
||||
BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
|
||||
.addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
}
|
||||
}
|
||||
|
||||
/// MergeOps - Create and insert a LDM or STM with Base as base register and
|
||||
/// registers in Regs as the register operands that would be loaded / stored.
|
||||
/// It returns true if the transformation is done.
|
||||
@ -329,6 +487,22 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
(MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
|
||||
MachineBasicBlock::LQR_Dead);
|
||||
|
||||
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
|
||||
|
||||
// Exception: If the base register is in the input reglist, Thumb1 LDM is
|
||||
// non-writeback.
|
||||
// It's also not possible to merge an STR of the base register in Thumb1.
|
||||
if (isThumb1)
|
||||
for (unsigned I = 0; I < NumRegs; ++I)
|
||||
if (Base == Regs[I].first) {
|
||||
if (Opcode == ARM::tLDRi) {
|
||||
Writeback = false;
|
||||
break;
|
||||
} else if (Opcode == ARM::tSTRi) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::ia;
|
||||
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
|
||||
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
|
||||
@ -421,24 +595,16 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
|
||||
if (!Opcode) return false;
|
||||
|
||||
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
|
||||
|
||||
// Exception: If the base register is in the input reglist, Thumb1 LDM is
|
||||
// non-writeback. Check for this.
|
||||
if (Opcode == ARM::tLDMIA && isThumb1)
|
||||
for (unsigned I = 0; I < NumRegs; ++I)
|
||||
if (Base == Regs[I].first) {
|
||||
Writeback = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// If the merged instruction has writeback and the base register is not killed
|
||||
// it's not safe to do the merge on Thumb1. This is because resetting the base
|
||||
// register writeback by inserting a SUBS sets the condition flags.
|
||||
// FIXME: Try something clever here to see if resetting the base register can
|
||||
// be avoided, e.g. by updating a later ADD/SUB of the base register with the
|
||||
// writeback.
|
||||
if (isThumb1 && Writeback && !BaseKill) return false;
|
||||
// Check if a Thumb1 LDM/STM merge is safe. This is the case if:
|
||||
// - There is no writeback (LDM of base register),
|
||||
// - the base register is killed by the merged instruction,
|
||||
// - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
|
||||
// to reset the base register.
|
||||
// Otherwise, don't merge.
|
||||
// It's safe to return here since the code to materialize a new base register
|
||||
// above is also conditional on SafeToClobberCPSR.
|
||||
if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
|
||||
return false;
|
||||
|
||||
MachineInstrBuilder MIB;
|
||||
|
||||
@ -452,6 +618,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
// Thumb1: we might need to set base writeback when building the MI.
|
||||
MIB.addReg(Base, getDefRegState(true))
|
||||
.addReg(Base, getKillRegState(BaseKill));
|
||||
|
||||
// The base isn't dead after a merged instruction with writeback.
|
||||
// Insert a sub instruction after the newly formed instruction to reset.
|
||||
if (!BaseKill)
|
||||
UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
|
||||
|
||||
} else {
|
||||
// No writeback, simply build the MachineInstr.
|
||||
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
|
||||
@ -622,6 +794,11 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
|
||||
memOps[i].MBBI = Merges.back();
|
||||
memOps[i].Position = insertPos;
|
||||
}
|
||||
|
||||
// Update memOps offsets, since they may have been modified by MergeOps.
|
||||
for (auto &MemOp : memOps) {
|
||||
MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
|
||||
}
|
||||
}
|
||||
|
||||
/// MergeLDR_STR - Merge a number of load / store instructions into one or more
|
||||
@ -704,20 +881,6 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
|
||||
Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
|
||||
}
|
||||
|
||||
static bool definesCPSR(MachineInstr *MI) {
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
|
||||
// If the instruction has live CPSR def, then it's not safe to fold it
|
||||
// into load / store.
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
|
||||
unsigned Bytes, unsigned Limit,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
@ -1255,34 +1418,6 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
|
||||
RS->forward(std::prev(Loc));
|
||||
}
|
||||
|
||||
static int getMemoryOpOffset(const MachineInstr *MI) {
|
||||
int Opcode = MI->getOpcode();
|
||||
bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
|
||||
unsigned NumOperands = MI->getDesc().getNumOperands();
|
||||
unsigned OffField = MI->getOperand(NumOperands-3).getImm();
|
||||
|
||||
if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
|
||||
Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
|
||||
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
|
||||
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
|
||||
return OffField;
|
||||
|
||||
// Thumb1 immediate offsets are scaled by 4
|
||||
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
|
||||
return OffField * 4;
|
||||
|
||||
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
|
||||
: ARM_AM::getAM5Offset(OffField) * 4;
|
||||
if (isAM3) {
|
||||
if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
|
||||
Offset = -Offset;
|
||||
} else {
|
||||
if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
|
||||
Offset = -Offset;
|
||||
}
|
||||
return Offset;
|
||||
}
|
||||
|
||||
static void InsertLDR_STR(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
int Offset, bool isDef,
|
||||
|
@ -4,8 +4,8 @@ define void @foo(i32* %A) #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: push {r7, lr}
|
||||
; CHECK: ldr
|
||||
; CHECK-NEXT: ldr
|
||||
; CHECK: ldm
|
||||
; CHECK-NEXT: subs
|
||||
; CHECK-NEXT: bl
|
||||
%0 = load i32* %A, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32* %A, i32 1
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
|
||||
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC
|
||||
|
||||
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
|
||||
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
|
||||
@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
|
||||
; CHECK: sub sp, #
|
||||
; CHECK: mov r[[R0:[0-9]+]], sp
|
||||
; CHECK: str r{{[0-9+]}}, [r[[R0]]
|
||||
; CHECK: str r{{[0-9+]}}, [r[[R0]]
|
||||
; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
|
||||
; RA_BASIC: stm r[[R0]]!
|
||||
; CHECK-NOT: ldr r0, [sp
|
||||
; CHECK: mov r[[R1:[0-9]+]], sp
|
||||
; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
|
||||
|
@ -1,5 +1,4 @@
|
||||
; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
@d = external global [64 x i32]
|
||||
@s = external global [64 x i32]
|
||||
|
||||
@ -7,8 +6,12 @@
|
||||
define void @t1() #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: t1:
|
||||
; CHECK-NOT: ldm
|
||||
; CHECK-NOT: stm
|
||||
; CHECK: ldr r[[LB:[0-9]]],
|
||||
; CHECK-NEXT: ldm r[[LB]]!,
|
||||
; CHECK-NEXT: ldr r[[SB:[0-9]]],
|
||||
; CHECK-NEXT: stm r[[SB]]!,
|
||||
; CHECK-NEXT: ldrb {{.*}}, [r[[LB]]]
|
||||
; CHECK-NEXT: strb {{.*}}, [r[[SB]]]
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
@ -17,8 +20,14 @@ entry:
|
||||
define void @t2() #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: t2:
|
||||
; CHECK-NOT: ldm
|
||||
; CHECK-NOT: stm
|
||||
; CHECK: ldr r[[LB:[0-9]]],
|
||||
; CHECK-NEXT: ldm r[[LB]]!,
|
||||
; CHECK-NEXT: ldr r[[SB:[0-9]]],
|
||||
; CHECK-NEXT: stm r[[SB]]!,
|
||||
; CHECK-NEXT: ldrh {{.*}}, [r[[LB]]]
|
||||
; CHECK-NEXT: ldrb {{.*}}, [r[[LB]], #2]
|
||||
; CHECK-NEXT: strb {{.*}}, [r[[SB]], #2]
|
||||
; CHECK-NEXT: strh {{.*}}, [r[[SB]]]
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user