Enable pre-regalloc load / store multiple pass for Thumb2.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82893 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2009-09-27 09:46:04 +00:00
parent 9217f6b2e4
commit e298ab26b1
4 changed files with 114 additions and 67 deletions

View File

@ -485,10 +485,12 @@ defm t2LDRSB : T2I_ld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1 in { let mayLoad = 1 in {
// Load doubleword // Load doubleword
def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst), (ins t2addrmode_imm8s4:$addr), def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2),
IIC_iLoadi, "ldrd", " $dst, $addr", []>; (ins t2addrmode_imm8s4:$addr),
def t2LDRDpci : T2Ii8s4<(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi, IIC_iLoadi, "ldrd", " $dst1, $addr", []>;
"ldrd", " $dst, $addr", []>; def t2LDRDpci : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2),
(ins i32imm:$addr), IIC_iLoadi,
"ldrd", " $dst1, $addr", []>;
} }
// zextload i1 -> zextload i8 // zextload i1 -> zextload i8
@ -597,8 +599,9 @@ defm t2STRH : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword // Store doubleword
let mayLoad = 1 in let mayLoad = 1 in
def t2STRDi8 : T2Ii8s4<(outs), (ins GPR:$src, t2addrmode_imm8s4:$addr), def t2STRDi8 : T2Ii8s4<(outs),
IIC_iStorer, "strd", " $src, $addr", []>; (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
IIC_iStorer, "strd", " $src1, $addr", []>;
// Indexed stores // Indexed stores
def t2STR_PRE : T2Iidxldst<(outs GPR:$base_wb), def t2STR_PRE : T2Iidxldst<(outs GPR:$base_wb),

View File

@ -683,7 +683,7 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::t2LDRi12: case ARM::t2LDRi12:
case ARM::t2STRi8: case ARM::t2STRi8:
case ARM::t2STRi12: case ARM::t2STRi12:
return true; return MI->getOperand(1).isReg();
} }
return false; return false;
} }
@ -737,37 +737,43 @@ static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, MachineBasicBlock::iterator &MBBI,
int OffImm, bool isDef, int OffImm, bool isDef,
DebugLoc dl, unsigned NewOpc, DebugLoc dl, unsigned NewOpc,
unsigned Reg, bool RegDeadKill, unsigned Reg, bool RegDeadKill, bool RegUndef,
unsigned BaseReg, bool BaseKill, unsigned BaseReg, bool BaseKill, bool BaseUndef,
unsigned OffReg, bool OffKill, unsigned OffReg, bool OffKill, bool OffUndef,
ARMCC::CondCodes Pred, unsigned PredReg, ARMCC::CondCodes Pred, unsigned PredReg,
const TargetInstrInfo *TII) { const TargetInstrInfo *TII, bool isT2) {
unsigned Offset; int Offset = OffImm;
if (OffImm < 0) if (!isT2) {
Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); if (OffImm < 0)
else Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); else
if (isDef) Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) }
if (isDef) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
.addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
.addReg(BaseReg, getKillRegState(BaseKill)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
.addReg(OffReg, getKillRegState(OffKill)) if (!isT2)
.addImm(Offset) MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
.addImm(Pred).addReg(PredReg); MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
else } else {
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
.addReg(Reg, getKillRegState(RegDeadKill)) TII->get(NewOpc))
.addReg(BaseReg, getKillRegState(BaseKill)) .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
.addReg(OffReg, getKillRegState(OffKill)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
.addImm(Offset) if (!isT2)
.addImm(Pred).addReg(PredReg); MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
}
} }
bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) { MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI; MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode(); unsigned Opcode = MI->getOpcode();
if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
unsigned EvenReg = MI->getOperand(0).getReg(); unsigned EvenReg = MI->getOperand(0).getReg();
unsigned OddReg = MI->getOperand(1).getReg(); unsigned OddReg = MI->getOperand(1).getReg();
unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
@ -775,17 +781,21 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
return false; return false;
bool isLd = Opcode == ARM::LDRD; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ? bool EvenDeadKill = isLd ?
MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
bool EvenUndef = MI->getOperand(0).isUndef();
bool OddDeadKill = isLd ? bool OddDeadKill = isLd ?
MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
bool OddUndef = MI->getOperand(1).isUndef();
const MachineOperand &BaseOp = MI->getOperand(2); const MachineOperand &BaseOp = MI->getOperand(2);
unsigned BaseReg = BaseOp.getReg(); unsigned BaseReg = BaseOp.getReg();
bool BaseKill = BaseOp.isKill(); bool BaseKill = BaseOp.isKill();
const MachineOperand &OffOp = MI->getOperand(3); bool BaseUndef = BaseOp.isUndef();
unsigned OffReg = OffOp.getReg(); unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
bool OffKill = OffOp.isKill(); bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
int OffImm = getMemoryOpOffset(MI); int OffImm = getMemoryOpOffset(MI);
unsigned PredReg = 0; unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
@ -793,27 +803,35 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
// Ascending register numbers and no offset. It's safe to change it to a // Ascending register numbers and no offset. It's safe to change it to a
// ldm or stm. // ldm or stm.
unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM; unsigned NewOpc = (isLd)
? (isT2 ? ARM::t2LDM : ARM::LDM)
: (isT2 ? ARM::t2STM : ARM::STM);
if (isLd) { if (isLd) {
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
.addReg(BaseReg, getKillRegState(BaseKill)) .addReg(BaseReg, getKillRegState(BaseKill))
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addImm(Pred).addReg(PredReg) .addImm(Pred).addReg(PredReg)
.addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
.addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); .addReg(OddReg, getDefRegState(isLd)| getDeadRegState(OddDeadKill));
++NumLDRD2LDM; ++NumLDRD2LDM;
} else { } else {
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
.addReg(BaseReg, getKillRegState(BaseKill)) .addReg(BaseReg, getKillRegState(BaseKill))
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addImm(Pred).addReg(PredReg) .addImm(Pred).addReg(PredReg)
.addReg(EvenReg, getKillRegState(EvenDeadKill)) .addReg(EvenReg,
.addReg(OddReg, getKillRegState(OddDeadKill)); getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
.addReg(OddReg,
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM; ++NumSTRD2STM;
} }
} else { } else {
// Split into two instructions. // Split into two instructions.
unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR; assert((!isT2 || !OffReg) &&
"Thumb2 ldrd / strd does not encode offset register!");
unsigned NewOpc = (isLd)
? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
: (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
DebugLoc dl = MBBI->getDebugLoc(); DebugLoc dl = MBBI->getDebugLoc();
// If this is a load and base register is killed, it may have been // If this is a load and base register is killed, it may have been
// re-defed by the load, make sure the first load does not clobber it. // re-defed by the load, make sure the first load does not clobber it.
@ -823,17 +841,23 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
(OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
assert(!TRI->regsOverlap(OddReg, BaseReg) && assert(!TRI->regsOverlap(OddReg, BaseReg) &&
(!OffReg || !TRI->regsOverlap(OddReg, OffReg))); (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
BaseReg, false, OffReg, false, Pred, PredReg, TII); OddReg, OddDeadKill, false,
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, BaseReg, false, BaseUndef, OffReg, false, OffUndef,
BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); Pred, PredReg, TII, isT2);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else { } else {
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, BaseReg, false, OffReg, false, EvenReg, EvenDeadKill, EvenUndef,
Pred, PredReg, TII); BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill, OddReg, OddDeadKill, OddUndef,
Pred, PredReg, TII); BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} }
if (isLd) if (isLd)
++NumLDRD2LDR; ++NumLDRD2LDR;
@ -1083,7 +1107,7 @@ namespace {
bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
unsigned &NewOpc, unsigned &EvenReg, unsigned &NewOpc, unsigned &EvenReg,
unsigned &OddReg, unsigned &BaseReg, unsigned &OddReg, unsigned &BaseReg,
unsigned &OffReg, unsigned &Offset, unsigned &OffReg, int &Offset,
unsigned &PredReg, ARMCC::CondCodes &Pred, unsigned &PredReg, ARMCC::CondCodes &Pred,
bool &isT2); bool &isT2);
bool RescheduleOps(MachineBasicBlock *MBB, bool RescheduleOps(MachineBasicBlock *MBB,
@ -1163,7 +1187,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl, DebugLoc &dl,
unsigned &NewOpc, unsigned &EvenReg, unsigned &NewOpc, unsigned &EvenReg,
unsigned &OddReg, unsigned &BaseReg, unsigned &OddReg, unsigned &BaseReg,
unsigned &OffReg, unsigned &Offset, unsigned &OffReg, int &Offset,
unsigned &PredReg, unsigned &PredReg,
ARMCC::CondCodes &Pred, ARMCC::CondCodes &Pred,
bool &isT2) { bool &isT2) {
@ -1206,19 +1230,28 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
// Then make sure the immediate offset fits. // Then make sure the immediate offset fits.
int OffImm = getMemoryOpOffset(Op0); int OffImm = getMemoryOpOffset(Op0);
ARM_AM::AddrOpc AddSub = ARM_AM::add; if (isT2) {
if (OffImm < 0) { if (OffImm < 0) {
AddSub = ARM_AM::sub; if (OffImm < -255)
OffImm = - OffImm; // Can't fall back to t2LDRi8 / t2STRi8.
} return false;
int Limit = (1 << 8) * Scale; } else {
if (OffImm >= Limit || (OffImm & (Scale-1))) int Limit = (1 << 8) * Scale;
return false; if (OffImm >= Limit || (OffImm & (Scale-1)))
return false;
if (isT2) }
Offset = OffImm; Offset = OffImm;
else } else {
ARM_AM::AddrOpc AddSub = ARM_AM::add;
if (OffImm < 0) {
AddSub = ARM_AM::sub;
OffImm = - OffImm;
}
int Limit = (1 << 8) * Scale;
if (OffImm >= Limit || (OffImm & (Scale-1)))
return false;
Offset = ARM_AM::getAM3Opc(AddSub, OffImm); Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
}
EvenReg = Op0->getOperand(0).getReg(); EvenReg = Op0->getOperand(0).getReg();
OddReg = Op1->getOperand(0).getReg(); OddReg = Op1->getOperand(0).getReg();
if (EvenReg == OddReg) if (EvenReg == OddReg)
@ -1316,7 +1349,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
ARMCC::CondCodes Pred = ARMCC::AL; ARMCC::CondCodes Pred = ARMCC::AL;
bool isT2 = false; bool isT2 = false;
unsigned NewOpc = 0; unsigned NewOpc = 0;
unsigned Offset = 0; int Offset = 0;
DebugLoc dl; DebugLoc dl;
if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
EvenReg, OddReg, BaseReg, OffReg, EvenReg, OddReg, BaseReg, OffReg,

View File

@ -95,20 +95,19 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
if (Subtarget.hasNEON()) if (Subtarget.hasNEON())
PM.add(createNEONPreAllocPass()); PM.add(createNEONPreAllocPass());
// FIXME: temporarily disabling load / store optimization pass for Thumb mode. // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb()) if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true)); PM.add(createARMLoadStoreOptimizationPass(true));
return true; return true;
} }
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) { CodeGenOpt::Level OptLevel) {
// FIXME: temporarily disabling load / store optimization pass for Thumb1 mode. // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) {
PM.add(createARMLoadStoreOptimizationPass()); PM.add(createARMLoadStoreOptimizationPass());
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createIfConverterPass()); PM.add(createIfConverterPass());
}
if (Subtarget.isThumb2()) { if (Subtarget.isThumb2()) {
PM.add(createThumb2ITBlockPass()); PM.add(createThumb2ITBlockPass());

View File

@ -0,0 +1,12 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
@b = external global i64*
define i64 @t(i64 %a) nounwind readonly {
entry:
;CHECK: ldrd r2, [r2]
%0 = load i64** @b, align 4
%1 = load i64* %0, align 4
%2 = mul i64 %1, %a
ret i64 %2
}