Sorry, several patches in one.

TargetInstrInfo:
Change produceSameValue() to take MachineRegisterInfo as an optional argument.
When in SSA form, targets can use it to make more aggressive equality analysis.

Machine LICM:
1. Eliminate isLoadFromConstantMemory, use MI.isInvariantLoad instead.
2. Fix a bug which prevent CSE of instructions which are not re-materializable.
3. Use improved form of produceSameValue.

ARM:
1. Teach ARM produceSameValue to look pass some PIC labels.
2. Look for operands from different loads of different constant pool entries
   which have same values.
3. Re-implement PIC GA materialization using movw + movt. Combine the pair with
   a "add pc" or "ldr [pc]" to form pseudo instructions. This makes it possible
   to re-materialize the instruction, allow machine LICM to hoist the set of
   instructions out of the loop and make it possible to CSE them. It's a bit
   hacky, but it significantly improve code quality.
4. Some minor bug fixes as well.

With the fixes, using movw + movt to materialize GAs significantly outperform the
load from constantpool method. 186.crafty and 255.vortex improved > 20%, 254.gap
and 176.gcc ~10%.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123905 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2011-01-20 08:34:58 +00:00
parent 59315d1d54
commit 9fe2009956
16 changed files with 321 additions and 224 deletions

View File

@ -228,9 +228,12 @@ public:
/// produceSameValue - Return true if two machine instructions would produce
/// identical values. By default, this is only true when the two instructions
/// are deemed identical except for defs.
/// are deemed identical except for defs. If this function is called when the
/// IR is still in SSA form, the caller can pass the MachineRegisterInfo for
/// aggressive checks.
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const = 0;
const MachineInstr *MI1,
const MachineRegisterInfo *MRI = 0) const = 0;
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@ -677,7 +680,8 @@ public:
virtual MachineInstr *duplicate(MachineInstr *Orig,
MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const;
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
virtual bool isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const;

View File

@ -208,10 +208,6 @@ namespace {
/// specified instruction.
void UpdateRegPressure(const MachineInstr *MI);
/// isLoadFromConstantMemory - Return true if the given instruction is a
/// load from constant memory.
bool isLoadFromConstantMemory(MachineInstr *MI);
/// ExtractHoistableLoad - Unfold a load from the given machineinstr if
/// the load itself could be hoisted. Return the unfolded and hoistable
/// load, or null if the load couldn't be unfolded or if it wouldn't
@ -773,25 +769,6 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
return false;
}
/// isLoadFromConstantMemory - Return true if the given instruction is a
/// load from constant memory. Machine LICM will hoist these even if they are
/// not re-materializable.
bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
if (!MI->getDesc().mayLoad()) return false;
if (!MI->hasOneMemOperand()) return false;
MachineMemOperand *MMO = *MI->memoperands_begin();
if (MMO->isVolatile()) return false;
if (!MMO->getValue()) return false;
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
if (PSV) {
MachineFunction &MF = *MI->getParent()->getParent();
return PSV->isConstant(MF.getFrameInfo());
} else {
return AA->pointsToConstantMemory(AliasAnalysis::Location(MMO->getValue(),
MMO->getSize(),
MMO->getTBAAInfo()));
}
}
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
/// and an use in the current loop, return true if the target considered
@ -995,7 +972,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going to
// be remat'ed.
if (!TII->isTriviallyReMaterializable(&MI, AA) &&
!isLoadFromConstantMemory(&MI))
!MI.isInvariantLoad(AA))
return false;
}
@ -1021,7 +998,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// If not, we may be able to unfold a load and hoist that.
// First test whether the instruction is loading from an amenable
// memory location.
if (!isLoadFromConstantMemory(MI))
if (!MI->isInvariantLoad(AA))
return 0;
// Next determine the register class for a temporary register.
@ -1072,20 +1049,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
// FIXME: For now, only hoist re-materilizable instructions. LICM will
// increase register pressure. We want to make sure it doesn't increase
// spilling.
if (TII->isTriviallyReMaterializable(MI, AA)) {
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
CI = CSEMap.find(Opcode);
if (CI != CSEMap.end())
CI->second.push_back(MI);
else {
std::vector<const MachineInstr*> CSEMIs;
CSEMIs.push_back(MI);
CSEMap.insert(std::make_pair(Opcode, CSEMIs));
}
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
CI = CSEMap.find(Opcode);
if (CI != CSEMap.end())
CI->second.push_back(MI);
else {
std::vector<const MachineInstr*> CSEMIs;
CSEMIs.push_back(MI);
CSEMap.insert(std::make_pair(Opcode, CSEMIs));
}
}
}
@ -1095,7 +1067,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
if (TII->produceSameValue(MI, PrevMI))
if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
return PrevMI;
}
return 0;

View File

@ -166,8 +166,10 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MBB.insert(I, MI);
}
bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const {
bool
TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const {
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}

View File

@ -568,7 +568,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 4;
case ARM::MOVi32imm:
case ARM::t2MOVi32imm:
case ARM::MOV_pic_ga:
return 8;
case ARM::CONSTPOOL_ENTRY:
// If this machine instr is a constant pool entry, its size is recorded as
@ -1053,12 +1052,16 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
}
bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const {
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const {
int Opcode = MI0->getOpcode();
if (Opcode == ARM::t2LDRpci ||
if (Opcode == ARM::LDRi12 ||
Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
Opcode == ARM::tLDRpci ||
Opcode == ARM::tLDRpci_pic) {
Opcode == ARM::tLDRpci_pic ||
Opcode == ARM::MOV_pic_ga_add_pc ||
Opcode == ARM::t2MOV_pic_ga_add_pc) {
if (MI1->getOpcode() != Opcode)
return false;
if (MI0->getNumOperands() != MI1->getNumOperands())
@ -1066,9 +1069,17 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
const MachineOperand &MO0 = MI0->getOperand(1);
const MachineOperand &MO1 = MI1->getOperand(1);
if (Opcode == ARM::LDRi12 && (!MO0.isCPI() || !MO1.isCPI()))
return false;
if (MO0.getOffset() != MO1.getOffset())
return false;
if (Opcode == ARM::MOV_pic_ga_add_pc ||
Opcode == ARM::t2MOV_pic_ga_add_pc)
// Ignore the PC labels.
return MO0.getGlobal() == MO1.getGlobal();
const MachineFunction *MF = MI0->getParent()->getParent();
const MachineConstantPool *MCP = MF->getConstantPool();
int CPI0 = MO0.getIndex();
@ -1080,6 +1091,37 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
ARMConstantPoolValue *ACPV1 =
static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
return ACPV0->hasSameValue(ACPV1);
} else if (Opcode == ARM::PICLDR) {
if (MI1->getOpcode() != Opcode)
return false;
if (MI0->getNumOperands() != MI1->getNumOperands())
return false;
unsigned Addr0 = MI0->getOperand(1).getReg();
unsigned Addr1 = MI1->getOperand(1).getReg();
if (Addr0 != Addr1) {
if (!MRI ||
!TargetRegisterInfo::isVirtualRegister(Addr0) ||
!TargetRegisterInfo::isVirtualRegister(Addr1))
return false;
// This assumes SSA form.
MachineInstr *Def0 = MRI->getVRegDef(Addr0);
MachineInstr *Def1 = MRI->getVRegDef(Addr1);
// Check if the loaded value, e.g. a constantpool of a global address, are
// the same.
if (!produceSameValue(Def0, Def1, MRI))
return false;
}
for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
// %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
const MachineOperand &MO0 = MI0->getOperand(i);
const MachineOperand &MO1 = MI1->getOperand(i);
if (!MO0.isIdenticalTo(MO1))
return false;
}
return true;
}
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);

View File

@ -298,7 +298,8 @@ public:
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const;
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
/// determine if two loads are loading from the same base address. It should

View File

@ -38,6 +38,7 @@ namespace {
const ARMBaseInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@ -48,12 +49,16 @@ namespace {
private:
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool ExpandMBB(MachineBasicBlock &MBB);
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt, unsigned NumRegs);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
};
char ARMExpandPseudo::ID = 0;
}
@ -612,21 +617,85 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MI.eraseFromParent();
}
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
MachineInstrBuilder LO16, HI16;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineInstr &MI = *MBBI;
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
if (!STI->hasV6T2Ops() &&
(Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
// Expand into a movi + orr.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
bool ModifiedOp = true;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
(*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
(*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
LO16.addImm(Pred).addReg(PredReg).addReg(0);
HI16.addImm(Pred).addReg(PredReg).addReg(0);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
return;
}
unsigned LO16Opc = 0;
unsigned HI16Opc = 0;
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
LO16Opc = ARM::t2MOVi16;
HI16Opc = ARM::t2MOVTi16;
} else {
LO16Opc = ARM::MOVi16;
HI16Opc = ARM::MOVTi16;
}
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
if (MO.isImm()) {
unsigned Imm = MO.getImm();
unsigned Lo16 = Imm & 0xffff;
unsigned Hi16 = (Imm >> 16) & 0xffff;
LO16 = LO16.addImm(Lo16);
HI16 = HI16.addImm(Hi16);
} else {
const GlobalValue *GV = MO.getGlobal();
unsigned TF = MO.getTargetFlags();
LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
}
(*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
(*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
}
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
default:
ModifiedOp = false;
break;
return false;
case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
@ -636,7 +705,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
// pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
// for us. Otherwise, expand to nothing.
if (RI.hasBasePointer(MF)) {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int32_t NumBytes = AFI->getFramePtrSpillOffset();
unsigned FramePtr = RI.getFrameRegister(MF);
assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
@ -670,7 +738,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
MI.eraseFromParent();
break;
return true;
}
case ARM::MOVsrl_flag:
@ -678,26 +746,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
// These are just fancy MOVs insructions.
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
.addReg(0)
.addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
: ARM_AM::asr), 1)))
.addReg(ARM::CPSR, RegState::Define);
.addOperand(MI.getOperand(1))
.addReg(0)
.addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
: ARM_AM::asr), 1)))
.addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
break;
return true;
}
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
.addReg(0);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
break;
return true;
}
case ARM::TPsoft: {
MachineInstrBuilder MIB =
@ -708,7 +776,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
(*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
break;
return true;
}
case ARM::t2LDRHpci:
case ARM::t2LDRBpci:
@ -733,13 +801,14 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(NewLdOpc), DstReg)
.addReg(ARM::PC)
.addOperand(MI.getOperand(1)));
.addReg(ARM::PC)
.addOperand(MI.getOperand(1)));
(*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
break;
return true;
}
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@ -748,7 +817,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(NewLdOpc), DstReg);
TII->get(NewLdOpc), DstReg);
if (Opcode == ARM::t2LDRpci_pic) MIB1.addReg(ARM::PC);
MIB1.addOperand(MI.getOperand(1));
AddDefaultPred(MIB1);
@ -760,102 +829,55 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
break;
return true;
}
case ARM::MOV_pic_ga_add_pc:
case ARM::MOV_pic_ga_ldr:
case ARM::t2MOV_pic_ga_add_pc: {
// Expand into movw + movw + add pc / ldr [pc]
unsigned LabelId = AFI->createPICLabelUId();
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
const GlobalValue *GV = MO1.getGlobal();
unsigned TF = MO1.getTargetFlags();
bool isARM = Opcode != ARM::t2MOV_pic_ga_add_pc;
unsigned LO16Opc = isARM ? ARM::MOVi16_pic_ga : ARM::t2MOVi16_pic_ga;
unsigned HI16Opc = isARM ? ARM::MOVTi16_pic_ga : ARM::t2MOVTi16_pic_ga;
unsigned PICAddOpc = isARM
? (Opcode == ARM::MOV_pic_ga_ldr ? ARM::PICLDR : ARM::PICADD)
: ARM::tPICADD;
MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(LO16Opc), DstReg)
.addGlobalAddress(GV, MO1.getOffset(),
TF | ARMII::MO_LO16_NONLAZY_PIC)
.addImm(LabelId);
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
.addReg(DstReg)
.addGlobalAddress(GV, MO1.getOffset(),
TF | ARMII::MO_HI16_NONLAZY_PIC)
.addImm(LabelId);
MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(PICAddOpc))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg).addImm(LabelId);
if (isARM) {
AddDefaultPred(MIB2);
if (Opcode == ARM::MOV_pic_ga_ldr)
(*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
case ARM::MOVi32imm:
case ARM::MOVCCi32imm:
case ARM::MOV_pic_ga:
case ARM::t2MOVi32imm:
case ARM::t2MOVCCi32imm:
case ARM::t2MOV_pic_ga: {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
bool isPIC_GA = (Opcode == ARM::t2MOV_pic_ga || Opcode == ARM::MOV_pic_ga);
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
MachineInstrBuilder LO16, HI16;
if (!STI->hasV6T2Ops() &&
(Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
// Expand into a movi + orr.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
(*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
(*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
LO16.addImm(Pred).addReg(PredReg).addReg(0);
HI16.addImm(Pred).addReg(PredReg).addReg(0);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
break;
}
unsigned LO16Opc = 0;
unsigned HI16Opc = 0;
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
LO16Opc = ARM::t2MOVi16;
HI16Opc = ARM::t2MOVTi16;
} else if (Opcode == ARM::MOV_pic_ga) {
LO16Opc = ARM::MOVi16_pic_ga;
HI16Opc = ARM::MOVTi16_pic_ga;
} else if (Opcode == ARM::t2MOV_pic_ga) {
LO16Opc = ARM::t2MOVi16_pic_ga;
HI16Opc = ARM::t2MOVTi16_pic_ga;
} else {
LO16Opc = ARM::MOVi16;
HI16Opc = ARM::MOVTi16;
}
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
if (MO.isImm()) {
unsigned Imm = MO.getImm();
unsigned Lo16 = Imm & 0xffff;
unsigned Hi16 = (Imm >> 16) & 0xffff;
LO16 = LO16.addImm(Lo16);
HI16 = HI16.addImm(Hi16);
} else if (isPIC_GA) {
unsigned LabelId = MI.getOperand(2).getImm();
const GlobalValue *GV = MO.getGlobal();
unsigned TF = MO.getTargetFlags();
LO16 = LO16.addGlobalAddress(GV, MO.getOffset(),
TF | ARMII::MO_LO16_NONLAZY_PIC)
.addImm(LabelId);
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(),
TF | ARMII::MO_HI16_NONLAZY_PIC)
.addImm(LabelId);
} else {
const GlobalValue *GV = MO.getGlobal();
unsigned TF = MO.getTargetFlags();
LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
}
(*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
(*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
if (!isPIC_GA) {
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
}
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
break;
}
ExpandMOV32BitImm(MBB, MBBI);
return true;
case ARM::VMOVQQ: {
unsigned DstReg = MI.getOperand(0).getReg();
@ -869,18 +891,18 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder Even =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
.addReg(EvenDst,
RegState::Define | getDeadRegState(DstIsDead))
.addReg(EvenSrc, getKillRegState(SrcIsKill)));
.addReg(EvenDst,
RegState::Define | getDeadRegState(DstIsDead))
.addReg(EvenSrc, getKillRegState(SrcIsKill)));
MachineInstrBuilder Odd =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
.addReg(OddDst,
RegState::Define | getDeadRegState(DstIsDead))
.addReg(OddSrc, getKillRegState(SrcIsKill)));
.addReg(OddDst,
RegState::Define | getDeadRegState(DstIsDead))
.addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
break;
return true;
}
case ARM::VLDMQIA:
@ -911,7 +933,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
break;
return true;
}
case ARM::VSTMQIA:
@ -943,7 +965,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
break;
return true;
}
case ARM::VDUPfqf:
case ARM::VDUPfdf:{
@ -954,7 +976,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
unsigned SrcReg = MI.getOperand(1).getReg();
unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
// The lane is [0,1] for the containing DReg superregister.
// Copy the dst/src register operands.
MIB.addOperand(MI.getOperand(OpIdx++));
@ -968,7 +990,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
break;
return true;
}
case ARM::VLD1q8Pseudo:
@ -1044,7 +1066,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VLD4DUPd16Pseudo_UPD:
case ARM::VLD4DUPd32Pseudo_UPD:
ExpandVLD(MBBI);
break;
return true;
case ARM::VST1q8Pseudo:
case ARM::VST1q16Pseudo:
@ -1095,7 +1117,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VST4q16oddPseudo_UPD:
case ARM::VST4q32oddPseudo_UPD:
ExpandVST(MBBI);
break;
return true;
case ARM::VLD1LNq8Pseudo:
case ARM::VLD1LNq16Pseudo:
@ -1170,18 +1192,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VST4LNq16Pseudo_UPD:
case ARM::VST4LNq32Pseudo_UPD:
ExpandLaneOp(MBBI);
break;
return true;
case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break;
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break;
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break;
case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break;
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break;
}
case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
}
if (ModifiedOp)
Modified = true;
return false;
}
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
Modified |= ExpandMI(MBB, MBBI);
MBBI = NMBBI;
}
@ -1192,6 +1222,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
TRI = MF.getTarget().getRegisterInfo();
STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
AFI = MF.getInfo<ARMFunctionInfo>();
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;

View File

@ -880,8 +880,8 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
Offset = N.getOperand(0);
SDValue N1 = N.getOperand(1);
Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
MVT::i32);
Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
MVT::i32);
return true;
}

View File

@ -2003,13 +2003,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
// FIXME: Not a constant pool!
unsigned PICLabelIndex = AFI->createPICLabelUId();
SDValue PICLabel = DAG.getConstant(PICLabelIndex, MVT::i32);
SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT),
PICLabel);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, 0);

View File

@ -70,7 +70,7 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntBinOp>;
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@ -3408,11 +3408,22 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
[(set GPR:$dst, (arm_i32imm:$src))]>,
Requires<[IsARM]>;
let isReMaterializable = 1 in
def MOV_pic_ga : PseudoInst<(outs GPR:$dst),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVix2,
[(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>,
Requires<[IsARM, UseMovt]>;
// Pseudo instruction that combines movw + movt + add pc.
// It also makes it possible to rematerialize the instructions.
// FIXME: Remove this when we can do generalized remat and when machine licm
// can properly the instructions.
let isReMaterializable = 1 in {
def MOV_pic_ga_add_pc : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2addpc,
[(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsARM, UseMovt]>;
let AddedComplexity = 10 in
def MOV_pic_ga_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2ld,
[(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
Requires<[IsARM, UseMovt]>;
} // isReMaterializable
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,

View File

@ -3245,12 +3245,15 @@ def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
[(set rGPR:$dst, (i32 imm:$src))]>,
Requires<[IsThumb, HasV6T2]>;
// Materialize GA with movw + movt.
// Pseudo instruction that combines movw + movt + add pc.
// It also makes it possible to rematerialize the instructions.
// FIXME: Remove this when we can do generalized remat and when machine licm
// can properly the instructions.
let isReMaterializable = 1 in
def t2MOV_pic_ga : PseudoInst<(outs rGPR:$dst),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVix2,
[(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>,
Requires<[IsThumb2, UseMovt]>;
def t2MOV_pic_ga_add_pc : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2,
[(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
Requires<[IsThumb2, UseMovt]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@ -3266,9 +3269,9 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// be expanded into two instructions late to allow if-conversion and
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
IIC_iLoadiALU,
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
[(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;

View File

@ -260,7 +260,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
// restricted GPR register class. Many Thumb2 instructions allow the full
// register range for operands, but have undefined behaviours when PC
// or SP (R13 or R15) are used. The ARM ARM refers to these operands
// or SP (R13 or R15) are used. The ARM ISA refers to these operands
// via the BadReg() pseudo-code description.
def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
R7, R8, R9, R10, R11, R12, LR]> {

View File

@ -38,6 +38,8 @@ def IIC_iMOVr : InstrItinClass;
def IIC_iMOVsi : InstrItinClass;
def IIC_iMOVsr : InstrItinClass;
def IIC_iMOVix2 : InstrItinClass;
def IIC_iMOVix2addpc : InstrItinClass;
def IIC_iMOVix2ld : InstrItinClass;
def IIC_iMVNi : InstrItinClass;
def IIC_iMVNr : InstrItinClass;
def IIC_iMVNsi : InstrItinClass;

View File

@ -71,6 +71,12 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LSPipe]>], [5]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,

View File

@ -50,6 +50,16 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_ALU0, A9_ALU1]>,
InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_ALU0, A9_ALU1]>,
InstrStage<1, [A9_ALU0, A9_ALU1]>,
InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_ALU0, A9_ALU1]>,
InstrStage<1, [A9_ALU0, A9_ALU1]>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU], 0>,
InstrStage<1, [A9_LSUnit]>], [5]>,
//
// MVN instructions
InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,

View File

@ -64,6 +64,12 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
InstrStage<1, [V6_Pipe]>], [2]>,
InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
InstrStage<1, [V6_Pipe]>,
InstrStage<1, [V6_Pipe]>], [3]>,
InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
InstrStage<1, [V6_Pipe]>,
InstrStage<1, [V6_Pipe]>], [5]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,

View File

@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=THUMB
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=ARM
; rdar://7353541
; rdar://7354376
@ -9,19 +10,30 @@
define void @t(i32* nocapture %vals, i32 %c) nounwind {
entry:
; CHECK: t:
; ARM: t:
; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
; ARM-NOT: ldr r{{[0-9]+}}, LCPI0_1
; ARM: LPC0_0:
; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
; THUMB: t:
%0 = icmp eq i32 %c, 0 ; <i1> [#uses=1]
br i1 %0, label %return, label %bb.nph
bb.nph: ; preds = %entry
; CHECK: BB#1
; CHECK: ldr.n r2, LCPI0_0
; CHECK: add r2, pc
; CHECK: ldr r{{[0-9]+}}, [r2]
; CHECK: LBB0_2
; CHECK: LCPI0_0:
; CHECK-NOT: LCPI0_1:
; CHECK: .section
; ARM: LCPI0_0:
; ARM-NOT: LCPI0_1:
; ARM: .section
; THUMB: BB#1
; THUMB: ldr.n r2, LCPI0_0
; THUMB: add r2, pc
; THUMB: ldr r{{[0-9]+}}, [r2]
; THUMB: LBB0_2
; THUMB: LCPI0_0:
; THUMB-NOT: LCPI0_1:
; THUMB: .section
%.pre = load i32* @GV, align 4 ; <i32> [#uses=1]
br label %bb