mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
[X86] Tune LEA usage for Silvermont
According to Intel Software Optimization Manual on Silvermont in some cases LEA is better to be replaced with ADD instructions: "The rule of thumb for ADDs and LEAs is that it is justified to use LEA with a valid index and/or displacement for non-destructive destination purposes (especially useful for stack offset cases), or to use a SCALE. Otherwise, ADD(s) are preferable." Differential Revision: http://reviews.llvm.org/D3826 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209198 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6f242c93ec
commit
0d0bab5168
@ -166,6 +166,8 @@ def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
|
||||
"Call register indirect">;
|
||||
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
|
||||
"LEA instruction needs inputs at AG stage">;
|
||||
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
|
||||
"LEA instruction with certain arguments is slow">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 processors supported.
|
||||
@ -226,6 +228,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
|
||||
FeaturePCLMUL, FeatureAES,
|
||||
FeatureCallRegIndirect,
|
||||
FeaturePRFCHW,
|
||||
FeatureSlowLEA,
|
||||
FeatureSlowBTMem, FeatureFastUAMem]>;
|
||||
// "Arrandale" along with corei3 and corei5
|
||||
def : ProcessorModel<"corei7", SandyBridgeModel,
|
||||
|
@ -57,6 +57,11 @@ namespace {
|
||||
void processInstruction(MachineBasicBlock::iterator& I,
|
||||
MachineFunction::iterator MFI);
|
||||
|
||||
/// \brief Given a LEA instruction which is unprofitable
|
||||
/// on Silvermont try to replace it with an equivalent ADD instruction
|
||||
void processInstructionForSLM(MachineBasicBlock::iterator& I,
|
||||
MachineFunction::iterator MFI);
|
||||
|
||||
/// \brief Determine if an instruction references a machine register
|
||||
/// and, if so, whether it reads or writes the register.
|
||||
RegUsageState usesRegister(MachineOperand& p,
|
||||
@ -86,7 +91,7 @@ namespace {
|
||||
private:
|
||||
MachineFunction *MF;
|
||||
const TargetMachine *TM;
|
||||
const TargetInstrInfo *TII; // Machine instruction info.
|
||||
const X86InstrInfo *TII; // Machine instruction info.
|
||||
|
||||
};
|
||||
char FixupLEAPass::ID = 0;
|
||||
@ -98,7 +103,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
|
||||
MachineInstr* MI = MBBI;
|
||||
MachineInstr* NewMI;
|
||||
switch (MI->getOpcode()) {
|
||||
case X86::MOV32rr:
|
||||
case X86::MOV32rr:
|
||||
case X86::MOV64rr: {
|
||||
const MachineOperand& Src = MI->getOperand(1);
|
||||
const MachineOperand& Dest = MI->getOperand(0);
|
||||
@ -146,7 +151,7 @@ FunctionPass *llvm::createX86FixupLEAs() {
|
||||
bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
|
||||
MF = &Func;
|
||||
TM = &MF->getTarget();
|
||||
TII = TM->getInstrInfo();
|
||||
TII = static_cast<const X86InstrInfo*>(TM->getInstrInfo());
|
||||
|
||||
DEBUG(dbgs() << "Start X86FixupLEAs\n";);
|
||||
// Process all basic blocks.
|
||||
@ -243,9 +248,9 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
|
||||
MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
|
||||
if (NewMI) {
|
||||
++NumLEAs;
|
||||
DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
|
||||
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
|
||||
// now to replace with an equivalent LEA...
|
||||
DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
|
||||
DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
|
||||
MFI->erase(MBI);
|
||||
MachineBasicBlock::iterator J =
|
||||
static_cast<MachineBasicBlock::iterator> (NewMI);
|
||||
@ -254,10 +259,80 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
|
||||
}
|
||||
}
|
||||
|
||||
void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
|
||||
MachineFunction::iterator MFI) {
|
||||
MachineInstr *MI = I;
|
||||
const int opcode = MI->getOpcode();
|
||||
if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r &&
|
||||
opcode != X86::LEA64_32r)
|
||||
return;
|
||||
if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() ||
|
||||
!TII->isSafeToClobberEFLAGS(*MFI, I))
|
||||
return;
|
||||
const unsigned DstR = MI->getOperand(0).getReg();
|
||||
const unsigned SrcR1 = MI->getOperand(1).getReg();
|
||||
const unsigned SrcR2 = MI->getOperand(3).getReg();
|
||||
if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
|
||||
return;
|
||||
if (MI->getOperand(2).getImm() > 1)
|
||||
return;
|
||||
int addrr_opcode, addri_opcode;
|
||||
switch (opcode) {
|
||||
case X86::LEA16r:
|
||||
addrr_opcode = X86::ADD16rr;
|
||||
addri_opcode = X86::ADD16ri;
|
||||
break;
|
||||
case X86::LEA32r:
|
||||
addrr_opcode = X86::ADD32rr;
|
||||
addri_opcode = X86::ADD32ri;
|
||||
break;
|
||||
case X86::LEA64_32r:
|
||||
case X86::LEA64r:
|
||||
addrr_opcode = X86::ADD64rr;
|
||||
addri_opcode = X86::ADD64ri32;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Unexpected LEA instruction");
|
||||
}
|
||||
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
|
||||
DEBUG(dbgs() << "FixLEA: Replaced by: ";);
|
||||
MachineInstr *NewMI = 0;
|
||||
const MachineOperand &Dst = MI->getOperand(0);
|
||||
// Make ADD instruction for two registers writing to LEA's destination
|
||||
if (SrcR1 != 0 && SrcR2 != 0) {
|
||||
const MachineOperand &Src1 = MI->getOperand(SrcR1 == DstR ? 1 : 3);
|
||||
const MachineOperand &Src2 = MI->getOperand(SrcR1 == DstR ? 3 : 1);
|
||||
NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addrr_opcode))
|
||||
.addOperand(Dst)
|
||||
.addOperand(Src1)
|
||||
.addOperand(Src2);
|
||||
MFI->insert(I, NewMI);
|
||||
DEBUG(NewMI->dump(););
|
||||
}
|
||||
// Make ADD instruction for immediate
|
||||
if (MI->getOperand(4).getImm() != 0) {
|
||||
const MachineOperand &SrcR = MI->getOperand(SrcR1 == DstR ? 1 : 3);
|
||||
NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addri_opcode))
|
||||
.addOperand(Dst)
|
||||
.addOperand(SrcR)
|
||||
.addImm(MI->getOperand(4).getImm());
|
||||
MFI->insert(I, NewMI);
|
||||
DEBUG(NewMI->dump(););
|
||||
}
|
||||
if (NewMI) {
|
||||
MFI->erase(I);
|
||||
I = static_cast<MachineBasicBlock::iterator>(NewMI);
|
||||
}
|
||||
}
|
||||
|
||||
bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
|
||||
MachineFunction::iterator MFI) {
|
||||
|
||||
for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
|
||||
processInstruction(I, MFI);
|
||||
for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
|
||||
if (TM->getSubtarget<X86Subtarget>().isSLM())
|
||||
processInstructionForSLM(I, MFI);
|
||||
else
|
||||
processInstruction(I, MFI);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1728,12 +1728,8 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
|
||||
/// would clobber the EFLAGS condition register. Note the result may be
|
||||
/// conservative. If it cannot definitely determine the safety after visiting
|
||||
/// a few instructions in each direction it assumes it's not safe.
|
||||
static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
MachineBasicBlock::iterator E = MBB.end();
|
||||
|
||||
// For compile time consideration, if we are not able to determine the
|
||||
|
@ -359,6 +359,13 @@ public:
|
||||
/// instruction that defines the specified register class.
|
||||
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
|
||||
|
||||
/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction tha
|
||||
/// would clobber the EFLAGS condition register. Note the result may be
|
||||
/// conservative. If it cannot definitely determine the safety after visiting
|
||||
/// a few instructions in each direction it assumes it's not safe.
|
||||
bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
|
||||
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
|
||||
if (!MO.isReg()) return false;
|
||||
return X86II::isX86_64ExtendedReg(MO.getReg());
|
||||
|
@ -282,6 +282,7 @@ void X86Subtarget::initializeEnvironment() {
|
||||
PadShortFunctions = false;
|
||||
CallRegIndirect = false;
|
||||
LEAUsesAG = false;
|
||||
SlowLEA = false;
|
||||
stackAlignment = 4;
|
||||
// FIXME: this is a known good value for Yonah. How about others?
|
||||
MaxInlineSizeThreshold = 128;
|
||||
|
@ -178,6 +178,9 @@ protected:
|
||||
/// address generation (AG) time.
|
||||
bool LEAUsesAG;
|
||||
|
||||
/// SlowLEA - True if the LEA instruction with certain arguments is slow
|
||||
bool SlowLEA;
|
||||
|
||||
/// Processor has AVX-512 PreFetch Instructions
|
||||
bool HasPFI;
|
||||
|
||||
@ -315,11 +318,13 @@ public:
|
||||
bool padShortFunctions() const { return PadShortFunctions; }
|
||||
bool callRegIndirect() const { return CallRegIndirect; }
|
||||
bool LEAusesAG() const { return LEAUsesAG; }
|
||||
bool slowLEA() const { return SlowLEA; }
|
||||
bool hasCDI() const { return HasCDI; }
|
||||
bool hasPFI() const { return HasPFI; }
|
||||
bool hasERI() const { return HasERI; }
|
||||
|
||||
bool isAtom() const { return X86ProcFamily == IntelAtom; }
|
||||
bool isSLM() const { return X86ProcFamily == IntelSLM; }
|
||||
|
||||
const Triple &getTargetTriple() const { return TargetTriple; }
|
||||
|
||||
|
@ -226,7 +226,8 @@ bool X86PassConfig::addPreEmitPass() {
|
||||
ShouldPrint = true;
|
||||
}
|
||||
if (getOptLevel() != CodeGenOpt::None &&
|
||||
getX86Subtarget().LEAusesAG()){
|
||||
(getX86Subtarget().LEAusesAG() ||
|
||||
getX86Subtarget().slowLEA())){
|
||||
addPass(createX86FixupLEAs());
|
||||
ShouldPrint = true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user