llvm-6502/lib/Target/ARM/ARMTargetMachine.cpp

205 lines
7.3 KiB
C++
Raw Normal View History

//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
#include "ARMTargetMachine.h"
#include "ARMMCAsmInfo.h"
#include "ARMFrameLowering.h"
#include "ARM.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
Making use of VFP / NEON floating point multiply-accumulate / subtraction is difficult on current ARM implementations for a few reasons. 1. Even though a single vmla has latency that is one cycle shorter than a pair of vmul + vadd, a RAW hazard during the first (4? on Cortex-a8) can cause additional pipeline stall. So it's frequently better to single codegen vmul + vadd. 2. A vmla folowed by a vmul, vmadd, or vsub causes the second fp instruction to stall for 4 cycles. We need to schedule them apart. 3. A vmla followed vmla is a special case. Obvious issuing back to back RAW vmla + vmla is very bad. But this isn't ideal either: vmul vadd vmla Instead, we want to expand the second vmla: vmla vmul vadd Even with the 4 cycle vmul stall, the second sequence is still 2 cycles faster. Up to now, isel simply avoid codegen'ing fp vmla / vmls. This works well enough but it isn't the optimial solution. This patch attempts to make it possible to use vmla / vmls in cases where it is profitable. A. Add missing isel predicates which cause vmla to be codegen'ed. B. Make sure the fmul in (fadd (fmul)) has a single use. We don't want to compute a fmul and a fmla. C. Add additional isel checks for vmla, avoid cases where vmla is feeding into fp instructions (except for the #3 exceptional case). D. Add ARM hazard recognizer to model the vmla / vmls hazards. E. Add a special pre-regalloc case to expand vmla / vmls when it's likely the vmla / vmls will trigger one of the special hazards. Work in progress, only A+B are enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120960 91177308-0d34-0410-b5e6-96231b3b80d8
2010-12-05 22:04:16 +00:00
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
return new ARMMCAsmInfoDarwin();
return new ARMELFMCAsmInfo();
}
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
MCContext &Ctx, TargetAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
bool RelaxAll,
bool NoExecStack) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
if (TheTriple.isOSWindows()) {
llvm_unreachable("ARM does not support Windows COFF format");
return NULL;
}
return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
// Register the target asm info.
RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
// Register the MC Code Emitter
TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
// Register the asm backend.
TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
// Register the object streamer.
TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
}
/// TargetMachine ctor - Create an ARM architecture model.
///
Reapply TargetRegistry refactoring commits. --- Reverse-merging r75799 into '.': U test/Analysis/PointerTracking U include/llvm/Target/TargetMachineRegistry.h U include/llvm/Target/TargetMachine.h U include/llvm/Target/TargetRegistry.h U include/llvm/Target/TargetSelect.h U tools/lto/LTOCodeGenerator.cpp U tools/lto/LTOModule.cpp U tools/llc/llc.cpp U lib/Target/PowerPC/PPCTargetMachine.h U lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp U lib/Target/PowerPC/PPCTargetMachine.cpp U lib/Target/PowerPC/PPC.h U lib/Target/ARM/ARMTargetMachine.cpp U lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp U lib/Target/ARM/ARMTargetMachine.h U lib/Target/ARM/ARM.h U lib/Target/XCore/XCoreTargetMachine.cpp U lib/Target/XCore/XCoreTargetMachine.h U lib/Target/PIC16/PIC16TargetMachine.cpp U lib/Target/PIC16/PIC16TargetMachine.h U lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp U lib/Target/Alpha/AlphaTargetMachine.cpp U lib/Target/Alpha/AlphaTargetMachine.h U lib/Target/X86/X86TargetMachine.h U lib/Target/X86/X86.h U lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h U lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp U lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h U lib/Target/X86/X86TargetMachine.cpp U lib/Target/MSP430/MSP430TargetMachine.cpp U lib/Target/MSP430/MSP430TargetMachine.h U lib/Target/CppBackend/CPPTargetMachine.h U lib/Target/CppBackend/CPPBackend.cpp U lib/Target/CBackend/CTargetMachine.h U lib/Target/CBackend/CBackend.cpp U lib/Target/TargetMachine.cpp U lib/Target/IA64/IA64TargetMachine.cpp U lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp U lib/Target/IA64/IA64TargetMachine.h U lib/Target/IA64/IA64.h U lib/Target/MSIL/MSILWriter.cpp U lib/Target/CellSPU/SPUTargetMachine.h U lib/Target/CellSPU/SPU.h U lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp U lib/Target/CellSPU/SPUTargetMachine.cpp U lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp U lib/Target/Mips/MipsTargetMachine.cpp U lib/Target/Mips/MipsTargetMachine.h U lib/Target/Mips/Mips.h U lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp U lib/Target/Sparc/SparcTargetMachine.cpp U lib/Target/Sparc/SparcTargetMachine.h U lib/ExecutionEngine/JIT/TargetSelect.cpp U lib/Support/TargetRegistry.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@75820 91177308-0d34-0410-b5e6-96231b3b80d8
2009-07-15 20:24:03 +00:00
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
const std::string &TT,
const std::string &CPU,
const std::string &FS,
bool isThumb)
: LLVMTargetMachine(T, TT),
Subtarget(TT, CPU, FS, isThumb),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
DefRelocModel = getRelocationModel();
// Default to soft float ABI
if (FloatABIType == FloatABI::Default)
FloatABIType = FloatABI::Soft;
}
ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &CPU,
Reapply TargetRegistry refactoring commits. --- Reverse-merging r75799 into '.': U test/Analysis/PointerTracking U include/llvm/Target/TargetMachineRegistry.h U include/llvm/Target/TargetMachine.h U include/llvm/Target/TargetRegistry.h U include/llvm/Target/TargetSelect.h U tools/lto/LTOCodeGenerator.cpp U tools/lto/LTOModule.cpp U tools/llc/llc.cpp U lib/Target/PowerPC/PPCTargetMachine.h U lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp U lib/Target/PowerPC/PPCTargetMachine.cpp U lib/Target/PowerPC/PPC.h U lib/Target/ARM/ARMTargetMachine.cpp U lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp U lib/Target/ARM/ARMTargetMachine.h U lib/Target/ARM/ARM.h U lib/Target/XCore/XCoreTargetMachine.cpp U lib/Target/XCore/XCoreTargetMachine.h U lib/Target/PIC16/PIC16TargetMachine.cpp U lib/Target/PIC16/PIC16TargetMachine.h U lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp U lib/Target/Alpha/AlphaTargetMachine.cpp U lib/Target/Alpha/AlphaTargetMachine.h U lib/Target/X86/X86TargetMachine.h U lib/Target/X86/X86.h U lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h U lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp U lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h U lib/Target/X86/X86TargetMachine.cpp U lib/Target/MSP430/MSP430TargetMachine.cpp U lib/Target/MSP430/MSP430TargetMachine.h U lib/Target/CppBackend/CPPTargetMachine.h U lib/Target/CppBackend/CPPBackend.cpp U lib/Target/CBackend/CTargetMachine.h U lib/Target/CBackend/CBackend.cpp U lib/Target/TargetMachine.cpp U lib/Target/IA64/IA64TargetMachine.cpp U lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp U lib/Target/IA64/IA64TargetMachine.h U lib/Target/IA64/IA64.h U lib/Target/MSIL/MSILWriter.cpp U lib/Target/CellSPU/SPUTargetMachine.h U lib/Target/CellSPU/SPU.h U lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp U lib/Target/CellSPU/SPUTargetMachine.cpp U lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp U lib/Target/Mips/MipsTargetMachine.cpp U lib/Target/Mips/MipsTargetMachine.h U lib/Target/Mips/Mips.h U lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp U lib/Target/Sparc/SparcTargetMachine.cpp U lib/Target/Sparc/SparcTargetMachine.h U lib/ExecutionEngine/JIT/TargetSelect.cpp U lib/Support/TargetRegistry.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@75820 91177308-0d34-0410-b5e6-96231b3b80d8
2009-07-15 20:24:03 +00:00
const std::string &FS)
: ARMBaseTargetMachine(T, TT, CPU, FS, false), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"v128:64:128-v64:64:64-n32")),
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
const std::string &CPU,
Reapply TargetRegistry refactoring commits. --- Reverse-merging r75799 into '.': U test/Analysis/PointerTracking U include/llvm/Target/TargetMachineRegistry.h U include/llvm/Target/TargetMachine.h U include/llvm/Target/TargetRegistry.h U include/llvm/Target/TargetSelect.h U tools/lto/LTOCodeGenerator.cpp U tools/lto/LTOModule.cpp U tools/llc/llc.cpp U lib/Target/PowerPC/PPCTargetMachine.h U lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp U lib/Target/PowerPC/PPCTargetMachine.cpp U lib/Target/PowerPC/PPC.h U lib/Target/ARM/ARMTargetMachine.cpp U lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp U lib/Target/ARM/ARMTargetMachine.h U lib/Target/ARM/ARM.h U lib/Target/XCore/XCoreTargetMachine.cpp U lib/Target/XCore/XCoreTargetMachine.h U lib/Target/PIC16/PIC16TargetMachine.cpp U lib/Target/PIC16/PIC16TargetMachine.h U lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp U lib/Target/Alpha/AlphaTargetMachine.cpp U lib/Target/Alpha/AlphaTargetMachine.h U lib/Target/X86/X86TargetMachine.h U lib/Target/X86/X86.h U lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h U lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp U lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h U lib/Target/X86/X86TargetMachine.cpp U lib/Target/MSP430/MSP430TargetMachine.cpp U lib/Target/MSP430/MSP430TargetMachine.h U lib/Target/CppBackend/CPPTargetMachine.h U lib/Target/CppBackend/CPPBackend.cpp U lib/Target/CBackend/CTargetMachine.h U lib/Target/CBackend/CBackend.cpp U lib/Target/TargetMachine.cpp U lib/Target/IA64/IA64TargetMachine.cpp U lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp U lib/Target/IA64/IA64TargetMachine.h U lib/Target/IA64/IA64.h U lib/Target/MSIL/MSILWriter.cpp U lib/Target/CellSPU/SPUTargetMachine.h U lib/Target/CellSPU/SPU.h U lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp U lib/Target/CellSPU/SPUTargetMachine.cpp U lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp U lib/Target/Mips/MipsTargetMachine.cpp U lib/Target/Mips/MipsTargetMachine.h U lib/Target/Mips/Mips.h U lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp U lib/Target/Sparc/SparcTargetMachine.cpp U lib/Target/Sparc/SparcTargetMachine.h U lib/ExecutionEngine/JIT/TargetSelect.cpp U lib/Support/TargetRegistry.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@75820 91177308-0d34-0410-b5e6-96231b3b80d8
2009-07-15 20:24:03 +00:00
const std::string &FS)
: ARMBaseTargetMachine(T, TT, CPU, FS, true),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:32:128-v64:32:64-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:64:128-v64:64:64-a:0:32-n32")),
ELFWriterInfo(*this),
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
if (OptLevel != CodeGenOpt::None)
PM.add(createARMGlobalMergePass(getTargetLowering()));
return false;
}
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
PM.add(createARMISelDag(*this, OptLevel));
return false;
}
bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
This patch combines several changes from Evan Cheng for rdar://8659675. Making use of VFP / NEON floating point multiply-accumulate / subtraction is difficult on current ARM implementations for a few reasons. 1. Even though a single vmla has latency that is one cycle shorter than a pair of vmul + vadd, a RAW hazard during the first (4? on Cortex-a8) can cause additional pipeline stall. So it's frequently better to single codegen vmul + vadd. 2. A vmla folowed by a vmul, vmadd, or vsub causes the second fp instruction to stall for 4 cycles. We need to schedule them apart. 3. A vmla followed vmla is a special case. Obvious issuing back to back RAW vmla + vmla is very bad. But this isn't ideal either: vmul vadd vmla Instead, we want to expand the second vmla: vmla vmul vadd Even with the 4 cycle vmul stall, the second sequence is still 2 cycles faster. Up to now, isel simply avoid codegen'ing fp vmla / vmls. This works well enough but it isn't the optimial solution. This patch attempts to make it possible to use vmla / vmls in cases where it is profitable. A. Add missing isel predicates which cause vmla to be codegen'ed. B. Make sure the fmul in (fadd (fmul)) has a single use. We don't want to compute a fmul and a fmla. C. Add additional isel checks for vmla, avoid cases where vmla is feeding into fp instructions (except for the #3 exceptional case). D. Add ARM hazard recognizer to model the vmla / vmls hazards. E. Add a special pre-regalloc case to expand vmla / vmls when it's likely the vmla / vmls will trigger one of the special hazards. Enable these fp vmlx codegen changes for Cortex-A9. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129775 91177308-0d34-0410-b5e6-96231b3b80d8
2011-04-19 18:11:57 +00:00
if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
Making use of VFP / NEON floating point multiply-accumulate / subtraction is difficult on current ARM implementations for a few reasons. 1. Even though a single vmla has latency that is one cycle shorter than a pair of vmul + vadd, a RAW hazard during the first (4? on Cortex-a8) can cause additional pipeline stall. So it's frequently better to single codegen vmul + vadd. 2. A vmla folowed by a vmul, vmadd, or vsub causes the second fp instruction to stall for 4 cycles. We need to schedule them apart. 3. A vmla followed vmla is a special case. Obvious issuing back to back RAW vmla + vmla is very bad. But this isn't ideal either: vmul vadd vmla Instead, we want to expand the second vmla: vmla vmul vadd Even with the 4 cycle vmul stall, the second sequence is still 2 cycles faster. Up to now, isel simply avoid codegen'ing fp vmla / vmls. This works well enough but it isn't the optimial solution. This patch attempts to make it possible to use vmla / vmls in cases where it is profitable. A. Add missing isel predicates which cause vmla to be codegen'ed. B. Make sure the fmul in (fadd (fmul)) has a single use. We don't want to compute a fmul and a fmla. C. Add additional isel checks for vmla, avoid cases where vmla is feeding into fp instructions (except for the #3 exceptional case). D. Add ARM hazard recognizer to model the vmla / vmls hazards. E. Add a special pre-regalloc case to expand vmla / vmls when it's likely the vmla / vmls will trigger one of the special hazards. Work in progress, only A+B are enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120960 91177308-0d34-0410-b5e6-96231b3b80d8
2010-12-05 22:04:16 +00:00
PM.add(createMLxExpansionPass());
return true;
}
bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None) {
if (!Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass());
if (Subtarget.hasNEON())
PM.add(createNEONMoveFixPass());
}
// Expand some pseudo instructions into multiple instructions to allow
// proper scheduling.
PM.add(createARMExpandPseudoPass());
if (OptLevel != CodeGenOpt::None) {
if (!Subtarget.isThumb1Only())
PM.add(createIfConverterPass());
}
if (Subtarget.isThumb2())
PM.add(createThumb2ITBlockPass());
return true;
}
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
PM.add(createThumb2SizeReductionPass());
PM.add(createARMConstantIslandPass());
return true;
}
bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
// FIXME: Move this to TargetJITInfo!
if (DefRelocModel == Reloc::Default)
setRelocationModel(Reloc::Static);
// Machine code emitter pass for ARM.
PM.add(createARMJITCodeEmitterPass(*this, JCE));
return false;
}