mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
48575f6ea7
difficult on current ARM implementations for a few reasons. 1. Even though a single vmla has latency that is one cycle shorter than a pair of vmul + vadd, a RAW hazard during the first (4? on Cortex-a8) can cause additional pipeline stall. So it's frequently better to single codegen vmul + vadd. 2. A vmla folowed by a vmul, vmadd, or vsub causes the second fp instruction to stall for 4 cycles. We need to schedule them apart. 3. A vmla followed vmla is a special case. Obvious issuing back to back RAW vmla + vmla is very bad. But this isn't ideal either: vmul vadd vmla Instead, we want to expand the second vmla: vmla vmul vadd Even with the 4 cycle vmul stall, the second sequence is still 2 cycles faster. Up to now, isel simply avoid codegen'ing fp vmla / vmls. This works well enough but it isn't the optimial solution. This patch attempts to make it possible to use vmla / vmls in cases where it is profitable. A. Add missing isel predicates which cause vmla to be codegen'ed. B. Make sure the fmul in (fadd (fmul)) has a single use. We don't want to compute a fmul and a fmla. C. Add additional isel checks for vmla, avoid cases where vmla is feeding into fp instructions (except for the #3 exceptional case). D. Add ARM hazard recognizer to model the vmla / vmls hazards. E. Add a special pre-regalloc case to expand vmla / vmls when it's likely the vmla / vmls will trigger one of the special hazards. Work in progress, only A+B are enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120960 91177308-0d34-0410-b5e6-96231b3b80d8
203 lines
7.1 KiB
C++
203 lines
7.1 KiB
C++
//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "ARMTargetMachine.h"
|
|
#include "ARMMCAsmInfo.h"
|
|
#include "ARMFrameInfo.h"
|
|
#include "ARM.h"
|
|
#include "llvm/PassManager.h"
|
|
#include "llvm/CodeGen/Passes.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/FormattedStream.h"
|
|
#include "llvm/Target/TargetOptions.h"
|
|
#include "llvm/Target/TargetRegistry.h"
|
|
using namespace llvm;
|
|
|
|
static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden);
|
|
|
|
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
|
|
Triple TheTriple(TT);
|
|
switch (TheTriple.getOS()) {
|
|
case Triple::Darwin:
|
|
return new ARMMCAsmInfoDarwin();
|
|
default:
|
|
return new ARMELFMCAsmInfo();
|
|
}
|
|
}
|
|
|
|
// This is duplicated code. Refactor this.
|
|
static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
|
|
MCContext &Ctx, TargetAsmBackend &TAB,
|
|
raw_ostream &OS,
|
|
MCCodeEmitter *Emitter,
|
|
bool RelaxAll) {
|
|
switch (Triple(TT).getOS()) {
|
|
case Triple::Darwin:
|
|
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
|
|
case Triple::MinGW32:
|
|
case Triple::MinGW64:
|
|
case Triple::Cygwin:
|
|
case Triple::Win32:
|
|
llvm_unreachable("ARM does not support Windows COFF format");
|
|
return NULL;
|
|
default:
|
|
return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
|
|
}
|
|
}
|
|
|
|
extern "C" void LLVMInitializeARMTarget() {
|
|
// Register the target.
|
|
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
|
|
RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
|
|
|
|
// Register the target asm info.
|
|
RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
|
|
RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
|
|
|
|
// Register the MC Code Emitter
|
|
TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
|
|
TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
|
|
|
|
// Register the asm backend.
|
|
TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
|
|
TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
|
|
|
|
// Register the object streamer.
|
|
TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
|
|
TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
|
|
|
|
}
|
|
|
|
/// TargetMachine ctor - Create an ARM architecture model.
|
|
///
|
|
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
|
|
const std::string &TT,
|
|
const std::string &FS,
|
|
bool isThumb)
|
|
: LLVMTargetMachine(T, TT),
|
|
Subtarget(TT, FS, isThumb),
|
|
JITInfo(),
|
|
InstrItins(Subtarget.getInstrItineraryData())
|
|
{
|
|
DefRelocModel = getRelocationModel();
|
|
}
|
|
|
|
ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
|
|
const std::string &FS)
|
|
: ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
|
|
DataLayout(Subtarget.isAPCS_ABI() ?
|
|
std::string("e-p:32:32-f64:32:64-i64:32:64-"
|
|
"v128:32:128-v64:32:64-n32") :
|
|
std::string("e-p:32:32-f64:64:64-i64:64:64-"
|
|
"v128:64:128-v64:64:64-n32")),
|
|
ELFWriterInfo(*this),
|
|
TLInfo(*this),
|
|
TSInfo(*this),
|
|
FrameInfo(Subtarget) {
|
|
if (!Subtarget.hasARMOps())
|
|
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
|
|
"support ARM mode execution!");
|
|
}
|
|
|
|
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
|
|
const std::string &FS)
|
|
: ARMBaseTargetMachine(T, TT, FS, true),
|
|
InstrInfo(Subtarget.hasThumb2()
|
|
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
|
|
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
|
|
DataLayout(Subtarget.isAPCS_ABI() ?
|
|
std::string("e-p:32:32-f64:32:64-i64:32:64-"
|
|
"i16:16:32-i8:8:32-i1:8:32-"
|
|
"v128:32:128-v64:32:64-a:0:32-n32") :
|
|
std::string("e-p:32:32-f64:64:64-i64:64:64-"
|
|
"i16:16:32-i8:8:32-i1:8:32-"
|
|
"v128:64:128-v64:64:64-a:0:32-n32")),
|
|
ELFWriterInfo(*this),
|
|
TLInfo(*this),
|
|
TSInfo(*this),
|
|
FrameInfo(Subtarget.hasThumb2()
|
|
? new ARMFrameInfo(Subtarget)
|
|
: (ARMFrameInfo*)new Thumb1FrameInfo(Subtarget)) {
|
|
}
|
|
|
|
// Pass Pipeline Configuration
|
|
bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
|
|
CodeGenOpt::Level OptLevel) {
|
|
if (OptLevel != CodeGenOpt::None)
|
|
PM.add(createARMGlobalMergePass(getTargetLowering()));
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
|
|
CodeGenOpt::Level OptLevel) {
|
|
PM.add(createARMISelDag(*this, OptLevel));
|
|
return false;
|
|
}
|
|
|
|
bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
|
|
CodeGenOpt::Level OptLevel) {
|
|
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
|
|
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
|
|
PM.add(createARMLoadStoreOptimizationPass(true));
|
|
if (ExpandMLx &&
|
|
OptLevel != CodeGenOpt::None && Subtarget.hasVFP2())
|
|
PM.add(createMLxExpansionPass());
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
|
|
CodeGenOpt::Level OptLevel) {
|
|
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
|
|
if (OptLevel != CodeGenOpt::None) {
|
|
if (!Subtarget.isThumb1Only())
|
|
PM.add(createARMLoadStoreOptimizationPass());
|
|
if (Subtarget.hasNEON())
|
|
PM.add(createNEONMoveFixPass());
|
|
}
|
|
|
|
// Expand some pseudo instructions into multiple instructions to allow
|
|
// proper scheduling.
|
|
PM.add(createARMExpandPseudoPass());
|
|
|
|
if (OptLevel != CodeGenOpt::None) {
|
|
if (!Subtarget.isThumb1Only())
|
|
PM.add(createIfConverterPass());
|
|
}
|
|
if (Subtarget.isThumb2())
|
|
PM.add(createThumb2ITBlockPass());
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
|
|
CodeGenOpt::Level OptLevel) {
|
|
if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
|
|
PM.add(createThumb2SizeReductionPass());
|
|
|
|
PM.add(createARMConstantIslandPass());
|
|
return true;
|
|
}
|
|
|
|
bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
|
|
CodeGenOpt::Level OptLevel,
|
|
JITCodeEmitter &JCE) {
|
|
// FIXME: Move this to TargetJITInfo!
|
|
if (DefRelocModel == Reloc::Default)
|
|
setRelocationModel(Reloc::Static);
|
|
|
|
// Machine code emitter pass for ARM.
|
|
PM.add(createARMJITCodeEmitterPass(*this, JCE));
|
|
return false;
|
|
}
|