llvm-6502/lib/Target/PowerPC/PPCSubtarget.cpp
Hal Finkel 827307b95f Use PPC reciprocal estimates with Newton iteration in fast-math mode
When unsafe FP math operations are enabled, we can use the fre[s] and
frsqrte[s] instructions, which generate reciprocal (sqrt) estimates, together
with some Newton iteration, in order to quickly generate floating-point
division and sqrt results. All of these instructions are separately optional,
and so each has its own feature flag (except for the Altivec instructions,
which are covered under the existing Altivec flag). Doing this is not only
faster than using the IEEE-compliant fdiv/fsqrt instructions, but allows these
computations to be pipelined with other computations in order to hide their
overall latency.

I've also added a couple of missing fnmsub patterns which turned out to be
missing (but are necessary for good code generation of the Newton iterations).
Altivec needs a similar fix, but that will probably be more complicated because
fneg is expanded for Altivec's v4f32.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178617 91177308-0d34-0410-b5e6-96231b3b80d8
2013-04-03 04:01:11 +00:00

160 lines
5.2 KiB
C++

//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PPC specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "PPCGenSubtargetInfo.inc"
using namespace llvm;
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: PPCGenSubtargetInfo(TT, CPU, FS)
, StackAlignment(16)
, DarwinDirective(PPC::DIR_NONE)
, HasMFOCRF(false)
, Has64BitSupport(false)
, Use64BitRegs(false)
, IsPPC64(is64Bit)
, HasAltivec(false)
, HasQPX(false)
, HasFSQRT(false)
, HasFRE(false)
, HasFRES(false)
, HasFRSQRTE(false)
, HasFRSQRTES(false)
, HasRecipPrec(false)
, HasSTFIWX(false)
, HasLFIWAX(false)
, HasFPRND(false)
, HasFPCVT(false)
, HasISEL(false)
, HasPOPCNTD(false)
, HasLDBRX(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
, TargetTriple(TT) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
#if (defined(__APPLE__) || defined(__linux__)) && \
(defined(__ppc__) || defined(__powerpc__))
if (CPUName == "generic")
CPUName = sys::getHostCPUName();
#endif
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
// Make sure 64-bit features are available when CPUname is generic
std::string FullFS = FS;
// If we are generating code for ppc64, verify that options make sense.
if (is64Bit) {
Has64BitSupport = true;
// Silently force 64-bit register use on ppc64.
Use64BitRegs = true;
if (!FullFS.empty())
FullFS = "+64bit," + FullFS;
else
FullFS = "+64bit";
}
// Parse features string.
ParseSubtargetFeatures(CPUName, FullFS);
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
if (use64BitRegs() && !has64BitSupport())
Use64BitRegs = false;
// Set up darwin-specific properties.
if (isDarwin())
HasLazyResolverStubs = true;
// QPX requires a 32-byte aligned stack. Note that we need to do this if
// we're compiling for a BG/Q system regardless of whether or not QPX
// is enabled because external functions will assume this alignment.
if (hasQPX() || isBGQ())
StackAlignment = 32;
}
/// SetJITMode - This is called to inform the subtarget info that we are
/// producing code for the JIT.
void PPCSubtarget::SetJITMode() {
// JIT mode doesn't want lazy resolver stubs, it knows exactly where
// everything is. This matters for PPC64, which codegens in PIC mode without
// stubs.
HasLazyResolverStubs = false;
// Calls to external functions need to use indirect calls
IsJITCodeModel = true;
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const {
// We never have stubs if HasLazyResolverStubs=false or if in static mode.
if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
return false;
// If symbol visibility is hidden, the extra load is not needed if
// the symbol is definitely defined in the current translation unit.
bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
return false;
return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage() || isDecl;
}
bool PPCSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
// FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here,
// but we can't because we can't reassign the cr registers. There is a
// dependence between the cr register and the RLWINM instruction used
// to extract its value which the anti-dependency breaker can't currently
// see. Maybe we should make a late-expanded pseudo to encode this dependency.
// (the relevant code is in PPCDAGToDAGISel::SelectSETCC)
Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
if (isPPC64())
CriticalPathRCs.push_back(&PPC::G8RCRegClass);
else
CriticalPathRCs.push_back(&PPC::GPRCRegClass);
CriticalPathRCs.push_back(&PPC::F8RCRegClass);
CriticalPathRCs.push_back(&PPC::VRRCRegClass);
return OptLevel >= CodeGenOpt::Default;
}