mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-20 11:32:33 +00:00
R600: Rework subtarget info and remove AMDILDevice classes
This should simplify the subtarget definitions and make it easier to add new ones. Reviewed-by: Vincent Lejeune <vljn@ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183566 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c1dcb8d654
commit
3ff0abfaab
@ -11,23 +11,28 @@
|
||||
#ifndef AMDGPU_H
|
||||
#define AMDGPU_H
|
||||
|
||||
#include "AMDILDeviceInfo.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class FunctionPass;
|
||||
class AMDGPUInstrPrinter;
|
||||
class AMDGPUTargetMachine;
|
||||
class FunctionPass;
|
||||
class MCAsmInfo;
|
||||
class raw_ostream;
|
||||
class Target;
|
||||
class TargetMachine;
|
||||
|
||||
// R600 Passes
|
||||
FunctionPass* createR600TextureIntrinsicsReplacer();
|
||||
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
|
||||
FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
|
||||
FunctionPass *createR600TextureIntrinsicsReplacer();
|
||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
||||
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
|
||||
FunctionPass *createR600Packetizer(TargetMachine &tm);
|
||||
FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
|
||||
FunctionPass *createAMDGPUCFGPreparationPass(TargetMachine &tm);
|
||||
FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAnnotateControlFlowPass();
|
||||
@ -38,7 +43,10 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
|
||||
// Passes common to R600 and SI
|
||||
Pass *createAMDGPUStructurizeCFGPass();
|
||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||
FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
|
||||
FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
|
||||
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
|
||||
|
||||
extern Target TheAMDGPUTarget;
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
@ -51,4 +59,41 @@ namespace ShaderType {
|
||||
};
|
||||
}
|
||||
|
||||
/// OpenCL uses address spaces to differentiate between
|
||||
/// various memory regions on the hardware. On the CPU
|
||||
/// all of the address spaces point to the same memory,
|
||||
/// however on the GPU, each address space points to
|
||||
/// a seperate piece of memory that is unique from other
|
||||
/// memory locations.
|
||||
namespace AMDGPUAS {
|
||||
enum AddressSpaces {
|
||||
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
|
||||
CONSTANT_ADDRESS = 2, ///< Address space for constant memory
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
REGION_ADDRESS = 4, ///< Address space for region memory.
|
||||
ADDRESS_NONE = 5, ///< Address space for unknown memory.
|
||||
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
|
||||
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
|
||||
CONSTANT_BUFFER_0 = 8,
|
||||
CONSTANT_BUFFER_1 = 9,
|
||||
CONSTANT_BUFFER_2 = 10,
|
||||
CONSTANT_BUFFER_3 = 11,
|
||||
CONSTANT_BUFFER_4 = 12,
|
||||
CONSTANT_BUFFER_5 = 13,
|
||||
CONSTANT_BUFFER_6 = 14,
|
||||
CONSTANT_BUFFER_7 = 15,
|
||||
CONSTANT_BUFFER_8 = 16,
|
||||
CONSTANT_BUFFER_9 = 17,
|
||||
CONSTANT_BUFFER_10 = 18,
|
||||
CONSTANT_BUFFER_11 = 19,
|
||||
CONSTANT_BUFFER_12 = 20,
|
||||
CONSTANT_BUFFER_13 = 21,
|
||||
CONSTANT_BUFFER_14 = 22,
|
||||
CONSTANT_BUFFER_15 = 23,
|
||||
LAST_ADDRESS = 24
|
||||
};
|
||||
|
||||
} // namespace AMDGPUAS
|
||||
|
||||
#endif // AMDGPU_H
|
||||
|
@ -14,56 +14,29 @@ include "AMDILBase.td"
|
||||
// Subtarget Features
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Debugging Features
|
||||
|
||||
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
||||
"DumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter">;
|
||||
|
||||
// Target features
|
||||
|
||||
def FeatureFP64 : SubtargetFeature<"fp64",
|
||||
"CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
|
||||
"FP64",
|
||||
"true",
|
||||
"Enable 64bit double precision operations">;
|
||||
def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
|
||||
"CapsOverride[AMDGPUDeviceInfo::ByteStores]",
|
||||
"true",
|
||||
"Enable byte addressable stores">;
|
||||
def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
|
||||
"CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
|
||||
"true",
|
||||
"Enable duplicate barrier detection(HD5XXX or later).">;
|
||||
def FeatureImages : SubtargetFeature<"images",
|
||||
"CapsOverride[AMDGPUDeviceInfo::Images]",
|
||||
"true",
|
||||
"Enable image functions">;
|
||||
def FeatureMultiUAV : SubtargetFeature<"multi_uav",
|
||||
"CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
|
||||
"true",
|
||||
"Generate multiple UAV code(HD5XXX family or later)">;
|
||||
def FeatureMacroDB : SubtargetFeature<"macrodb",
|
||||
"CapsOverride[AMDGPUDeviceInfo::MacroDB]",
|
||||
"true",
|
||||
"Use internal macrodb, instead of macrodb in driver">;
|
||||
def FeatureNoAlias : SubtargetFeature<"noalias",
|
||||
"CapsOverride[AMDGPUDeviceInfo::NoAlias]",
|
||||
"true",
|
||||
"assert that all kernel argument pointers are not aliased">;
|
||||
def FeatureNoInline : SubtargetFeature<"no-inline",
|
||||
"CapsOverride[AMDGPUDeviceInfo::NoInline]",
|
||||
"true",
|
||||
"specify whether to not inline functions">;
|
||||
|
||||
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
|
||||
"Is64bit",
|
||||
"false",
|
||||
"true",
|
||||
"Specify if 64bit addressing should be used.">;
|
||||
|
||||
def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
|
||||
"Is32on64bit",
|
||||
"false",
|
||||
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
|
||||
def FeatureDebug : SubtargetFeature<"debug",
|
||||
"CapsOverride[AMDGPUDeviceInfo::Debug]",
|
||||
"true",
|
||||
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
|
||||
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
||||
"DumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter">;
|
||||
|
||||
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
|
||||
"R600ALUInst",
|
||||
@ -75,6 +48,11 @@ def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
|
||||
"true",
|
||||
"Specify use of dedicated vertex cache.">;
|
||||
|
||||
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
|
||||
"CaymanISA",
|
||||
"true",
|
||||
"Use Cayman ISA">;
|
||||
|
||||
class SubtargetFeatureFetchLimit <string Value> :
|
||||
SubtargetFeature <"fetch"#Value,
|
||||
"TexVTXClauseSize",
|
||||
@ -84,6 +62,26 @@ class SubtargetFeatureFetchLimit <string Value> :
|
||||
def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
|
||||
def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
|
||||
|
||||
class SubtargetFeatureGeneration <string Value,
|
||||
list<SubtargetFeature> Implies> :
|
||||
SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
|
||||
Value#" GPU generation", Implies>;
|
||||
|
||||
def FeatureR600 : SubtargetFeatureGeneration<"R600",
|
||||
[FeatureR600ALUInst, FeatureFetchLimit8]>;
|
||||
|
||||
def FeatureR700 : SubtargetFeatureGeneration<"R700",
|
||||
[FeatureFetchLimit16]>;
|
||||
|
||||
def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
|
||||
[FeatureFetchLimit16]>;
|
||||
|
||||
def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
|
||||
[FeatureFetchLimit16]>;
|
||||
|
||||
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
[Feature64BitPtr, FeatureFP64]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
|
@ -63,7 +63,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
ELF::SHT_PROGBITS, 0,
|
||||
SectionKind::getReadOnly());
|
||||
OutStreamer.SwitchSection(ConfigSection);
|
||||
if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
EmitProgramInfoSI(MF);
|
||||
} else {
|
||||
EmitProgramInfoR600(MF);
|
||||
@ -105,7 +105,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
unsigned RsrcReg;
|
||||
if (STM.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX) {
|
||||
if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
|
||||
// Evergreen / Northern Islands
|
||||
switch (MFI->ShaderType) {
|
||||
default: // Fall through
|
||||
|
@ -47,6 +47,6 @@ def CC_SI_Kernel : CallingConv<[
|
||||
def CC_AMDGPU : CallingConv<[
|
||||
CCIf<"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_SI_Kernel>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
|
||||
".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
|
||||
]>;
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
@ -13,6 +13,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -25,8 +26,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
||||
AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
|
||||
InstrItins = getInstrItineraryForCPU(CPU);
|
||||
|
||||
memset(CapsOverride, 0, sizeof(*CapsOverride)
|
||||
* AMDGPUDeviceInfo::MaxNumberCapabilities);
|
||||
// Default card
|
||||
StringRef GPU = CPU;
|
||||
Is64bit = false;
|
||||
@ -35,21 +34,13 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
||||
DefaultSize[2] = 1;
|
||||
HasVertexCache = false;
|
||||
TexVTXClauseSize = 0;
|
||||
Gen = AMDGPUSubtarget::R600;
|
||||
FP64 = false;
|
||||
CaymanISA = false;
|
||||
ParseSubtargetFeatures(GPU, FS);
|
||||
DevName = GPU;
|
||||
Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
|
||||
}
|
||||
|
||||
AMDGPUSubtarget::~AMDGPUSubtarget() {
|
||||
delete Device;
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
|
||||
assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
|
||||
"Caps index is out of bounds!");
|
||||
return CapsOverride[caps];
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::is64bit() const {
|
||||
return Is64bit;
|
||||
@ -62,6 +53,18 @@ short
|
||||
AMDGPUSubtarget::getTexVTXClauseSize() const {
|
||||
return TexVTXClauseSize;
|
||||
}
|
||||
enum AMDGPUSubtarget::Generation
|
||||
AMDGPUSubtarget::getGeneration() const {
|
||||
return Gen;
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::hasHWFP64() const {
|
||||
return FP64;
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::hasCaymanISA() const {
|
||||
return CaymanISA;
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::isTargetELF() const {
|
||||
return false;
|
||||
@ -77,21 +80,28 @@ AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
|
||||
|
||||
std::string
|
||||
AMDGPUSubtarget::getDataLayout() const {
|
||||
if (!Device) {
|
||||
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
|
||||
}
|
||||
return Device->getDataLayout();
|
||||
std::string DataLayout = std::string(
|
||||
"e"
|
||||
"-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
|
||||
"-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
"-n32:64"
|
||||
);
|
||||
|
||||
if (hasHWFP64()) {
|
||||
DataLayout.append("-f64:64:64");
|
||||
}
|
||||
|
||||
if (is64bit()) {
|
||||
DataLayout.append("-p:64:64:64");
|
||||
} else {
|
||||
DataLayout.append("-p:32:32:32");
|
||||
}
|
||||
|
||||
return DataLayout;
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUSubtarget::getDeviceName() const {
|
||||
return DevName;
|
||||
}
|
||||
const AMDGPUDevice *
|
||||
AMDGPUSubtarget::device() const {
|
||||
return Device;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#ifndef AMDGPUSUBTARGET_H
|
||||
#define AMDGPUSUBTARGET_H
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
@ -27,9 +27,16 @@
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
||||
public:
|
||||
enum Generation {
|
||||
R600 = 0,
|
||||
R700,
|
||||
EVERGREEN,
|
||||
NORTHERN_ISLANDS,
|
||||
SOUTHERN_ISLANDS
|
||||
};
|
||||
|
||||
private:
|
||||
bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
|
||||
const AMDGPUDevice *Device;
|
||||
size_t DefaultSize[3];
|
||||
std::string DevName;
|
||||
bool Is64bit;
|
||||
@ -38,24 +45,27 @@ private:
|
||||
bool R600ALUInst;
|
||||
bool HasVertexCache;
|
||||
short TexVTXClauseSize;
|
||||
enum Generation Gen;
|
||||
bool FP64;
|
||||
bool CaymanISA;
|
||||
|
||||
InstrItineraryData InstrItins;
|
||||
|
||||
public:
|
||||
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
|
||||
virtual ~AMDGPUSubtarget();
|
||||
|
||||
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
|
||||
virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
|
||||
|
||||
bool isOverride(AMDGPUDeviceInfo::Caps) const;
|
||||
bool is64bit() const;
|
||||
bool hasVertexCache() const;
|
||||
short getTexVTXClauseSize() const;
|
||||
enum Generation getGeneration() const;
|
||||
bool hasHWFP64() const;
|
||||
bool hasCaymanISA() const;
|
||||
|
||||
// Helper functions to simplify if statements
|
||||
bool isTargetELF() const;
|
||||
const AMDGPUDevice* device() const;
|
||||
std::string getDataLayout() const;
|
||||
std::string getDeviceName() const;
|
||||
virtual size_t getDefaultSize(uint32_t dim) const;
|
||||
|
@ -58,12 +58,12 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
|
||||
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
|
||||
Subtarget(TT, CPU, FS),
|
||||
Layout(Subtarget.getDataLayout()),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||
Subtarget.device()->getStackAlignment(), 0),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, 16 // Stack Alignment
|
||||
, 0),
|
||||
IntrinsicInfo(this),
|
||||
InstrItins(&Subtarget.getInstrItineraryData()) {
|
||||
// TLInfo uses InstrInfo so it must be initialized after.
|
||||
if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
InstrInfo.reset(new R600InstrInfo(*this));
|
||||
TLInfo.reset(new R600TargetLowering(*this));
|
||||
} else {
|
||||
@ -82,7 +82,7 @@ public:
|
||||
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
enablePass(&MachineSchedulerID);
|
||||
MachineSchedRegistry::setDefault(createR600MachineScheduler);
|
||||
}
|
||||
@ -108,7 +108,7 @@ TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
bool
|
||||
AMDGPUPassConfig::addPreISel() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createAMDGPUStructurizeCFGPass());
|
||||
addPass(createSIAnnotateControlFlowPass());
|
||||
} else {
|
||||
@ -121,7 +121,7 @@ bool AMDGPUPassConfig::addInstSelector() {
|
||||
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
|
||||
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
// This callbacks this pass uses are not implemented yet on SI.
|
||||
addPass(createAMDGPUIndirectAddressingPass(*TM));
|
||||
}
|
||||
@ -131,8 +131,8 @@ bool AMDGPUPassConfig::addInstSelector() {
|
||||
bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||
addPass(createAMDGPUConvertToISAPass(*TM));
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createR600VectorRegMerger(*TM));
|
||||
}
|
||||
return false;
|
||||
@ -141,7 +141,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||
bool AMDGPUPassConfig::addPostRegAlloc() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
|
||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createSIInsertWaits(*TM));
|
||||
}
|
||||
return false;
|
||||
@ -155,7 +155,7 @@ bool AMDGPUPassConfig::addPreSched2() {
|
||||
|
||||
bool AMDGPUPassConfig::addPreEmitPass() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createAMDGPUCFGPreparationPass(*TM));
|
||||
addPass(createAMDGPUCFGStructurizerPass(*TM));
|
||||
addPass(createR600EmitClauseMarkers(*TM));
|
||||
|
@ -1,121 +0,0 @@
|
||||
//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
/// This file contains the entry points for global functions defined in the LLVM
|
||||
/// AMDGPU back-end.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDIL_H
|
||||
#define AMDIL_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#define ARENA_SEGMENT_RESERVED_UAVS 12
|
||||
#define DEFAULT_ARENA_UAV_ID 8
|
||||
#define DEFAULT_RAW_UAV_ID 7
|
||||
#define GLOBAL_RETURN_RAW_UAV_ID 11
|
||||
#define HW_MAX_NUM_CB 8
|
||||
#define MAX_NUM_UNIQUE_UAVS 8
|
||||
#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
|
||||
#define OPENCL_MAX_READ_IMAGES 128
|
||||
#define OPENCL_MAX_WRITE_IMAGES 8
|
||||
#define OPENCL_MAX_SAMPLERS 16
|
||||
|
||||
// The next two values can never be zero, as zero is the ID that is
|
||||
// used to assert against.
|
||||
#define DEFAULT_LDS_ID 1
|
||||
#define DEFAULT_GDS_ID 1
|
||||
#define DEFAULT_SCRATCH_ID 1
|
||||
#define DEFAULT_VEC_SLOTS 8
|
||||
|
||||
#define OCL_DEVICE_RV710 0x0001
|
||||
#define OCL_DEVICE_RV730 0x0002
|
||||
#define OCL_DEVICE_RV770 0x0004
|
||||
#define OCL_DEVICE_CEDAR 0x0008
|
||||
#define OCL_DEVICE_REDWOOD 0x0010
|
||||
#define OCL_DEVICE_JUNIPER 0x0020
|
||||
#define OCL_DEVICE_CYPRESS 0x0040
|
||||
#define OCL_DEVICE_CAICOS 0x0080
|
||||
#define OCL_DEVICE_TURKS 0x0100
|
||||
#define OCL_DEVICE_BARTS 0x0200
|
||||
#define OCL_DEVICE_CAYMAN 0x0400
|
||||
#define OCL_DEVICE_ALL 0x3FFF
|
||||
|
||||
/// The number of function ID's that are reserved for
|
||||
/// internal compiler usage.
|
||||
const unsigned int RESERVED_FUNCS = 1024;
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUInstrPrinter;
|
||||
class FunctionPass;
|
||||
class MCAsmInfo;
|
||||
class raw_ostream;
|
||||
class Target;
|
||||
class TargetMachine;
|
||||
|
||||
// Instruction selection passes.
|
||||
FunctionPass*
|
||||
createAMDGPUISelDag(TargetMachine &TM);
|
||||
FunctionPass*
|
||||
createAMDGPUPeepholeOpt(TargetMachine &TM);
|
||||
|
||||
// Pre emit passes.
|
||||
FunctionPass*
|
||||
createAMDGPUCFGPreparationPass(TargetMachine &TM);
|
||||
FunctionPass*
|
||||
createAMDGPUCFGStructurizerPass(TargetMachine &TM);
|
||||
|
||||
extern Target TheAMDGPUTarget;
|
||||
} // end namespace llvm;
|
||||
|
||||
// Include device information enumerations
|
||||
#include "AMDILDeviceInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
/// OpenCL uses address spaces to differentiate between
|
||||
/// various memory regions on the hardware. On the CPU
|
||||
/// all of the address spaces point to the same memory,
|
||||
/// however on the GPU, each address space points to
|
||||
/// a seperate piece of memory that is unique from other
|
||||
/// memory locations.
|
||||
namespace AMDGPUAS {
|
||||
enum AddressSpaces {
|
||||
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
|
||||
CONSTANT_ADDRESS = 2, ///< Address space for constant memory
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
REGION_ADDRESS = 4, ///< Address space for region memory.
|
||||
ADDRESS_NONE = 5, ///< Address space for unknown memory.
|
||||
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
|
||||
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
|
||||
CONSTANT_BUFFER_0 = 8,
|
||||
CONSTANT_BUFFER_1 = 9,
|
||||
CONSTANT_BUFFER_2 = 10,
|
||||
CONSTANT_BUFFER_3 = 11,
|
||||
CONSTANT_BUFFER_4 = 12,
|
||||
CONSTANT_BUFFER_5 = 13,
|
||||
CONSTANT_BUFFER_6 = 14,
|
||||
CONSTANT_BUFFER_7 = 15,
|
||||
CONSTANT_BUFFER_8 = 16,
|
||||
CONSTANT_BUFFER_9 = 17,
|
||||
CONSTANT_BUFFER_10 = 18,
|
||||
CONSTANT_BUFFER_11 = 19,
|
||||
CONSTANT_BUFFER_12 = 20,
|
||||
CONSTANT_BUFFER_13 = 21,
|
||||
CONSTANT_BUFFER_14 = 22,
|
||||
CONSTANT_BUFFER_15 = 23,
|
||||
LAST_ADDRESS = 24
|
||||
};
|
||||
|
||||
} // namespace AMDGPUAS
|
||||
|
||||
} // end namespace llvm
|
||||
#endif // AMDIL_H
|
@ -1,115 +0,0 @@
|
||||
//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDIL7XXDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
|
||||
setCaps();
|
||||
std::string name = mSTM->getDeviceName();
|
||||
if (name == "rv710") {
|
||||
DeviceFlag = OCL_DEVICE_RV710;
|
||||
} else if (name == "rv730") {
|
||||
DeviceFlag = OCL_DEVICE_RV730;
|
||||
} else {
|
||||
DeviceFlag = OCL_DEVICE_RV770;
|
||||
}
|
||||
}
|
||||
|
||||
AMDGPU7XXDevice::~AMDGPU7XXDevice() {
|
||||
}
|
||||
|
||||
void AMDGPU7XXDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
}
|
||||
|
||||
size_t AMDGPU7XXDevice::getMaxLDSSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_700;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPU7XXDevice::getWavefrontSize() const {
|
||||
return AMDGPUDevice::HalfWavefrontSize;
|
||||
}
|
||||
|
||||
uint32_t AMDGPU7XXDevice::getGeneration() const {
|
||||
return AMDGPUDeviceInfo::HD4XXX;
|
||||
}
|
||||
|
||||
uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
|
||||
switch (DeviceID) {
|
||||
default:
|
||||
assert(0 && "ID type passed in is unknown!");
|
||||
break;
|
||||
case GLOBAL_ID:
|
||||
case CONSTANT_ID:
|
||||
case RAW_UAV_ID:
|
||||
case ARENA_UAV_ID:
|
||||
break;
|
||||
case LDS_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return DEFAULT_LDS_ID;
|
||||
}
|
||||
break;
|
||||
case SCRATCH_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
|
||||
return DEFAULT_SCRATCH_ID;
|
||||
}
|
||||
break;
|
||||
case GDS_ID:
|
||||
assert(0 && "GDS UAV ID is not supported on this chip");
|
||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
||||
return DEFAULT_GDS_ID;
|
||||
}
|
||||
break;
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPU770Device::~AMDGPU770Device() {
|
||||
}
|
||||
|
||||
void AMDGPU770Device::setCaps() {
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
||||
}
|
||||
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
|
||||
mHWBits.reset(AMDGPUDeviceInfo::LongOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::LongOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
}
|
||||
|
||||
size_t AMDGPU770Device::getWavefrontSize() const {
|
||||
return AMDGPUDevice::WavefrontSize;
|
||||
}
|
||||
|
||||
AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
|
||||
}
|
||||
|
||||
AMDGPU710Device::~AMDGPU710Device() {
|
||||
}
|
||||
|
||||
size_t AMDGPU710Device::getWavefrontSize() const {
|
||||
return AMDGPUDevice::QuarterWavefrontSize;
|
||||
}
|
@ -1,72 +0,0 @@
|
||||
//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// \brief Interface for the subtarget data classes.
|
||||
///
|
||||
/// This file will define the interface that each generation needs to
|
||||
/// implement in order to correctly answer queries on the capabilities of the
|
||||
/// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef AMDIL7XXDEVICEIMPL_H
|
||||
#define AMDIL7XXDEVICEIMPL_H
|
||||
#include "AMDILDevice.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 7XX generation of devices and their respective sub classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
|
||||
///
|
||||
/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
|
||||
/// support the minimal features that are required to be considered OpenCL 1.0
|
||||
/// compliant and nothing more.
|
||||
class AMDGPU7XXDevice : public AMDGPUDevice {
|
||||
public:
|
||||
AMDGPU7XXDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPU7XXDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual size_t getWavefrontSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual uint32_t getResourceID(uint32_t DeviceID) const;
|
||||
virtual uint32_t getMaxNumUAVs() const;
|
||||
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
};
|
||||
|
||||
/// \brief The AMDGPU770Device class represents the RV770 chip and it's
|
||||
/// derivative cards.
|
||||
///
|
||||
/// The difference between this device and the base class is this device device
|
||||
/// adds support for double precision and has a larger wavefront size.
|
||||
class AMDGPU770Device : public AMDGPU7XXDevice {
|
||||
public:
|
||||
AMDGPU770Device(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPU770Device();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
};
|
||||
|
||||
/// \brief The AMDGPU710Device class derives from the 7XX base class.
|
||||
///
|
||||
/// This class is a smaller derivative, so we need to overload some of the
|
||||
/// functions in order to correctly specify this information.
|
||||
class AMDGPU710Device : public AMDGPU7XXDevice {
|
||||
public:
|
||||
AMDGPU710Device(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPU710Device();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
#endif // AMDILDEVICEIMPL_H
|
@ -11,8 +11,8 @@
|
||||
#define DEBUGME 0
|
||||
#define DEBUG_TYPE "structcfg"
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
@ -28,9 +28,12 @@
|
||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEFAULT_VEC_SLOTS 8
|
||||
|
||||
// TODO: move-begin.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1,132 +0,0 @@
|
||||
//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
// Default implementation for all of the classes.
|
||||
AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) {
|
||||
mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
|
||||
mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
|
||||
setCaps();
|
||||
DeviceFlag = OCL_DEVICE_ALL;
|
||||
}
|
||||
|
||||
AMDGPUDevice::~AMDGPUDevice() {
|
||||
mHWBits.clear();
|
||||
mSWBits.clear();
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxGDSSize() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDGPUDevice::getDeviceFlag() const {
|
||||
return DeviceFlag;
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxNumCBs() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
|
||||
return HW_MAX_NUM_CB;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxCBSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
|
||||
return MAX_CB_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxScratchSize() const {
|
||||
return 65536;
|
||||
}
|
||||
|
||||
uint32_t AMDGPUDevice::getStackAlignment() const {
|
||||
return 16;
|
||||
}
|
||||
|
||||
void AMDGPUDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::HalfOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::ByteOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::ShortOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::NoInline);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::MacroDB);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
|
||||
} else {
|
||||
mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
|
||||
} else {
|
||||
mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
|
||||
}
|
||||
mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::LongOps);
|
||||
}
|
||||
|
||||
AMDGPUDeviceInfo::ExecutionMode
|
||||
AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const {
|
||||
if (mHWBits[Caps]) {
|
||||
assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
|
||||
return AMDGPUDeviceInfo::Hardware;
|
||||
}
|
||||
|
||||
if (mSWBits[Caps]) {
|
||||
assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
|
||||
return AMDGPUDeviceInfo::Software;
|
||||
}
|
||||
|
||||
return AMDGPUDeviceInfo::Unsupported;
|
||||
|
||||
}
|
||||
|
||||
bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const {
|
||||
return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
|
||||
}
|
||||
|
||||
bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const {
|
||||
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
|
||||
}
|
||||
|
||||
bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
|
||||
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUDevice::getDataLayout() const {
|
||||
std::string DataLayout = std::string(
|
||||
"e"
|
||||
"-p:32:32:32"
|
||||
"-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
|
||||
"-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
"-n32:64"
|
||||
);
|
||||
|
||||
if (usesHardware(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
DataLayout.append("-f64:64:64");
|
||||
}
|
||||
|
||||
return DataLayout;
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief Interface for the subtarget data classes.
|
||||
//
|
||||
/// This file will define the interface that each generation needs to
|
||||
/// implement in order to correctly answer queries on the capabilities of the
|
||||
/// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef AMDILDEVICEIMPL_H
|
||||
#define AMDILDEVICEIMPL_H
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
class MCStreamer;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Interface for data that is specific to a single device
|
||||
//===----------------------------------------------------------------------===//
|
||||
class AMDGPUDevice {
|
||||
public:
|
||||
AMDGPUDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUDevice();
|
||||
|
||||
// Enum values for the various memory types.
|
||||
enum {
|
||||
RAW_UAV_ID = 0,
|
||||
ARENA_UAV_ID = 1,
|
||||
LDS_ID = 2,
|
||||
GDS_ID = 3,
|
||||
SCRATCH_ID = 4,
|
||||
CONSTANT_ID = 5,
|
||||
GLOBAL_ID = 6,
|
||||
MAX_IDS = 7
|
||||
} IO_TYPE_IDS;
|
||||
|
||||
/// \returns The max LDS size that the hardware supports. Size is in
|
||||
/// bytes.
|
||||
virtual size_t getMaxLDSSize() const = 0;
|
||||
|
||||
/// \returns The max GDS size that the hardware supports if the GDS is
|
||||
/// supported by the hardware. Size is in bytes.
|
||||
virtual size_t getMaxGDSSize() const;
|
||||
|
||||
/// \returns The max number of hardware constant address spaces that
|
||||
/// are supported by this device.
|
||||
virtual size_t getMaxNumCBs() const;
|
||||
|
||||
/// \returns The max number of bytes a single hardware constant buffer
|
||||
/// can support. Size is in bytes.
|
||||
virtual size_t getMaxCBSize() const;
|
||||
|
||||
/// \returns The max number of bytes allowed by the hardware scratch
|
||||
/// buffer. Size is in bytes.
|
||||
virtual size_t getMaxScratchSize() const;
|
||||
|
||||
/// \brief Get the flag that corresponds to the device.
|
||||
virtual uint32_t getDeviceFlag() const;
|
||||
|
||||
/// \returns The number of work-items that exist in a single hardware
|
||||
/// wavefront.
|
||||
virtual size_t getWavefrontSize() const = 0;
|
||||
|
||||
/// \brief Get the generational name of this specific device.
|
||||
virtual uint32_t getGeneration() const = 0;
|
||||
|
||||
/// \brief Get the stack alignment of this specific device.
|
||||
virtual uint32_t getStackAlignment() const;
|
||||
|
||||
/// \brief Get the resource ID for this specific device.
|
||||
virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
|
||||
|
||||
/// \brief Get the max number of UAV's for this device.
|
||||
virtual uint32_t getMaxNumUAVs() const = 0;
|
||||
|
||||
|
||||
// API utilizing more detailed capabilities of each family of
|
||||
// cards. If a capability is supported, then either usesHardware or
|
||||
// usesSoftware returned true. If usesHardware returned true, then
|
||||
// usesSoftware must return false for the same capability. Hardware
|
||||
// execution means that the feature is done natively by the hardware
|
||||
// and is not emulated by the softare. Software execution means
|
||||
// that the feature could be done in the hardware, but there is
|
||||
// software that emulates it with possibly using the hardware for
|
||||
// support since the hardware does not fully comply with OpenCL
|
||||
// specs.
|
||||
|
||||
bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
|
||||
bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
|
||||
bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
|
||||
virtual std::string getDataLayout() const;
|
||||
static const unsigned int MAX_LDS_SIZE_700 = 16384;
|
||||
static const unsigned int MAX_LDS_SIZE_800 = 32768;
|
||||
static const unsigned int WavefrontSize = 64;
|
||||
static const unsigned int HalfWavefrontSize = 32;
|
||||
static const unsigned int QuarterWavefrontSize = 16;
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
BitVector mHWBits;
|
||||
llvm::BitVector mSWBits;
|
||||
AMDGPUSubtarget *mSTM;
|
||||
uint32_t DeviceFlag;
|
||||
private:
|
||||
AMDGPUDeviceInfo::ExecutionMode
|
||||
getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
#endif // AMDILDEVICEIMPL_H
|
@ -1,97 +0,0 @@
|
||||
//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief Function that creates DeviceInfo from a device name and other information.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
namespace llvm {
|
||||
namespace AMDGPUDeviceInfo {
|
||||
|
||||
AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
|
||||
AMDGPUSubtarget *ptr,
|
||||
bool is64bit, bool is64on32bit) {
|
||||
if (deviceName.c_str()[2] == '7') {
|
||||
switch (deviceName.c_str()[3]) {
|
||||
case '1':
|
||||
return new AMDGPU710Device(ptr);
|
||||
case '7':
|
||||
return new AMDGPU770Device(ptr);
|
||||
default:
|
||||
return new AMDGPU7XXDevice(ptr);
|
||||
}
|
||||
} else if (deviceName == "cypress") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUCypressDevice(ptr);
|
||||
} else if (deviceName == "juniper") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUEvergreenDevice(ptr);
|
||||
} else if (deviceName == "redwood" || deviceName == "sumo") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPURedwoodDevice(ptr);
|
||||
} else if (deviceName == "cedar") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUCedarDevice(ptr);
|
||||
} else if (deviceName == "barts" || deviceName == "turks") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUNIDevice(ptr);
|
||||
} else if (deviceName == "cayman") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUCaymanDevice(ptr);
|
||||
} else if (deviceName == "caicos") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUNIDevice(ptr);
|
||||
} else if (deviceName == "SI" ||
|
||||
deviceName == "tahiti" || deviceName == "pitcairn" ||
|
||||
deviceName == "verde" || deviceName == "oland" ||
|
||||
deviceName == "hainan") {
|
||||
return new AMDGPUSIDevice(ptr);
|
||||
} else {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPU7XXDevice(ptr);
|
||||
}
|
||||
}
|
||||
} // End namespace AMDGPUDeviceInfo
|
||||
} // End namespace llvm
|
@ -1,89 +0,0 @@
|
||||
//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#ifndef AMDILDEVICEINFO_H
|
||||
#define AMDILDEVICEINFO_H
|
||||
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUDevice;
|
||||
class AMDGPUSubtarget;
|
||||
namespace AMDGPUDeviceInfo {
|
||||
/// Each Capabilities can be executed using a hardware instruction,
|
||||
/// emulated with a sequence of software instructions, or not
|
||||
/// supported at all.
|
||||
enum ExecutionMode {
|
||||
Unsupported = 0, ///< Unsupported feature on the card(Default value)
|
||||
/// This is the execution mode that is set if the feature is emulated in
|
||||
/// software.
|
||||
Software,
|
||||
/// This execution mode is set if the feature exists natively in hardware
|
||||
Hardware
|
||||
};
|
||||
|
||||
enum Caps {
|
||||
HalfOps = 0x1, ///< Half float is supported or not.
|
||||
DoubleOps = 0x2, ///< Double is supported or not.
|
||||
ByteOps = 0x3, ///< Byte(char) is support or not.
|
||||
ShortOps = 0x4, ///< Short is supported or not.
|
||||
LongOps = 0x5, ///< Long is supported or not.
|
||||
Images = 0x6, ///< Images are supported or not.
|
||||
ByteStores = 0x7, ///< ByteStores available(!HD4XXX).
|
||||
ConstantMem = 0x8, ///< Constant/CB memory.
|
||||
LocalMem = 0x9, ///< Local/LDS memory.
|
||||
PrivateMem = 0xA, ///< Scratch/Private/Stack memory.
|
||||
RegionMem = 0xB, ///< OCL GDS Memory Extension.
|
||||
FMA = 0xC, ///< Use HW FMA or SW FMA.
|
||||
ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023.
|
||||
MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7.
|
||||
Reserved0 = 0xF, ///< ReservedFlag
|
||||
NoAlias = 0x10, ///< Cached loads.
|
||||
Signed24BitOps = 0x11, ///< Peephole Optimization.
|
||||
/// Debug mode implies that no hardware features or optimizations
|
||||
/// are performned and that all memory access go through a single
|
||||
/// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
|
||||
Debug = 0x12,
|
||||
CachedMem = 0x13, ///< Cached mem is available or not.
|
||||
BarrierDetect = 0x14, ///< Detect duplicate barriers.
|
||||
Reserved1 = 0x15, ///< Reserved flag
|
||||
ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available.
|
||||
ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work.
|
||||
TmrReg = 0x18, ///< Flag to specify if Tmr register is supported.
|
||||
NoInline = 0x19, ///< Flag to specify that no inlining should occur.
|
||||
MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb.
|
||||
HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod.
|
||||
ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported.
|
||||
PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's.
|
||||
/// If more capabilities are required, then
|
||||
/// this number needs to be increased.
|
||||
/// All capabilities must come before this
|
||||
/// number.
|
||||
MaxNumberCapabilities = 0x20
|
||||
};
|
||||
/// These have to be in order with the older generations
|
||||
/// having the lower number enumerations.
|
||||
enum Generation {
|
||||
HD3XXX = 0, ///< 6XX based devices.
|
||||
HD4XXX, ///< 7XX based devices.
|
||||
HD5XXX, ///< Evergreen based devices.
|
||||
HD6XXX, ///< NI/Evergreen+ based devices.
|
||||
HD7XXX, ///< Southern Islands based devices.
|
||||
HDTEST, ///< Experimental feature testing device.
|
||||
HDNUMGEN
|
||||
};
|
||||
|
||||
|
||||
AMDGPUDevice*
|
||||
getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
|
||||
bool is64bit = false, bool is64on32bit = false);
|
||||
} // namespace AMDILDeviceInfo
|
||||
} // namespace llvm
|
||||
#endif // AMDILDEVICEINFO_H
|
@ -1,19 +0,0 @@
|
||||
//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#ifndef AMDIL_DEVICES_H
|
||||
#define AMDIL_DEVICES_H
|
||||
// Include all of the device specific header files
|
||||
#include "AMDIL7XXDevice.h"
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDILSIDevice.h"
|
||||
|
||||
#endif // AMDIL_DEVICES_H
|
@ -1,169 +0,0 @@
|
||||
//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUDevice(ST) {
|
||||
setCaps();
|
||||
std::string name = ST->getDeviceName();
|
||||
if (name == "cedar") {
|
||||
DeviceFlag = OCL_DEVICE_CEDAR;
|
||||
} else if (name == "redwood") {
|
||||
DeviceFlag = OCL_DEVICE_REDWOOD;
|
||||
} else if (name == "cypress") {
|
||||
DeviceFlag = OCL_DEVICE_CYPRESS;
|
||||
} else {
|
||||
DeviceFlag = OCL_DEVICE_JUNIPER;
|
||||
}
|
||||
}
|
||||
|
||||
AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
|
||||
}
|
||||
|
||||
size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_800;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
||||
return MAX_LDS_SIZE_800;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
|
||||
return 12;
|
||||
}
|
||||
|
||||
uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
|
||||
switch(id) {
|
||||
default:
|
||||
assert(0 && "ID type passed in is unknown!");
|
||||
break;
|
||||
case CONSTANT_ID:
|
||||
case RAW_UAV_ID:
|
||||
return GLOBAL_RETURN_RAW_UAV_ID;
|
||||
case GLOBAL_ID:
|
||||
case ARENA_UAV_ID:
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
case LDS_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return DEFAULT_LDS_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
case GDS_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
||||
return DEFAULT_GDS_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
case SCRATCH_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
|
||||
return DEFAULT_SCRATCH_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
|
||||
return AMDGPUDevice::WavefrontSize;
|
||||
}
|
||||
|
||||
uint32_t AMDGPUEvergreenDevice::getGeneration() const {
|
||||
return AMDGPUDeviceInfo::HD5XXX;
|
||||
}
|
||||
|
||||
void AMDGPUEvergreenDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
|
||||
mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
|
||||
mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
|
||||
mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::ByteStores);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
mSWBits.set(AMDGPUDeviceInfo::RegionMem);
|
||||
} else {
|
||||
mHWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
mHWBits.set(AMDGPUDeviceInfo::RegionMem);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::Images);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::NoAlias);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::CachedMem);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
|
||||
mHWBits.set(AMDGPUDeviceInfo::LongOps);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::LongOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::TmrReg);
|
||||
}
|
||||
|
||||
AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPUCypressDevice::~AMDGPUCypressDevice() {
|
||||
}
|
||||
|
||||
void AMDGPUCypressDevice::setCaps() {
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPUCedarDevice::~AMDGPUCedarDevice() {
|
||||
}
|
||||
|
||||
void AMDGPUCedarDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
|
||||
size_t AMDGPUCedarDevice::getWavefrontSize() const {
|
||||
return AMDGPUDevice::QuarterWavefrontSize;
|
||||
}
|
||||
|
||||
AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPURedwoodDevice::~AMDGPURedwoodDevice() {
|
||||
}
|
||||
|
||||
void AMDGPURedwoodDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
|
||||
size_t AMDGPURedwoodDevice::getWavefrontSize() const {
|
||||
return AMDGPUDevice::HalfWavefrontSize;
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief Interface for the subtarget data classes.
|
||||
///
|
||||
/// This file will define the interface that each generation needs to
|
||||
/// implement in order to correctly answer queries on the capabilities of the
|
||||
/// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef AMDILEVERGREENDEVICE_H
|
||||
#define AMDILEVERGREENDEVICE_H
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILDevice.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Evergreen generation of devices and their respective sub classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
|
||||
/// series of cards.
|
||||
///
|
||||
/// This class contains information required to differentiate
|
||||
/// the Evergreen device from the generic AMDGPUDevice. This device represents
|
||||
/// that capabilities of the 'Juniper' cards, also known as the HD57XX.
|
||||
class AMDGPUEvergreenDevice : public AMDGPUDevice {
|
||||
public:
|
||||
AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUEvergreenDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual size_t getMaxGDSSize() const;
|
||||
virtual size_t getWavefrontSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual uint32_t getMaxNumUAVs() const;
|
||||
virtual uint32_t getResourceID(uint32_t) const;
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
};
|
||||
|
||||
/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
|
||||
/// support for double precision operations. This device is used to represent
|
||||
/// both the Cypress and Hemlock cards, which are commercially known as HD58XX
|
||||
/// and HD59XX cards.
|
||||
class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUCypressDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUCypressDevice();
|
||||
private:
|
||||
virtual void setCaps();
|
||||
};
|
||||
|
||||
|
||||
/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
|
||||
/// devices.
|
||||
///
|
||||
/// This class differs from the base AMDGPUEvergreenDevice in that the
|
||||
/// device is a ~quarter of the 'Juniper'. These are commercially known as the
|
||||
/// HD54XX and HD53XX series of cards.
|
||||
class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUCedarDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUCedarDevice();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
};
|
||||
|
||||
/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
|
||||
/// devices.
|
||||
///
|
||||
/// This class differs from the base class, in that these devices are
|
||||
/// considered about half of a 'Juniper' device. These are commercially known as
|
||||
/// the HD55XX and HD56XX series of cards.
|
||||
class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPURedwoodDevice();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
#endif // AMDILEVERGREENDEVICE_H
|
@ -14,7 +14,6 @@
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDILDevices.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "SIISelLowering.h"
|
||||
#include "llvm/ADT/ValueMap.h"
|
||||
@ -168,7 +167,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
default: break;
|
||||
case ISD::BUILD_VECTOR: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
break;
|
||||
}
|
||||
// BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
|
||||
@ -198,7 +197,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
case ISD::BUILD_PAIR: {
|
||||
SDValue RC, SubReg0, SubReg1;
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
break;
|
||||
}
|
||||
if (N->getValueType(0) == MVT::i128) {
|
||||
@ -223,7 +222,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
// XXX: Custom immediate lowering not implemented yet. Instead we use
|
||||
// pseudo instructions defined in SIInstructions.td
|
||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
break;
|
||||
}
|
||||
const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||
@ -318,7 +317,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
// Fold operands of selected node
|
||||
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
const R600InstrInfo *TII =
|
||||
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||
if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
|
||||
@ -746,7 +745,7 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
||||
|
||||
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
||||
|
||||
if (Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX) {
|
||||
if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
@ -156,21 +155,19 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
|
||||
setOperationAction(ISD::SELECT_CC, VT, Expand);
|
||||
|
||||
}
|
||||
if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
|
||||
setOperationAction(ISD::MULHU, MVT::i64, Expand);
|
||||
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::MULHS, MVT::i64, Expand);
|
||||
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::Constant , MVT::i64 , Legal);
|
||||
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
|
||||
}
|
||||
if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
setOperationAction(ISD::MULHU, MVT::i64, Expand);
|
||||
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::MULHS, MVT::i64, Expand);
|
||||
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::Constant , MVT::i64 , Legal);
|
||||
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
|
||||
if (STM.hasHWFP64()) {
|
||||
// we support loading/storing v2f64 but not operations on the type
|
||||
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
|
||||
|
@ -10,63 +10,6 @@
|
||||
// This file describes the AMDIL instructions in TableGen format.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDIL Instruction Predicate Definitions
|
||||
// Predicate that is set to true if the hardware supports double precision
|
||||
// divide
|
||||
def HasHWDDiv : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
|
||||
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
|
||||
|
||||
// Predicate that is set to true if the hardware supports double, but not double
|
||||
// precision divide in hardware
|
||||
def HasSWDDiv : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
|
||||
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
|
||||
|
||||
// Predicate that is set to true if the hardware support 24bit signed
|
||||
// math ops. Otherwise a software expansion to 32bit math ops is used instead.
|
||||
def HasHWSign24Bit : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
|
||||
|
||||
// Predicate that is set to true if 64bit operations are supported or not
|
||||
def HasHW64Bit : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDGPUDeviceInfo::LongOps)">;
|
||||
def HasSW64Bit : Predicate<"Subtarget.device()"
|
||||
"->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
|
||||
|
||||
// Predicate that is set to true if the timer register is supported
|
||||
def HasTmrRegister : Predicate<"Subtarget.device()"
|
||||
"->isSupported(AMDGPUDeviceInfo::TmrReg)">;
|
||||
// Predicate that is true if we are at least evergreen series
|
||||
def HasDeviceIDInst : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
|
||||
|
||||
// Predicate that is true if we have region address space.
|
||||
def hasRegionAS : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
|
||||
|
||||
// Predicate that is false if we don't have region address space.
|
||||
def noRegionAS : Predicate<"!Subtarget.device()"
|
||||
"->isSupported(AMDGPUDeviceInfo::RegionMem)">;
|
||||
|
||||
|
||||
// Predicate that is set to true if 64bit Mul is supported in the IL or not
|
||||
def HasHW64Mul : Predicate<"Subtarget.calVersion()"
|
||||
">= CAL_VERSION_SC_139"
|
||||
"&& Subtarget.device()"
|
||||
"->getGeneration() >="
|
||||
"AMDGPUDeviceInfo::HD5XXX">;
|
||||
def HasSW64Mul : Predicate<"Subtarget.calVersion()"
|
||||
"< CAL_VERSION_SC_139">;
|
||||
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
|
||||
def HasHW64DivMod : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
|
||||
def HasSW64DivMod : Predicate<"Subtarget.device()"
|
||||
"->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
|
||||
|
||||
// Predicate that is set to true if 64bit pointer are used.
|
||||
def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
|
||||
def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Operands
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
|
@ -1,65 +0,0 @@
|
||||
//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
std::string name = ST->getDeviceName();
|
||||
if (name == "caicos") {
|
||||
DeviceFlag = OCL_DEVICE_CAICOS;
|
||||
} else if (name == "turks") {
|
||||
DeviceFlag = OCL_DEVICE_TURKS;
|
||||
} else if (name == "cayman") {
|
||||
DeviceFlag = OCL_DEVICE_CAYMAN;
|
||||
} else {
|
||||
DeviceFlag = OCL_DEVICE_BARTS;
|
||||
}
|
||||
}
|
||||
AMDGPUNIDevice::~AMDGPUNIDevice() {
|
||||
}
|
||||
|
||||
size_t
|
||||
AMDGPUNIDevice::getMaxLDSSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_900;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDGPUNIDevice::getGeneration() const {
|
||||
return AMDGPUDeviceInfo::HD6XXX;
|
||||
}
|
||||
|
||||
|
||||
AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUNIDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPUCaymanDevice::~AMDGPUCaymanDevice() {
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUCaymanDevice::setCaps() {
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
|
||||
}
|
||||
|
@ -1,57 +0,0 @@
|
||||
//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// \brief Interface for the subtarget data classes.
|
||||
///
|
||||
/// This file will define the interface that each generation needs to
|
||||
/// implement in order to correctly answer queries on the capabilities of the
|
||||
/// specific hardware.
|
||||
//===---------------------------------------------------------------------===//
|
||||
#ifndef AMDILNIDEVICE_H
|
||||
#define AMDILNIDEVICE_H
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
//===---------------------------------------------------------------------===//
|
||||
// NI generation of devices and their respective sub classes
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of
|
||||
/// cards.
|
||||
///
|
||||
/// It is very similiar to the AMDGPUEvergreenDevice, with the major
|
||||
/// exception being differences in wavefront size and hardware capabilities. The
|
||||
/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
||||
/// integer operations
|
||||
class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUNIDevice(AMDGPUSubtarget*);
|
||||
virtual ~AMDGPUNIDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
};
|
||||
|
||||
/// Just as the AMDGPUCypressDevice is the double capable version of the
|
||||
/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version
|
||||
/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device
|
||||
/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide.
|
||||
class AMDGPUCaymanDevice: public AMDGPUNIDevice {
|
||||
public:
|
||||
AMDGPUCaymanDevice(AMDGPUSubtarget*);
|
||||
virtual ~AMDGPUCaymanDevice();
|
||||
private:
|
||||
virtual void setCaps();
|
||||
};
|
||||
|
||||
static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
|
||||
} // namespace llvm
|
||||
#endif // AMDILNIDEVICE_H
|
@ -1,48 +0,0 @@
|
||||
//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// \file
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILSIDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILNIDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
}
|
||||
AMDGPUSIDevice::~AMDGPUSIDevice() {
|
||||
}
|
||||
|
||||
size_t
|
||||
AMDGPUSIDevice::getMaxLDSSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_900;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDGPUSIDevice::getGeneration() const {
|
||||
return AMDGPUDeviceInfo::HD7XXX;
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUSIDevice::getDataLayout() const {
|
||||
return std::string(
|
||||
"e"
|
||||
"-p:64:64:64"
|
||||
"-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128"
|
||||
"-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
|
||||
"-v2048:2048:2048"
|
||||
"-n32:64"
|
||||
);
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief Interface for the subtarget data classes.
|
||||
///
|
||||
/// This file will define the interface that each generation needs to
|
||||
/// implement in order to correctly answer queries on the capabilities of the
|
||||
/// specific hardware.
|
||||
//===---------------------------------------------------------------------===//
|
||||
#ifndef AMDILSIDEVICE_H
|
||||
#define AMDILSIDEVICE_H
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SI generation of devices and their respective sub classes
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
/// \brief The AMDGPUSIDevice is the base class for all Southern Island series
|
||||
/// of cards.
|
||||
class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUSIDevice(AMDGPUSubtarget*);
|
||||
virtual ~AMDGPUSIDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual std::string getDataLayout() const;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
#endif // AMDILSIDEVICE_H
|
@ -12,16 +12,10 @@ tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
|
||||
add_public_tablegen_target(AMDGPUCommonTableGen)
|
||||
|
||||
add_llvm_target(R600CodeGen
|
||||
AMDIL7XXDevice.cpp
|
||||
AMDILCFGStructurizer.cpp
|
||||
AMDILDevice.cpp
|
||||
AMDILDeviceInfo.cpp
|
||||
AMDILEvergreenDevice.cpp
|
||||
AMDILIntrinsicInfo.cpp
|
||||
AMDILISelDAGToDAG.cpp
|
||||
AMDILISelLowering.cpp
|
||||
AMDILNIDevice.cpp
|
||||
AMDILSIDevice.cpp
|
||||
AMDGPUAsmPrinter.cpp
|
||||
AMDGPUFrameLowering.cpp
|
||||
AMDGPUIndirectAddressing.cpp
|
||||
|
@ -10,41 +10,41 @@
|
||||
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
|
||||
: Processor<Name, itin, Features>;
|
||||
def : Proc<"", R600_VLIW5_Itin,
|
||||
[FeatureR600ALUInst, FeatureVertexCache, FeatureFetchLimit8]>;
|
||||
[FeatureR600, FeatureVertexCache]>;
|
||||
def : Proc<"r600", R600_VLIW5_Itin,
|
||||
[FeatureR600ALUInst , FeatureVertexCache, FeatureFetchLimit8]>;
|
||||
[FeatureR600 , FeatureVertexCache]>;
|
||||
def : Proc<"rs880", R600_VLIW5_Itin,
|
||||
[FeatureR600ALUInst, FeatureFetchLimit8]>;
|
||||
[FeatureR600]>;
|
||||
def : Proc<"rv670", R600_VLIW5_Itin,
|
||||
[FeatureR600ALUInst, FeatureFP64, FeatureVertexCache, FeatureFetchLimit8]>;
|
||||
[FeatureR600, FeatureFP64, FeatureVertexCache]>;
|
||||
def : Proc<"rv710", R600_VLIW5_Itin,
|
||||
[FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureR700, FeatureVertexCache]>;
|
||||
def : Proc<"rv730", R600_VLIW5_Itin,
|
||||
[FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureR700, FeatureVertexCache]>;
|
||||
def : Proc<"rv770", R600_VLIW5_Itin,
|
||||
[FeatureFP64, FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureR700, FeatureFP64, FeatureVertexCache]>;
|
||||
def : Proc<"cedar", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureEvergreen, FeatureVertexCache]>;
|
||||
def : Proc<"redwood", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureEvergreen, FeatureVertexCache]>;
|
||||
def : Proc<"sumo", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureFetchLimit16]>;
|
||||
[FeatureEvergreen]>;
|
||||
def : Proc<"juniper", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureEvergreen, FeatureVertexCache]>;
|
||||
def : Proc<"cypress", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureEvergreen, FeatureFP64, FeatureVertexCache]>;
|
||||
def : Proc<"barts", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureNorthernIslands, FeatureVertexCache]>;
|
||||
def : Proc<"turks", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache, FeatureFetchLimit16]>;
|
||||
[FeatureNorthernIslands, FeatureVertexCache]>;
|
||||
def : Proc<"caicos", R600_VLIW5_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureFetchLimit16]>;
|
||||
[FeatureNorthernIslands]>;
|
||||
def : Proc<"cayman", R600_VLIW4_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureFP64, FeatureFetchLimit16]>;
|
||||
[FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
|
||||
|
||||
def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"oland", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"hainan", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>;
|
||||
|
@ -65,7 +65,7 @@ private:
|
||||
|
||||
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
|
||||
unsigned Opcode = 0;
|
||||
bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
|
||||
bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
|
||||
switch (CFI) {
|
||||
case CF_TC:
|
||||
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
|
||||
@ -98,7 +98,7 @@ private:
|
||||
Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
|
||||
break;
|
||||
case CF_END:
|
||||
if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) {
|
||||
if (ST.hasCaymanISA()) {
|
||||
Opcode = AMDGPU::CF_END_CM;
|
||||
break;
|
||||
}
|
||||
@ -301,17 +301,19 @@ private:
|
||||
}
|
||||
|
||||
unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
|
||||
switch (ST.device()->getGeneration()) {
|
||||
case AMDGPUDeviceInfo::HD4XXX:
|
||||
switch (ST.getGeneration()) {
|
||||
case AMDGPUSubtarget::R600:
|
||||
case AMDGPUSubtarget::R700:
|
||||
if (hasPush)
|
||||
StackSubEntry += 2;
|
||||
break;
|
||||
case AMDGPUDeviceInfo::HD5XXX:
|
||||
case AMDGPUSubtarget::EVERGREEN:
|
||||
if (hasPush)
|
||||
StackSubEntry ++;
|
||||
case AMDGPUDeviceInfo::HD6XXX:
|
||||
case AMDGPUSubtarget::NORTHERN_ISLANDS:
|
||||
StackSubEntry += 2;
|
||||
break;
|
||||
default: llvm_unreachable("Not a VLIW4/VLIW5 GPU");
|
||||
}
|
||||
return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
|
||||
}
|
||||
|
@ -941,7 +941,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
|
||||
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
|
||||
unsigned Opcode;
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::R700)
|
||||
Opcode = AMDGPU::DOT4_r600;
|
||||
else
|
||||
Opcode = AMDGPU::DOT4_eg;
|
||||
|
@ -16,7 +16,6 @@
|
||||
#define R600INSTRUCTIONINFO_H_
|
||||
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDIL.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include <map>
|
||||
|
@ -551,26 +551,21 @@ def load_param : LoadParamFrag<load>;
|
||||
def load_param_zexti8 : LoadParamFrag<zextloadi8>;
|
||||
def load_param_zexti16 : LoadParamFrag<zextloadi16>;
|
||||
|
||||
def isR600 : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
|
||||
def isR700 : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
|
||||
"Subtarget.device()->getDeviceFlag()"
|
||||
">= OCL_DEVICE_RV710">;
|
||||
def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">;
|
||||
def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">;
|
||||
def isEG : Predicate<
|
||||
"Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
|
||||
"Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
|
||||
"Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
|
||||
"Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
|
||||
"Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
|
||||
"!Subtarget.hasCaymanISA()">;
|
||||
|
||||
def isCayman : Predicate<"Subtarget.device()"
|
||||
"->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
|
||||
def isEGorCayman : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
|
||||
"|| Subtarget.device()->getGeneration() =="
|
||||
"AMDGPUDeviceInfo::HD6XXX">;
|
||||
def isCayman : Predicate<"Subtarget.hasCaymanISA()">;
|
||||
def isEGorCayman : Predicate<"Subtarget.getGeneration() == "
|
||||
"AMDGPUSubtarget::EVERGREEN"
|
||||
"|| Subtarget.getGeneration() =="
|
||||
"AMDGPUSubtarget::NORTHERN_ISLANDS">;
|
||||
|
||||
def isR600toCayman : Predicate<
|
||||
"Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
|
||||
"Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600 SDNodes
|
||||
|
@ -38,6 +38,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
|
||||
|
||||
const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
|
||||
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
|
||||
|
||||
}
|
||||
|
||||
void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
#include "SIISelLowering.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
|
@ -22,8 +22,8 @@ def InterpSlot : Operand<i32> {
|
||||
let PrintMethod = "printInterpSlot";
|
||||
}
|
||||
|
||||
def isSI : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
|
||||
def isSI : Predicate<"Subtarget.getGeneration() "
|
||||
"== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
|
||||
|
||||
let Predicates = [isSI] in {
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user