Add R600 backend

A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169915 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard
2012-12-11 21:25:42 +00:00
parent 57ac1f458a
commit f98f2ce29e
146 changed files with 20232 additions and 1 deletions

View File

@@ -472,3 +472,4 @@ include "llvm/IntrinsicsXCore.td"
include "llvm/IntrinsicsHexagon.td"
include "llvm/IntrinsicsNVVM.td"
include "llvm/IntrinsicsMips.td"
include "llvm/IntrinsicsR600.td"

View File

@@ -0,0 +1,36 @@
//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the R600-specific intrinsics.
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "r600" in {
class R600ReadPreloadRegisterIntrinsic<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
}
defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_global_size">;
defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_local_size">;
defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_ngroups">;
defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tgid">;
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tidig">;
} // End TargetPrefix = "r600"

View File

@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the

48
lib/Target/R600/AMDGPU.h Normal file
View File

@@ -0,0 +1,48 @@
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_H
#define AMDGPU_H
#include "AMDGPUTargetMachine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class FunctionPass;
class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRLivenessPass(TargetMachine &tm);
// Passes common to R600 and SI
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
} // End namespace llvm
namespace ShaderType {
enum Type {
PIXEL = 0,
VERTEX = 1,
GEOMETRY = 2,
COMPUTE = 3
};
}
#endif // AMDGPU_H

40
lib/Target/R600/AMDGPU.td Normal file
View File

@@ -0,0 +1,40 @@
//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
// Include AMDIL TD files
include "AMDILBase.td"
def AMDGPUInstrInfo : InstrInfo {
let guessInstructionProperties = 1;
}
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
def AMDGPUAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
int Variant = 0;
bit isMCAsmWriter = 1;
}
def AMDGPU : Target {
// Pull in Instruction Info:
let InstructionSet = AMDGPUInstrInfo;
let AssemblyWriters = [AMDGPUAsmWriter];
}
// Include AMDGPU TD files
include "R600Schedule.td"
include "SISchedule.td"
include "Processors.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPUInstructions.td"

View File

@@ -0,0 +1,138 @@
//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
/// code. When passed an MCAsmStreamer it prints assembly and when passed
/// an MCObjectStreamer it outputs binary code.
//
//===----------------------------------------------------------------------===//
//
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
MCStreamer &Streamer) {
return new AMDGPUAsmPrinter(tm, Streamer);
}
extern "C" void LLVMInitializeR600AsmPrinter() {
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
}
/// We need to override this function so we can avoid
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
if (STM.dumpCode()) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
MF.dump();
#endif
}
SetupMachineFunction(MF);
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
EmitProgramInfo(MF);
}
EmitFunctionBody();
return false;
}
void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
const SIRegisterInfo * RI =
static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
MachineOperand & MO = MI.getOperand(op_idx);
unsigned maxUsed;
unsigned width = 0;
bool isSGPR = false;
unsigned reg;
unsigned hwReg;
if (!MO.isReg()) {
continue;
}
reg = MO.getReg();
if (reg == AMDGPU::VCC) {
VCCUsed = true;
continue;
}
switch (reg) {
default: break;
case AMDGPU::EXEC:
case AMDGPU::SI_LITERAL_CONSTANT:
case AMDGPU::SREG_LIT_0:
case AMDGPU::M0:
continue;
}
if (AMDGPU::SReg_32RegClass.contains(reg)) {
isSGPR = true;
width = 1;
} else if (AMDGPU::VReg_32RegClass.contains(reg)) {
isSGPR = false;
width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
isSGPR = true;
width = 2;
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
isSGPR = false;
width = 2;
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
isSGPR = true;
width = 4;
} else if (AMDGPU::VReg_128RegClass.contains(reg)) {
isSGPR = false;
width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
isSGPR = true;
width = 8;
} else {
assert(!"Unknown register class");
}
hwReg = RI->getEncodingValue(reg);
maxUsed = hwReg + width - 1;
if (isSGPR) {
MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
} else {
MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
}
}
}
}
if (VCCUsed) {
MaxSGPR += 2;
}
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
}

View File

@@ -0,0 +1,44 @@
//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief AMDGPU Assembly printer class.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_ASMPRINTER_H
#define AMDGPU_ASMPRINTER_H
#include "llvm/CodeGen/AsmPrinter.h"
namespace llvm {
class AMDGPUAsmPrinter : public AsmPrinter {
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
return "AMDGPU Assembly Printer";
}
/// \brief Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
void EmitProgramInfo(MachineFunction &MF);
/// Implemented in AMDGPUMCInstLower.cpp
virtual void EmitInstruction(const MachineInstr *MI);
};
} // End anonymous llvm
#endif //AMDGPU_ASMPRINTER_H

View File

@@ -0,0 +1,49 @@
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief CodeEmitter interface for R600 and SI codegen.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUCODEEMITTER_H
#define AMDGPUCODEEMITTER_H
namespace llvm {
class AMDGPUCodeEmitter {
public:
uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const { return 0; }
virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
unsigned OpNo) const {
return 0;
}
virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
unsigned OpNo) const {
return 0;
}
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
uint64_t Value) const {
return Value;
}
virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
unsigned OpNo) const {
return 0;
}
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
const {
return 0;
}
};
} // End namespace llvm
#endif // AMDGPUCODEEMITTER_H

View File

@@ -0,0 +1,62 @@
//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This pass lowers AMDIL machine instructions to the appropriate
/// hardware instructions.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
namespace {
class AMDGPUConvertToISAPass : public MachineFunctionPass {
private:
static char ID;
TargetMachine &TM;
public:
AMDGPUConvertToISAPass(TargetMachine &tm) :
MachineFunctionPass(ID), TM(tm) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
};
} // End anonymous namespace
char AMDGPUConvertToISAPass::ID = 0;
FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
return new AMDGPUConvertToISAPass(tm);
}
bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUInstrInfo * TII =
static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
}
}
return false;
}

View File

@@ -0,0 +1,417 @@
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This is the parent TargetLowering class for hardware code gen
/// targets.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUISelLowering.h"
#include "AMDILIntrinsicInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
using namespace llvm;
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Initialize target lowering borrowed from AMDIL
InitAMDILLowering();
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FEXP2, MVT::f32, Legal);
setOperationAction(ISD::FPOW, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Legal);
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
setOperationAction(ISD::LOAD, MVT::f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
}
//===---------------------------------------------------------------------===//
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
SDValue AMDGPUTargetLowering::LowerFormalArguments(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
InVals.push_back(SDValue());
}
return Chain;
}
SDValue AMDGPUTargetLowering::LowerReturn(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc DL, SelectionDAG &DAG) const {
return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
}
//===---------------------------------------------------------------------===//
// Target specific lowering
//===---------------------------------------------------------------------===//
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
const {
switch (Op.getOpcode()) {
default:
Op.getNode()->dump();
assert(0 && "Custom lowering code for this"
"instruction is not implemented yet!");
break;
// AMDIL DAG lowering
case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
// AMDGPU DAG lowering
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
}
return Op;
}
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
switch (IntrinsicID) {
default: return Op;
case AMDGPUIntrinsic::AMDIL_abs:
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDIL_exp:
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDIL_fraction:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDIL_mad:
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case AMDGPUIntrinsic::AMDIL_max:
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imax:
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umax:
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDIL_min:
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imin:
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umin:
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDIL_round_nearest:
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
}
}
///IABS(a) = SMAX(sub(0, a), a)
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
Op.getOperand(1));
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
}
/// Linear Interpolation
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, MVT::f32),
Op.getOperand(1));
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
Op.getOperand(3));
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
Op.getOperand(2),
OneSubAC);
}
/// \brief Generate Min/Max node
SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue True = Op.getOperand(2);
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
if (VT != MVT::f32 ||
!((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
return SDValue();
}
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
case ISD::SETOEQ:
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
case ISD::SETUEQ:
case ISD::SETEQ:
case ISD::SETFALSE:
case ISD::SETFALSE2:
case ISD::SETTRUE:
case ISD::SETTRUE2:
case ISD::SETUO:
case ISD::SETO:
assert(0 && "Operation should already be optimised !");
case ISD::SETULE:
case ISD::SETULT:
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
if (LHS == True)
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
else
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETUGE:
case ISD::SETOGE:
case ISD::SETUGT:
case ISD::SETOGT: {
if (LHS == True)
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
else
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
}
case ISD::SETCC_INVALID:
assert(0 && "Invalid setcc condcode !");
}
return Op;
}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue Num = Op.getOperand(0);
SDValue Den = Op.getOperand(1);
SmallVector<SDValue, 8> Results;
// RCP = URECIP(Den) = 2^32 / Den + e
// e is rounding error.
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
// RCP_LO = umulo(RCP, Den) */
SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
// RCP_HI = mulhu (RCP, Den) */
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
// NEG_RCP_LO = -RCP_LO
SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
RCP_LO);
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
NEG_RCP_LO, RCP_LO,
ISD::SETEQ);
// Calculate the rounding error from the URECIP instruction
// E = mulhu(ABS_RCP_LO, RCP)
SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
// RCP_A_E = RCP + E
SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
// RCP_S_E = RCP - E
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
RCP_A_E, RCP_S_E,
ISD::SETEQ);
// Quotient = mulhu(Tmp0, Num)
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
// Num_S_Remainder = Quotient * Den
SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
// Remainder = Num - Num_S_Remainder
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
ISD::SETGE);
// Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
DAG.getConstant(0, VT),
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
ISD::SETGE);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
Remainder_GE_Zero);
// Calculate Division result:
// Quotient_A_One = Quotient + 1
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
DAG.getConstant(1, VT));
// Quotient_S_One = Quotient - 1
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
DAG.getConstant(1, VT));
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
Quotient, Quotient_A_One, ISD::SETEQ);
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
Quotient_S_One, Div, ISD::SETEQ);
// Calculate Rem result:
// Remainder_S_Den = Remainder - Den
SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
// Remainder_A_Den = Remainder + Den
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
Remainder, Remainder_S_Den, ISD::SETEQ);
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
Remainder_A_Den, Rem, ISD::SETEQ);
SDValue Ops[2];
Ops[0] = Div;
Ops[1] = Rem;
return DAG.getMergeValues(Ops, 2, DL);
}
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
return C->isAllOnesValue();
}
return false;
}
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->getValueAPF().isZero();
}
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
return C->isNullValue();
}
return false;
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned VirtualRegister;
if (!MRI.isLiveIn(Reg)) {
VirtualRegister = MRI.createVirtualRegister(RC);
MRI.addLiveIn(Reg, VirtualRegister);
} else {
VirtualRegister = MRI.getLiveInVirtReg(Reg);
}
return DAG.getRegister(VirtualRegister, VT);
}
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
// AMDIL DAG nodes
NODE_NAME_CASE(MAD);
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(DIV_INF);
NODE_NAME_CASE(RET_FLAG);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(INTERP)
NODE_NAME_CASE(INTERP_P0)
NODE_NAME_CASE(EXPORT)
}
}

View File

@@ -0,0 +1,144 @@
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface definition of the TargetLowering class that is common
/// to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUISELLOWERING_H
#define AMDGPUISELLOWERING_H
#include "llvm/Target/TargetLowering.h"
namespace llvm {
class MachineRegisterInfo;
class AMDGPUTargetLowering : public TargetLowering {
private:
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
protected:
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
/// \returns a RegisterSDNode representing Reg.
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
public:
AMDGPUTargetLowering(TargetMachine &TM);
virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc DL, SelectionDAG &DAG) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
virtual const char* getTargetNodeName(unsigned Opcode) const;
// Functions defined in AMDILISelLowering.cpp
public:
/// \brief Determine which of the bits specified in \p Mask are known to be
/// either zero or one and return them in the \p KnownZero and \p KnownOne
/// bitsets.
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I, unsigned Intrinsic) const;
/// We want to mark f32/f64 floating point values as legal.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
/// We don't want to shrink f64/f32 constants.
bool ShouldShrinkFPConstant(EVT VT) const;
private:
void InitAMDILLowering();
SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
};
namespace AMDGPUISD {
enum {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
MAD, // 32bit Fused Multiply Add instruction
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
DIV_INF, // Divide with infinity returned on zero divisor
RET_FLAG,
BRANCH_COND,
// End AMDIL ISD Opcodes
BITALIGN,
DWORDADDR,
FRACT,
FMAX,
SMAX,
UMAX,
FMIN,
SMIN,
UMIN,
URECIP,
INTERP,
INTERP_P0,
EXPORT,
LAST_AMDGPU_ISD_NUMBER
};
} // End namespace AMDGPUISD
namespace SIISD {
enum {
SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
VCC_AND,
VCC_BITCAST
};
} // End namespace SIISD
} // End namespace llvm
#endif // AMDGPUISELLOWERING_H

View File

@@ -0,0 +1,257 @@
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Implementation of the TargetInstrInfo class that is common to all
/// AMD GPUs.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "AMDIL.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenInstrInfo.inc"
using namespace llvm;
AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
: AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
}
bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
// TODO: Implement this function
return false;
}
unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const {
// TODO: Implement this function
return false;
}
unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
// TODO: Implement this function
return 0;
}
bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const {
// TODO: Implement this function
return false;
}
MachineInstr *
AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const {
// TODO: Implement this function
return NULL;
}
bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const {
while (iter != MBB.end()) {
switch (iter->getOpcode()) {
default:
break;
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32:
case AMDGPU::BRANCH:
return true;
};
++iter;
}
return false;
}
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator tmp = MBB->end();
if (!MBB->size()) {
return MBB->end();
}
while (--tmp) {
if (tmp->getOpcode() == AMDGPU::ENDLOOP
|| tmp->getOpcode() == AMDGPU::ENDIF
|| tmp->getOpcode() == AMDGPU::ELSE) {
if (tmp == MBB->begin()) {
return tmp;
} else {
continue;
}
} else {
return ++tmp;
}
}
return MBB->end();
}
void
AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
assert(!"Not Implemented");
}
void
AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
assert(!"Not Implemented");
}
MachineInstr *
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// TODO: Implement this function
return 0;
}
MachineInstr*
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
// TODO: Implement this function
return 0;
}
bool
AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const {
// TODO: Implement this function
return false;
}
bool
AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad,
bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
// TODO: Implement this function
return false;
}
bool
AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
SmallVectorImpl<SDNode*> &NewNodes) const {
// TODO: Implement this function
return false;
}
unsigned
AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex) const {
// TODO: Implement this function
return 0;
}
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
assert(Offset2 > Offset1
&& "Second offset should be larger than first offset!");
// If we have less than 16 loads in a row, and the offsets are within 16,
// then schedule together.
// TODO: Make the loads schedule near if it fits in a cacheline
return (NumLoads < 16 && (Offset2 - Offset1) < 16);
}
bool
AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
const {
// TODO: Implement this function
return true;
}
void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
// TODO: Implement this function
}
bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
// TODO: Implement this function
return false;
}
bool
AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2)
const {
// TODO: Implement this function
return false;
}
bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// TODO: Implement this function
return false;
}
bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
// TODO: Implement this function
return MI->getDesc().isPredicable();
}
bool
AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
// TODO: Implement this function
return true;
}
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
const AMDGPURegisterInfo & RI = getRegisterInfo();
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
MachineOperand &MO = MI.getOperand(i);
// Convert dst regclass to one that is supported by the ISA
if (MO.isReg() && MO.isDef()) {
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
assert(newRegClass);
MRI.setRegClass(MO.getReg(), newRegClass);
}
}
}
}

View File

@@ -0,0 +1,149 @@
//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Contains the definition of a TargetInstrInfo class that is common
/// to all AMD GPUs.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUINSTRUCTIONINFO_H
#define AMDGPUINSTRUCTIONINFO_H
#include "AMDGPURegisterInfo.h"
#include "AMDGPUInstrInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <map>
#define GET_INSTRINFO_HEADER
#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
namespace llvm {
class AMDGPUTargetMachine;
class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
private:
const AMDGPURegisterInfo RI;
TargetMachine &TM;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
public:
explicit AMDGPUInstrInfo(TargetMachine &tm);
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DstReg, unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
bool hasLoadFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const;
unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
bool hasStoreFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const;
MachineInstr *
convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const = 0;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const;
public:
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
SmallVectorImpl<SDNode *> &NewNodes) const;
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
bool isPredicated(const MachineInstr *MI) const;
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const;
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
bool isPredicable(MachineInstr *MI) const;
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
// Helper functions that check the opcode for status information
bool isLoadInst(llvm::MachineInstr *MI) const;
bool isExtLoadInst(llvm::MachineInstr *MI) const;
bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
bool isSExtLoadInst(llvm::MachineInstr *MI) const;
bool isZExtLoadInst(llvm::MachineInstr *MI) const;
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
bool isStoreInst(llvm::MachineInstr *MI) const;
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const = 0;
virtual unsigned getIEQOpcode() const = 0;
virtual bool isMov(unsigned opcode) const = 0;
/// \brief Convert the AMDIL MachineInstr to a supported ISA
/// MachineInstr
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const;
};
} // End llvm namespace
#endif // AMDGPUINSTRINFO_H

View File

@@ -0,0 +1,74 @@
//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains DAG node defintions for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AMDGPU DAG Profiles
//===----------------------------------------------------------------------===//
def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
// out = ((a << 32) | b) >> c)
//
// Can be used to optimize rtol:
// rotl(a, b) = bitalign(a, a, 32 - b)
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
// out = max(a, b) a and b are floats
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = max(a, b) a and b are unsigned ints
def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = min(a, b) a and b are floats
def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = min(a, b) a snd b are signed ints
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// out = min(a, b) a and b are unsigned ints
def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
// urecip - This operation is a helper for integer division, it returns the
// result of 1 / a as a fractional unsigned integer.
// out = (2^32 / a) + e
// e is rounding error
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;

View File

@@ -0,0 +1,190 @@
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains instruction defs that are common to all hw codegen
// targets.
//
//===----------------------------------------------------------------------===//
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
field bits<16> AMDILOp = 0;
field bits<3> Gen = 0;
let Namespace = "AMDGPU";
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asm;
let Pattern = pattern;
let Itinerary = NullALU;
let TSFlags{42-40} = Gen;
let TSFlags{63-48} = AMDILOp;
}
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
: AMDGPUInst<outs, ins, asm, pattern> {
field bits<32> Inst = 0xffffffff;
}
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def COND_EQ : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOEQ: case ISD::SETUEQ:
case ISD::SETEQ: return true;}}}]
>;
def COND_NE : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETONE: case ISD::SETUNE:
case ISD::SETNE: return true;}}}]
>;
def COND_GT : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOGT: case ISD::SETUGT:
case ISD::SETGT: return true;}}}]
>;
def COND_GE : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOGE: case ISD::SETUGE:
case ISD::SETGE: return true;}}}]
>;
def COND_LT : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOLT: case ISD::SETULT:
case ISD::SETLT: return true;}}}]
>;
def COND_LE : PatLeaf <
(cond),
[{switch(N->get()){{default: return false;
case ISD::SETOLE: case ISD::SETULE:
case ISD::SETLE: return true;}}}]
>;
//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
}
def CONST : Constants;
def FP_ZERO : PatLeaf <
(fpimm),
[{return N->getValueAPF().isZero();}]
>;
def FP_ONE : PatLeaf <
(fpimm),
[{return N->isExactlyValue(1.0);}]
>;
let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
[(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FABS $dst, $src0",
[(set rc:$dst, (fabs rc:$src0))]
>;
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FNEG $dst, $src0",
[(set rc:$dst, (fneg rc:$src0))]
>;
def SHADER_TYPE : AMDGPUShaderInst <
(outs),
(ins i32imm:$type),
"SHADER_TYPE $type",
[(int_AMDGPU_shader_type imm:$type)]
>;
} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
/* Generic helper patterns for intrinsics */
/* -------------------------------------- */
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
RegisterClass rc> : Pat <
(fpow rc:$src0, rc:$src1),
(exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
>;
/* Other helper patterns */
/* --------------------- */
/* Extract element pattern */
class Extract_Element <ValueType sub_type, ValueType vec_type,
RegisterClass vec_class, int sub_idx,
SubRegIndex sub_reg>: Pat<
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
(EXTRACT_SUBREG vec_class:$src, sub_reg)
>;
/* Insert element pattern */
class Insert_Element <ValueType elem_type, ValueType vec_type,
RegisterClass elem_class, RegisterClass vec_class,
int sub_idx, SubRegIndex sub_reg> : Pat <
(vec_type (vector_insert (vec_type vec_class:$vec),
(elem_type elem_class:$elem), sub_idx)),
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
>;
// Vector Build pattern
class Vector_Build <ValueType vecType, RegisterClass vectorClass,
ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
(vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
elemClass:$z, sel_z), elemClass:$w, sel_w)
>;
// bitconvert pattern
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
(dt (bitconvert (st rc:$src0))),
(dt rc:$src0)
>;
class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
(vt (AMDGPUdwordaddr (vt rc:$addr))),
(vt rc:$addr)
>;
include "R600Instructions.td"
include "SIInstrInfo.td"

View File

@@ -0,0 +1,62 @@
//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines intrinsics that are used by all hw codegen targets.
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
}
include "SIIntrinsics.td"

View File

@@ -0,0 +1,77 @@
//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
//
//===----------------------------------------------------------------------===//
//
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Constants.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
AMDGPUMCInstLower::AMDGPUMCInstLower() { }
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
switch (MO.getType()) {
default:
llvm_unreachable("unknown operand type");
case MachineOperand::MO_FPImmediate: {
const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
"Only floating point immediates are supported at the moment.");
MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
break;
}
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_Register:
MCOp = MCOperand::CreateReg(MO.getReg());
break;
}
OutMI.addOperand(MCOp);
}
}
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
AMDGPUMCInstLower MCInstLowering;
if (MI->isBundle()) {
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator I = MI;
++I;
while (I != MBB->end() && I->isInsideBundle()) {
MCInst MCBundleInst;
const MachineInstr *BundledInst = I;
MCInstLowering.lower(BundledInst, MCBundleInst);
OutStreamer.EmitInstruction(MCBundleInst);
++I;
}
} else {
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
}
}

View File

@@ -0,0 +1,31 @@
//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_MCINSTLOWER_H
#define AMDGPU_MCINSTLOWER_H
namespace llvm {
class MCInst;
class MachineInstr;
class AMDGPUMCInstLower {
public:
AMDGPUMCInstLower();
/// \brief Lower a MachineInstr to an MCInst
void lower(const MachineInstr *MI, MCInst &OutMI) const;
};
} // End namespace llvm
#endif //AMDGPU_MCINSTLOWER_H

View File

@@ -0,0 +1,51 @@
//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
//
//===----------------------------------------------------------------------===//
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
using namespace llvm;
AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
const TargetInstrInfo &tii)
: AMDGPUGenRegisterInfo(0),
TM(tm),
TII(tii)
{ }
//===----------------------------------------------------------------------===//
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
// they are not supported at this time.
//===----------------------------------------------------------------------===//
const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
const {
return &CalleeSavedReg;
}
void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj,
RegScavenger *RS) const {
assert(!"Subroutines not supported yet");
}
unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
assert(!"Subroutines not supported yet");
return 0;
}
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"

View File

@@ -0,0 +1,63 @@
//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
/// targets.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUREGISTERINFO_H
#define AMDGPUREGISTERINFO_H
#include "llvm/ADT/BitVector.h"
#include "llvm/Target/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
namespace llvm {
class AMDGPUTargetMachine;
class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
TargetMachine &TM;
const TargetInstrInfo &TII;
static const uint16_t CalleeSavedReg;
AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
virtual BitVector getReservedRegs(const MachineFunction &MF) const {
assert(!"Unimplemented"); return BitVector();
}
/// \param RC is an AMDIL reg class.
///
/// \returns The ISA reg class that is equivalent to \p RC.
virtual const TargetRegisterClass * getISARegClass(
const TargetRegisterClass * RC) const {
assert(!"Unimplemented"); return NULL;
}
virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
assert(!"Unimplemented"); return NULL;
}
const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
RegScavenger *RS) const;
unsigned getFrameRegister(const MachineFunction &MF) const;
};
} // End namespace llvm
#endif // AMDIDSAREGISTERINFO_H

View File

@@ -0,0 +1,22 @@
//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Tablegen register definitions common to all hw codegen targets.
//
//===----------------------------------------------------------------------===//
let Namespace = "AMDGPU" in {
def sel_x : SubRegIndex;
def sel_y : SubRegIndex;
def sel_z : SubRegIndex;
def sel_w : SubRegIndex;
}
include "R600RegisterInfo.td"
include "SIRegisterInfo.td"

View File

@@ -0,0 +1,87 @@
//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
using namespace llvm;
#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
InstrItins = getInstrItineraryForCPU(CPU);
memset(CapsOverride, 0, sizeof(*CapsOverride)
* AMDGPUDeviceInfo::MaxNumberCapabilities);
// Default card
StringRef GPU = CPU;
Is64bit = false;
DefaultSize[0] = 64;
DefaultSize[1] = 1;
DefaultSize[2] = 1;
ParseSubtargetFeatures(GPU, FS);
DevName = GPU;
Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
}
AMDGPUSubtarget::~AMDGPUSubtarget() {
delete Device;
}
bool
AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
"Caps index is out of bounds!");
return CapsOverride[caps];
}
bool
AMDGPUSubtarget::is64bit() const {
return Is64bit;
}
bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
size_t
AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
if (dim > 3) {
return 1;
} else {
return DefaultSize[dim];
}
}
std::string
AMDGPUSubtarget::getDataLayout() const {
if (!Device) {
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
"-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
}
return Device->getDataLayout();
}
std::string
AMDGPUSubtarget::getDeviceName() const {
return DevName;
}
const AMDGPUDevice *
AMDGPUSubtarget::device() const {
return Device;
}

View File

@@ -0,0 +1,65 @@
//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief AMDGPU specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUSUBTARGET_H
#define AMDGPUSUBTARGET_H
#include "AMDILDevice.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
#define MAX_CB_SIZE (1 << 16)
namespace llvm {
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
private:
bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
const AMDGPUDevice *Device;
size_t DefaultSize[3];
std::string DevName;
bool Is64bit;
bool Is32on64bit;
bool DumpCode;
bool R600ALUInst;
InstrItineraryData InstrItins;
public:
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
virtual ~AMDGPUSubtarget();
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
bool isOverride(AMDGPUDeviceInfo::Caps) const;
bool is64bit() const;
// Helper functions to simplify if statements
bool isTargetELF() const;
const AMDGPUDevice* device() const;
std::string getDataLayout() const;
std::string getDeviceName() const;
virtual size_t getDefaultSize(uint32_t dim) const;
bool dumpCode() const { return DumpCode; }
bool r600ALUEncoding() const { return R600ALUInst; }
};
} // End namespace llvm
#endif // AMDGPUSUBTARGET_H

View File

@@ -0,0 +1,141 @@
//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief The AMDGPU target machine contains all of the hardware specific
/// information needed to emit code for R600 and SI GPUs.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include <llvm/CodeGen/Passes.h>
using namespace llvm;
extern "C" void LLVMInitializeR600Target() {
// Register the target
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
}
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OptLevel
)
:
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
Subtarget(TT, CPU, FS),
Layout(Subtarget.getDataLayout()),
FrameLowering(TargetFrameLowering::StackGrowsUp,
Subtarget.device()->getStackAlignment(), 0),
IntrinsicInfo(this),
InstrItins(&Subtarget.getInstrItineraryData()) {
// TLInfo uses InstrInfo so it must be initialized after.
if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
InstrInfo = new R600InstrInfo(*this);
TLInfo = new R600TargetLowering(*this);
} else {
InstrInfo = new SIInstrInfo(*this);
TLInfo = new SITargetLowering(*this);
}
}
AMDGPUTargetMachine::~AMDGPUTargetMachine() {
}
namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();
}
virtual bool addPreISel();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
virtual bool addPostRegAlloc();
virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // End of anonymous namespace
TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
return new AMDGPUPassConfig(this, PM);
}
bool
AMDGPUPassConfig::addPreISel() {
return false;
}
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUPeepholeOpt(*TM));
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
return false;
}
bool AMDGPUPassConfig::addPreRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
addPass(createSIAssignInterpRegsPass(*TM));
}
addPass(createAMDGPUConvertToISAPass(*TM));
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
addPass(createSIFixSGPRLivenessPass(*TM));
}
return false;
}
bool AMDGPUPassConfig::addPostRegAlloc() {
return false;
}
bool AMDGPUPassConfig::addPreSched2() {
addPass(&IfConverterID);
return false;
}
bool AMDGPUPassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
addPass(createR600ExpandSpecialInstrsPass(*TM));
addPass(&FinalizeMachineBundlesID);
} else {
addPass(createSILowerLiteralConstantsPass(*TM));
addPass(createSILowerControlFlowPass(*TM));
}
return false;
}

View File

@@ -0,0 +1,70 @@
//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_TARGET_MACHINE_H
#define AMDGPU_TARGET_MACHINE_H
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILFrameLowering.h"
#include "AMDILIntrinsicInfo.h"
#include "R600ISelLowering.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/DataLayout.h"
namespace llvm {
MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
class AMDGPUTargetMachine : public LLVMTargetMachine {
AMDGPUSubtarget Subtarget;
const DataLayout Layout;
AMDGPUFrameLowering FrameLowering;
AMDGPUIntrinsicInfo IntrinsicInfo;
const AMDGPUInstrInfo * InstrInfo;
AMDGPUTargetLowering * TLInfo;
const InstrItineraryData* InstrItins;
public:
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
StringRef CPU,
TargetOptions Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
virtual const AMDGPUFrameLowering* getFrameLowering() const {
return &FrameLowering;
}
virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
return &IntrinsicInfo;
}
virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
virtual const AMDGPURegisterInfo *getRegisterInfo() const {
return &InstrInfo->getRegisterInfo();
}
virtual AMDGPUTargetLowering * getTargetLowering() const {
return TLInfo;
}
virtual const InstrItineraryData* getInstrItineraryData() const {
return InstrItins;
}
virtual const DataLayout* getDataLayout() const { return &Layout; }
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
} // End namespace llvm
#endif // AMDGPU_TARGET_MACHINE_H

106
lib/Target/R600/AMDIL.h Normal file
View File

@@ -0,0 +1,106 @@
//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// This file contains the entry points for global functions defined in the LLVM
/// AMDGPU back-end.
//
//===----------------------------------------------------------------------===//
#ifndef AMDIL_H
#define AMDIL_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
#define ARENA_SEGMENT_RESERVED_UAVS 12
#define DEFAULT_ARENA_UAV_ID 8
#define DEFAULT_RAW_UAV_ID 7
#define GLOBAL_RETURN_RAW_UAV_ID 11
#define HW_MAX_NUM_CB 8
#define MAX_NUM_UNIQUE_UAVS 8
#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
#define OPENCL_MAX_READ_IMAGES 128
#define OPENCL_MAX_WRITE_IMAGES 8
#define OPENCL_MAX_SAMPLERS 16
// The next two values can never be zero, as zero is the ID that is
// used to assert against.
#define DEFAULT_LDS_ID 1
#define DEFAULT_GDS_ID 1
#define DEFAULT_SCRATCH_ID 1
#define DEFAULT_VEC_SLOTS 8
#define OCL_DEVICE_RV710 0x0001
#define OCL_DEVICE_RV730 0x0002
#define OCL_DEVICE_RV770 0x0004
#define OCL_DEVICE_CEDAR 0x0008
#define OCL_DEVICE_REDWOOD 0x0010
#define OCL_DEVICE_JUNIPER 0x0020
#define OCL_DEVICE_CYPRESS 0x0040
#define OCL_DEVICE_CAICOS 0x0080
#define OCL_DEVICE_TURKS 0x0100
#define OCL_DEVICE_BARTS 0x0200
#define OCL_DEVICE_CAYMAN 0x0400
#define OCL_DEVICE_ALL 0x3FFF
/// The number of function ID's that are reserved for
/// internal compiler usage.
const unsigned int RESERVED_FUNCS = 1024;
namespace llvm {
class AMDGPUInstrPrinter;
class FunctionPass;
class MCAsmInfo;
class raw_ostream;
class Target;
class TargetMachine;
// Instruction selection passes.
FunctionPass*
createAMDGPUISelDag(TargetMachine &TM);
FunctionPass*
createAMDGPUPeepholeOpt(TargetMachine &TM);
// Pre emit passes.
FunctionPass*
createAMDGPUCFGPreparationPass(TargetMachine &TM);
FunctionPass*
createAMDGPUCFGStructurizerPass(TargetMachine &TM);
extern Target TheAMDGPUTarget;
} // end namespace llvm;
// Include device information enumerations
#include "AMDILDeviceInfo.h"
namespace llvm {
/// OpenCL uses address spaces to differentiate between
/// various memory regions on the hardware. On the CPU
/// all of the address spaces point to the same memory,
/// however on the GPU, each address space points to
/// a seperate piece of memory that is unique from other
/// memory locations.
namespace AMDGPUAS {
enum AddressSpaces {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
LOCAL_ADDRESS = 3, ///< Address space for local memory.
REGION_ADDRESS = 4, ///< Address space for region memory.
ADDRESS_NONE = 5, ///< Address space for unknown memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
LAST_ADDRESS = 9
};
} // namespace AMDGPUAS
} // end namespace llvm
#endif // AMDIL_H

View File

@@ -0,0 +1,115 @@
//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
// \file
//==-----------------------------------------------------------------------===//
#include "AMDIL7XXDevice.h"
#include "AMDGPUSubtarget.h"
#include "AMDILDevice.h"
using namespace llvm;
AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
setCaps();
std::string name = mSTM->getDeviceName();
if (name == "rv710") {
DeviceFlag = OCL_DEVICE_RV710;
} else if (name == "rv730") {
DeviceFlag = OCL_DEVICE_RV730;
} else {
DeviceFlag = OCL_DEVICE_RV770;
}
}
AMDGPU7XXDevice::~AMDGPU7XXDevice() {
}
void AMDGPU7XXDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
}
size_t AMDGPU7XXDevice::getMaxLDSSize() const {
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_700;
}
return 0;
}
size_t AMDGPU7XXDevice::getWavefrontSize() const {
return AMDGPUDevice::HalfWavefrontSize;
}
uint32_t AMDGPU7XXDevice::getGeneration() const {
return AMDGPUDeviceInfo::HD4XXX;
}
uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
switch (DeviceID) {
default:
assert(0 && "ID type passed in is unknown!");
break;
case GLOBAL_ID:
case CONSTANT_ID:
case RAW_UAV_ID:
case ARENA_UAV_ID:
break;
case LDS_ID:
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return DEFAULT_LDS_ID;
}
break;
case SCRATCH_ID:
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
return DEFAULT_SCRATCH_ID;
}
break;
case GDS_ID:
assert(0 && "GDS UAV ID is not supported on this chip");
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
return DEFAULT_GDS_ID;
}
break;
};
return 0;
}
uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
return 1;
}
AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
setCaps();
}
AMDGPU770Device::~AMDGPU770Device() {
}
void AMDGPU770Device::setCaps() {
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
mSWBits.set(AMDGPUDeviceInfo::FMA);
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
}
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
mHWBits.reset(AMDGPUDeviceInfo::LongOps);
mSWBits.set(AMDGPUDeviceInfo::LongOps);
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
}
size_t AMDGPU770Device::getWavefrontSize() const {
return AMDGPUDevice::WavefrontSize;
}
AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
}
AMDGPU710Device::~AMDGPU710Device() {
}
size_t AMDGPU710Device::getWavefrontSize() const {
return AMDGPUDevice::QuarterWavefrontSize;
}

View File

@@ -0,0 +1,72 @@
//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
/// \file
/// \brief Interface for the subtarget data classes.
///
/// This file will define the interface that each generation needs to
/// implement in order to correctly answer queries on the capabilities of the
/// specific hardware.
//===----------------------------------------------------------------------===//
#ifndef AMDIL7XXDEVICEIMPL_H
#define AMDIL7XXDEVICEIMPL_H
#include "AMDILDevice.h"
namespace llvm {
class AMDGPUSubtarget;
//===----------------------------------------------------------------------===//
// 7XX generation of devices and their respective sub classes
//===----------------------------------------------------------------------===//
/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
///
/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
/// support the minimal features that are required to be considered OpenCL 1.0
/// compliant and nothing more.
class AMDGPU7XXDevice : public AMDGPUDevice {
public:
AMDGPU7XXDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPU7XXDevice();
virtual size_t getMaxLDSSize() const;
virtual size_t getWavefrontSize() const;
virtual uint32_t getGeneration() const;
virtual uint32_t getResourceID(uint32_t DeviceID) const;
virtual uint32_t getMaxNumUAVs() const;
protected:
virtual void setCaps();
};
/// \brief The AMDGPU770Device class represents the RV770 chip and it's
/// derivative cards.
///
/// The difference between this device and the base class is this device device
/// adds support for double precision and has a larger wavefront size.
class AMDGPU770Device : public AMDGPU7XXDevice {
public:
AMDGPU770Device(AMDGPUSubtarget *ST);
virtual ~AMDGPU770Device();
virtual size_t getWavefrontSize() const;
private:
virtual void setCaps();
};
/// \brief The AMDGPU710Device class derives from the 7XX base class.
///
/// This class is a smaller derivative, so we need to overload some of the
/// functions in order to correctly specify this information.
class AMDGPU710Device : public AMDGPU7XXDevice {
public:
AMDGPU710Device(AMDGPUSubtarget *ST);
virtual ~AMDGPU710Device();
virtual size_t getWavefrontSize() const;
};
} // namespace llvm
#endif // AMDILDEVICEIMPL_H

View File

@@ -0,0 +1,85 @@
//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Target-independent interfaces which we are implementing
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
// Dummy Instruction itineraries for pseudo instructions
def ALU_NULL : FuncUnit;
def NullALU : InstrItinClass;
//===----------------------------------------------------------------------===//
// AMDIL Subtarget features.
//===----------------------------------------------------------------------===//
def FeatureFP64 : SubtargetFeature<"fp64",
"CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
"true",
"Enable 64bit double precision operations">;
def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
"CapsOverride[AMDGPUDeviceInfo::ByteStores]",
"true",
"Enable byte addressable stores">;
def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
"CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
"true",
"Enable duplicate barrier detection(HD5XXX or later).">;
def FeatureImages : SubtargetFeature<"images",
"CapsOverride[AMDGPUDeviceInfo::Images]",
"true",
"Enable image functions">;
def FeatureMultiUAV : SubtargetFeature<"multi_uav",
"CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
"true",
"Generate multiple UAV code(HD5XXX family or later)">;
def FeatureMacroDB : SubtargetFeature<"macrodb",
"CapsOverride[AMDGPUDeviceInfo::MacroDB]",
"true",
"Use internal macrodb, instead of macrodb in driver">;
def FeatureNoAlias : SubtargetFeature<"noalias",
"CapsOverride[AMDGPUDeviceInfo::NoAlias]",
"true",
"assert that all kernel argument pointers are not aliased">;
def FeatureNoInline : SubtargetFeature<"no-inline",
"CapsOverride[AMDGPUDeviceInfo::NoInline]",
"true",
"specify whether to not inline functions">;
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
"Is64bit",
"false",
"Specify if 64bit addressing should be used.">;
def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
"Is32on64bit",
"false",
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
def FeatureDebug : SubtargetFeature<"debug",
"CapsOverride[AMDGPUDeviceInfo::Debug]",
"true",
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
def FeatureDumpCode : SubtargetFeature <"DumpCode",
"DumpCode",
"true",
"Dump MachineInstrs in the CodeEmitter">;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"R600ALUInst",
"false",
"Older version of ALU instructions encoding.">;
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
include "AMDILRegisterInfo.td"
include "AMDILInstrInfo.td"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,124 @@
//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//==-----------------------------------------------------------------------===//
#include "AMDILDevice.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
// Default implementation for all of the classes.
AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) {
mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
setCaps();
DeviceFlag = OCL_DEVICE_ALL;
}
AMDGPUDevice::~AMDGPUDevice() {
mHWBits.clear();
mSWBits.clear();
}
size_t AMDGPUDevice::getMaxGDSSize() const {
return 0;
}
uint32_t
AMDGPUDevice::getDeviceFlag() const {
return DeviceFlag;
}
size_t AMDGPUDevice::getMaxNumCBs() const {
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
return HW_MAX_NUM_CB;
}
return 0;
}
size_t AMDGPUDevice::getMaxCBSize() const {
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
return MAX_CB_SIZE;
}
return 0;
}
size_t AMDGPUDevice::getMaxScratchSize() const {
return 65536;
}
uint32_t AMDGPUDevice::getStackAlignment() const {
return 16;
}
void AMDGPUDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::HalfOps);
mSWBits.set(AMDGPUDeviceInfo::ByteOps);
mSWBits.set(AMDGPUDeviceInfo::ShortOps);
mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
mSWBits.set(AMDGPUDeviceInfo::NoInline);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
mSWBits.set(AMDGPUDeviceInfo::MacroDB);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
} else {
mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
} else {
mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
}
mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
mSWBits.set(AMDGPUDeviceInfo::LongOps);
}
AMDGPUDeviceInfo::ExecutionMode
AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const {
if (mHWBits[Caps]) {
assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
return AMDGPUDeviceInfo::Hardware;
}
if (mSWBits[Caps]) {
assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
return AMDGPUDeviceInfo::Software;
}
return AMDGPUDeviceInfo::Unsupported;
}
bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const {
return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
}
bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const {
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
}
bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
}
std::string
AMDGPUDevice::getDataLayout() const {
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n8:16:32:64");
}

View File

@@ -0,0 +1,117 @@
//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface for the subtarget data classes.
//
/// This file will define the interface that each generation needs to
/// implement in order to correctly answer queries on the capabilities of the
/// specific hardware.
//===----------------------------------------------------------------------===//
#ifndef AMDILDEVICEIMPL_H
#define AMDILDEVICEIMPL_H
#include "AMDIL.h"
#include "llvm/ADT/BitVector.h"
namespace llvm {
class AMDGPUSubtarget;
class MCStreamer;
//===----------------------------------------------------------------------===//
// Interface for data that is specific to a single device
//===----------------------------------------------------------------------===//
class AMDGPUDevice {
public:
AMDGPUDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUDevice();
// Enum values for the various memory types.
enum {
RAW_UAV_ID = 0,
ARENA_UAV_ID = 1,
LDS_ID = 2,
GDS_ID = 3,
SCRATCH_ID = 4,
CONSTANT_ID = 5,
GLOBAL_ID = 6,
MAX_IDS = 7
} IO_TYPE_IDS;
/// \returns The max LDS size that the hardware supports. Size is in
/// bytes.
virtual size_t getMaxLDSSize() const = 0;
/// \returns The max GDS size that the hardware supports if the GDS is
/// supported by the hardware. Size is in bytes.
virtual size_t getMaxGDSSize() const;
/// \returns The max number of hardware constant address spaces that
/// are supported by this device.
virtual size_t getMaxNumCBs() const;
/// \returns The max number of bytes a single hardware constant buffer
/// can support. Size is in bytes.
virtual size_t getMaxCBSize() const;
/// \returns The max number of bytes allowed by the hardware scratch
/// buffer. Size is in bytes.
virtual size_t getMaxScratchSize() const;
/// \brief Get the flag that corresponds to the device.
virtual uint32_t getDeviceFlag() const;
/// \returns The number of work-items that exist in a single hardware
/// wavefront.
virtual size_t getWavefrontSize() const = 0;
/// \brief Get the generational name of this specific device.
virtual uint32_t getGeneration() const = 0;
/// \brief Get the stack alignment of this specific device.
virtual uint32_t getStackAlignment() const;
/// \brief Get the resource ID for this specific device.
virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
/// \brief Get the max number of UAV's for this device.
virtual uint32_t getMaxNumUAVs() const = 0;
// API utilizing more detailed capabilities of each family of
// cards. If a capability is supported, then either usesHardware or
// usesSoftware returned true. If usesHardware returned true, then
// usesSoftware must return false for the same capability. Hardware
// execution means that the feature is done natively by the hardware
// and is not emulated by the softare. Software execution means
// that the feature could be done in the hardware, but there is
// software that emulates it with possibly using the hardware for
// support since the hardware does not fully comply with OpenCL
// specs.
bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
virtual std::string getDataLayout() const;
static const unsigned int MAX_LDS_SIZE_700 = 16384;
static const unsigned int MAX_LDS_SIZE_800 = 32768;
static const unsigned int WavefrontSize = 64;
static const unsigned int HalfWavefrontSize = 32;
static const unsigned int QuarterWavefrontSize = 16;
protected:
virtual void setCaps();
llvm::BitVector mHWBits;
llvm::BitVector mSWBits;
AMDGPUSubtarget *mSTM;
uint32_t DeviceFlag;
private:
AMDGPUDeviceInfo::ExecutionMode
getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
};
} // namespace llvm
#endif // AMDILDEVICEIMPL_H

View File

@@ -0,0 +1,94 @@
//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief Function that creates DeviceInfo from a device name and other information.
//
//==-----------------------------------------------------------------------===//
#include "AMDILDevices.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
namespace llvm {
namespace AMDGPUDeviceInfo {
AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
AMDGPUSubtarget *ptr,
bool is64bit, bool is64on32bit) {
if (deviceName.c_str()[2] == '7') {
switch (deviceName.c_str()[3]) {
case '1':
return new AMDGPU710Device(ptr);
case '7':
return new AMDGPU770Device(ptr);
default:
return new AMDGPU7XXDevice(ptr);
}
} else if (deviceName == "cypress") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUCypressDevice(ptr);
} else if (deviceName == "juniper") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUEvergreenDevice(ptr);
} else if (deviceName == "redwood") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPURedwoodDevice(ptr);
} else if (deviceName == "cedar") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUCedarDevice(ptr);
} else if (deviceName == "barts" || deviceName == "turks") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUNIDevice(ptr);
} else if (deviceName == "cayman") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUCaymanDevice(ptr);
} else if (deviceName == "caicos") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPUNIDevice(ptr);
} else if (deviceName == "SI") {
return new AMDGPUSIDevice(ptr);
} else {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
" on 32bit pointers!");
#endif
return new AMDGPU7XXDevice(ptr);
}
}
} // End namespace AMDGPUDeviceInfo
} // End namespace llvm

View File

@@ -0,0 +1,88 @@
//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//==-----------------------------------------------------------------------===//
#ifndef AMDILDEVICEINFO_H
#define AMDILDEVICEINFO_H
#include <string>
namespace llvm {
class AMDGPUDevice;
class AMDGPUSubtarget;
namespace AMDGPUDeviceInfo {
/// Each Capabilities can be executed using a hardware instruction,
/// emulated with a sequence of software instructions, or not
/// supported at all.
enum ExecutionMode {
Unsupported = 0, ///< Unsupported feature on the card(Default value)
/// This is the execution mode that is set if the feature is emulated in
/// software.
Software,
/// This execution mode is set if the feature exists natively in hardware
Hardware
};
enum Caps {
HalfOps = 0x1, ///< Half float is supported or not.
DoubleOps = 0x2, ///< Double is supported or not.
ByteOps = 0x3, ///< Byte(char) is support or not.
ShortOps = 0x4, ///< Short is supported or not.
LongOps = 0x5, ///< Long is supported or not.
Images = 0x6, ///< Images are supported or not.
ByteStores = 0x7, ///< ByteStores available(!HD4XXX).
ConstantMem = 0x8, ///< Constant/CB memory.
LocalMem = 0x9, ///< Local/LDS memory.
PrivateMem = 0xA, ///< Scratch/Private/Stack memory.
RegionMem = 0xB, ///< OCL GDS Memory Extension.
FMA = 0xC, ///< Use HW FMA or SW FMA.
ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023.
MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7.
Reserved0 = 0xF, ///< ReservedFlag
NoAlias = 0x10, ///< Cached loads.
Signed24BitOps = 0x11, ///< Peephole Optimization.
/// Debug mode implies that no hardware features or optimizations
/// are performned and that all memory access go through a single
/// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
Debug = 0x12,
CachedMem = 0x13, ///< Cached mem is available or not.
BarrierDetect = 0x14, ///< Detect duplicate barriers.
Reserved1 = 0x15, ///< Reserved flag
ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available.
ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work.
TmrReg = 0x18, ///< Flag to specify if Tmr register is supported.
NoInline = 0x19, ///< Flag to specify that no inlining should occur.
MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb.
HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod.
ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported.
PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's.
/// If more capabilities are required, then
/// this number needs to be increased.
/// All capabilities must come before this
/// number.
MaxNumberCapabilities = 0x20
};
/// These have to be in order with the older generations
/// having the lower number enumerations.
enum Generation {
HD4XXX = 0, ///< 7XX based devices.
HD5XXX, ///< Evergreen based devices.
HD6XXX, ///< NI/Evergreen+ based devices.
HD7XXX, ///< Southern Islands based devices.
HDTEST, ///< Experimental feature testing device.
HDNUMGEN
};
AMDGPUDevice*
getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
bool is64bit = false, bool is64on32bit = false);
} // namespace AMDILDeviceInfo
} // namespace llvm
#endif // AMDILDEVICEINFO_H

View File

@@ -0,0 +1,19 @@
//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//==-----------------------------------------------------------------------===//
#ifndef AMDIL_DEVICES_H
#define AMDIL_DEVICES_H
// Include all of the device specific header files
#include "AMDIL7XXDevice.h"
#include "AMDILDevice.h"
#include "AMDILEvergreenDevice.h"
#include "AMDILNIDevice.h"
#include "AMDILSIDevice.h"
#endif // AMDIL_DEVICES_H

View File

@@ -0,0 +1,169 @@
//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//==-----------------------------------------------------------------------===//
#include "AMDILEvergreenDevice.h"
using namespace llvm;
AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
: AMDGPUDevice(ST) {
setCaps();
std::string name = ST->getDeviceName();
if (name == "cedar") {
DeviceFlag = OCL_DEVICE_CEDAR;
} else if (name == "redwood") {
DeviceFlag = OCL_DEVICE_REDWOOD;
} else if (name == "cypress") {
DeviceFlag = OCL_DEVICE_CYPRESS;
} else {
DeviceFlag = OCL_DEVICE_JUNIPER;
}
}
AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
}
size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_800;
} else {
return 0;
}
}
size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
return MAX_LDS_SIZE_800;
} else {
return 0;
}
}
uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
return 12;
}
uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
switch(id) {
default:
assert(0 && "ID type passed in is unknown!");
break;
case CONSTANT_ID:
case RAW_UAV_ID:
return GLOBAL_RETURN_RAW_UAV_ID;
case GLOBAL_ID:
case ARENA_UAV_ID:
return DEFAULT_ARENA_UAV_ID;
case LDS_ID:
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return DEFAULT_LDS_ID;
} else {
return DEFAULT_ARENA_UAV_ID;
}
case GDS_ID:
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
return DEFAULT_GDS_ID;
} else {
return DEFAULT_ARENA_UAV_ID;
}
case SCRATCH_ID:
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
return DEFAULT_SCRATCH_ID;
} else {
return DEFAULT_ARENA_UAV_ID;
}
};
return 0;
}
size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
return AMDGPUDevice::WavefrontSize;
}
uint32_t AMDGPUEvergreenDevice::getGeneration() const {
return AMDGPUDeviceInfo::HD5XXX;
}
void AMDGPUEvergreenDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
mHWBits.set(AMDGPUDeviceInfo::ByteStores);
}
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
mSWBits.set(AMDGPUDeviceInfo::RegionMem);
} else {
mHWBits.set(AMDGPUDeviceInfo::LocalMem);
mHWBits.set(AMDGPUDeviceInfo::RegionMem);
}
mHWBits.set(AMDGPUDeviceInfo::Images);
if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
mHWBits.set(AMDGPUDeviceInfo::NoAlias);
}
mHWBits.set(AMDGPUDeviceInfo::CachedMem);
if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
}
mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
mHWBits.set(AMDGPUDeviceInfo::LongOps);
mSWBits.reset(AMDGPUDeviceInfo::LongOps);
mHWBits.set(AMDGPUDeviceInfo::TmrReg);
}
AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
setCaps();
}
AMDGPUCypressDevice::~AMDGPUCypressDevice() {
}
void AMDGPUCypressDevice::setCaps() {
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
mHWBits.set(AMDGPUDeviceInfo::FMA);
}
}
AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
setCaps();
}
AMDGPUCedarDevice::~AMDGPUCedarDevice() {
}
void AMDGPUCedarDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::FMA);
}
size_t AMDGPUCedarDevice::getWavefrontSize() const {
return AMDGPUDevice::QuarterWavefrontSize;
}
AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
setCaps();
}
AMDGPURedwoodDevice::~AMDGPURedwoodDevice() {
}
void AMDGPURedwoodDevice::setCaps() {
mSWBits.set(AMDGPUDeviceInfo::FMA);
}
size_t AMDGPURedwoodDevice::getWavefrontSize() const {
return AMDGPUDevice::HalfWavefrontSize;
}

View File

@@ -0,0 +1,93 @@
//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface for the subtarget data classes.
///
/// This file will define the interface that each generation needs to
/// implement in order to correctly answer queries on the capabilities of the
/// specific hardware.
//===----------------------------------------------------------------------===//
#ifndef AMDILEVERGREENDEVICE_H
#define AMDILEVERGREENDEVICE_H
#include "AMDILDevice.h"
#include "AMDGPUSubtarget.h"
namespace llvm {
class AMDGPUSubtarget;
//===----------------------------------------------------------------------===//
// Evergreen generation of devices and their respective sub classes
//===----------------------------------------------------------------------===//
/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
/// series of cards.
///
/// This class contains information required to differentiate
/// the Evergreen device from the generic AMDGPUDevice. This device represents
/// that capabilities of the 'Juniper' cards, also known as the HD57XX.
class AMDGPUEvergreenDevice : public AMDGPUDevice {
public:
AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUEvergreenDevice();
virtual size_t getMaxLDSSize() const;
virtual size_t getMaxGDSSize() const;
virtual size_t getWavefrontSize() const;
virtual uint32_t getGeneration() const;
virtual uint32_t getMaxNumUAVs() const;
virtual uint32_t getResourceID(uint32_t) const;
protected:
virtual void setCaps();
};
/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
/// support for double precision operations. This device is used to represent
/// both the Cypress and Hemlock cards, which are commercially known as HD58XX
/// and HD59XX cards.
class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUCypressDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUCypressDevice();
private:
virtual void setCaps();
};
/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
/// devices.
///
/// This class differs from the base AMDGPUEvergreenDevice in that the
/// device is a ~quarter of the 'Juniper'. These are commercially known as the
/// HD54XX and HD53XX series of cards.
class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUCedarDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPUCedarDevice();
virtual size_t getWavefrontSize() const;
private:
virtual void setCaps();
};
/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
/// devices.
///
/// This class differs from the base class, in that these devices are
/// considered about half of a 'Juniper' device. These are commercially known as
/// the HD55XX and HD56XX series of cards.
class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
public:
AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
virtual ~AMDGPURedwoodDevice();
virtual size_t getWavefrontSize() const;
private:
virtual void setCaps();
};
} // namespace llvm
#endif // AMDILEVERGREENDEVICE_H

View File

@@ -0,0 +1,47 @@
//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface to describe a layout of a stack frame on a AMDGPU target
/// machine.
//
//===----------------------------------------------------------------------===//
#include "AMDILFrameLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
int LAO, unsigned TransAl)
: TargetFrameLowering(D, StackAl, LAO, TransAl) {
}
AMDGPUFrameLowering::~AMDGPUFrameLowering() {
}
int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getObjectOffset(FI);
}
const TargetFrameLowering::SpillSlot *
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
NumEntries = 0;
return 0;
}
void
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
}
void
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
}
bool
AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
return false;
}

View File

@@ -0,0 +1,40 @@
//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface to describe a layout of a stack frame on a AMDIL target
/// machine.
//
//===----------------------------------------------------------------------===//
#ifndef AMDILFRAME_LOWERING_H
#define AMDILFRAME_LOWERING_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
/// \brief Information about the stack frame layout on the AMDGPU targets.
///
/// It holds the direction of the stack growth, the known stack alignment on
/// entry to each function, and the offset to the locals area.
/// See TargetFrameInfo for more comments.
class AMDGPUFrameLowering : public TargetFrameLowering {
public:
AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
unsigned TransAl = 1);
virtual ~AMDGPUFrameLowering();
virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
virtual bool hasFP(const MachineFunction &MF) const;
};
} // namespace llvm
#endif // AMDILFRAME_LOWERING_H

View File

@@ -0,0 +1,485 @@
//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief Defines an instruction selector for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUInstrInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPURegisterInfo.h"
#include "AMDILDevices.h"
#include "R600InstrInfo.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
#include <list>
#include <queue>
using namespace llvm;
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
namespace {
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget &Subtarget;
public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
SDNode *Select(SDNode *N);
virtual const char *getPassName() const;
private:
inline SDValue getSmallIPtrImm(unsigned Imm);
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
static bool checkType(const Value *ptr, unsigned int addrspace);
static const Value *getBasePointerValue(const Value *V);
static bool isGlobalStore(const StoreSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
static bool isLocalStore(const StoreSDNode *N);
static bool isRegionStore(const StoreSDNode *N);
static bool isCPLoad(const LoadSDNode *N);
static bool isConstantLoad(const LoadSDNode *N, int cbID);
static bool isGlobalLoad(const LoadSDNode *N);
static bool isParamLoad(const LoadSDNode *N);
static bool isPrivateLoad(const LoadSDNode *N);
static bool isLocalLoad(const LoadSDNode *N);
static bool isRegionLoad(const LoadSDNode *N);
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
} // end anonymous namespace
/// \brief This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
) {
return new AMDGPUDAGToDAGISel(TM);
}
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
)
: SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
}
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
}
SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
bool AMDGPUDAGToDAGISel::SelectADDRParam(
SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
R2 = CurDAG->getTargetConstant(0, MVT::i32);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i32);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i32);
}
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
}
return SelectADDRParam(Addr, R1, R2);
}
bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
}
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
R2 = CurDAG->getTargetConstant(0, MVT::i64);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i64);
}
} else if (Addr.getOpcode() == ISD::ADD) {
R1 = Addr.getOperand(0);
R2 = Addr.getOperand(1);
} else {
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i64);
}
return true;
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
return NULL; // Already selected.
}
switch (Opc) {
default: break;
case ISD::FrameIndex: {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
unsigned int FI = FIN->getIndex();
EVT OpVT = N->getValueType(0);
unsigned int NewOpc = AMDGPU::COPY;
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
}
break;
}
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
// XXX: Custom immediate lowering not implemented yet. Instead we use
// pseudo instructions defined in SIInstructions.td
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
break;
}
const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
uint64_t ImmValue = 0;
unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
if (N->getOpcode() == ISD::ConstantFP) {
// XXX: 64-bit Immediates not supported yet
assert(N->getValueType(0) != MVT::f64);
ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
APFloat Value = C->getValueAPF();
float FloatValue = Value.convertToFloat();
if (FloatValue == 0.0) {
ImmReg = AMDGPU::ZERO;
} else if (FloatValue == 0.5) {
ImmReg = AMDGPU::HALF;
} else if (FloatValue == 1.0) {
ImmReg = AMDGPU::ONE;
} else {
ImmValue = Value.bitcastToAPInt().getZExtValue();
}
} else {
// XXX: 64-bit Immediates not supported yet
assert(N->getValueType(0) != MVT::i64);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
if (C->getZExtValue() == 0) {
ImmReg = AMDGPU::ZERO;
} else if (C->getZExtValue() == 1) {
ImmReg = AMDGPU::ONE_INT;
} else {
ImmValue = C->getZExtValue();
}
}
for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
Use != SDNode::use_end(); Use = Next) {
Next = llvm::next(Use);
std::vector<SDValue> Ops;
for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
Ops.push_back(Use->getOperand(i));
}
if (!Use->isMachineOpcode()) {
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
// We can only use literal constants (e.g. AMDGPU::ZERO,
// AMDGPU::ONE, etc) in machine opcodes.
continue;
}
} else {
if (!TII->isALUInstr(Use->getMachineOpcode())) {
continue;
}
int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
assert(ImmIdx != -1);
// subtract one from ImmIdx, because the DST operand is usually index
// 0 for MachineInstrs, but we have no DST in the Ops vector.
ImmIdx--;
// Check that we aren't already using an immediate.
// XXX: It's possible for an instruction to have more than one
// immediate operand, but this is not supported yet.
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
assert(C);
if (C->getZExtValue() != 0) {
// This instruction is already using an immediate.
continue;
}
// Set the immediate value
Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
}
}
// Set the immediate register
Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
}
break;
}
}
return SelectCode(N);
}
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
if (!ptr) {
return false;
}
Type *ptrType = ptr->getType();
return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
}
const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) {
if (!V) {
return NULL;
}
const Value *ret = NULL;
ValueMap<const Value *, bool> ValueBitMap;
std::queue<const Value *, std::list<const Value *> > ValueQueue;
ValueQueue.push(V);
while (!ValueQueue.empty()) {
V = ValueQueue.front();
if (ValueBitMap.find(V) == ValueBitMap.end()) {
ValueBitMap[V] = true;
if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
ret = V;
break;
} else if (dyn_cast<GlobalVariable>(V)) {
ret = V;
break;
} else if (dyn_cast<Constant>(V)) {
const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
if (CE) {
ValueQueue.push(CE->getOperand(0));
}
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
ret = AI;
break;
} else if (const Instruction *I = dyn_cast<Instruction>(V)) {
uint32_t numOps = I->getNumOperands();
for (uint32_t x = 0; x < numOps; ++x) {
ValueQueue.push(I->getOperand(x));
}
} else {
assert(!"Found a Value that we didn't know how to handle!");
}
}
ValueQueue.pop();
}
return ret;
}
bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
}
bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
return true;
}
MachineMemOperand *MMO = N->getMemOperand();
const Value *V = MMO->getValue();
const Value *BV = getBasePointerValue(V);
if (MMO
&& MMO->getValue()
&& ((V && dyn_cast<GlobalValue>(V))
|| (BV && dyn_cast<GlobalValue>(
getBasePointerValue(MMO->getValue()))))) {
return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
} else {
return false;
}
}
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
MachineMemOperand *MMO = N->getMemOperand();
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
if (MMO) {
const Value *V = MMO->getValue();
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
return true;
}
}
}
return false;
}
bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
// Check to make sure we are not a constant pool load or a constant load
// that is marked as a private load
if (isCPLoad(N) || isConstantLoad(N, -1)) {
return false;
}
}
if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
return true;
}
return false;
}
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
#ifdef DEBUGTMP
#undef INT64_C
#endif
#undef DEBUGTMP
///==== AMDGPU Functions ====///
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
SDValue& Offset) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
}
if (Addr.getOpcode() == ISD::ADD) {
bool Match = false;
// Find the base ptr and the offset
for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
SDValue Arg = Addr.getOperand(i);
ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
// This arg isn't a constant so it must be the base PTR.
if (!OffsetNode) {
Base = Addr.getOperand(i);
continue;
}
// Check if the constant argument fits in 8-bits. The offset is in bytes
// so we need to convert it to dwords.
if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
Match = true;
Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
MVT::i32);
}
}
return Match;
}
// Default case, no offset
Base = Addr;
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) {
ConstantSDNode * IMMOffset;
if (Addr.getOpcode() == ISD::ADD
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
return true;
// If the pointer address is constant, we can move it to the offset field.
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
CurDAG->getEntryNode().getDebugLoc(),
AMDGPU::ZERO, MVT::i32);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
return true;
}
// Default case, no offset
Base = Addr;
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
SDValue& Offset) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress ||
Addr.getOpcode() != ISD::ADD) {
return false;
}
Base = Addr.getOperand(0);
Offset = Addr.getOperand(1);
return true;
}

View File

@@ -0,0 +1,652 @@
//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief TargetLowering functions borrowed from AMDIL.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUISelLowering.h"
#include "AMDGPURegisterInfo.h"
#include "AMDILDevices.h"
#include "AMDILIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "AMDGPUGenCallingConv.inc"
//===----------------------------------------------------------------------===//
// TargetLowering Implementation Help Functions End
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// TargetLowering Class Implementation Begins
//===----------------------------------------------------------------------===//
void AMDGPUTargetLowering::InitAMDILLowering() {
int types[] = {
(int)MVT::i8,
(int)MVT::i16,
(int)MVT::i32,
(int)MVT::f32,
(int)MVT::f64,
(int)MVT::i64,
(int)MVT::v2i8,
(int)MVT::v4i8,
(int)MVT::v2i16,
(int)MVT::v4i16,
(int)MVT::v4f32,
(int)MVT::v4i32,
(int)MVT::v2f32,
(int)MVT::v2i32,
(int)MVT::v2f64,
(int)MVT::v2i64
};
int IntTypes[] = {
(int)MVT::i8,
(int)MVT::i16,
(int)MVT::i32,
(int)MVT::i64
};
int FloatTypes[] = {
(int)MVT::f32,
(int)MVT::f64
};
int VectorTypes[] = {
(int)MVT::v2i8,
(int)MVT::v4i8,
(int)MVT::v2i16,
(int)MVT::v4i16,
(int)MVT::v4f32,
(int)MVT::v4i32,
(int)MVT::v2f32,
(int)MVT::v2i32,
(int)MVT::v2f64,
(int)MVT::v2i64
};
size_t NumTypes = sizeof(types) / sizeof(*types);
size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
// These are the current register classes that are
// supported
for (unsigned int x = 0; x < NumTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
// We cannot sextinreg, expand to shifts
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::BRCOND, VT, Custom);
setOperationAction(ISD::BR_JT, VT, Expand);
setOperationAction(ISD::BRIND, VT, Expand);
// TODO: Implement custom UREM/SREM routines
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
if (VT != MVT::i64 && VT != MVT::v2i64) {
setOperationAction(ISD::SDIV, VT, Custom);
}
}
for (unsigned int x = 0; x < NumFloatTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
// IL does not have these operations for floating point types
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
setOperationAction(ISD::SETOLT, VT, Expand);
setOperationAction(ISD::SETOGE, VT, Expand);
setOperationAction(ISD::SETOGT, VT, Expand);
setOperationAction(ISD::SETOLE, VT, Expand);
setOperationAction(ISD::SETULT, VT, Expand);
setOperationAction(ISD::SETUGE, VT, Expand);
setOperationAction(ISD::SETUGT, VT, Expand);
setOperationAction(ISD::SETULE, VT, Expand);
}
for (unsigned int x = 0; x < NumIntTypes; ++x) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
// GPU also does not have divrem function for signed or unsigned
setOperationAction(ISD::SDIVREM, VT, Expand);
// GPU does not have [S|U]MUL_LOHI functions as a single instruction
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
// GPU doesn't have a rotl, rotr, or byteswap instruction
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
// GPU doesn't have any counting operators
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
}
for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
// setOperationAction(ISD::VSETCC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
setOperationAction(ISD::MULHU, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
setOperationAction(ISD::Constant , MVT::i64 , Legal);
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
}
if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
// we support loading/storing v2f64 but not operations on the type
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
// We want to expand vector conversions into their scalar
// counterparts.
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
}
// TODO: Fix the UDIV24 algorithm so it works for these
// types correctly. This needs vector comparisons
// for this to work correctly.
setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
setOperationAction(ISD::SUBC, MVT::Other, Expand);
setOperationAction(ISD::ADDE, MVT::Other, Expand);
setOperationAction(ISD::ADDC, MVT::Other, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
// Use the default implementation.
setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
setOperationAction(ISD::Constant , MVT::i32 , Legal);
setSchedulingPreference(Sched::RegPressure);
setPow2DivIsCheap(false);
setPrefLoopAlignment(16);
setSelectIsExpensive(true);
setJumpIsExpensive(true);
maxStoresPerMemcpy = 4096;
maxStoresPerMemmove = 4096;
maxStoresPerMemset = 4096;
}
bool
AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I, unsigned Intrinsic) const {
return false;
}
// The backend supports 32 and 64 bit floating point immediates
bool
AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
return true;
} else {
return false;
}
}
bool
AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
return false;
} else {
return true;
}
}
// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
// be zero. Op is expected to be a target specific node. Used by DAG
// combiner.
void
AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
APInt KnownZero2;
APInt KnownOne2;
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
switch (Op.getOpcode()) {
default: break;
case ISD::SELECT_CC:
DAG.ComputeMaskedBits(
Op.getOperand(1),
KnownZero,
KnownOne,
Depth + 1
);
DAG.ComputeMaskedBits(
Op.getOperand(0),
KnownZero2,
KnownOne2
);
assert((KnownZero & KnownOne) == 0
&& "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0
&& "Bits known to be one AND zero?");
// Only known if known in both the LHS and RHS
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
};
}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
SDValue
AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
EVT OVT = Op.getValueType();
SDValue DST;
if (OVT.getScalarType() == MVT::i64) {
DST = LowerSDIV64(Op, DAG);
} else if (OVT.getScalarType() == MVT::i32) {
DST = LowerSDIV32(Op, DAG);
} else if (OVT.getScalarType() == MVT::i16
|| OVT.getScalarType() == MVT::i8) {
DST = LowerSDIV24(Op, DAG);
} else {
DST = SDValue(Op.getNode(), 0);
}
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
EVT OVT = Op.getValueType();
SDValue DST;
if (OVT.getScalarType() == MVT::i64) {
DST = LowerSREM64(Op, DAG);
} else if (OVT.getScalarType() == MVT::i32) {
DST = LowerSREM32(Op, DAG);
} else if (OVT.getScalarType() == MVT::i16) {
DST = LowerSREM16(Op, DAG);
} else if (OVT.getScalarType() == MVT::i8) {
DST = LowerSREM8(Op, DAG);
} else {
DST = SDValue(Op.getNode(), 0);
}
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
SDValue Data = Op.getOperand(0);
VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
DebugLoc DL = Op.getDebugLoc();
EVT DVT = Data.getValueType();
EVT BVT = BaseType->getVT();
unsigned baseBits = BVT.getScalarType().getSizeInBits();
unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
unsigned shiftBits = srcBits - baseBits;
if (srcBits < 32) {
// If the op is less than 32 bits, then it needs to extend to 32bits
// so it can properly keep the upper bits valid.
EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
shiftBits = 32 - baseBits;
DVT = IVT;
}
SDValue Shift = DAG.getConstant(shiftBits, DVT);
// Shift left by 'Shift' bits.
Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
// Signed shift Right by 'Shift' bits.
Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
if (srcBits < 32) {
// Once the sign extension is done, the op needs to be converted to
// its original type.
Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
}
return Data;
}
EVT
AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
int iSize = (size * numEle);
int vEle = (iSize >> ((size == 64) ? 6 : 5));
if (!vEle) {
vEle = 1;
}
if (size == 64) {
if (vEle == 1) {
return EVT(MVT::i64);
} else {
return EVT(MVT::getVectorVT(MVT::i64, vEle));
}
} else {
if (vEle == 1) {
return EVT(MVT::i32);
} else {
return EVT(MVT::getVectorVT(MVT::i32, vEle));
}
}
}
SDValue
AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Jump = Op.getOperand(2);
SDValue Result;
Result = DAG.getNode(
AMDGPUISD::BRANCH_COND,
Op.getDebugLoc(),
Op.getValueType(),
Chain, Jump, Cond);
return Result;
}
SDValue
AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
MVT INTTY;
MVT FLTTY;
if (!OVT.isVector()) {
INTTY = MVT::i32;
FLTTY = MVT::f32;
} else if (OVT.getVectorNumElements() == 2) {
INTTY = MVT::v2i32;
FLTTY = MVT::v2f32;
} else if (OVT.getVectorNumElements() == 4) {
INTTY = MVT::v4i32;
FLTTY = MVT::v4f32;
}
unsigned bitsize = OVT.getScalarType().getSizeInBits();
// char|short jq = ia ^ ib;
SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
// jq = jq >> (bitsize - 2)
jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
// jq = jq | 0x1
jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
// jq = (int)jq
jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
// int ia = (int)LHS;
SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
// int ib, (int)RHS;
SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
// float fa = (float)ia;
SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
// float fb = (float)ib;
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
// float fq = native_divide(fa, fb);
SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
// fq = trunc(fq);
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
// float fqneg = -fq;
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
// float fr = mad(fqneg, fb, fa);
SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
// fr = fabs(fr);
fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
// fb = fabs(fb);
fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
// int cv = fr >= fb;
SDValue cv;
if (INTTY == MVT::i32) {
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
} else {
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
}
// jq = (cv ? jq : 0);
jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
DAG.getConstant(0, OVT));
// dst = iq + jq;
iq = DAG.getSExtOrTrunc(iq, DL, OVT);
iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
return iq;
}
SDValue
AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
// The LowerSDIV32 function generates equivalent to the following IL.
// mov r0, LHS
// mov r1, RHS
// ilt r10, r0, 0
// ilt r11, r1, 0
// iadd r0, r0, r10
// iadd r1, r1, r11
// ixor r0, r0, r10
// ixor r1, r1, r11
// udiv r0, r0, r1
// ixor r10, r10, r11
// iadd r0, r0, r10
// ixor DST, r0, r10
// mov r0, LHS
SDValue r0 = LHS;
// mov r1, RHS
SDValue r1 = RHS;
// ilt r10, r0, 0
SDValue r10 = DAG.getSelectCC(DL,
r0, DAG.getConstant(0, OVT),
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
ISD::SETLT);
// ilt r11, r1, 0
SDValue r11 = DAG.getSelectCC(DL,
r1, DAG.getConstant(0, OVT),
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
ISD::SETLT);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// iadd r1, r1, r11
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
// ixor r0, r0, r10
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
// ixor r1, r1, r11
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
// udiv r0, r0, r1
r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
// ixor r10, r10, r11
r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// ixor DST, r0, r10
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
return SDValue(Op.getNode(), 0);
}
SDValue
AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
MVT INTTY = MVT::i32;
if (OVT == MVT::v2i8) {
INTTY = MVT::v2i32;
} else if (OVT == MVT::v4i8) {
INTTY = MVT::v4i32;
}
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
return LHS;
}
SDValue
AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
MVT INTTY = MVT::i32;
if (OVT == MVT::v2i16) {
INTTY = MVT::v2i32;
} else if (OVT == MVT::v4i16) {
INTTY = MVT::v4i32;
}
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
return LHS;
}
SDValue
AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT OVT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
// The LowerSREM32 function generates equivalent to the following IL.
// mov r0, LHS
// mov r1, RHS
// ilt r10, r0, 0
// ilt r11, r1, 0
// iadd r0, r0, r10
// iadd r1, r1, r11
// ixor r0, r0, r10
// ixor r1, r1, r11
// udiv r20, r0, r1
// umul r20, r20, r1
// sub r0, r0, r20
// iadd r0, r0, r10
// ixor DST, r0, r10
// mov r0, LHS
SDValue r0 = LHS;
// mov r1, RHS
SDValue r1 = RHS;
// ilt r10, r0, 0
SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
// ilt r11, r1, 0
SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// iadd r1, r1, r11
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
// ixor r0, r0, r10
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
// ixor r1, r1, r11
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
// udiv r20, r0, r1
SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
// umul r20, r20, r1
r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
// sub r0, r0, r20
r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
// ixor DST, r0, r10
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
return DST;
}
SDValue
AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
return SDValue(Op.getNode(), 0);
}

View File

@@ -0,0 +1,273 @@
//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file describes the AMDIL instructions in TableGen format.
//
//===----------------------------------------------------------------------===//
// AMDIL Instruction Predicate Definitions
// Predicate that is set to true if the hardware supports double precision
// divide
def HasHWDDiv : Predicate<"Subtarget.device()"
"->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
// Predicate that is set to true if the hardware supports double, but not double
// precision divide in hardware
def HasSWDDiv : Predicate<"Subtarget.device()"
"->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
// Predicate that is set to true if the hardware support 24bit signed
// math ops. Otherwise a software expansion to 32bit math ops is used instead.
def HasHWSign24Bit : Predicate<"Subtarget.device()"
"->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
// Predicate that is set to true if 64bit operations are supported or not
def HasHW64Bit : Predicate<"Subtarget.device()"
"->usesHardware(AMDGPUDeviceInfo::LongOps)">;
def HasSW64Bit : Predicate<"Subtarget.device()"
"->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
// Predicate that is set to true if the timer register is supported
def HasTmrRegister : Predicate<"Subtarget.device()"
"->isSupported(AMDGPUDeviceInfo::TmrReg)">;
// Predicate that is true if we are at least evergreen series
def HasDeviceIDInst : Predicate<"Subtarget.device()"
"->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
// Predicate that is true if we have region address space.
def hasRegionAS : Predicate<"Subtarget.device()"
"->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
// Predicate that is false if we don't have region address space.
def noRegionAS : Predicate<"!Subtarget.device()"
"->isSupported(AMDGPUDeviceInfo::RegionMem)">;
// Predicate that is set to true if 64bit Mul is supported in the IL or not
def HasHW64Mul : Predicate<"Subtarget.calVersion()"
">= CAL_VERSION_SC_139"
"&& Subtarget.device()"
"->getGeneration() >="
"AMDGPUDeviceInfo::HD5XXX">;
def HasSW64Mul : Predicate<"Subtarget.calVersion()"
"< CAL_VERSION_SC_139">;
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
def HasHW64DivMod : Predicate<"Subtarget.device()"
"->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
def HasSW64DivMod : Predicate<"Subtarget.device()"
"->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
// Predicate that is set to true if 64bit pointer are used.
def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
//===--------------------------------------------------------------------===//
// Custom Operands
//===--------------------------------------------------------------------===//
def brtarget : Operand<OtherVT>;
//===--------------------------------------------------------------------===//
// Custom Selection DAG Type Profiles
//===--------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Generic Profile Types
//===----------------------------------------------------------------------===//
def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>;
def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
]>;
def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
SDTCisEltOfVec<1, 0>
]>;
//===----------------------------------------------------------------------===//
// Flow Control Profile Types
//===----------------------------------------------------------------------===//
// Branch instruction where second and third are basic blocks
def SDTIL_BRCond : SDTypeProfile<0, 2, [
SDTCisVT<0, OtherVT>
]>;
//===--------------------------------------------------------------------===//
// Custom Selection DAG Nodes
//===--------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Flow Control DAG Nodes
//===----------------------------------------------------------------------===//
def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// Call/Return DAG Nodes
//===----------------------------------------------------------------------===//
def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
//===--------------------------------------------------------------------===//
// Instructions
//===--------------------------------------------------------------------===//
// Floating point math functions
def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
//===----------------------------------------------------------------------===//
// Integer functions
//===----------------------------------------------------------------------===//
def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
//===--------------------------------------------------------------------===//
// Custom Pattern DAG Nodes
//===--------------------------------------------------------------------===//
def global_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
//===----------------------------------------------------------------------===//
// Load pattern fragments
//===----------------------------------------------------------------------===//
// Global address space loads
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
// Constant address space loads
def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
//===----------------------------------------------------------------------===//
// Complex addressing mode patterns
//===----------------------------------------------------------------------===//
def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
//===----------------------------------------------------------------------===//
// Instruction format classes
//===----------------------------------------------------------------------===//
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
let Namespace = "AMDGPU";
dag OutOperandList = outs;
dag InOperandList = ins;
let Pattern = pattern;
let AsmString = !strconcat(asmstr, "\n");
let isPseudo = 1;
let Itinerary = NullALU;
bit hasIEEEFlag = 0;
bit hasZeroOpFlag = 0;
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
//===--------------------------------------------------------------------===//
// Multiclass Instruction formats
//===--------------------------------------------------------------------===//
// Multiclass that handles branch instructions
multiclass BranchConditional<SDNode Op> {
def _i32 : ILFormat<(outs),
(ins brtarget:$target, GPRI32:$src0),
"; i32 Pseudo branch instruction",
[(Op bb:$target, GPRI32:$src0)]>;
def _f32 : ILFormat<(outs),
(ins brtarget:$target, GPRF32:$src0),
"; f32 Pseudo branch instruction",
[(Op bb:$target, GPRF32:$src0)]>;
}
// Only scalar types should generate flow control
multiclass BranchInstr<string name> {
def _i32 : ILFormat<(outs), (ins GPRI32:$src),
!strconcat(name, " $src"), []>;
def _f32 : ILFormat<(outs), (ins GPRF32:$src),
!strconcat(name, " $src"), []>;
}
// Only scalar types should generate flow control
multiclass BranchInstr2<string name> {
def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
!strconcat(name, " $src0, $src1"), []>;
def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
!strconcat(name, " $src0, $src1"), []>;
}
//===--------------------------------------------------------------------===//
// Intrinsics support
//===--------------------------------------------------------------------===//
include "AMDILIntrinsics.td"
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
defm BRANCH_COND : BranchConditional<IL_brcond>;
}
//===---------------------------------------------------------------------===//
// Flow and Program control Instructions
//===---------------------------------------------------------------------===//
let isTerminator=1 in {
def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
!strconcat("SWITCH", " $src"), []>;
def CASE : ILFormat< (outs), (ins GPRI32:$src),
!strconcat("CASE", " $src"), []>;
def BREAK : ILFormat< (outs), (ins),
"BREAK", []>;
def CONTINUE : ILFormat< (outs), (ins),
"CONTINUE", []>;
def DEFAULT : ILFormat< (outs), (ins),
"DEFAULT", []>;
def ELSE : ILFormat< (outs), (ins),
"ELSE", []>;
def ENDSWITCH : ILFormat< (outs), (ins),
"ENDSWITCH", []>;
def ENDMAIN : ILFormat< (outs), (ins),
"ENDMAIN", []>;
def END : ILFormat< (outs), (ins),
"END", []>;
def ENDFUNC : ILFormat< (outs), (ins),
"ENDFUNC", []>;
def ENDIF : ILFormat< (outs), (ins),
"ENDIF", []>;
def WHILELOOP : ILFormat< (outs), (ins),
"WHILE", []>;
def ENDLOOP : ILFormat< (outs), (ins),
"ENDLOOP", []>;
def FUNC : ILFormat< (outs), (ins),
"FUNC", []>;
def RETDYN : ILFormat< (outs), (ins),
"RET_DYN", []>;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
defm IFC : BranchInstr2<"IFC">;
defm BREAKC : BranchInstr2<"BREAKC">;
defm CONTINUEC : BranchInstr2<"CONTINUEC">;
}

View File

@@ -0,0 +1,79 @@
//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief AMDGPU Implementation of the IntrinsicInfo class.
//
//===-----------------------------------------------------------------------===//
#include "AMDILIntrinsicInfo.h"
#include "AMDIL.h"
#include "AMDGPUSubtarget.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Intrinsics.h"
#include "llvm/Module.h"
using namespace llvm;
#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
#include "AMDGPUGenIntrinsics.inc"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
: TargetIntrinsicInfo() {
}
std::string
AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
unsigned int numTys) const {
static const char* const names[] = {
#define GET_INTRINSIC_NAME_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_NAME_TABLE
};
if (IntrID < Intrinsic::num_intrinsics) {
return 0;
}
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
&& "Invalid intrinsic ID");
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
return Result;
}
unsigned int
AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
#define GET_FUNCTION_RECOGNIZER
#include "AMDGPUGenIntrinsics.inc"
#undef GET_FUNCTION_RECOGNIZER
AMDGPUIntrinsic::ID IntrinsicID
= (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
return IntrinsicID;
}
return 0;
}
bool
AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
// Overload Table
#define GET_INTRINSIC_OVERLOAD_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
Function*
AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned numTys) const {
assert(!"Not implemented");
}

View File

@@ -0,0 +1,49 @@
//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
//
//===-----------------------------------------------------------------------===//
#ifndef AMDIL_INTRINSICS_H
#define AMDIL_INTRINSICS_H
#include "llvm/Intrinsics.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
namespace llvm {
class TargetMachine;
namespace AMDGPUIntrinsic {
enum ID {
last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
#define GET_INTRINSIC_ENUM_VALUES
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_ENUM_VALUES
, num_AMDGPU_intrinsics
};
} // end namespace AMDGPUIntrinsic
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
public:
AMDGPUIntrinsicInfo(TargetMachine *tm);
std::string getName(unsigned int IntrId, Type **Tys = 0,
unsigned int numTys = 0) const;
unsigned int lookupName(const char *Name, unsigned int Len) const;
bool isOverloaded(unsigned int IID) const;
Function *getDeclaration(Module *M, unsigned int ID,
Type **Tys = 0,
unsigned int numTys = 0) const;
};
} // end namespace llvm
#endif // AMDIL_INTRINSICS_H

View File

@@ -0,0 +1,242 @@
//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// This file defines all of the amdil-specific intrinsics
//
//===---------------------------------------------------------------===//
//===--------------------------------------------------------------------===//
// Intrinsic classes
// Generic versions of the above classes but for Target specific intrinsics
// instead of SDNode patterns.
//===--------------------------------------------------------------------===//
let TargetPrefix = "AMDIL", isTarget = 1 in {
class VoidIntLong :
Intrinsic<[llvm_i64_ty], [], []>;
class VoidIntInt :
Intrinsic<[llvm_i32_ty], [], []>;
class VoidIntBool :
Intrinsic<[llvm_i32_ty], [], []>;
class UnaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class UnaryIntFloat :
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class ConvertIntFTOI :
Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
class ConvertIntITOF :
Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
class UnaryIntNoRetInt :
Intrinsic<[], [llvm_anyint_ty], []>;
class UnaryIntNoRetFloat :
Intrinsic<[], [llvm_anyfloat_ty], []>;
class BinaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class BinaryIntFloat :
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class BinaryIntNoRetInt :
Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
class BinaryIntNoRetFloat :
Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
class TernaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class TernaryIntFloat :
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class QuaternaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class UnaryAtomicInt :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class BinaryAtomicInt :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class TernaryAtomicInt :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
class UnaryAtomicIntNoRet :
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class BinaryAtomicIntNoRet :
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class TernaryAtomicIntNoRet :
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
}
let TargetPrefix = "AMDIL", isTarget = 1 in {
def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
TernaryIntInt;
def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
TernaryIntInt;
def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
UnaryIntInt;
def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
UnaryIntInt;
def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
UnaryIntInt;
def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
UnaryIntInt;
def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
UnaryIntInt;
def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
TernaryIntInt;
def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
TernaryIntInt;
def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
QuaternaryIntInt;
def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
TernaryIntInt;
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
BinaryIntInt;
def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
TernaryIntInt;
def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
TernaryIntInt;
def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
TernaryIntFloat;
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
BinaryIntInt;
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
BinaryIntInt;
def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
BinaryIntInt;
def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
BinaryIntInt;
def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
BinaryIntInt;
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
BinaryIntInt;
def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
TernaryIntInt;
def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
TernaryIntInt;
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
BinaryIntInt;
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
BinaryIntInt;
def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
BinaryIntInt;
def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
BinaryIntInt;
def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
BinaryIntFloat;
def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
BinaryIntInt;
def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
BinaryIntInt;
def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
BinaryIntFloat;
def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
TernaryIntInt;
def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
TernaryIntInt;
def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
TernaryIntInt;
def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
UnaryIntFloat;
def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
TernaryIntFloat;
def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
UnaryIntFloat;
def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
UnaryIntFloat;
def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
UnaryIntFloat;
def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
UnaryIntFloat;
def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
UnaryIntFloat;
def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
UnaryIntFloat;
def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
UnaryIntFloat;
def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
UnaryIntFloat;
def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
UnaryIntFloat;
def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
UnaryIntFloat;
def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
UnaryIntFloat;
def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
UnaryIntFloat;
def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
UnaryIntFloat;
def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
UnaryIntFloat;
def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
UnaryIntFloat;
def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
UnaryIntFloat;
def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
UnaryIntFloat;
def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
UnaryIntFloat;
def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
UnaryIntFloat;
def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
UnaryIntFloat;
def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
TernaryIntFloat;
def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
UnaryIntFloat;
def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
UnaryIntFloat;
def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
UnaryIntFloat;
def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
TernaryIntFloat;
def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i32_ty], []>;
def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
ConvertIntITOF;
def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
ConvertIntFTOI;
def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
ConvertIntITOF;
def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
ConvertIntITOF;
def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
ConvertIntITOF;
def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
ConvertIntITOF;
def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
llvm_v2f32_ty, llvm_float_ty], []>;
def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
llvm_v2f32_ty], []>;
def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], []>;
def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], []>;
}

View File

@@ -0,0 +1,65 @@
//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//==-----------------------------------------------------------------------===//
#include "AMDILNIDevice.h"
#include "AMDILEvergreenDevice.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
std::string name = ST->getDeviceName();
if (name == "caicos") {
DeviceFlag = OCL_DEVICE_CAICOS;
} else if (name == "turks") {
DeviceFlag = OCL_DEVICE_TURKS;
} else if (name == "cayman") {
DeviceFlag = OCL_DEVICE_CAYMAN;
} else {
DeviceFlag = OCL_DEVICE_BARTS;
}
}
AMDGPUNIDevice::~AMDGPUNIDevice() {
}
size_t
AMDGPUNIDevice::getMaxLDSSize() const {
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_900;
} else {
return 0;
}
}
uint32_t
AMDGPUNIDevice::getGeneration() const {
return AMDGPUDeviceInfo::HD6XXX;
}
AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
: AMDGPUNIDevice(ST) {
setCaps();
}
AMDGPUCaymanDevice::~AMDGPUCaymanDevice() {
}
void
AMDGPUCaymanDevice::setCaps() {
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
mHWBits.set(AMDGPUDeviceInfo::FMA);
}
mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
}

View File

@@ -0,0 +1,57 @@
//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
/// \file
/// \brief Interface for the subtarget data classes.
///
/// This file will define the interface that each generation needs to
/// implement in order to correctly answer queries on the capabilities of the
/// specific hardware.
//===---------------------------------------------------------------------===//
#ifndef AMDILNIDEVICE_H
#define AMDILNIDEVICE_H
#include "AMDILEvergreenDevice.h"
#include "AMDGPUSubtarget.h"
namespace llvm {
class AMDGPUSubtarget;
//===---------------------------------------------------------------------===//
// NI generation of devices and their respective sub classes
//===---------------------------------------------------------------------===//
/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of
/// cards.
///
/// It is very similiar to the AMDGPUEvergreenDevice, with the major
/// exception being differences in wavefront size and hardware capabilities. The
/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
/// integer operations
class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUNIDevice(AMDGPUSubtarget*);
virtual ~AMDGPUNIDevice();
virtual size_t getMaxLDSSize() const;
virtual uint32_t getGeneration() const;
};
/// Just as the AMDGPUCypressDevice is the double capable version of the
/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version
/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device
/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide.
class AMDGPUCaymanDevice: public AMDGPUNIDevice {
public:
AMDGPUCaymanDevice(AMDGPUSubtarget*);
virtual ~AMDGPUCaymanDevice();
private:
virtual void setCaps();
};
static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
} // namespace llvm
#endif // AMDILNIDEVICE_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,107 @@
//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Declarations that describe the AMDIL register file
//
//===----------------------------------------------------------------------===//
class AMDILReg<bits<16> num, string n> : Register<n> {
field bits<16> Value;
let Value = num;
let Namespace = "AMDGPU";
}
// We will start with 8 registers for each class before expanding to more
// Since the swizzle is added based on the register class, we can leave it
// off here and just specify different registers for different register classes
def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
// All registers between 1000 and 1024 are reserved and cannot be used
// unless commented in this section
// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
// r1020 is used to hold the frame index for local arrays
// r1019 is used to hold the dynamic stack allocation pointer
// r1018 is used as a temporary register for handwritten code
// r1017 is used as a temporary register for handwritten code
// r1016 is used as a temporary register for load/store code
// r1015 is used as a temporary register for data segment offset
// r1014 is used as a temporary register for store code
// r1013 is used as the section data pointer register
// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
// r1009 is used as the frame pointer register
// r999 is used as the mem register.
// r998 is used as the return address register.
//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
let AltOrders = [(add (sequence "R%u", 1, 20))];
let AltOrderSelect = [{
return 1;
}];
}
def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
let AltOrders = [(add (sequence "R%u", 1, 20))];
let AltOrderSelect = [{
return 1;
}];
}
def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
let AltOrders = [(add (sequence "R%u", 1, 20))];
let AltOrderSelect = [{
return 1;
}];
}

View File

@@ -0,0 +1,45 @@
//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//==-----------------------------------------------------------------------===//
#include "AMDILSIDevice.h"
#include "AMDILEvergreenDevice.h"
#include "AMDILNIDevice.h"
#include "AMDGPUSubtarget.h"
using namespace llvm;
AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
: AMDGPUEvergreenDevice(ST) {
}
AMDGPUSIDevice::~AMDGPUSIDevice() {
}
size_t
AMDGPUSIDevice::getMaxLDSSize() const {
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_900;
} else {
return 0;
}
}
uint32_t
AMDGPUSIDevice::getGeneration() const {
return AMDGPUDeviceInfo::HD7XXX;
}
std::string
AMDGPUSIDevice::getDataLayout() const {
return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
"-n8:16:32:64");
}

View File

@@ -0,0 +1,39 @@
//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface for the subtarget data classes.
///
/// This file will define the interface that each generation needs to
/// implement in order to correctly answer queries on the capabilities of the
/// specific hardware.
//===---------------------------------------------------------------------===//
#ifndef AMDILSIDEVICE_H
#define AMDILSIDEVICE_H
#include "AMDILEvergreenDevice.h"
namespace llvm {
class AMDGPUSubtarget;
//===---------------------------------------------------------------------===//
// SI generation of devices and their respective sub classes
//===---------------------------------------------------------------------===//
/// \brief The AMDGPUSIDevice is the base class for all Southern Island series
/// of cards.
class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
public:
AMDGPUSIDevice(AMDGPUSubtarget*);
virtual ~AMDGPUSIDevice();
virtual size_t getMaxLDSSize() const;
virtual uint32_t getGeneration() const;
virtual std::string getDataLayout() const;
};
} // namespace llvm
#endif // AMDILSIDEVICE_H

View File

@@ -0,0 +1,54 @@
set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
add_public_tablegen_target(AMDGPUCommonTableGen)
add_llvm_target(R600CodeGen
AMDIL7XXDevice.cpp
AMDILCFGStructurizer.cpp
AMDILDevice.cpp
AMDILDeviceInfo.cpp
AMDILEvergreenDevice.cpp
AMDILFrameLowering.cpp
AMDILIntrinsicInfo.cpp
AMDILISelDAGToDAG.cpp
AMDILISelLowering.cpp
AMDILNIDevice.cpp
AMDILPeepholeOptimizer.cpp
AMDILSIDevice.cpp
AMDGPUAsmPrinter.cpp
AMDGPUMCInstLower.cpp
AMDGPUSubtarget.cpp
AMDGPUTargetMachine.cpp
AMDGPUISelLowering.cpp
AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
AMDGPURegisterInfo.cpp
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
R600MachineFunctionInfo.cpp
R600RegisterInfo.cpp
SIAssignInterpRegs.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
SILowerLiteralConstants.cpp
SILowerControlFlow.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
SIFixSGPRLiveness.cpp
)
add_dependencies(LLVMR600CodeGen intrinsics_gen)
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)

View File

@@ -0,0 +1,132 @@
//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
// \file
//===----------------------------------------------------------------------===//
#include "AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
printInstruction(MI, OS);
printAnnotation(OS, Annot);
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
switch (Op.getReg()) {
// This is the default predicate state, so we don't need to print it.
case AMDGPU::PRED_SEL_OFF: break;
default: O << getRegisterName(Op.getReg()); break;
}
} else if (Op.isImm()) {
O << Op.getImm();
} else if (Op.isFPImm()) {
O << Op.getFPImm();
} else {
assert(!"unknown operand type in printOperand");
}
}
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printOperand(MI, OpNo, O);
O << ", ";
printOperand(MI, OpNo + 1, O);
}
void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
raw_ostream &O, StringRef Asm) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isImm());
if (Op.getImm() == 1) {
O << Asm;
}
}
void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printIfSet(MI, OpNo, O, "|");
}
void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printIfSet(MI, OpNo, O, "_SAT");
}
void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
union Literal {
float f;
int32_t i;
} L;
L.i = MI->getOperand(OpNo).getImm();
O << L.i << "(" << L.f << ")";
}
void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printIfSet(MI, OpNo, O, " *");
}
void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printIfSet(MI, OpNo, O, "-");
}
void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
switch (MI->getOperand(OpNo).getImm()) {
default: break;
case 1:
O << " * 2.0";
break;
case 2:
O << " * 4.0";
break;
case 3:
O << " / 2.0";
break;
}
}
void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.getImm() != 0) {
O << " + " << Op.getImm();
}
}
void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printIfSet(MI, OpNo, O, "ExecMask,");
}
void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printIfSet(MI, OpNo, O, "Pred,");
}
void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.getImm() == 0) {
O << " (MASKED)";
}
}
#include "AMDGPUGenAsmWriter.inc"

View File

@@ -0,0 +1,52 @@
//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
//===----------------------------------------------------------------------===//
#ifndef AMDGPUINSTPRINTER_H
#define AMDGPUINSTPRINTER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
class AMDGPUInstPrinter : public MCInstPrinter {
public:
AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
//Autogenerated by tblgen
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm
#endif // AMDGPUINSTRPRINTER_H

View File

@@ -0,0 +1,7 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMR600AsmPrinter
AMDGPUInstPrinter.cpp
)
add_dependencies(LLVMR600AsmPrinter R600CommonTableGen)

View File

@@ -0,0 +1,24 @@
;===- ./lib/Target/R600/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = R600AsmPrinter
parent = R600
required_libraries = MC Support
add_to_library_groups = R600

View File

@@ -0,0 +1,15 @@
#===- lib/Target/R600/AsmPrinter/Makefile ------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMR600AsmPrinter
# Hack: we need to include 'main' x86 target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@@ -0,0 +1,32 @@
;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[common]
subdirectories = InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = R600
parent = Target
has_asmprinter = 1
[component_1]
type = Library
name = R600CodeGen
parent = R600
required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info
add_to_library_groups = R600

View File

@@ -0,0 +1,82 @@
//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
class AMDGPUMCObjectWriter : public MCObjectWriter {
public:
AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
//XXX: Implement if necessary.
}
virtual void RecordRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) {
assert(!"Not implemented");
}
virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
};
class AMDGPUAsmBackend : public MCAsmBackend {
public:
AMDGPUAsmBackend(const Target &T)
: MCAsmBackend() {}
virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
virtual unsigned getNumFixupKinds() const { return 0; };
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const { assert(!"Not implemented"); }
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCInstFragment *DF,
const MCAsmLayout &Layout) const {
return false;
}
virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
assert(!"Not implemented");
}
virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
};
} //End anonymous namespace
void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
Asm.writeSectionData(I, Layout);
}
}
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
StringRef CPU) {
return new AMDGPUAsmBackend(T);
}
AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
raw_ostream &OS) const {
return new AMDGPUMCObjectWriter(OS);
}

View File

@@ -0,0 +1,85 @@
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#include "AMDGPUMCAsmInfo.h"
using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
HasSingleParameterDotFile = false;
WeakDefDirective = 0;
//===------------------------------------------------------------------===//
HasSubsectionsViaSymbols = true;
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = true;
MaxInstLength = 16;
PCSymbol = "$";
SeparatorString = "\n";
CommentColumn = 40;
CommentString = ";";
LabelSuffix = ":";
GlobalPrefix = "@";
PrivateGlobalPrefix = ";.";
LinkerPrivateGlobalPrefix = "!";
InlineAsmStart = ";#ASMSTART";
InlineAsmEnd = ";#ASMEND";
AssemblerDialect = 0;
AllowQuotesInName = false;
AllowNameToStartWithDigit = false;
AllowPeriodsInName = false;
//===--- Data Emission Directives -------------------------------------===//
ZeroDirective = ".zero";
AsciiDirective = ".ascii\t";
AscizDirective = ".asciz\t";
Data8bitsDirective = ".byte\t";
Data16bitsDirective = ".short\t";
Data32bitsDirective = ".long\t";
Data64bitsDirective = ".quad\t";
GPRel32Directive = 0;
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
HasMicrosoftFastStdCallMangling = false;
//===--- Alignment Information ----------------------------------------===//
AlignDirective = ".align\t";
AlignmentIsInBytes = true;
TextAlignFillValue = 0;
//===--- Global Variable Emission Directives --------------------------===//
GlobalDirective = ".global";
ExternDirective = ".extern";
HasSetDirective = false;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
HasSymbolResolver = false;
WeakRefDirective = ".weakref\t";
LinkOnceDirective = 0;
//===--- Dwarf Emission Directives -----------------------------------===//
HasLEB128 = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::None;
DwarfUsesInlineInfoSection = false;
DwarfSectionOffsetDirective = ".offset";
}
const char*
AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const {
return 0;
}
const MCSection*
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
return 0;
}

View File

@@ -0,0 +1,30 @@
//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUMCASMINFO_H
#define AMDGPUMCASMINFO_H
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
class StringRef;
class AMDGPUMCAsmInfo : public MCAsmInfo {
public:
explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
const char* getDataASDirective(unsigned int Size, unsigned int AS) const;
const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
};
} // namespace llvm
#endif // AMDGPUMCASMINFO_H

View File

@@ -0,0 +1,60 @@
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief CodeEmitter interface for R600 and SI codegen.
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPUCODEEMITTER_H
#define AMDGPUCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
class MCInst;
class MCOperand;
class AMDGPUMCCodeEmitter : public MCCodeEmitter {
public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups) const;
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
return Value;
}
virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
};
} // End namespace llvm
#endif // AMDGPUCODEEMITTER_H

View File

@@ -0,0 +1,113 @@
//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This file provides AMDGPU specific target descriptions.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
#include "AMDGPUMCAsmInfo.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "AMDGPUGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "AMDGPUGenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "AMDGPUGenRegisterInfo.inc"
using namespace llvm;
static MCInstrInfo *createAMDGPUMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitAMDGPUMCInstrInfo(X);
return X;
}
static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitAMDGPUMCRegisterInfo(X, 0);
return X;
}
static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo * X = new MCSubtargetInfo();
InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
return X;
}
static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
} else {
return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
}
}
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
raw_ostream &_OS,
MCCodeEmitter *_Emitter,
bool RelaxAll,
bool NoExecStack) {
return createPureStreamer(Ctx, MAB, _OS, _Emitter);
}
extern "C" void LLVMInitializeR600TargetMC() {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
}

View File

@@ -0,0 +1,55 @@
//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Provides AMDGPU specific target descriptions.
//
//===----------------------------------------------------------------------===//
//
#ifndef AMDGPUMCTARGETDESC_H
#define AMDGPUMCTARGETDESC_H
#include "llvm/ADT/StringRef.h"
namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
extern Target TheAMDGPUTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
StringRef CPU);
} // End llvm namespace
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#endif // AMDGPUMCTARGETDESC_H

View File

@@ -0,0 +1,10 @@
add_llvm_library(LLVMR600Desc
AMDGPUAsmBackend.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUMCAsmInfo.cpp
R600MCCodeEmitter.cpp
SIMCCodeEmitter.cpp
)
add_dependencies(LLVMR600Desc AMDGPUCommonTableGen)

View File

@@ -0,0 +1,23 @@
;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = R600Desc
parent = R600
required_libraries = R600AsmPrinter R600Info MC
add_to_library_groups = R600

View File

@@ -0,0 +1,16 @@
##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMR600Desc
# Hack: we need to include 'main' target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@@ -0,0 +1,575 @@
//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// This code emitter outputs bytecode that is understood by the r600g driver
/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
/// but it still needs to be run through a finalizer in order to be executed
/// by the GPU.
///
/// [1] http://www.mesa3d.org/
//
//===----------------------------------------------------------------------===//
#include "R600Defines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
#include <stdio.h>
#define SRC_BYTE_COUNT 11
#define DST_BYTE_COUNT 5
using namespace llvm;
namespace {
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
const MCSubtargetInfo &STI;
MCContext &Ctx;
public:
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
const MCSubtargetInfo &sti, MCContext &ctx)
: MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
/// \brief Encode the instruction and write it to the OS.
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
/// \returns the encoding for an MCOperand.
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const;
private:
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
raw_ostream &OS) const;
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
void EmitByte(unsigned int byte, raw_ostream &OS) const;
void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
void Emit(uint32_t value, raw_ostream &OS) const;
void Emit(uint64_t value, raw_ostream &OS) const;
unsigned getHWRegChan(unsigned reg) const;
unsigned getHWReg(unsigned regNo) const;
bool isFCOp(unsigned opcode) const;
bool isTexOp(unsigned opcode) const;
bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
};
} // End anonymous namespace
enum RegElement {
ELEMENT_X = 0,
ELEMENT_Y,
ELEMENT_Z,
ELEMENT_W
};
enum InstrTypes {
INSTR_ALU = 0,
INSTR_TEX,
INSTR_FC,
INSTR_NATIVE,
INSTR_VTX,
INSTR_EXPORT
};
enum FCInstr {
FC_IF_PREDICATE = 0,
FC_ELSE,
FC_ENDIF,
FC_BGNLOOP,
FC_ENDLOOP,
FC_BREAK_PREDICATE,
FC_CONTINUE
};
enum TextureTypes {
TEXTURE_1D = 1,
TEXTURE_2D,
TEXTURE_3D,
TEXTURE_CUBE,
TEXTURE_RECT,
TEXTURE_SHADOW1D,
TEXTURE_SHADOW2D,
TEXTURE_SHADOWRECT,
TEXTURE_1D_ARRAY,
TEXTURE_2D_ARRAY,
TEXTURE_SHADOW1D_ARRAY,
TEXTURE_SHADOW2D_ARRAY
};
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
}
void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
if (isTexOp(MI.getOpcode())) {
EmitTexInstr(MI, Fixups, OS);
} else if (isFCOp(MI.getOpcode())){
EmitFCInstr(MI, OS);
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) {
return;
} else {
switch(MI.getOpcode()) {
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_NATIVE, OS);
Emit(inst, OS);
break;
}
case AMDGPU::CONSTANT_LOAD_eg:
case AMDGPU::VTX_READ_PARAM_8_eg:
case AMDGPU::VTX_READ_PARAM_16_eg:
case AMDGPU::VTX_READ_PARAM_32_eg:
case AMDGPU::VTX_READ_GLOBAL_8_eg:
case AMDGPU::VTX_READ_GLOBAL_32_eg:
case AMDGPU::VTX_READ_GLOBAL_128_eg: {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
EmitByte(INSTR_VTX, OS);
Emit(InstWord01, OS);
Emit(InstWord2, OS);
break;
}
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz:
case AMDGPU::EG_ExportBuf:
case AMDGPU::R600_ExportBuf: {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_EXPORT, OS);
Emit(Inst, OS);
break;
}
default:
EmitALUInstr(MI, Fixups, OS);
break;
}
}
}
void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const {
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
unsigned NumOperands = MI.getNumOperands();
// Emit instruction type
EmitByte(INSTR_ALU, OS);
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
//older alu have different encoding for instructions with one or two src
//parameters.
if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
!(MCDesc.TSFlags & R600_InstFlag::OP3)) {
uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
InstWord01 &= ~(0x3FFULL << 39);
InstWord01 |= ISAOpCode << 1;
}
unsigned SrcIdx = 0;
for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
continue;
}
EmitSrcISA(MI, OpIdx, InstWord01, OS);
SrcIdx++;
}
// Emit zeros for unused sources
for ( ; SrcIdx < 3; SrcIdx++) {
EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
}
Emit(InstWord01, OS);
return;
}
void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
raw_ostream &OS) const {
const MCOperand &MO = MI.getOperand(OpIdx);
union {
float f;
uint32_t i;
} Value;
Value.i = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
// For other potential instruction operands, (e.g. constant registers) the
// value of the source select is defined in the r600isa docs.
if (MO.isReg()) {
unsigned reg = MO.getReg();
EmitTwoBytes(getHWReg(reg), OS);
if (reg == AMDGPU::ALU_LITERAL_X) {
unsigned ImmOpIndex = MI.getNumOperands() - 1;
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
if (ImmOp.isFPImm()) {
Value.f = ImmOp.getFPImm();
} else {
assert(ImmOp.isImm());
Value.i = ImmOp.getImm();
}
}
} else {
// XXX: Handle other operand types.
EmitTwoBytes(0, OS);
}
// Emit the source channel (1 byte)
if (MO.isReg()) {
EmitByte(getHWRegChan(MO.getReg()), OS);
} else {
EmitByte(0, OS);
}
// XXX: Emit isNegated (1 byte)
if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
&& (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
(MO.isReg() &&
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
EmitByte(1, OS);
} else {
EmitByte(0, OS);
}
// Emit isAbsolute (1 byte)
if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
EmitByte(1, OS);
} else {
EmitByte(0, OS);
}
// XXX: Emit relative addressing mode (1 byte)
EmitByte(0, OS);
// Emit kc_bank, This will be adjusted later by r600_asm
EmitByte(0, OS);
// Emit the literal value, if applicable (4 bytes).
Emit(Value.i, OS);
}
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
uint64_t &Value, raw_ostream &OS) const {
const MCOperand &MO = MI.getOperand(OpIdx);
union {
float f;
uint32_t i;
} InlineConstant;
InlineConstant.i = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
// For other potential instruction operands, (e.g. constant registers) the
// value of the source select is defined in the r600isa docs.
if (MO.isReg()) {
unsigned Reg = MO.getReg();
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
EmitByte(1, OS);
} else {
EmitByte(0, OS);
}
if (Reg == AMDGPU::ALU_LITERAL_X) {
unsigned ImmOpIndex = MI.getNumOperands() - 1;
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
if (ImmOp.isFPImm()) {
InlineConstant.f = ImmOp.getFPImm();
} else {
assert(ImmOp.isImm());
InlineConstant.i = ImmOp.getImm();
}
}
}
// Emit the literal value, if applicable (4 bytes).
Emit(InlineConstant.i, OS);
}
void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const {
unsigned Opcode = MI.getOpcode();
bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
unsigned OpOffset = hasOffsets ? 3 : 0;
int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
unsigned srcSelect[4] = {0, 1, 2, 3};
// Emit instruction type
EmitByte(1, OS);
// Emit instruction
EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
// Emit resource id
EmitByte(Resource, OS);
// Emit source register
EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
// XXX: Emit src isRelativeAddress
EmitByte(0, OS);
// Emit destination register
EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
// XXX: Emit dst isRealtiveAddress
EmitByte(0, OS);
// XXX: Emit dst select
EmitByte(0, OS); // X
EmitByte(1, OS); // Y
EmitByte(2, OS); // Z
EmitByte(3, OS); // W
// XXX: Emit lod bias
EmitByte(0, OS);
// XXX: Emit coord types
unsigned coordType[4] = {1, 1, 1, 1};
if (TextureType == TEXTURE_RECT
|| TextureType == TEXTURE_SHADOWRECT) {
coordType[ELEMENT_X] = 0;
coordType[ELEMENT_Y] = 0;
}
if (TextureType == TEXTURE_1D_ARRAY
|| TextureType == TEXTURE_SHADOW1D_ARRAY) {
if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
coordType[ELEMENT_Y] = 0;
} else {
coordType[ELEMENT_Z] = 0;
srcSelect[ELEMENT_Z] = ELEMENT_Y;
}
} else if (TextureType == TEXTURE_2D_ARRAY
|| TextureType == TEXTURE_SHADOW2D_ARRAY) {
coordType[ELEMENT_Z] = 0;
}
for (unsigned i = 0; i < 4; i++) {
EmitByte(coordType[i], OS);
}
// XXX: Emit offsets
if (hasOffsets)
for (unsigned i = 2; i < 5; i++)
EmitByte(MI.getOperand(i).getImm()<<1, OS);
else
EmitNullBytes(3, OS);
// Emit sampler id
EmitByte(Sampler, OS);
// XXX:Emit source select
if ((TextureType == TEXTURE_SHADOW1D
|| TextureType == TEXTURE_SHADOW2D
|| TextureType == TEXTURE_SHADOWRECT
|| TextureType == TEXTURE_SHADOW1D_ARRAY)
&& Opcode != AMDGPU::TEX_SAMPLE_C_L
&& Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
srcSelect[ELEMENT_W] = ELEMENT_Z;
}
for (unsigned i = 0; i < 4; i++) {
EmitByte(srcSelect[i], OS);
}
}
void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
// Emit instruction type
EmitByte(INSTR_FC, OS);
// Emit SRC
unsigned NumOperands = MI.getNumOperands();
if (NumOperands > 0) {
assert(NumOperands == 1);
EmitSrc(MI, 0, OS);
} else {
EmitNullBytes(SRC_BYTE_COUNT, OS);
}
// Emit FC Instruction
enum FCInstr instr;
switch (MI.getOpcode()) {
case AMDGPU::PREDICATED_BREAK:
instr = FC_BREAK_PREDICATE;
break;
case AMDGPU::CONTINUE:
instr = FC_CONTINUE;
break;
case AMDGPU::IF_PREDICATE_SET:
instr = FC_IF_PREDICATE;
break;
case AMDGPU::ELSE:
instr = FC_ELSE;
break;
case AMDGPU::ENDIF:
instr = FC_ENDIF;
break;
case AMDGPU::ENDLOOP:
instr = FC_ENDLOOP;
break;
case AMDGPU::WHILELOOP:
instr = FC_BGNLOOP;
break;
default:
abort();
break;
}
EmitByte(instr, OS);
}
void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
raw_ostream &OS) const {
for (unsigned int i = 0; i < ByteCount; i++) {
EmitByte(0, OS);
}
}
void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
OS.write((uint8_t) Byte & 0xff);
}
void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
raw_ostream &OS) const {
OS.write((uint8_t) (Bytes & 0xff));
OS.write((uint8_t) ((Bytes >> 8) & 0xff));
}
void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
for (unsigned i = 0; i < 4; i++) {
OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
}
}
void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
for (unsigned i = 0; i < 8; i++) {
EmitByte((Value >> (8 * i)) & 0xff, OS);
}
}
unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
}
uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixup) const {
if (MO.isReg()) {
if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
return MRI.getEncodingValue(MO.getReg());
} else {
return getHWReg(MO.getReg());
}
} else if (MO.isImm()) {
return MO.getImm();
} else {
assert(0);
return 0;
}
}
//===----------------------------------------------------------------------===//
// Encoding helper functions
//===----------------------------------------------------------------------===//
bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
switch(opcode) {
default: return false;
case AMDGPU::PREDICATED_BREAK:
case AMDGPU::CONTINUE:
case AMDGPU::IF_PREDICATE_SET:
case AMDGPU::ELSE:
case AMDGPU::ENDIF:
case AMDGPU::ENDLOOP:
case AMDGPU::WHILELOOP:
return true;
}
}
bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
switch(opcode) {
default: return false;
case AMDGPU::TEX_LD:
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
case AMDGPU::TEX_SAMPLE:
case AMDGPU::TEX_SAMPLE_C:
case AMDGPU::TEX_SAMPLE_L:
case AMDGPU::TEX_SAMPLE_C_L:
case AMDGPU::TEX_SAMPLE_LB:
case AMDGPU::TEX_SAMPLE_C_LB:
case AMDGPU::TEX_SAMPLE_G:
case AMDGPU::TEX_SAMPLE_C_G:
case AMDGPU::TEX_GET_GRADIENTS_H:
case AMDGPU::TEX_GET_GRADIENTS_V:
case AMDGPU::TEX_SET_GRADIENTS_H:
case AMDGPU::TEX_SET_GRADIENTS_V:
return true;
}
}
bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
unsigned Flag) const {
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
if (FlagIndex == 0) {
return false;
}
assert(MI.getOperand(FlagIndex).isImm());
return !!((MI.getOperand(FlagIndex).getImm() >>
(NUM_MO_FLAGS * Operand)) & Flag);
}
#include "AMDGPUGenMCCodeEmitter.inc"

View File

@@ -0,0 +1,292 @@
//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief The SI code emitter produces machine code that can be executed
/// directly on the GPU device.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
// These must be kept in sync with SIInstructions.td and also the
// InstrEncodingInfo array in SIInstrInfo.cpp.
//
// NOTE: This enum is only used to identify the encoding type within LLVM,
// the actual encoding type that is part of the instruction format is different
namespace SIInstrEncodingType {
enum Encoding {
EXP = 0,
LDS = 1,
MIMG = 2,
MTBUF = 3,
MUBUF = 4,
SMRD = 5,
SOP1 = 6,
SOP2 = 7,
SOPC = 8,
SOPK = 9,
SOPP = 10,
VINTRP = 11,
VOP1 = 12,
VOP2 = 13,
VOP3 = 14,
VOPC = 15
};
}
using namespace llvm;
namespace {
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
const MCSubtargetInfo &STI;
MCContext &Ctx;
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
const MCSubtargetInfo &sti, MCContext &ctx)
: MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
~SIMCCodeEmitter() { }
/// \breif Encode the instruction and write it to the OS.
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
/// \returns the encoding for an MCOperand.
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const;
public:
/// \brief Encode a sequence of registers with the correct alignment.
unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
/// \brief Encoding for when 2 consecutive registers are used
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
/// \brief Encoding for when 4 consectuive registers are used
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
/// \brief Encoding for SMRD indexed loads
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
/// \brief Post-Encoder method for VOP instructions
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
private:
/// \returns this SIInstrEncodingType for this instruction.
unsigned getEncodingType(const MCInst &MI) const;
/// \brief Get then size in bytes of this instructions encoding.
unsigned getEncodingBytes(const MCInst &MI) const;
/// \returns the hardware encoding for a register
unsigned getRegBinaryCode(unsigned reg) const;
/// \brief Generated function that returns the hardware encoding for
/// a register
unsigned getHWRegNum(unsigned reg) const;
};
} // End anonymous namespace
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
}
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
unsigned bytes = getEncodingBytes(MI);
for (unsigned i = 0; i < bytes; i++) {
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
}
}
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
return getRegBinaryCode(MO.getReg());
} else if (MO.isImm()) {
return MO.getImm();
} else if (MO.isFPImm()) {
// XXX: Not all instructions can use inline literals
// XXX: We should make sure this is a 32-bit constant
union {
float F;
uint32_t I;
} Imm;
Imm.F = MO.getFPImm();
return Imm.I;
} else{
llvm_unreachable("Encoding of this operand type is not supported yet.");
}
return 0;
}
//===----------------------------------------------------------------------===//
// Custom Operand Encodings
//===----------------------------------------------------------------------===//
unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
unsigned shift) const {
unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
return regCode >> shift;
return 0;
}
unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
unsigned OpNo ,
SmallVectorImpl<MCFixup> &Fixup) const {
return GPRAlign(MI, OpNo, 1);
}
unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const {
return GPRAlign(MI, OpNo, 2);
}
#define SMRD_OFFSET_MASK 0xff
#define SMRD_IMM_SHIFT 8
#define SMRD_SBASE_MASK 0x3f
#define SMRD_SBASE_SHIFT 9
/// This function is responsibe for encoding the offset
/// and the base ptr for SMRD instructions it should return a bit string in
/// this format:
///
/// OFFSET = bits{7-0}
/// IMM = bits{8}
/// SBASE = bits{14-9}
///
uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const {
uint32_t Encoding;
const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
//XXX: Use this function for SMRD loads with register offsets
assert(OffsetOp.isImm());
Encoding =
(getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
| (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
| ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
;
return Encoding;
}
//===----------------------------------------------------------------------===//
// Post Encoder Callbacks
//===----------------------------------------------------------------------===//
uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
unsigned encodingType = getEncodingType(MI);
unsigned numSrcOps;
unsigned vgprBitOffset;
if (encodingType == SIInstrEncodingType::VOP3) {
numSrcOps = 3;
vgprBitOffset = 32;
} else {
numSrcOps = 1;
vgprBitOffset = 0;
}
// Add one to skip over the destination reg operand.
for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
const MCOperand &MO = MI.getOperand(opIdx);
if (MO.isReg()) {
unsigned reg = MI.getOperand(opIdx).getReg();
if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
}
} else if (MO.isFPImm()) {
union {
float f;
uint32_t i;
} Imm;
// XXX: Not all instructions can use inline literals
// XXX: We should make sure this is a 32-bit constant
Imm.f = MO.getFPImm();
Value |= ((uint64_t)Imm.i) << 32;
}
}
return Value;
}
//===----------------------------------------------------------------------===//
// Encoding helper functions
//===----------------------------------------------------------------------===//
unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
}
unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
// These instructions aren't real instructions with an encoding type, so
// we need to manually specify their size.
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_LOAD_LITERAL_I32:
case AMDGPU::SI_LOAD_LITERAL_F32:
return 4;
}
unsigned encoding_type = getEncodingType(MI);
switch (encoding_type) {
case SIInstrEncodingType::EXP:
case SIInstrEncodingType::LDS:
case SIInstrEncodingType::MUBUF:
case SIInstrEncodingType::MTBUF:
case SIInstrEncodingType::MIMG:
case SIInstrEncodingType::VOP3:
return 8;
default:
return 4;
}
}
unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
switch (reg) {
case AMDGPU::M0: return 124;
case AMDGPU::SREG_LIT_0: return 128;
case AMDGPU::SI_LITERAL_CONSTANT: return 255;
default: return MRI.getEncodingValue(reg);
}
}

23
lib/Target/R600/Makefile Normal file
View File

@@ -0,0 +1,23 @@
##===- lib/Target/R600/Makefile ---------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../..
LIBRARYNAME = LLVMR600CodeGen
TARGET = AMDGPU
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
AMDGPUGenAsmWriter.inc
DIRS = InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common

View File

@@ -0,0 +1,29 @@
//===-- Processors.td - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// AMDIL processors supported.
//
//===----------------------------------------------------------------------===//
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
: Processor<Name, itin, Features>;
def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
def : Proc<"rv710", R600_EG_Itin, []>;
def : Proc<"rv730", R600_EG_Itin, []>;
def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;

View File

@@ -0,0 +1,79 @@
//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#ifndef R600DEFINES_H_
#define R600DEFINES_H_
#include "llvm/MC/MCRegisterInfo.h"
// Operand Flags
#define MO_FLAG_CLAMP (1 << 0)
#define MO_FLAG_NEG (1 << 1)
#define MO_FLAG_ABS (1 << 2)
#define MO_FLAG_MASK (1 << 3)
#define MO_FLAG_PUSH (1 << 4)
#define MO_FLAG_NOT_LAST (1 << 5)
#define MO_FLAG_LAST (1 << 6)
#define NUM_MO_FLAGS 7
/// \brief Helper for getting the operand index for the instruction flags
/// operand.
#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
namespace R600_InstFlag {
enum TIF {
TRANS_ONLY = (1 << 0),
TEX = (1 << 1),
REDUCTION = (1 << 2),
FC = (1 << 3),
TRIG = (1 << 4),
OP3 = (1 << 5),
VECTOR = (1 << 6),
//FlagOperand bits 7, 8
NATIVE_OPERANDS = (1 << 9),
OP1 = (1 << 10),
OP2 = (1 << 11)
};
}
#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
/// \brief Defines for extracting register infomation from register encoding
#define HW_REG_MASK 0x1ff
#define HW_CHAN_SHIFT 9
namespace R600Operands {
enum Ops {
DST,
UPDATE_EXEC_MASK,
UPDATE_PREDICATE,
WRITE,
OMOD,
DST_REL,
CLAMP,
SRC0,
SRC0_NEG,
SRC0_REL,
SRC0_ABS,
SRC1,
SRC1_NEG,
SRC1_REL,
SRC1_ABS,
SRC2,
SRC2_NEG,
SRC2_REL,
LAST,
PRED_SEL,
IMM,
COUNT
};
}
#endif // R600DEFINES_H_

View File

@@ -0,0 +1,333 @@
//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Vector, Reduction, and Cube instructions need to fill the entire instruction
/// group to work correctly. This pass expands these individual instructions
/// into several instructions that will completely fill the instruction group.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
private:
static char ID;
const R600InstrInfo *TII;
bool ExpandInputPerspective(MachineInstr& MI);
bool ExpandInputConstant(MachineInstr& MI);
public:
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "R600 Expand special instructions pass";
}
};
} // End anonymous namespace
char R600ExpandSpecialInstrsPass::ID = 0;
FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
return new R600ExpandSpecialInstrsPass(TM);
}
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
if (MI.getOpcode() != AMDGPU::input_perspective)
return false;
MachineBasicBlock::iterator I = &MI;
unsigned DstReg = MI.getOperand(0).getReg();
R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
->getInfo<R600MachineFunctionInfo>();
unsigned IJIndexBase;
// In Evergreen ISA doc section 8.3.2 :
// We need to interpolate XY and ZW in two different instruction groups.
// An INTERP_* must occupy all 4 slots of an instruction group.
// Output of INTERP_XY is written in X,Y slots
// Output of INTERP_ZW is written in Z,W slots
//
// Thus interpolation requires the following sequences :
//
// AnyGPR.x = INTERP_ZW; (Write Masked Out)
// AnyGPR.y = INTERP_ZW; (Write Masked Out)
// DstGPR.z = INTERP_ZW;
// DstGPR.w = INTERP_ZW; (End of first IG)
// DstGPR.x = INTERP_XY;
// DstGPR.y = INTERP_XY;
// AnyGPR.z = INTERP_XY; (Write Masked Out)
// AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
//
switch (MI.getOperand(1).getImm()) {
case 0:
IJIndexBase = MFI->GetIJPerspectiveIndex();
break;
case 1:
IJIndexBase = MFI->GetIJLinearIndex();
break;
default:
assert(0 && "Unknow ij index");
}
for (unsigned i = 0; i < 8; i++) {
unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
2 * IJIndexBase + ((i + 1) % 2));
unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
MI.getOperand(2).getImm());
unsigned Sel = AMDGPU::sel_x;
switch (i % 4) {
case 0:Sel = AMDGPU::sel_x;break;
case 1:Sel = AMDGPU::sel_y;break;
case 2:Sel = AMDGPU::sel_z;break;
case 3:Sel = AMDGPU::sel_w;break;
default:break;
}
unsigned Res = TRI.getSubReg(DstReg, Sel);
unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY;
MachineBasicBlock &MBB = *(MI.getParent());
MachineInstr *NewMI =
TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg);
if (!(i> 1 && i < 6)) {
TII->addFlag(NewMI, 0, MO_FLAG_MASK);
}
if (i % 4 != 3)
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
return true;
}
bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
if (MI.getOpcode() != AMDGPU::input_constant)
return false;
MachineBasicBlock::iterator I = &MI;
unsigned DstReg = MI.getOperand(0).getReg();
for (unsigned i = 0; i < 4; i++) {
unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
MI.getOperand(1).getImm());
unsigned Sel = AMDGPU::sel_x;
switch (i % 4) {
case 0:Sel = AMDGPU::sel_x;break;
case 1:Sel = AMDGPU::sel_y;break;
case 2:Sel = AMDGPU::sel_z;break;
case 3:Sel = AMDGPU::sel_w;break;
default:break;
}
unsigned Res = TRI.getSubReg(DstReg, Sel);
MachineBasicBlock &MBB = *(MI.getParent());
MachineInstr *NewMI = TII->buildDefaultInstruction(
MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg);
if (i % 4 != 3)
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
}
MI.eraseFromParent();
return true;
}
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
MachineBasicBlock::iterator I = MBB.begin();
while (I != MBB.end()) {
MachineInstr &MI = *I;
I = llvm::next(I);
switch (MI.getOpcode()) {
default: break;
// Expand PRED_X to one of the PRED_SET instructions.
case AMDGPU::PRED_X: {
uint64_t Flags = MI.getOperand(3).getImm();
// The native opcode used by PRED_X is stored as an immediate in the
// third operand.
MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
MI.getOperand(2).getImm(), // opcode
MI.getOperand(0).getReg(), // dst
MI.getOperand(1).getReg(), // src0
AMDGPU::ZERO); // src1
TII->addFlag(PredSet, 0, MO_FLAG_MASK);
if (Flags & MO_FLAG_PUSH) {
TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
} else {
TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1);
}
MI.eraseFromParent();
continue;
}
case AMDGPU::BREAK:
MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
AMDGPU::PRED_SETE_INT,
AMDGPU::PREDICATE_BIT,
AMDGPU::ZERO,
AMDGPU::ZERO);
TII->addFlag(PredSet, 0, MO_FLAG_MASK);
TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
BuildMI(MBB, I, MBB.findDebugLoc(I),
TII->get(AMDGPU::PREDICATED_BREAK))
.addReg(AMDGPU::PREDICATE_BIT);
MI.eraseFromParent();
continue;
}
if (ExpandInputPerspective(MI))
continue;
if (ExpandInputConstant(MI))
continue;
bool IsReduction = TII->isReductionOp(MI.getOpcode());
bool IsVector = TII->isVector(MI);
bool IsCube = TII->isCubeOp(MI.getOpcode());
if (!IsReduction && !IsVector && !IsCube) {
continue;
}
// Expand the instruction
//
// Reduction instructions:
// T0_X = DP4 T1_XYZW, T2_XYZW
// becomes:
// TO_X = DP4 T1_X, T2_X
// TO_Y (write masked) = DP4 T1_Y, T2_Y
// TO_Z (write masked) = DP4 T1_Z, T2_Z
// TO_W (write masked) = DP4 T1_W, T2_W
//
// Vector instructions:
// T0_X = MULLO_INT T1_X, T2_X
// becomes:
// T0_X = MULLO_INT T1_X, T2_X
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
// T0_W (write masked) = MULLO_INT T1_X, T2_X
//
// Cube instructions:
// T0_XYZW = CUBE T1_XYZW
// becomes:
// TO_X = CUBE T1_Z, T1_Y
// T0_Y = CUBE T1_Z, T1_X
// T0_Z = CUBE T1_X, T1_Z
// T0_W = CUBE T1_Y, T1_Z
for (unsigned Chan = 0; Chan < 4; Chan++) {
unsigned DstReg = MI.getOperand(
TII->getOperandIdx(MI, R600Operands::DST)).getReg();
unsigned Src0 = MI.getOperand(
TII->getOperandIdx(MI, R600Operands::SRC0)).getReg();
unsigned Src1 = 0;
// Determine the correct source registers
if (!IsCube) {
int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1);
if (Src1Idx != -1) {
Src1 = MI.getOperand(Src1Idx).getReg();
}
}
if (IsReduction) {
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
Src0 = TRI.getSubReg(Src0, SubRegIndex);
Src1 = TRI.getSubReg(Src1, SubRegIndex);
} else if (IsCube) {
static const int CubeSrcSwz[] = {2, 2, 0, 1};
unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
}
// Determine the correct destination registers;
bool Mask = false;
bool NotLast = true;
if (IsCube) {
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
} else {
// Mask the write if the original instruction does not write to
// the current Channel.
Mask = (Chan != TRI.getHWRegChan(DstReg));
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
}
// Set the IsLast bit
NotLast = (Chan != 3 );
// Add the new instruction
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
case AMDGPU::CUBE_r600_pseudo:
Opcode = AMDGPU::CUBE_r600_real;
break;
case AMDGPU::CUBE_eg_pseudo:
Opcode = AMDGPU::CUBE_eg_real;
break;
case AMDGPU::DOT4_r600_pseudo:
Opcode = AMDGPU::DOT4_r600_real;
break;
case AMDGPU::DOT4_eg_pseudo:
Opcode = AMDGPU::DOT4_eg_real;
break;
default:
break;
}
MachineInstr *NewMI =
TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
NewMI->setIsInsideBundle(Chan != 0);
if (Mask) {
TII->addFlag(NewMI, 0, MO_FLAG_MASK);
}
if (NotLast) {
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
}
}
MI.eraseFromParent();
}
}
return false;
}

View File

@@ -0,0 +1,905 @@
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Custom DAG lowering for R600
//
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/Argument.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
setOperationAction(ISD::MUL, MVT::i64, Expand);
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
computeRegisterProperties();
setOperationAction(ISD::FADD, MVT::v4f32, Expand);
setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
setOperationAction(ISD::ADD, MVT::v4i32, Expand);
setOperationAction(ISD::AND, MVT::v4i32, Expand);
setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
setOperationAction(ISD::UREM, MVT::v4i32, Expand);
setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
setOperationAction(ISD::FPOW, MVT::f32, Custom);
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setTargetDAGCombine(ISD::FP_ROUND);
setSchedulingPreference(Sched::VLIW);
}
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineFunction * MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::SHADER_TYPE: break;
case AMDGPU::CLAMP_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOV,
MI->getOperand(0).getReg(),
MI->getOperand(1).getReg());
TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
break;
}
case AMDGPU::FABS_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOV,
MI->getOperand(0).getReg(),
MI->getOperand(1).getReg());
TII->addFlag(NewMI, 0, MO_FLAG_ABS);
break;
}
case AMDGPU::FNEG_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOV,
MI->getOperand(0).getReg(),
MI->getOperand(1).getReg());
TII->addFlag(NewMI, 0, MO_FLAG_NEG);
break;
}
case AMDGPU::R600_LOAD_CONST: {
int64_t RegIndex = MI->getOperand(1).getImm();
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
.addOperand(MI->getOperand(0))
.addReg(ConstantReg);
break;
}
case AMDGPU::MASK_WRITE: {
unsigned maskedRegister = MI->getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
TII->addFlag(defInstr, 0, MO_FLAG_MASK);
break;
}
case AMDGPU::MOV_IMM_F32:
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
MI->getOperand(1).getFPImm()->getValueAPF()
.bitcastToAPInt().getZExtValue());
break;
case AMDGPU::MOV_IMM_I32:
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
MI->getOperand(1).getImm());
break;
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(EOP); // Set End of program bit
break;
}
case AMDGPU::RESERVE_REG: {
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
int64_t ReservedIndex = MI->getOperand(0).getImm();
unsigned ReservedReg =
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
MFI->ReservedRegs.push_back(ReservedReg);
unsigned SuperReg =
AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
MFI->ReservedRegs.push_back(SuperReg);
break;
}
case AMDGPU::TXD: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6))
.addReg(T0, RegState::Implicit)
.addReg(T1, RegState::Implicit);
break;
}
case AMDGPU::TXD_SHADOW: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6))
.addReg(T0, RegState::Implicit)
.addReg(T1, RegState::Implicit);
break;
}
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(0);
break;
case AMDGPU::BRANCH_COND_f32: {
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_NOT_ZERO)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
case AMDGPU::BRANCH_COND_i32: {
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_NOT_ZERO_INT)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
case AMDGPU::input_perspective: {
R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
// XXX Be more fine about register reservation
for (unsigned i = 0; i < 4; i ++) {
unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
MFI->ReservedRegs.push_back(ReservedReg);
}
switch (MI->getOperand(1).getImm()) {
case 0:// Perspective
MFI->HasPerspectiveInterpolation = true;
break;
case 1:// Linear
MFI->HasLinearInterpolation = true;
break;
default:
assert(0 && "Unknow ij index");
}
return BB;
}
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: {
bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
if (!EOP)
return BB;
unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6))
.addImm(CfInst)
.addImm(1);
break;
}
}
MI->eraseFromParent();
return BB;
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
using namespace llvm::Intrinsic;
using namespace llvm::AMDGPUIntrinsic;
static SDValue
InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
SDValue Scalar, SDValue Chain) {
if (!ExportMap[Slot]) {
SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
DL, MVT::v4f32,
DAG.getUNDEF(MVT::v4f32),
Scalar,
DAG.getConstant(Channel, MVT::i32));
unsigned Mask = 1 << Channel;
const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
DAG.getConstant(Mask, MVT::i32)};
SDValue Res = DAG.getNode(
AMDGPUISD::EXPORT,
DL,
MVT::Other,
Ops, 6);
ExportMap[Slot] = Res.getNode();
return Res;
}
SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
SDValue PreviousVector = ExportInstruction->getOperand(1);
SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
DL, MVT::v4f32,
PreviousVector,
Scalar,
DAG.getConstant(Channel, MVT::i32));
unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
->getZExtValue();
Mask |= (1 << Channel);
const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
DAG.getConstant(Inst, MVT::i32),
DAG.getConstant(Type, MVT::i32),
DAG.getConstant(Slot, MVT::i32),
DAG.getConstant(Mask, MVT::i32)};
DAG.UpdateNodeOperands(ExportInstruction,
Ops, 6);
return Chain;
}
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
case AMDGPUIntrinsic::AMDGPU_store_output: {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
if (!MRI.isLiveOut(Reg)) {
MRI.addLiveOut(Reg);
}
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
}
case AMDGPUIntrinsic::R600_store_pixel_color: {
MachineFunction &MF = DAG.getMachineFunction();
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
SDNode **OutputsMap = MFI->Outputs;
return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
Chain);
}
case AMDGPUIntrinsic::R600_store_stream_output : {
MachineFunction &MF = DAG.getMachineFunction();
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
SDNode **OutputsMap = MFI->StreamOutputs[BufIndex];
unsigned Inst;
switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) {
// STREAM3
case 3:
Inst = 4;
break;
// STREAM2
case 2:
Inst = 3;
break;
// STREAM1
case 1:
Inst = 2;
break;
// STREAM0
case 0:
Inst = 1;
break;
default:
assert(0 && "Wrong buffer id for stream outputs !");
}
return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
Chain);
}
// default for switch(IntrinsicID)
default: break;
}
// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
switch(IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case AMDGPUIntrinsic::R600_load_input: {
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
}
case AMDGPUIntrinsic::R600_load_input_perspective: {
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (slot < 0)
return DAG.getUNDEF(MVT::f32);
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP,
DL, MVT::v4f32,
DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case AMDGPUIntrinsic::R600_load_input_linear: {
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (slot < 0)
return DAG.getUNDEF(MVT::f32);
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP,
DL, MVT::v4f32,
DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case AMDGPUIntrinsic::R600_load_input_constant: {
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (slot < 0)
return DAG.getUNDEF(MVT::f32);
SDValue FullVector = DAG.getNode(
AMDGPUISD::INTERP_P0,
DL, MVT::v4f32,
DAG.getConstant(slot / 4 , MVT::i32));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
}
case r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
case r600_read_ngroups_y:
return LowerImplicitParameter(DAG, VT, DL, 1);
case r600_read_ngroups_z:
return LowerImplicitParameter(DAG, VT, DL, 2);
case r600_read_global_size_x:
return LowerImplicitParameter(DAG, VT, DL, 3);
case r600_read_global_size_y:
return LowerImplicitParameter(DAG, VT, DL, 4);
case r600_read_global_size_z:
return LowerImplicitParameter(DAG, VT, DL, 5);
case r600_read_local_size_x:
return LowerImplicitParameter(DAG, VT, DL, 6);
case r600_read_local_size_y:
return LowerImplicitParameter(DAG, VT, DL, 7);
case r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
case r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
case r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Y, VT);
case r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Z, VT);
case r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_X, VT);
case r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Y, VT);
case r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
}
} // end switch(Op.getOpcode())
return SDValue();
}
void R600TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
switch (N->getOpcode()) {
default: return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
}
}
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::SETCC,
Op.getDebugLoc(),
MVT::i1,
Op, DAG.getConstantFP(0.0f, MVT::f32),
DAG.getCondCode(ISD::SETNE)
);
}
SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue CC = Op.getOperand(1);
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue JumpT = Op.getOperand(4);
SDValue CmpValue;
SDValue Result;
if (LHS.getValueType() == MVT::i32) {
CmpValue = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::i32,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
} else if (LHS.getValueType() == MVT::f32) {
CmpValue = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::f32,
LHS, RHS,
DAG.getConstantFP(1.0f, MVT::f32),
DAG.getConstantFP(0.0f, MVT::f32),
CC);
} else {
assert(0 && "Not valid type for br_cc");
}
Result = DAG.getNode(
AMDGPUISD::BRANCH_COND,
CmpValue.getDebugLoc(),
MVT::Other, Chain,
JumpT, CmpValue);
return Result;
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getConstant(ByteOffset, MVT::i32), // PTR
MachinePointerInfo(ConstantPointerNull::get(PtrType)),
false, false, false, 0);
}
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
Op.getOperand(0),
Op.getOperand(0),
DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(32, MVT::i32),
Op.getOperand(1)));
}
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
return CstFP->isZero();
} else {
return false;
}
}
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue True = Op.getOperand(2);
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
SDValue Temp;
// LHS and RHS are guaranteed to be the same value type
EVT CompareVT = LHS.getValueType();
// Check if we can lower this to a native operation.
// Try to lower to a CND* instruction:
// CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
// can be lowered to CND* instructions can also be lowered to SET*
// instructions. CND* instructions are cheaper, because they dont't
// require additional instructions to convert their result to the correct
// value type, so this check should be first.
if (isZero(LHS) || isZero(RHS)) {
SDValue Cond = (isZero(LHS) ? RHS : LHS);
SDValue Zero = (isZero(LHS) ? LHS : RHS);
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
if (CompareVT != VT) {
// Bitcast True / False to the correct types. This will end up being
// a nop, but it allows us to define only a single pattern in the
// .TD files for each CND* instruction rather than having to have
// one pattern for integer True/False and one for fp True/False
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
}
if (isZero(LHS)) {
CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
}
switch (CCOpcode) {
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
case ISD::SETULE:
case ISD::SETULT:
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT:
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Temp = True;
True = False;
False = Temp;
break;
default:
break;
}
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
Cond, Zero,
True, False,
DAG.getCondCode(CCOpcode));
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
}
// Try to lower to a SET* instruction:
// We need all the operands of SELECT_CC to have the same value type, so if
// necessary we need to change True and False to be the same type as LHS and
// RHS, and then convert the result of the select_cc back to the correct type.
// Move hardware True/False values to the correct operand.
if (isHWTrueValue(False) && isHWFalseValue(True)) {
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
std::swap(False, True);
CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
}
if (isHWTrueValue(True) && isHWFalseValue(False)) {
if (CompareVT != VT) {
if (VT == MVT::f32 && CompareVT == MVT::i32) {
SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
// Convert integer values of true (-1) and false (0) to fp values of
// true (1.0f) and false (0.0f).
SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
DAG.getConstant(1, MVT::i32));
return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
LHS, RHS,
DAG.getConstantFP(1.0f, MVT::f32),
DAG.getConstantFP(0.0f, MVT::f32),
CC);
// Convert fp values of true (1.0f) and false (0.0f) to integer values
// of true (-1) and false (0).
SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
} else {
// I don't think there will be any other type pairings.
assert(!"Unhandled operand type parings in SELECT_CC");
}
} else {
// This SELECT_CC is already legal.
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
}
}
// Possible Min/Max pattern
SDValue MinMax = LowerMinMax(Op, DAG);
if (MinMax.getNode()) {
return MinMax;
}
// If we make it this for it means we have no native instructions to handle
// this SELECT_CC, so we must lower it.
SDValue HWTrue, HWFalse;
if (CompareVT == MVT::f32) {
HWTrue = DAG.getConstantFP(1.0f, CompareVT);
HWFalse = DAG.getConstantFP(0.0f, CompareVT);
} else if (CompareVT == MVT::i32) {
HWTrue = DAG.getConstant(-1, CompareVT);
HWFalse = DAG.getConstant(0, CompareVT);
}
else {
assert(!"Unhandled value type in LowerSELECT_CC");
}
// Lower this unsupported SELECT_CC into a combination of two supported
// SELECT_CC operations.
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
return DAG.getNode(ISD::SELECT_CC, DL, VT,
Cond, HWFalse,
True, False,
DAG.getCondCode(ISD::SETNE));
}
SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SELECT_CC,
Op.getDebugLoc(),
Op.getValueType(),
Op.getOperand(0),
DAG.getConstant(0, MVT::i32),
Op.getOperand(1),
Op.getOperand(2),
DAG.getCondCode(ISD::SETNE));
}
SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond;
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
DebugLoc DL = Op.getDebugLoc();
assert(Op.getValueType() == MVT::i32);
if (LHS.getValueType() == MVT::i32) {
Cond = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::i32,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
} else if (LHS.getValueType() == MVT::f32) {
Cond = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::f32,
LHS, RHS,
DAG.getConstantFP(1.0f, MVT::f32),
DAG.getConstantFP(0.0f, MVT::f32),
CC);
Cond = DAG.getNode(
ISD::FP_TO_SINT,
DL,
MVT::i32,
Cond);
} else {
assert(0 && "Not valid type for set_cc");
}
Cond = DAG.getNode(
ISD::AND,
DL,
MVT::i32,
DAG.getConstant(1, MVT::i32),
Cond);
return Cond;
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Value = Op.getOperand(1);
SDValue Ptr = Op.getOperand(2);
if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
// Convert pointer from byte address to dword address.
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Ptr, DAG.getConstant(2, MVT::i32)));
if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
assert(!"Truncated and indexed stores not supported yet");
} else {
Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
}
return Chain;
}
return SDValue();
}
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
}
/// XXX Only kernel functions are supported, so we can assume for now that
/// every function is a kernel function, but in the future we should use
/// separate calling conventions for kernel and non-kernel functions.
SDValue R600TargetLowering::LowerFormalArguments(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned ParamOffsetBytes = 36;
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
EVT VT = Ins[i].VT;
Type *ArgType = FuncArg->getType();
unsigned ArgSizeInBits = ArgType->isPointerTy() ?
32 : ArgType->getPrimitiveSizeInBits();
unsigned ArgBytes = ArgSizeInBits >> 3;
EVT ArgVT;
if (ArgSizeInBits < VT.getSizeInBits()) {
assert(!ArgType->isFloatTy() &&
"Extending floating point arguments not supported yet");
ArgVT = MVT::getIntegerVT(ArgSizeInBits);
} else {
ArgVT = VT;
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
DAG.getConstant(ParamOffsetBytes, MVT::i32),
MachinePointerInfo(new Argument(PtrTy)),
ArgVT, false, false, ArgBytes);
InVals.push_back(Arg);
ParamOffsetBytes += ArgBytes;
}
return Chain;
}
EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
if (!VT.isVector()) return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
//===----------------------------------------------------------------------===//
// Custom DAG Optimizations
//===----------------------------------------------------------------------===//
SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
case ISD::FP_ROUND: {
SDValue Arg = N->getOperand(0);
if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
Arg.getOperand(0));
}
break;
}
}
return SDValue();
}

View File

@@ -0,0 +1,72 @@
//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief R600 DAG Lowering interface definition
//
//===----------------------------------------------------------------------===//
#ifndef R600ISELLOWERING_H
#define R600ISELLOWERING_H
#include "AMDGPUISelLowering.h"
namespace llvm {
class R600InstrInfo;
class R600TargetLowering : public AMDGPUTargetLowering {
public:
R600TargetLowering(TargetMachine &TM);
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock * BB) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
void ReplaceNodeResults(SDNode * N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
virtual SDValue LowerFormalArguments(
SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual EVT getSetCCResultType(EVT VT) const;
private:
const R600InstrInfo * TII;
/// Each OpenCL kernel has nine implicit parameters that are stored in the
/// first nine dwords of a Vertex Buffer. These implicit parameters are
/// lowered to load instructions which retreive the values from the Vertex
/// Buffer.
SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL, unsigned DwordOffset) const;
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
/// \brief Lower ROTL opcode to BITALIGN
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
bool isZero(SDValue Op) const;
};
} // End namespace llvm;
#endif // R600ISELLOWERING_H

View File

@@ -0,0 +1,663 @@
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief R600 Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//
#include "R600InstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"
using namespace llvm;
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
RI(tm, *this)
{ }
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
return RI;
}
bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
}
bool R600InstrInfo::isVector(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
}
void
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
&& AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
for (unsigned I = 0; I < 4; I++) {
unsigned SubRegIndex = RI.getSubRegFromChannel(I);
buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
RI.getSubReg(DestReg, SubRegIndex),
RI.getSubReg(SrcReg, SubRegIndex))
.addReg(DestReg,
RegState::Define | RegState::Implicit);
}
} else {
// We can't copy vec4 registers
assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
&& !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
DestReg, SrcReg);
NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0))
.setIsKill(KillSrc);
}
}
MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
unsigned DstReg, int64_t Imm) const {
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
MachineInstrBuilder(MI).addImm(Imm);
MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
return MI;
}
unsigned R600InstrInfo::getIEQOpcode() const {
return AMDGPU::SETE_INT;
}
bool R600InstrInfo::isMov(unsigned Opcode) const {
switch(Opcode) {
default: return false;
case AMDGPU::MOV:
case AMDGPU::MOV_IMM_F32:
case AMDGPU::MOV_IMM_I32:
return true;
}
}
// Some instructions act as place holders to emulate operations that the GPU
// hardware does automatically. This function can be used to check if
// an opcode falls into this category.
bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
switch (Opcode) {
default: return false;
case AMDGPU::RETURN:
case AMDGPU::RESERVE_REG:
return true;
}
}
bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
switch(Opcode) {
default: return false;
case AMDGPU::DOT4_r600_pseudo:
case AMDGPU::DOT4_eg_pseudo:
return true;
}
}
bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
switch(Opcode) {
default: return false;
case AMDGPU::CUBE_r600_pseudo:
case AMDGPU::CUBE_r600_real:
case AMDGPU::CUBE_eg_pseudo:
case AMDGPU::CUBE_eg_real:
return true;
}
}
bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
unsigned TargetFlags = get(Opcode).TSFlags;
return ((TargetFlags & R600_InstFlag::OP1) |
(TargetFlags & R600_InstFlag::OP2) |
(TargetFlags & R600_InstFlag::OP3));
}
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
}
static bool
isPredicateSetter(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::PRED_X:
return true;
default:
return false;
}
}
static MachineInstr *
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
while (I != MBB.begin()) {
--I;
MachineInstr *MI = I;
if (isPredicateSetter(MI->getOpcode()))
return MI;
}
return NULL;
}
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// Most of the following comes from the ARM implementation of AnalyzeBranch
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
return false;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return false;
--I;
}
if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
return false;
}
// Get the last instruction in the block.
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() ||
static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
if (LastOpc == AMDGPU::JUMP) {
if(!isPredicated(LastInst)) {
TBB = LastInst->getOperand(0).getMBB();
return false;
} else {
MachineInstr *predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
}
return true; // Can't handle indirect branch.
}
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = I;
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If the block ends with a B and a Bcc, handle it.
if (SecondLastOpc == AMDGPU::JUMP &&
isPredicated(SecondLastInst) &&
LastOpc == AMDGPU::JUMP &&
!isPredicated(LastInst)) {
MachineInstr *predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = SecondLastInst->getOperand(0).getMBB();
FBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
// Otherwise, can't handle this.
return true;
}
int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
const MachineInstr *MI = op.getParent();
switch (MI->getDesc().OpInfo->RegClass) {
default: // FIXME: fallthrough??
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
};
}
unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
if (FBB == 0) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
return 1;
}
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
return 2;
}
}
unsigned
R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
// Note : we leave PRED* instructions there.
// They may be needed when predicating instructions.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) {
return 0;
}
--I;
switch (I->getOpcode()) {
default:
return 0;
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
}
I->eraseFromParent();
break;
}
I = MBB.end();
if (I == MBB.begin()) {
return 1;
}
--I;
switch (I->getOpcode()) {
// FIXME: only one case??
default:
return 1;
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
}
I->eraseFromParent();
break;
}
return 2;
}
bool
R600InstrInfo::isPredicated(const MachineInstr *MI) const {
int idx = MI->findFirstPredOperandIdx();
if (idx < 0)
return false;
unsigned Reg = MI->getOperand(idx).getReg();
switch (Reg) {
default: return false;
case AMDGPU::PRED_SEL_ONE:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PREDICATE_BIT:
return true;
}
}
bool
R600InstrInfo::isPredicable(MachineInstr *MI) const {
// XXX: KILL* instructions can be predicated, but they must be the last
// instruction in a clause, so this means any instructions after them cannot
// be predicated. Until we have proper support for instruction clauses in the
// backend, we will mark KILL* instructions as unpredicable.
if (MI->getOpcode() == AMDGPU::KILLGT) {
return false;
} else {
return AMDGPUInstrInfo::isPredicable(MI);
}
}
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
unsigned ExtraPredCycles,
const BranchProbability &Probability) const{
return true;
}
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles,
unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles,
unsigned ExtraFCycles,
const BranchProbability &Probability) const {
return true;
}
bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
const BranchProbability &Probability)
const {
return true;
}
bool
R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
return false;
}
bool
R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MachineOperand &MO = Cond[1];
switch (MO.getImm()) {
case OPCODE_IS_ZERO_INT:
MO.setImm(OPCODE_IS_NOT_ZERO_INT);
break;
case OPCODE_IS_NOT_ZERO_INT:
MO.setImm(OPCODE_IS_ZERO_INT);
break;
case OPCODE_IS_ZERO:
MO.setImm(OPCODE_IS_NOT_ZERO);
break;
case OPCODE_IS_NOT_ZERO:
MO.setImm(OPCODE_IS_ZERO);
break;
default:
return true;
}
MachineOperand &MO2 = Cond[2];
switch (MO2.getReg()) {
case AMDGPU::PRED_SEL_ZERO:
MO2.setReg(AMDGPU::PRED_SEL_ONE);
break;
case AMDGPU::PRED_SEL_ONE:
MO2.setReg(AMDGPU::PRED_SEL_ZERO);
break;
default:
return true;
}
return false;
}
bool
R600InstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
return isPredicateSetter(MI->getOpcode());
}
bool
R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const {
return false;
}
bool
R600InstrInfo::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
int PIdx = MI->findFirstPredOperandIdx();
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
return true;
}
return false;
}
unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
if (PredCost)
*PredCost = 2;
return 2;
}
MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned Opcode,
unsigned DstReg,
unsigned Src0Reg,
unsigned Src1Reg) const {
MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
DstReg); // $dst
if (Src1Reg) {
MIB.addImm(0) // $update_exec_mask
.addImm(0); // $update_predicate
}
MIB.addImm(1) // $write
.addImm(0) // $omod
.addImm(0) // $dst_rel
.addImm(0) // $dst_clamp
.addReg(Src0Reg) // $src0
.addImm(0) // $src0_neg
.addImm(0) // $src0_rel
.addImm(0); // $src0_abs
if (Src1Reg) {
MIB.addReg(Src1Reg) // $src1
.addImm(0) // $src1_neg
.addImm(0) // $src1_rel
.addImm(0); // $src1_abs
}
//XXX: The r600g finalizer expects this to be 1, once we've moved the
//scheduling to the backend, we can change the default to 0.
MIB.addImm(1) // $last
.addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
.addImm(0); // $literal
return MIB;
}
MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
unsigned DstReg,
uint64_t Imm) const {
MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
AMDGPU::ALU_LITERAL_X);
setImmOperand(MovImm, R600Operands::IMM, Imm);
return MovImm;
}
int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
R600Operands::Ops Op) const {
return getOperandIdx(MI.getOpcode(), Op);
}
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::Ops Op) const {
const static int OpTable[3][R600Operands::COUNT] = {
// W C S S S S S S S S
// R O D L S R R R S R R R S R R L P
// D U I M R A R C C C C C C C R C C A R I
// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
// T M P E D L P 0 N R A 1 N R A 2 N R T D M
{0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
{0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
{0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
};
unsigned TargetFlags = get(Opcode).TSFlags;
unsigned OpTableIdx;
if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
switch (Op) {
case R600Operands::DST: return 0;
case R600Operands::SRC0: return 1;
case R600Operands::SRC1: return 2;
case R600Operands::SRC2: return 3;
default:
assert(!"Unknown operand type for instruction");
return -1;
}
}
if (TargetFlags & R600_InstFlag::OP1) {
OpTableIdx = 0;
} else if (TargetFlags & R600_InstFlag::OP2) {
OpTableIdx = 1;
} else {
assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined "
"for this instruction");
OpTableIdx = 2;
}
return OpTable[OpTableIdx][Op];
}
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
int64_t Imm) const {
int Idx = getOperandIdx(*MI, Op);
assert(Idx != -1 && "Operand not supported for this instruction.");
assert(MI->getOperand(Idx).isImm());
MI->getOperand(Idx).setImm(Imm);
}
//===----------------------------------------------------------------------===//
// Instruction flag getters/setters
//===----------------------------------------------------------------------===//
bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
}
MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
unsigned Flag) const {
unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
int FlagIndex = 0;
if (Flag != 0) {
// If we pass something other than the default value of Flag to this
// function, it means we are want to set a flag on an instruction
// that uses native encoding.
assert(HAS_NATIVE_OPERANDS(TargetFlags));
bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
switch (Flag) {
case MO_FLAG_CLAMP:
FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP);
break;
case MO_FLAG_MASK:
FlagIndex = getOperandIdx(*MI, R600Operands::WRITE);
break;
case MO_FLAG_NOT_LAST:
case MO_FLAG_LAST:
FlagIndex = getOperandIdx(*MI, R600Operands::LAST);
break;
case MO_FLAG_NEG:
switch (SrcIdx) {
case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break;
case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break;
case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break;
}
break;
case MO_FLAG_ABS:
assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
"instructions.");
switch (SrcIdx) {
case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break;
case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break;
}
break;
default:
FlagIndex = -1;
break;
}
assert(FlagIndex != -1 && "Flag not supported for this instruction");
} else {
FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
assert(FlagIndex != 0 &&
"Instruction flags not supported for this instruction");
}
MachineOperand &FlagOp = MI->getOperand(FlagIndex);
assert(FlagOp.isImm());
return FlagOp;
}
void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
unsigned Flag) const {
unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
if (Flag == 0) {
return;
}
if (HAS_NATIVE_OPERANDS(TargetFlags)) {
MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
if (Flag == MO_FLAG_NOT_LAST) {
clearFlag(MI, Operand, MO_FLAG_LAST);
} else if (Flag == MO_FLAG_MASK) {
clearFlag(MI, Operand, Flag);
} else {
FlagOp.setImm(1);
}
} else {
MachineOperand &FlagOp = getFlagOp(MI, Operand);
FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
}
}
void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
unsigned Flag) const {
unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
if (HAS_NATIVE_OPERANDS(TargetFlags)) {
MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
FlagOp.setImm(0);
} else {
MachineOperand &FlagOp = getFlagOp(MI);
unsigned InstFlags = FlagOp.getImm();
InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
FlagOp.setImm(InstFlags);
}
}

View File

@@ -0,0 +1,169 @@
//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface definition for R600InstrInfo
//
//===----------------------------------------------------------------------===//
#ifndef R600INSTRUCTIONINFO_H_
#define R600INSTRUCTIONINFO_H_
#include "AMDIL.h"
#include "AMDGPUInstrInfo.h"
#include "R600Defines.h"
#include "R600RegisterInfo.h"
#include <map>
namespace llvm {
class AMDGPUTargetMachine;
class DFAPacketizer;
class ScheduleDAG;
class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
class R600InstrInfo : public AMDGPUInstrInfo {
private:
const R600RegisterInfo RI;
int getBranchInstr(const MachineOperand &op) const;
public:
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
const R600RegisterInfo &getRegisterInfo() const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
bool isTrig(const MachineInstr &MI) const;
bool isPlaceHolderOpcode(unsigned opcode) const;
bool isReductionOp(unsigned opcode) const;
bool isCubeOp(unsigned opcode) const;
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
/// \breif Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const;
virtual unsigned getIEQOpcode() const;
virtual bool isMov(unsigned Opcode) const;
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
bool isPredicated(const MachineInstr *MI) const;
bool isPredicable(MachineInstr *MI) const;
bool
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
const BranchProbability &Probability) const;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
unsigned ExtraPredCycles,
const BranchProbability &Probability) const ;
bool
isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles, unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles, unsigned ExtraFCycles,
const BranchProbability &Probability) const;
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const;
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const;
bool PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const;
unsigned int getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost = 0) const;
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const { return 1;}
/// You can use this function to avoid manually specifying each instruction
/// modifier operand when building a new instruction.
///
/// \returns a MachineInstr with all the instruction modifiers initialized
/// to their default values.
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned Opcode,
unsigned DstReg,
unsigned Src0Reg,
unsigned Src1Reg = 0) const;
MachineInstr *buildMovImm(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
unsigned DstReg,
uint64_t Imm) const;
/// \brief Get the index of Op in the MachineInstr.
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
/// \brief Get the index of \p Op for the given Opcode.
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
/// \brief Helper function for setting instruction flag values.
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
/// \returns true if this instruction has an operand for storing target flags.
bool hasFlagOperand(const MachineInstr &MI) const;
///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
///\brief Determine if the specified \p Flag is set on this \p Operand.
bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
/// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
/// \param Flag The flag being set.
///
/// \returns the operand containing the flags for this instruction.
MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
unsigned Flag = 0) const;
/// \brief Clear the specified flag on the instruction.
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
};
} // End llvm namespace
#endif // R600INSTRINFO_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,32 @@
//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "R600", isTarget = 1 in {
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_R600_load_input_perspective :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_constant :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_load_input_linear :
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
def int_R600_store_stream_output :
Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_color :
Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_depth :
Intrinsic<[], [llvm_float_ty], []>;
def int_R600_store_pixel_stencil :
Intrinsic<[], [llvm_float_ty], []>;
def int_R600_store_pixel_dummy :
Intrinsic<[], [], []>;
}

View File

@@ -0,0 +1,34 @@
//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#include "R600MachineFunctionInfo.h"
using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
HasLinearInterpolation(false),
HasPerspectiveInterpolation(false) {
memset(Outputs, 0, sizeof(Outputs));
memset(StreamOutputs, 0, sizeof(StreamOutputs));
}
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
assert(HasPerspectiveInterpolation);
return 0;
}
unsigned R600MachineFunctionInfo::GetIJLinearIndex() const {
assert(HasLinearInterpolation);
if (HasPerspectiveInterpolation)
return 1;
else
return 0;
}

View File

@@ -0,0 +1,39 @@
//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
//===----------------------------------------------------------------------===//
#ifndef R600MACHINEFUNCTIONINFO_H
#define R600MACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include <vector>
namespace llvm {
class R600MachineFunctionInfo : public MachineFunctionInfo {
public:
R600MachineFunctionInfo(const MachineFunction &MF);
std::vector<unsigned> ReservedRegs;
SDNode *Outputs[16];
SDNode *StreamOutputs[64][4];
bool HasLinearInterpolation;
bool HasPerspectiveInterpolation;
unsigned GetIJLinearIndex() const;
unsigned GetIJPerspectiveIndex() const;
};
} // End llvm namespace
#endif //R600MACHINEFUNCTIONINFO_H

View File

@@ -0,0 +1,89 @@
//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief R600 implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "R600RegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
using namespace llvm;
R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
const TargetInstrInfo &tii)
: AMDGPURegisterInfo(tm, tii),
TM(tm),
TII(tii)
{ }
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
Reserved.set(AMDGPU::ONE);
Reserved.set(AMDGPU::ONE_INT);
Reserved.set(AMDGPU::NEG_HALF);
Reserved.set(AMDGPU::NEG_ONE);
Reserved.set(AMDGPU::PV_X);
Reserved.set(AMDGPU::ALU_LITERAL_X);
Reserved.set(AMDGPU::PREDICATE_BIT);
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
Reserved.set(*I);
}
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
E = MFI->ReservedRegs.end(); I != E; ++I) {
Reserved.set(*I);
}
return Reserved;
}
const TargetRegisterClass *
R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
switch (rc->getID()) {
case AMDGPU::GPRF32RegClassID:
case AMDGPU::GPRI32RegClassID:
return &AMDGPU::R600_Reg32RegClass;
default: return rc;
}
}
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
default:
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
}
}
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
switch (Channel) {
default: assert(!"Invalid channel index"); return 0;
case 0: return AMDGPU::sel_x;
case 1: return AMDGPU::sel_y;
case 2: return AMDGPU::sel_z;
case 3: return AMDGPU::sel_w;
}
}

View File

@@ -0,0 +1,55 @@
//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface definition for R600RegisterInfo
//
//===----------------------------------------------------------------------===//
#ifndef R600REGISTERINFO_H_
#define R600REGISTERINFO_H_
#include "AMDGPUTargetMachine.h"
#include "AMDGPURegisterInfo.h"
namespace llvm {
class R600TargetMachine;
class TargetInstrInfo;
struct R600RegisterInfo : public AMDGPURegisterInfo {
AMDGPUTargetMachine &TM;
const TargetInstrInfo &TII;
R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
/// \param RC is an AMDIL reg class.
///
/// \returns the R600 reg class that is equivalent to \p RC.
virtual const TargetRegisterClass *getISARegClass(
const TargetRegisterClass *RC) const;
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
unsigned getSubRegFromChannel(unsigned Channel) const;
};
} // End namespace llvm
#endif // AMDIDSAREGISTERINFO_H_

View File

@@ -0,0 +1,107 @@
class R600Reg <string name, bits<16> encoding> : Register<name> {
let Namespace = "AMDGPU";
let HWEncoding = encoding;
}
class R600RegWithChan <string name, bits<9> sel, string chan> :
Register <name> {
field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
!if(!eq(chan, "Y"), 1,
!if(!eq(chan, "Z"), 2,
!if(!eq(chan, "W"), 3, 0))));
let HWEncoding{8-0} = sel;
let HWEncoding{10-9} = chan_encoding;
let Namespace = "AMDGPU";
}
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
let HWEncoding = encoding;
}
foreach Index = 0-127 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
// 32-bit Temporary Registers
def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
// 32-bit Constant Registers (There are more than 128, this the number
// that is currently supported.
def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
[!cast<Register>("T"#Index#"_X"),
!cast<Register>("T"#Index#"_Y"),
!cast<Register>("T"#Index#"_Z"),
!cast<Register>("T"#Index#"_W")],
Index>;
}
// Array Base Register holding input in FS
foreach Index = 448-464 in {
def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
}
// Special Registers
def ZERO : R600Reg<"0.0", 248>;
def ONE : R600Reg<"1.0", 249>;
def NEG_ONE : R600Reg<"-1.0", 249>;
def ONE_INT : R600Reg<"1", 250>;
def HALF : R600Reg<"0.5", 252>;
def NEG_HALF : R600Reg<"-0.5", 252>;
def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
def PV_X : R600Reg<"pv.x", 254>;
def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "ArrayBase%u", 448, 464))>;
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (interleave
(interleave (sequence "C%u_X", 0, 127),
(sequence "C%u_Z", 0, 127)),
(interleave (sequence "C%u_Y", 0, 127),
(sequence "C%u_W", 0, 127))))>;
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127))>;
def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_Y", 0, 127))>;
def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_Z", 0, 127))>;
def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_W", 0, 127))>;
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (interleave
(interleave R600_TReg32_X, R600_TReg32_Z),
(interleave R600_TReg32_Y, R600_TReg32_W)))>;
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
R600_CReg32,
R600_ArrayBase,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
PREDICATE_BIT)>;
def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
(add (sequence "T%u_XYZW", 0, 127))> {
let CopyCost = -1;
}

View File

@@ -0,0 +1,36 @@
//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
// slot has been removed.
//
//===----------------------------------------------------------------------===//
def ALU_X : FuncUnit;
def ALU_Y : FuncUnit;
def ALU_Z : FuncUnit;
def ALU_W : FuncUnit;
def TRANS : FuncUnit;
def AnyALU : InstrItinClass;
def VecALU : InstrItinClass;
def TransALU : InstrItinClass;
def R600_EG_Itin : ProcessorItineraries <
[ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
[],
[
InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
]
>;

View File

@@ -0,0 +1,152 @@
//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This pass maps the pseudo interpolation registers to the correct physical
/// registers.
//
/// Prior to executing a fragment shader, the GPU loads interpolation
/// parameters into physical registers. The specific physical register that each
/// interpolation parameter ends up in depends on the type of the interpolation
/// parameter as well as how many interpolation parameters are used by the
/// shader.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDIL.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class SIAssignInterpRegsPass : public MachineFunctionPass {
private:
static char ID;
TargetMachine &TM;
void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
unsigned physReg, unsigned virtReg);
public:
SIAssignInterpRegsPass(TargetMachine &tm) :
MachineFunctionPass(ID), TM(tm) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "SI Assign intrpolation registers"; }
};
} // End anonymous namespace
char SIAssignInterpRegsPass::ID = 0;
#define INTERP_VALUES 16
#define REQUIRED_VALUE_MAX_INDEX 7
struct InterpInfo {
bool Enabled;
unsigned Regs[3];
unsigned RegCount;
};
FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
return new SIAssignInterpRegsPass(tm);
}
bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) {
struct InterpInfo InterpUse[INTERP_VALUES] = {
{false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
{false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
{false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
{false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
{false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
{false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
{false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
{false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
{false, {AMDGPU::POS_X_FLOAT}, 1},
{false, {AMDGPU::POS_Y_FLOAT}, 1},
{false, {AMDGPU::POS_Z_FLOAT}, 1},
{false, {AMDGPU::POS_W_FLOAT}, 1},
{false, {AMDGPU::FRONT_FACE}, 1},
{false, {AMDGPU::ANCILLARY}, 1},
{false, {AMDGPU::SAMPLE_COVERAGE}, 1},
{false, {AMDGPU::POS_FIXED_PT}, 1}
};
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
// This pass is only needed for pixel shaders.
if (MFI->ShaderType != ShaderType::PIXEL) {
return false;
}
MachineRegisterInfo &MRI = MF.getRegInfo();
bool ForceEnable = true;
// First pass, mark the interpolation values that are used.
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
RegIdx++) {
InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
!MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
if (InterpUse[InterpIdx].Enabled &&
InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
ForceEnable = false;
}
}
}
// At least one interpolation mode must be enabled or else the GPU will hang.
if (ForceEnable) {
InterpUse[0].Enabled = true;
}
unsigned UsedVgprs = 0;
// Second pass, replace with VGPRs.
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
if (!InterpUse[InterpIdx].Enabled) {
continue;
}
MFI->SPIPSInputAddr |= (1 << InterpIdx);
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
RegIdx++, UsedVgprs++) {
unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
addLiveIn(&MF, MRI, NewReg, VirtReg);
}
}
return false;
}
void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
MachineRegisterInfo & MRI,
unsigned physReg, unsigned virtReg) {
const TargetInstrInfo * TII = TM.getInstrInfo();
if (!MRI.isLiveIn(physReg)) {
MRI.addLiveIn(physReg, virtReg);
MF->front().addLiveIn(physReg);
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
TII->get(TargetOpcode::COPY), virtReg)
.addReg(physReg);
} else {
MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
}
}

View File

@@ -0,0 +1,179 @@
//===-- SIFixSGPRLiveness.cpp - SGPR liveness adjustment ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// SGPRs are not affected by control flow. This pass adjusts SGPR liveness in
/// so that the register allocator can still correctly allocate them.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
namespace {
class SIFixSGPRLiveness : public MachineFunctionPass {
private:
static char ID;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
MachineDominatorTree *MD;
MachinePostDominatorTree *MPD;
bool isSGPR(const TargetRegisterClass *RegClass) {
return RegClass == &AMDGPU::SReg_1RegClass ||
RegClass == &AMDGPU::SReg_32RegClass ||
RegClass == &AMDGPU::SReg_64RegClass ||
RegClass == &AMDGPU::SReg_128RegClass ||
RegClass == &AMDGPU::SReg_256RegClass;
}
void addKill(MachineBasicBlock::iterator I, unsigned Reg);
MachineBasicBlock *handleUses(unsigned VirtReg, MachineBasicBlock *Begin);
void handlePreds(MachineBasicBlock *Begin, MachineBasicBlock *End,
unsigned VirtReg);
bool handleVirtReg(unsigned VirtReg);
public:
SIFixSGPRLiveness(TargetMachine &tm);
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
return "SI fix SGPR liveness pass";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
};
} // end anonymous namespace
char SIFixSGPRLiveness::ID = 0;
SIFixSGPRLiveness::SIFixSGPRLiveness(TargetMachine &tm):
MachineFunctionPass(ID),
TII(tm.getInstrInfo()) {
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
}
void SIFixSGPRLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
void SIFixSGPRLiveness::addKill(MachineBasicBlock::iterator I, unsigned Reg) {
MachineBasicBlock *MBB = I->getParent();
BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)).addReg(Reg);
}
// Find the common post dominator of all uses
MachineBasicBlock *SIFixSGPRLiveness::handleUses(unsigned VirtReg,
MachineBasicBlock *Begin) {
MachineBasicBlock *LastUse = Begin, *End = Begin;
bool EndUsesReg = true;
MachineRegisterInfo::use_iterator i, e;
for (i = MRI->use_begin(VirtReg), e = MRI->use_end(); i != e; ++i) {
MachineBasicBlock *MBB = i->getParent();
if (LastUse == MBB)
continue;
LastUse = MBB;
MBB = MPD->findNearestCommonDominator(End, MBB);
if (MBB == LastUse)
EndUsesReg = true;
else if (MBB != End)
EndUsesReg = false;
End = MBB;
}
return EndUsesReg ? Begin : End;
}
// Handles predecessors separately, only add KILLs to dominated ones
void SIFixSGPRLiveness::handlePreds(MachineBasicBlock *Begin,
MachineBasicBlock *End,
unsigned VirtReg) {
MachineBasicBlock::pred_iterator i, e;
for (i = End->pred_begin(), e = End->pred_end(); i != e; ++i) {
if (MD->dominates(End, *i))
continue; // ignore loops
if (MD->dominates(*i, Begin))
continue; // too far up, abort search
if (MD->dominates(Begin, *i)) {
// found end of livetime
addKill((*i)->getFirstTerminator(), VirtReg);
continue;
}
handlePreds(Begin, *i, VirtReg);
}
}
bool SIFixSGPRLiveness::handleVirtReg(unsigned VirtReg) {
MachineInstr *Def = MRI->getVRegDef(VirtReg);
if (!Def || MRI->use_empty(VirtReg))
return false; // No definition or not used
MachineBasicBlock *Begin = Def->getParent();
MachineBasicBlock *End = handleUses(VirtReg, Begin);
if (Begin == End)
return false; // Defined and only used in the same block
if (MD->dominates(Begin, End)) {
// Lifetime dominate the end node, just kill it here
addKill(End->getFirstNonPHI(), VirtReg);
} else {
// only some predecessors are dominate, handle them separately
handlePreds(Begin, End, VirtReg);
}
return true;
}
bool SIFixSGPRLiveness::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
MRI = &MF.getRegInfo();
MD = &getAnalysis<MachineDominatorTree>();
MPD = &getAnalysis<MachinePostDominatorTree>();
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i);
const TargetRegisterClass *RegClass = MRI->getRegClass(VirtReg);
if (!isSGPR(RegClass))
continue;
Changes |= handleVirtReg(VirtReg);
}
return Changes;
}
FunctionPass *llvm::createSIFixSGPRLivenessPass(TargetMachine &tm) {
return new SIFixSGPRLiveness(tm);
}

View File

@@ -0,0 +1,442 @@
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Custom DAG lowering for SI
//
//===----------------------------------------------------------------------===//
#include "SIISelLowering.h"
#include "AMDIL.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) {
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
computeRegisterProperties();
setOperationAction(ISD::AND, MVT::i1, Custom);
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We need to custom lower loads from the USER_SGPR address space, so we can
// add the SGPRs as livein registers.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
MachineBasicBlock::iterator I = MI;
if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
AppendS_WAITCNT(MI, *BB, llvm::next(I));
return BB;
}
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
case AMDGPU::CLAMP_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
// VSRC1-2 are unused, but we still need to fill all the
// operand slots, so we just reuse the VSRC0 operand
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(1))
.addImm(0) // ABS
.addImm(1) // CLAMP
.addImm(0) // OMOD
.addImm(0); // NEG
MI->eraseFromParent();
break;
case AMDGPU::FABS_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
// VSRC1-2 are unused, but we still need to fill all the
// operand slots, so we just reuse the VSRC0 operand
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(1))
.addImm(1) // ABS
.addImm(0) // CLAMP
.addImm(0) // OMOD
.addImm(0); // NEG
MI->eraseFromParent();
break;
case AMDGPU::FNEG_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
// VSRC1-2 are unused, but we still need to fill all the
// operand slots, so we just reuse the VSRC0 operand
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(1))
.addImm(0) // ABS
.addImm(0) // CLAMP
.addImm(0) // OMOD
.addImm(1); // NEG
MI->eraseFromParent();
break;
case AMDGPU::SHADER_TYPE:
BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
MI->getOperand(0).getImm();
MI->eraseFromParent();
break;
case AMDGPU::SI_INTERP:
LowerSI_INTERP(MI, *BB, I, MRI);
break;
case AMDGPU::SI_INTERP_CONST:
LowerSI_INTERP_CONST(MI, *BB, I, MRI);
break;
case AMDGPU::SI_KIL:
LowerSI_KIL(MI, *BB, I, MRI);
break;
case AMDGPU::SI_WQM:
LowerSI_WQM(MI, *BB, I, MRI);
break;
case AMDGPU::SI_V_CNDLT:
LowerSI_V_CNDLT(MI, *BB, I, MRI);
break;
}
return BB;
}
void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I) const {
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
.addImm(0);
}
void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
MachineOperand dst = MI->getOperand(0);
MachineOperand iReg = MI->getOperand(1);
MachineOperand jReg = MI->getOperand(2);
MachineOperand attr_chan = MI->getOperand(3);
MachineOperand attr = MI->getOperand(4);
MachineOperand params = MI->getOperand(5);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
.addOperand(iReg)
.addOperand(attr_chan)
.addOperand(attr)
.addReg(M0);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
.addOperand(dst)
.addReg(tmp)
.addOperand(jReg)
.addOperand(attr_chan)
.addOperand(attr)
.addReg(M0);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
MachineBasicBlock &BB, MachineBasicBlock::iterator I,
MachineRegisterInfo &MRI) const {
MachineOperand dst = MI->getOperand(0);
MachineOperand attr_chan = MI->getOperand(1);
MachineOperand attr = MI->getOperand(2);
MachineOperand params = MI->getOperand(3);
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
.addOperand(dst)
.addOperand(attr_chan)
.addOperand(attr)
.addReg(M0);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
// Clear this pixel from the exec mask if the operand is negative
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
AMDGPU::VCC)
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI->getOperand(0));
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
BuildMI(BB, I, BB.findDebugLoc(I),
TII->get(AMDGPU::V_CMP_GT_F32_e32),
VCC)
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI->getOperand(1));
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(2))
.addReg(VCC);
MI->eraseFromParent();
}
EVT SITargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
switch (IntrinsicID) {
case AMDGPUIntrinsic::SI_vs_load_buffer_index:
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
AMDGPU::VGPR0, VT);
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
break;
}
}
return SDValue();
}
/// \brief The function is for lowering i1 operations on the
/// VCC register.
///
/// In the VALU context, VCC is a one bit register, but in the
/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
/// the SALU can perform operations on the VCC register, we need to promote
/// the operand types from i1 to i64 in order for tablegen to be able to match
/// this operation to the correct SALU instruction. We do this promotion by
/// wrapping the operands in a CopyToReg node.
///
SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
SelectionDAG &DAG,
unsigned VCCNode) const {
DebugLoc DL = Op.getDebugLoc();
SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
Op.getOperand(0)),
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
Op.getOperand(1)));
return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
}
SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue CC = Op.getOperand(1);
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue JumpT = Op.getOperand(4);
SDValue CmpValue;
SDValue Result;
CmpValue = DAG.getNode(
ISD::SETCC,
Op.getDebugLoc(),
MVT::i1,
LHS, RHS,
CC);
Result = DAG.getNode(
AMDGPUISD::BRANCH_COND,
CmpValue.getDebugLoc(),
MVT::Other, Chain,
JumpT, CmpValue);
return Result;
}
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
assert(Ptr);
unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
// We only need to lower USER_SGPR address space loads
if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
return SDValue();
}
// Loads from the USER_SGPR address space can only have constant value
// pointers.
ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
assert(BasePtr);
unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
const TargetRegisterClass * dstClass;
switch (TypeDwordWidth) {
default:
assert(!"USER_SGPR value size not implemented");
return SDValue();
case 1:
dstClass = &AMDGPU::SReg_32RegClass;
break;
case 2:
dstClass = &AMDGPU::SReg_64RegClass;
break;
}
uint64_t Index = BasePtr->getZExtValue();
assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
unsigned SGPRIndex = Index / TypeDwordWidth;
unsigned Reg = dstClass->getRegister(SGPRIndex);
DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
VT));
return SDValue();
}
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue True = Op.getOperand(2);
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
// Possible Min/Max pattern
SDValue MinMax = LowerMinMax(Op, DAG);
if (MinMax.getNode()) {
return MinMax;
}
SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
}
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
DebugLoc DL = N->getDebugLoc();
EVT VT = N->getValueType(0);
switch (N->getOpcode()) {
default: break;
case ISD::SELECT_CC: {
N->dump();
ConstantSDNode *True, *False;
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
&& (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
&& True->isAllOnesValue()
&& False->isNullValue()
&& VT == MVT::i1) {
return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
N->getOperand(1), N->getOperand(4));
}
break;
}
case ISD::SETCC: {
SDValue Arg0 = N->getOperand(0);
SDValue Arg1 = N->getOperand(1);
SDValue CC = N->getOperand(2);
ConstantSDNode * C = NULL;
ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
if (VT == MVT::i1
&& Arg0.getOpcode() == ISD::SIGN_EXTEND
&& Arg0.getOperand(0).getValueType() == MVT::i1
&& (C = dyn_cast<ConstantSDNode>(Arg1))
&& C->isNullValue()
&& CCOp == ISD::SETNE) {
return SimplifySetCC(VT, Arg0.getOperand(0),
DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
}
break;
}
}
return SDValue();
}
#define NODE_NAME_CASE(node) case SIISD::node: return #node;
const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
NODE_NAME_CASE(VCC_AND)
NODE_NAME_CASE(VCC_BITCAST)
}
}

View File

@@ -0,0 +1,62 @@
//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief SI DAG Lowering interface definition
//
//===----------------------------------------------------------------------===//
#ifndef SIISELLOWERING_H
#define SIISELLOWERING_H
#include "AMDGPUISelLowering.h"
#include "SIInstrInfo.h"
namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering {
const SIInstrInfo * TII;
/// Memory reads and writes are syncronized using the S_WAITCNT instruction.
/// This function takes the most conservative approach and inserts an
/// S_WAITCNT instruction after every read and write.
void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I) const;
void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, unsigned Opocde) const;
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
unsigned VCCNode) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
public:
SITargetLowering(TargetMachine &tm);
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual const char* getTargetNodeName(unsigned Opcode) const;
};
} // End namespace llvm
#endif //SIISELLOWERING_H

View File

@@ -0,0 +1,146 @@
//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SI Instruction format definitions.
//
// Instructions with _32 take 32-bit operands.
// Instructions with _64 take 64-bit operands.
//
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
// encoding is the standard encoding, but instruction that make use of
// any of the instruction modifiers must use the 64-bit encoding.
//
// Instructions with _e32 use the 32-bit encoding.
// Instructions with _e64 use the 64-bit encoding.
//
//===----------------------------------------------------------------------===//
class VOP3b_2IN <bits<9> op, string opName, RegisterClass dstClass,
RegisterClass src0Class, RegisterClass src1Class,
list<dag> pattern>
: VOP3b <op, (outs dstClass:$vdst),
(ins src0Class:$src0, src1Class:$src1, InstFlag:$src2, InstFlag:$sdst,
InstFlag:$omod, InstFlag:$neg),
opName, pattern
>;
class VOP3_1_32 <bits<9> op, string opName, list<dag> pattern>
: VOP3b_2IN <op, opName, SReg_1, AllReg_32, VReg_32, pattern>;
class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
: VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
: VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, VReg_64:$src1, VReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
: SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
: SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs SReg_1:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> :
VOP1 <
op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
>;
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
def _e64 : VOP3_64 <
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> :
VOP2 <
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
>;
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
def _e64 : VOP3_64 <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
: SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
: SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> :
VOPC <
op, (ins arc:$src0, vrc:$src1), opName, pattern
>;
multiclass VOPC_32 <bits<9> op, string opName, list<dag> pattern> {
def _e32 : VOPC_Helper <
{op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
VReg_32, AllReg_32, opName, pattern
>;
def _e64 : VOP3_1_32 <
op,
opName, pattern
>;
}
multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
def _e64 : VOP3_64 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
opName, []
>;
}
class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
: SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
: SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;

View File

@@ -0,0 +1,89 @@
//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief SI Implementation of TargetInstrInfo.
//
//===----------------------------------------------------------------------===//
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include <stdio.h>
using namespace llvm;
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
RI(tm, *this)
{ }
const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
return RI;
}
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
// If we are trying to copy to or from SCC, there is a bug somewhere else in
// the backend. While it may be theoretically possible to do this, it should
// never be necessary.
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
assert(AMDGPU::SReg_32RegClass.contains(DestReg));
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
}
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const {
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
MachineInstrBuilder(MI).addImm(Imm);
return MI;
}
bool SIInstrInfo::isMov(unsigned Opcode) const {
switch(Opcode) {
default: return false;
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_IMM_F32:
case AMDGPU::V_MOV_IMM_I32:
case AMDGPU::S_MOV_IMM_I32:
return true;
}
}
bool
SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return RC != &AMDGPU::EXECRegRegClass;
}

View File

@@ -0,0 +1,62 @@
//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Interface definition for SIInstrInfo.
//
//===----------------------------------------------------------------------===//
#ifndef SIINSTRINFO_H
#define SIINSTRINFO_H
#include "AMDGPUInstrInfo.h"
#include "SIRegisterInfo.h"
namespace llvm {
class SIInstrInfo : public AMDGPUInstrInfo {
private:
const SIRegisterInfo RI;
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
const SIRegisterInfo &getRegisterInfo() const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
/// \returns the encoding type of this instruction.
unsigned getEncodingType(const MachineInstr &MI) const;
/// \returns the size of this instructions encoding in number of bytes.
unsigned getEncodingBytes(const MachineInstr &MI) const;
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const;
virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
virtual bool isMov(unsigned Opcode) const;
virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
};
} // End namespace llvm
namespace SIInstrFlags {
enum Flags {
// First 4 bits are the instruction encoding
NEED_WAIT = 1 << 4
};
}
#endif //SIINSTRINFO_H

View File

@@ -0,0 +1,589 @@
//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SI DAG Profiles
//===----------------------------------------------------------------------===//
def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
]>;
//===----------------------------------------------------------------------===//
// SI DAG Nodes
//===----------------------------------------------------------------------===//
// and operation on 64-bit wide vcc
def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
[SDNPCommutative, SDNPAssociative]
>;
// Special bitcast node for sharing VCC register between VALU and SALU
def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
>;
// and operation on 64-bit wide vcc
def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
[SDNPCommutative, SDNPAssociative]
>;
// Special bitcast node for sharing VCC register between VALU and SALU
def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
>;
class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
AMDGPUInst<outs, ins, asm, pattern> {
field bits<4> EncodingType = 0;
field bits<1> NeedWait = 0;
let TSFlags{3-0} = EncodingType;
let TSFlags{4} = NeedWait;
}
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
field bits<32> Inst;
}
class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
field bits<64> Inst;
}
class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
let EncoderMethod = "encodeOperand";
let MIOperandInfo = opInfo;
}
def IMM16bit : ImmLeaf <
i16,
[{return isInt<16>(Imm);}]
>;
def IMM8bit : ImmLeaf <
i32,
[{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
>;
def IMM12bit : ImmLeaf <
i16,
[{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
>;
def IMM32bitIn64bit : ImmLeaf <
i64,
[{return isInt<32>(Imm);}]
>;
class GPR4Align <RegisterClass rc> : Operand <vAny> {
let EncoderMethod = "GPR4AlignEncode";
let MIOperandInfo = (ops rc:$reg);
}
class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
let EncoderMethod = "GPR2AlignEncode";
let MIOperandInfo = (ops rc:$reg);
}
def SMRDmemrr : Operand<iPTR> {
let MIOperandInfo = (ops SReg_64, SReg_32);
let EncoderMethod = "GPR2AlignEncode";
}
def SMRDmemri : Operand<iPTR> {
let MIOperandInfo = (ops SReg_64, i32imm);
let EncoderMethod = "SMRDmemriEncode";
}
def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
let Uses = [EXEC] in {
def EXP : Enc64<
(outs),
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
"EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
[] > {
bits<4> EN;
bits<6> TGT;
bits<1> COMPR;
bits<1> DONE;
bits<1> VM;
bits<8> VSRC0;
bits<8> VSRC1;
bits<8> VSRC2;
bits<8> VSRC3;
let Inst{3-0} = EN;
let Inst{9-4} = TGT;
let Inst{10} = COMPR;
let Inst{11} = DONE;
let Inst{12} = VM;
let Inst{31-26} = 0x3e;
let Inst{39-32} = VSRC0;
let Inst{47-40} = VSRC1;
let Inst{55-48} = VSRC2;
let Inst{63-56} = VSRC3;
let EncodingType = 0; //SIInstrEncodingType::EXP
let NeedWait = 1;
let usesCustomInserter = 1;
}
class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64 <outs, ins, asm, pattern> {
bits<8> VDATA;
bits<4> DMASK;
bits<1> UNORM;
bits<1> GLC;
bits<1> DA;
bits<1> R128;
bits<1> TFE;
bits<1> LWE;
bits<1> SLC;
bits<8> VADDR;
bits<5> SRSRC;
bits<5> SSAMP;
let Inst{11-8} = DMASK;
let Inst{12} = UNORM;
let Inst{13} = GLC;
let Inst{14} = DA;
let Inst{15} = R128;
let Inst{16} = TFE;
let Inst{17} = LWE;
let Inst{24-18} = op;
let Inst{25} = SLC;
let Inst{31-26} = 0x3c;
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
let Inst{52-48} = SRSRC;
let Inst{57-53} = SSAMP;
let EncodingType = 2; //SIInstrEncodingType::MIMG
let NeedWait = 1;
let usesCustomInserter = 1;
}
class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64<outs, ins, asm, pattern> {
bits<8> VDATA;
bits<12> OFFSET;
bits<1> OFFEN;
bits<1> IDXEN;
bits<1> GLC;
bits<1> ADDR64;
bits<4> DFMT;
bits<3> NFMT;
bits<8> VADDR;
bits<5> SRSRC;
bits<1> SLC;
bits<1> TFE;
bits<8> SOFFSET;
let Inst{11-0} = OFFSET;
let Inst{12} = OFFEN;
let Inst{13} = IDXEN;
let Inst{14} = GLC;
let Inst{15} = ADDR64;
let Inst{18-16} = op;
let Inst{22-19} = DFMT;
let Inst{25-23} = NFMT;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
let Inst{52-48} = SRSRC;
let Inst{54} = SLC;
let Inst{55} = TFE;
let Inst{63-56} = SOFFSET;
let EncodingType = 3; //SIInstrEncodingType::MTBUF
let NeedWait = 1;
let usesCustomInserter = 1;
let neverHasSideEffects = 1;
}
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64<outs, ins, asm, pattern> {
bits<8> VDATA;
bits<12> OFFSET;
bits<1> OFFEN;
bits<1> IDXEN;
bits<1> GLC;
bits<1> ADDR64;
bits<1> LDS;
bits<8> VADDR;
bits<5> SRSRC;
bits<1> SLC;
bits<1> TFE;
bits<8> SOFFSET;
let Inst{11-0} = OFFSET;
let Inst{12} = OFFEN;
let Inst{13} = IDXEN;
let Inst{14} = GLC;
let Inst{15} = ADDR64;
let Inst{16} = LDS;
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
let Inst{39-32} = VADDR;
let Inst{47-40} = VDATA;
let Inst{52-48} = SRSRC;
let Inst{54} = SLC;
let Inst{55} = TFE;
let Inst{63-56} = SOFFSET;
let EncodingType = 4; //SIInstrEncodingType::MUBUF
let NeedWait = 1;
let usesCustomInserter = 1;
let neverHasSideEffects = 1;
}
} // End Uses = [EXEC]
class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32<outs, ins, asm, pattern> {
bits<7> SDST;
bits<15> PTR;
bits<8> OFFSET = PTR{7-0};
bits<1> IMM = PTR{8};
bits<6> SBASE = PTR{14-9};
let Inst{7-0} = OFFSET;
let Inst{8} = IMM;
let Inst{14-9} = SBASE;
let Inst{21-15} = SDST;
let Inst{26-22} = op;
let Inst{31-27} = 0x18; //encoding
let EncodingType = 5; //SIInstrEncodingType::SMRD
let NeedWait = 1;
let usesCustomInserter = 1;
}
class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32<outs, ins, asm, pattern> {
bits<7> SDST;
bits<8> SSRC0;
let Inst{7-0} = SSRC0;
let Inst{15-8} = op;
let Inst{22-16} = SDST;
let Inst{31-23} = 0x17d; //encoding;
let EncodingType = 6; //SIInstrEncodingType::SOP1
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<7> SDST;
bits<8> SSRC0;
bits<8> SSRC1;
let Inst{7-0} = SSRC0;
let Inst{15-8} = SSRC1;
let Inst{22-16} = SDST;
let Inst{29-23} = op;
let Inst{31-30} = 0x2; // encoding
let EncodingType = 7; // SIInstrEncodingType::SOP2
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32<outs, ins, asm, pattern> {
bits<8> SSRC0;
bits<8> SSRC1;
let Inst{7-0} = SSRC0;
let Inst{15-8} = SSRC1;
let Inst{22-16} = op;
let Inst{31-23} = 0x17e;
let EncodingType = 8; // SIInstrEncodingType::SOPC
let DisableEncoding = "$dst";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins , asm, pattern> {
bits <7> SDST;
bits <16> SIMM16;
let Inst{15-0} = SIMM16;
let Inst{22-16} = SDST;
let Inst{27-23} = op;
let Inst{31-28} = 0xb; //encoding
let EncodingType = 9; // SIInstrEncodingType::SOPK
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
(outs),
ins,
asm,
pattern > {
bits <16> SIMM16;
let Inst{15-0} = SIMM16;
let Inst{22-16} = op;
let Inst{31-23} = 0x17f; // encoding
let EncodingType = 10; // SIInstrEncodingType::SOPP
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
let Uses = [EXEC] in {
class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<8> VSRC;
bits<2> ATTRCHAN;
bits<6> ATTR;
let Inst{7-0} = VSRC;
let Inst{9-8} = ATTRCHAN;
let Inst{15-10} = ATTR;
let Inst{17-16} = op;
let Inst{25-18} = VDST;
let Inst{31-26} = 0x32; // encoding
let EncodingType = 11; // SIInstrEncodingType::VINTRP
let neverHasSideEffects = 1;
let mayLoad = 1;
let mayStore = 0;
}
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<9> SRC0;
let Inst{8-0} = SRC0;
let Inst{16-9} = op;
let Inst{24-17} = VDST;
let Inst{31-25} = 0x3f; //encoding
let EncodingType = 12; // SIInstrEncodingType::VOP1
let PostEncoderMethod = "VOPPostEncode";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc32 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<9> SRC0;
bits<8> VSRC1;
let Inst{8-0} = SRC0;
let Inst{16-9} = VSRC1;
let Inst{24-17} = VDST;
let Inst{30-25} = op;
let Inst{31} = 0x0; //encoding
let EncodingType = 13; // SIInstrEncodingType::VOP2
let PostEncoderMethod = "VOPPostEncode";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<9> SRC0;
bits<9> SRC1;
bits<9> SRC2;
bits<3> ABS;
bits<1> CLAMP;
bits<2> OMOD;
bits<3> NEG;
let Inst{7-0} = VDST;
let Inst{10-8} = ABS;
let Inst{11} = CLAMP;
let Inst{25-17} = op;
let Inst{31-26} = 0x34; //encoding
let Inst{40-32} = SRC0;
let Inst{49-41} = SRC1;
let Inst{58-50} = SRC2;
let Inst{60-59} = OMOD;
let Inst{63-61} = NEG;
let EncodingType = 14; // SIInstrEncodingType::VOP3
let PostEncoderMethod = "VOPPostEncode";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64 <outs, ins, asm, pattern> {
bits<8> VDST;
bits<9> SRC0;
bits<9> SRC1;
bits<9> SRC2;
bits<7> SDST;
bits<2> OMOD;
bits<3> NEG;
let Inst{7-0} = VDST;
let Inst{14-8} = SDST;
let Inst{25-17} = op;
let Inst{31-26} = 0x34; //encoding
let Inst{40-32} = SRC0;
let Inst{49-41} = SRC1;
let Inst{58-50} = SRC2;
let Inst{60-59} = OMOD;
let Inst{63-61} = NEG;
let EncodingType = 14; // SIInstrEncodingType::VOP3
let PostEncoderMethod = "VOPPostEncode";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
bits<9> SRC0;
bits<8> VSRC1;
let Inst{8-0} = SRC0;
let Inst{16-9} = VSRC1;
let Inst{24-17} = op;
let Inst{31-25} = 0x3e;
let EncodingType = 15; //SIInstrEncodingType::VOPC
let PostEncoderMethod = "VOPPostEncode";
let DisableEncoding = "$dst";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
} // End Uses = [EXEC]
class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
op,
(outs VReg_128:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
asm,
[]> {
let mayLoad = 1;
let mayStore = 0;
}
class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
op,
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
i1imm:$tfe, SReg_32:$soffset),
asm,
[]> {
let mayLoad = 1;
let mayStore = 0;
}
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
asm,
[]> {
let mayLoad = 1;
let mayStore = 0;
}
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
(ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
asm,
[]> {
let mayStore = 1;
let mayLoad = 0;
}
multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
ValueType vt> {
def _IMM : SMRD <
op,
(outs dstClass:$dst),
(ins SMRDmemri:$src0),
asm,
[(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
>;
def _SGPR : SMRD <
op,
(outs dstClass:$dst),
(ins SMRDmemrr:$src0),
asm,
[(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
>;
}
multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
}
include "SIInstrFormats.td"
include "SIInstructions.td"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// SI Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
/* XXX: We may need a seperate intrinsic here for loading integer values */
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
def int_SI_wqm : Intrinsic <[], [], []>;
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
def int_SI_sample_bias : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
def int_SI_sample_lod : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
/* Interpolation Intrinsics */
def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
def int_SI_fs_interp_linear_center : Interp;
def int_SI_fs_interp_linear_centroid : Interp;
def int_SI_fs_interp_persp_center : Interp;
def int_SI_fs_interp_persp_centroid : Interp;
def int_SI_fs_interp_constant : Interp;
def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
}

View File

@@ -0,0 +1,191 @@
//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This pass lowers the pseudo control flow instructions (SI_IF_NZ, ELSE, ENDIF)
/// to predicated instructions.
///
/// All control flow (except loops) is handled using predicated instructions and
/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
/// by writting to the 64-bit EXEC register (each bit corresponds to a
/// single vector ALU). Typically, for predicates, a vector ALU will write
/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
/// Vector ALU) and then the ScalarALU will AND the VCC register with the
/// EXEC to update the predicates.
///
/// For example:
/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
/// SI_IF_NZ %VCC
/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
/// ELSE
/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
/// ENDIF
///
/// becomes:
///
/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
/// S_CBRANCH_EXECZ label0 // This instruction is an
/// // optimization which allows us to
/// // branch if all the bits of
/// // EXEC are zero.
/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
///
/// label0:
/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
/// S_BRANCH_EXECZ label1 // Use our branch optimization
/// // instruction again.
/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
/// label1:
/// %EXEC = S_OR_B64 %EXEC, %SGPR2 // Re-enable saved exec mask bits
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class SILowerControlFlowPass : public MachineFunctionPass {
private:
static char ID;
const TargetInstrInfo *TII;
std::vector<unsigned> PredicateStack;
std::vector<unsigned> UnusedRegisters;
unsigned allocReg();
void freeReg(unsigned Reg);
public:
SILowerControlFlowPass(TargetMachine &tm) :
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI Lower control flow instructions";
}
};
} // End anonymous namespace
char SILowerControlFlowPass::ID = 0;
FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm);
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
// Find all the unused registers that can be used for the predicate stack.
for (TargetRegisterClass::iterator I = AMDGPU::SReg_64RegClass.begin(),
S = AMDGPU::SReg_64RegClass.end();
I != S; ++I) {
unsigned Reg = *I;
if (!MF.getRegInfo().isPhysRegUsed(Reg)) {
UnusedRegisters.insert(UnusedRegisters.begin(), Reg);
}
}
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next) {
Next = llvm::next(I);
MachineInstr &MI = *I;
unsigned Reg;
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF_NZ:
Reg = allocReg();
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
Reg)
.addOperand(MI.getOperand(0)); // VCC
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
Reg)
.addReg(Reg)
.addReg(AMDGPU::EXEC);
MI.eraseFromParent();
PredicateStack.push_back(Reg);
break;
case AMDGPU::ELSE:
Reg = PredicateStack.back();
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
Reg)
.addReg(Reg);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64),
AMDGPU::EXEC)
.addReg(Reg)
.addReg(AMDGPU::EXEC);
MI.eraseFromParent();
break;
case AMDGPU::ENDIF:
Reg = PredicateStack.back();
PredicateStack.pop_back();
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64),
AMDGPU::EXEC)
.addReg(AMDGPU::EXEC)
.addReg(Reg);
freeReg(Reg);
if (MF.getInfo<SIMachineFunctionInfo>()->ShaderType == ShaderType::PIXEL &&
PredicateStack.empty()) {
// If the exec mask is non-zero, skip the next two instructions
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addImm(3)
.addReg(AMDGPU::EXEC);
// Exec mask is zero: Export to NULL target...
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::EXP))
.addImm(0)
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
.addImm(0)
.addImm(1)
.addImm(1)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0);
// ... and terminate wavefront
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
}
MI.eraseFromParent();
break;
}
}
}
return true;
}
unsigned SILowerControlFlowPass::allocReg() {
assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
unsigned Reg = UnusedRegisters.back();
UnusedRegisters.pop_back();
return Reg;
}
void SILowerControlFlowPass::freeReg(unsigned Reg) {
UnusedRegisters.push_back(Reg);
}

View File

@@ -0,0 +1,108 @@
//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This pass performs the following transformation on instructions with
/// literal constants:
///
/// %VGPR0 = V_MOV_IMM_I32 1
///
/// becomes:
///
/// BUNDLE
/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
/// * SI_LOAD_LITERAL 1
///
/// The resulting sequence matches exactly how the hardware handles immediate
/// operands, so this transformation greatly simplifies the code generator.
///
/// Only the *_MOV_IMM_* support immediate operands at the moment, but when
/// support for immediate operands is added to other instructions, they
/// will be lowered here as well.
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
using namespace llvm;
namespace {
class SILowerLiteralConstantsPass : public MachineFunctionPass {
private:
static char ID;
const TargetInstrInfo *TII;
public:
SILowerLiteralConstantsPass(TargetMachine &tm) :
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI Lower literal constants pass";
}
};
} // End anonymous namespace
char SILowerLiteralConstantsPass::ID = 0;
FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
return new SILowerLiteralConstantsPass(tm);
}
bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next) {
Next = llvm::next(I);
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
default: break;
case AMDGPU::S_MOV_IMM_I32:
case AMDGPU::S_MOV_IMM_I64:
case AMDGPU::V_MOV_IMM_F32:
case AMDGPU::V_MOV_IMM_I32: {
unsigned MovOpcode;
unsigned LoadLiteralOpcode;
MachineOperand LiteralOp = MI.getOperand(1);
if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
MovOpcode = AMDGPU::V_MOV_B32_e32;
} else {
MovOpcode = AMDGPU::S_MOV_B32;
}
if (LiteralOp.isImm()) {
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
} else {
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
}
MachineInstr *First =
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
MI.getOperand(0).getReg())
.addReg(AMDGPU::SI_LITERAL_CONSTANT);
MachineInstr *Last =
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
.addOperand(MI.getOperand(1));
Last->setIsInsideBundle();
llvm::finalizeBundle(MBB, First, Last);
MI.eraseFromParent();
break;
}
}
}
}
return false;
}

View File

@@ -0,0 +1,20 @@
//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
#include "SIMachineFunctionInfo.h"
using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
SPIPSInputAddr(0),
ShaderType(0)
{ }

View File

@@ -0,0 +1,34 @@
//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
//
//===----------------------------------------------------------------------===//
#ifndef SIMACHINEFUNCTIONINFO_H_
#define SIMACHINEFUNCTIONINFO_H_
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public MachineFunctionInfo {
public:
SIMachineFunctionInfo(const MachineFunction &MF);
unsigned SPIPSInputAddr;
unsigned ShaderType;
};
} // End namespace llvm
#endif //_SIMACHINEFUNCTIONINFO_H_

View File

@@ -0,0 +1,48 @@
//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief SI implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "SIRegisterInfo.h"
#include "AMDGPUTargetMachine.h"
using namespace llvm;
SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
const TargetInstrInfo &tii)
: AMDGPURegisterInfo(tm, tii),
TM(tm),
TII(tii)
{ }
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
return Reserved;
}
const TargetRegisterClass *
SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
switch (rc->getID()) {
case AMDGPU::GPRF32RegClassID:
return &AMDGPU::VReg_32RegClass;
default: return rc;
}
}
const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
default:
case MVT::i32: return &AMDGPU::VReg_32RegClass;
}
}

Some files were not shown because too many files have changed in this diff Show More