mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-23 01:24:30 +00:00
AMDGPU: Add core backend files for R600/SI codegen v6
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160270 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
35
lib/Target/AMDGPU/AMDGPU.h
Normal file
35
lib/Target/AMDGPU/AMDGPU.h
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDGPU_H
|
||||||
|
#define AMDGPU_H
|
||||||
|
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class FunctionPass;
|
||||||
|
class AMDGPUTargetMachine;
|
||||||
|
|
||||||
|
// R600 Passes
|
||||||
|
FunctionPass* createR600KernelParametersPass(const TargetData* TD);
|
||||||
|
FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
|
||||||
|
|
||||||
|
// SI Passes
|
||||||
|
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||||
|
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||||
|
|
||||||
|
// Passes common to R600 and SI
|
||||||
|
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDGPU_H
|
21
lib/Target/AMDGPU/AMDGPU.td
Normal file
21
lib/Target/AMDGPU/AMDGPU.td
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// Include AMDIL TD files
|
||||||
|
include "AMDILBase.td"
|
||||||
|
include "AMDILVersion.td"
|
||||||
|
|
||||||
|
// Include AMDGPU TD files
|
||||||
|
include "R600Schedule.td"
|
||||||
|
include "SISchedule.td"
|
||||||
|
include "Processors.td"
|
||||||
|
include "AMDGPUInstrInfo.td"
|
||||||
|
include "AMDGPUIntrinsics.td"
|
||||||
|
include "AMDGPURegisterInfo.td"
|
||||||
|
include "AMDGPUInstructions.td"
|
63
lib/Target/AMDGPU/AMDGPUConvertToISA.cpp
Normal file
63
lib/Target/AMDGPU/AMDGPUConvertToISA.cpp
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This pass lowers AMDIL machine instructions to the appropriate hardware
|
||||||
|
// instructions.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUInstrInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class AMDGPUConvertToISAPass : public MachineFunctionPass {
|
||||||
|
|
||||||
|
private:
|
||||||
|
static char ID;
|
||||||
|
TargetMachine &TM;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AMDGPUConvertToISAPass(TargetMachine &tm) :
|
||||||
|
MachineFunctionPass(ID), TM(tm) { }
|
||||||
|
|
||||||
|
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||||
|
|
||||||
|
virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End anonymous namespace
|
||||||
|
|
||||||
|
char AMDGPUConvertToISAPass::ID = 0;
|
||||||
|
|
||||||
|
FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
|
||||||
|
return new AMDGPUConvertToISAPass(tm);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
|
||||||
|
{
|
||||||
|
const AMDGPUInstrInfo * TII =
|
||||||
|
static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
|
||||||
|
|
||||||
|
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||||
|
BB != BB_E; ++BB) {
|
||||||
|
MachineBasicBlock &MBB = *BB;
|
||||||
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||||
|
I != E; ++I) {
|
||||||
|
MachineInstr &MI = *I;
|
||||||
|
TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
393
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Normal file
393
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Normal file
@ -0,0 +1,393 @@
|
|||||||
|
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This is the parent TargetLowering class for hardware code gen targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPUISelLowering.h"
|
||||||
|
#include "AMDILIntrinsicInfo.h"
|
||||||
|
#include "AMDGPUUtil.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||||
|
AMDILTargetLowering(TM)
|
||||||
|
{
|
||||||
|
// We need to custom lower some of the intrinsics
|
||||||
|
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||||
|
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||||
|
|
||||||
|
// Library functions. These default to Expand, but we have instructions
|
||||||
|
// for them.
|
||||||
|
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
|
||||||
|
setOperationAction(ISD::FEXP2, MVT::f32, Legal);
|
||||||
|
setOperationAction(ISD::FRINT, MVT::f32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::UDIV, MVT::i32, Expand);
|
||||||
|
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
|
||||||
|
setOperationAction(ISD::UREM, MVT::i32, Expand);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||||
|
const
|
||||||
|
{
|
||||||
|
switch (Op.getOpcode()) {
|
||||||
|
default: return AMDILTargetLowering::LowerOperation(Op, DAG);
|
||||||
|
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||||
|
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||||
|
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const
|
||||||
|
{
|
||||||
|
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
|
||||||
|
switch (IntrinsicID) {
|
||||||
|
default: return Op;
|
||||||
|
case AMDGPUIntrinsic::AMDIL_abs:
|
||||||
|
return LowerIntrinsicIABS(Op, DAG);
|
||||||
|
case AMDGPUIntrinsic::AMDIL_exp:
|
||||||
|
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
|
||||||
|
case AMDGPUIntrinsic::AMDIL_fabs:
|
||||||
|
return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
|
||||||
|
case AMDGPUIntrinsic::AMDGPU_lrp:
|
||||||
|
return LowerIntrinsicLRP(Op, DAG);
|
||||||
|
case AMDGPUIntrinsic::AMDIL_fraction:
|
||||||
|
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
||||||
|
case AMDGPUIntrinsic::AMDIL_mad:
|
||||||
|
return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2), Op.getOperand(3));
|
||||||
|
case AMDGPUIntrinsic::AMDIL_max:
|
||||||
|
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2));
|
||||||
|
case AMDGPUIntrinsic::AMDGPU_imax:
|
||||||
|
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2));
|
||||||
|
case AMDGPUIntrinsic::AMDGPU_umax:
|
||||||
|
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2));
|
||||||
|
case AMDGPUIntrinsic::AMDIL_min:
|
||||||
|
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2));
|
||||||
|
case AMDGPUIntrinsic::AMDGPU_imin:
|
||||||
|
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2));
|
||||||
|
case AMDGPUIntrinsic::AMDGPU_umin:
|
||||||
|
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2));
|
||||||
|
case AMDGPUIntrinsic::AMDIL_round_nearest:
|
||||||
|
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
|
||||||
|
case AMDGPUIntrinsic::AMDIL_round_posinf:
|
||||||
|
return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///IABS(a) = SMAX(sub(0, a), a)
|
||||||
|
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const
|
||||||
|
{
|
||||||
|
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
||||||
|
Op.getOperand(1));
|
||||||
|
|
||||||
|
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Linear Interpolation
|
||||||
|
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
|
||||||
|
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const
|
||||||
|
{
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
|
||||||
|
DAG.getConstantFP(1.0f, MVT::f32),
|
||||||
|
Op.getOperand(1));
|
||||||
|
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
|
||||||
|
Op.getOperand(3));
|
||||||
|
return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2),
|
||||||
|
OneSubAC);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const
|
||||||
|
{
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
|
||||||
|
SDValue LHS = Op.getOperand(0);
|
||||||
|
SDValue RHS = Op.getOperand(1);
|
||||||
|
SDValue True = Op.getOperand(2);
|
||||||
|
SDValue False = Op.getOperand(3);
|
||||||
|
SDValue CC = Op.getOperand(4);
|
||||||
|
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||||
|
SDValue Temp;
|
||||||
|
|
||||||
|
// LHS and RHS are guaranteed to be the same value type
|
||||||
|
EVT CompareVT = LHS.getValueType();
|
||||||
|
|
||||||
|
// We need all the operands of SELECT_CC to have the same value type, so if
|
||||||
|
// necessary we need to convert LHS and RHS to be the same type True and
|
||||||
|
// False. True and False are guaranteed to have the same type as this
|
||||||
|
// SELECT_CC node.
|
||||||
|
|
||||||
|
if (CompareVT != VT) {
|
||||||
|
ISD::NodeType ConversionOp = ISD::DELETED_NODE;
|
||||||
|
if (VT == MVT::f32 && CompareVT == MVT::i32) {
|
||||||
|
if (isUnsignedIntSetCC(CCOpcode)) {
|
||||||
|
ConversionOp = ISD::UINT_TO_FP;
|
||||||
|
} else {
|
||||||
|
ConversionOp = ISD::SINT_TO_FP;
|
||||||
|
}
|
||||||
|
} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
|
||||||
|
ConversionOp = ISD::FP_TO_SINT;
|
||||||
|
} else {
|
||||||
|
// I don't think there will be any other type pairings.
|
||||||
|
assert(!"Unhandled operand type parings in SELECT_CC");
|
||||||
|
}
|
||||||
|
// XXX Check the value of LHS and RHS and avoid creating sequences like
|
||||||
|
// (FTOI (ITOF))
|
||||||
|
LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
|
||||||
|
RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If True is a hardware TRUE value and False is a hardware FALSE value or
|
||||||
|
// vice-versa we can handle this with a native instruction (SET* instructions).
|
||||||
|
if ((isHWTrueValue(True) && isHWFalseValue(False))) {
|
||||||
|
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX If True is a hardware TRUE value and False is a hardware FALSE value,
|
||||||
|
// we can handle this with a native instruction, but we need to swap true
|
||||||
|
// and false and change the conditional.
|
||||||
|
if (isHWTrueValue(False) && isHWFalseValue(True)) {
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX Check if we can lower this to a SELECT or if it is supported by a native
|
||||||
|
// operation. (The code below does this but we don't have the Instruction
|
||||||
|
// selection patterns to do this yet.
|
||||||
|
#if 0
|
||||||
|
if (isZero(LHS) || isZero(RHS)) {
|
||||||
|
SDValue Cond = (isZero(LHS) ? RHS : LHS);
|
||||||
|
bool SwapTF = false;
|
||||||
|
switch (CCOpcode) {
|
||||||
|
case ISD::SETOEQ:
|
||||||
|
case ISD::SETUEQ:
|
||||||
|
case ISD::SETEQ:
|
||||||
|
SwapTF = true;
|
||||||
|
// Fall through
|
||||||
|
case ISD::SETONE:
|
||||||
|
case ISD::SETUNE:
|
||||||
|
case ISD::SETNE:
|
||||||
|
// We can lower to select
|
||||||
|
if (SwapTF) {
|
||||||
|
Temp = True;
|
||||||
|
True = False;
|
||||||
|
False = Temp;
|
||||||
|
}
|
||||||
|
// CNDE
|
||||||
|
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
||||||
|
default:
|
||||||
|
// Supported by a native operation (CNDGE, CNDGT)
|
||||||
|
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// If we make it this for it means we have no native instructions to handle
|
||||||
|
// this SELECT_CC, so we must lower it.
|
||||||
|
SDValue HWTrue, HWFalse;
|
||||||
|
|
||||||
|
if (VT == MVT::f32) {
|
||||||
|
HWTrue = DAG.getConstantFP(1.0f, VT);
|
||||||
|
HWFalse = DAG.getConstantFP(0.0f, VT);
|
||||||
|
} else if (VT == MVT::i32) {
|
||||||
|
HWTrue = DAG.getConstant(-1, VT);
|
||||||
|
HWFalse = DAG.getConstant(0, VT);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(!"Unhandled value type in LowerSELECT_CC");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lower this unsupported SELECT_CC into a combination of two supported
|
||||||
|
// SELECT_CC operations.
|
||||||
|
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const
|
||||||
|
{
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
|
||||||
|
SDValue Num = Op.getOperand(0);
|
||||||
|
SDValue Den = Op.getOperand(1);
|
||||||
|
|
||||||
|
SmallVector<SDValue, 8> Results;
|
||||||
|
|
||||||
|
// RCP = URECIP(Den) = 2^32 / Den + e
|
||||||
|
// e is rounding error.
|
||||||
|
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
|
||||||
|
|
||||||
|
// RCP_LO = umulo(RCP, Den) */
|
||||||
|
SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
|
||||||
|
|
||||||
|
// RCP_HI = mulhu (RCP, Den) */
|
||||||
|
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
|
||||||
|
|
||||||
|
// NEG_RCP_LO = -RCP_LO
|
||||||
|
SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
||||||
|
RCP_LO);
|
||||||
|
|
||||||
|
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
|
||||||
|
SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
||||||
|
NEG_RCP_LO, RCP_LO,
|
||||||
|
ISD::SETEQ);
|
||||||
|
// Calculate the rounding error from the URECIP instruction
|
||||||
|
// E = mulhu(ABS_RCP_LO, RCP)
|
||||||
|
SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
|
||||||
|
|
||||||
|
// RCP_A_E = RCP + E
|
||||||
|
SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
|
||||||
|
|
||||||
|
// RCP_S_E = RCP - E
|
||||||
|
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
|
||||||
|
|
||||||
|
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
|
||||||
|
SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
||||||
|
RCP_A_E, RCP_S_E,
|
||||||
|
ISD::SETEQ);
|
||||||
|
// Quotient = mulhu(Tmp0, Num)
|
||||||
|
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
|
||||||
|
|
||||||
|
// Num_S_Remainder = Quotient * Den
|
||||||
|
SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
|
||||||
|
|
||||||
|
// Remainder = Num - Num_S_Remainder
|
||||||
|
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
|
||||||
|
|
||||||
|
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
|
||||||
|
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
|
||||||
|
DAG.getConstant(-1, VT),
|
||||||
|
DAG.getConstant(0, VT),
|
||||||
|
ISD::SETGE);
|
||||||
|
// Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
|
||||||
|
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
|
||||||
|
DAG.getConstant(0, VT),
|
||||||
|
DAG.getConstant(-1, VT),
|
||||||
|
DAG.getConstant(0, VT),
|
||||||
|
ISD::SETGE);
|
||||||
|
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
|
||||||
|
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
|
||||||
|
Remainder_GE_Zero);
|
||||||
|
|
||||||
|
// Calculate Division result:
|
||||||
|
|
||||||
|
// Quotient_A_One = Quotient + 1
|
||||||
|
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
|
||||||
|
DAG.getConstant(1, VT));
|
||||||
|
|
||||||
|
// Quotient_S_One = Quotient - 1
|
||||||
|
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
|
||||||
|
DAG.getConstant(1, VT));
|
||||||
|
|
||||||
|
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
|
||||||
|
SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
||||||
|
Quotient, Quotient_A_One, ISD::SETEQ);
|
||||||
|
|
||||||
|
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
|
||||||
|
Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
||||||
|
Quotient_S_One, Div, ISD::SETEQ);
|
||||||
|
|
||||||
|
// Calculate Rem result:
|
||||||
|
|
||||||
|
// Remainder_S_Den = Remainder - Den
|
||||||
|
SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
|
||||||
|
|
||||||
|
// Remainder_A_Den = Remainder + Den
|
||||||
|
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
|
||||||
|
|
||||||
|
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
|
||||||
|
SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
||||||
|
Remainder, Remainder_S_Den, ISD::SETEQ);
|
||||||
|
|
||||||
|
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
|
||||||
|
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
||||||
|
Remainder_A_Den, Rem, ISD::SETEQ);
|
||||||
|
|
||||||
|
DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
|
||||||
|
DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
|
||||||
|
|
||||||
|
return Op;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Helper functions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
|
||||||
|
{
|
||||||
|
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||||
|
return CFP->isExactlyValue(1.0);
|
||||||
|
}
|
||||||
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
||||||
|
return C->isAllOnesValue();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
|
||||||
|
{
|
||||||
|
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||||
|
return CFP->getValueAPF().isZero();
|
||||||
|
}
|
||||||
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
||||||
|
return C->isNullValue();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
|
||||||
|
MachineFunction * MF, MachineRegisterInfo & MRI,
|
||||||
|
const TargetInstrInfo * TII, unsigned reg) const
|
||||||
|
{
|
||||||
|
AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
|
||||||
|
|
||||||
|
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||||
|
{
|
||||||
|
switch (Opcode) {
|
||||||
|
default: return AMDILTargetLowering::getTargetNodeName(Opcode);
|
||||||
|
|
||||||
|
NODE_NAME_CASE(FRACT)
|
||||||
|
NODE_NAME_CASE(FMAX)
|
||||||
|
NODE_NAME_CASE(SMAX)
|
||||||
|
NODE_NAME_CASE(UMAX)
|
||||||
|
NODE_NAME_CASE(FMIN)
|
||||||
|
NODE_NAME_CASE(SMIN)
|
||||||
|
NODE_NAME_CASE(UMIN)
|
||||||
|
NODE_NAME_CASE(URECIP)
|
||||||
|
}
|
||||||
|
}
|
77
lib/Target/AMDGPU/AMDGPUISelLowering.h
Normal file
77
lib/Target/AMDGPU/AMDGPUISelLowering.h
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the interface defintiion of the TargetLowering class
|
||||||
|
// that is common to all AMD GPUs.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDGPUISELLOWERING_H
|
||||||
|
#define AMDGPUISELLOWERING_H
|
||||||
|
|
||||||
|
#include "AMDILISelLowering.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class AMDGPUTargetLowering : public AMDILTargetLowering
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
/// addLiveIn - This functions adds reg to the live in list of the entry block
|
||||||
|
/// and emits a copy from reg to MI.getOperand(0).
|
||||||
|
///
|
||||||
|
// Some registers are loaded with values before the program
|
||||||
|
/// begins to execute. The loading of these values is modeled with pseudo
|
||||||
|
/// instructions which are lowered using this function.
|
||||||
|
void addLiveIn(MachineInstr * MI, MachineFunction * MF,
|
||||||
|
MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
|
||||||
|
unsigned reg) const;
|
||||||
|
|
||||||
|
bool isHWTrueValue(SDValue Op) const;
|
||||||
|
bool isHWFalseValue(SDValue Op) const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AMDGPUTargetLowering(TargetMachine &TM);
|
||||||
|
|
||||||
|
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
virtual const char* getTargetNodeName(unsigned Opcode) const;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace AMDGPUISD
|
||||||
|
{
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
|
||||||
|
BITALIGN,
|
||||||
|
FRACT,
|
||||||
|
FMAX,
|
||||||
|
SMAX,
|
||||||
|
UMAX,
|
||||||
|
FMIN,
|
||||||
|
SMIN,
|
||||||
|
UMIN,
|
||||||
|
URECIP,
|
||||||
|
LAST_AMDGPU_ISD_NUMBER
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // End namespace AMDGPUISD
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDGPUISELLOWERING_H
|
46
lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
Normal file
46
lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the implementation of the TargetInstrInfo class that is
|
||||||
|
// common to all AMD GPUs.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPUInstrInfo.h"
|
||||||
|
#include "AMDGPURegisterInfo.h"
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
|
||||||
|
: AMDILInstrInfo(tm) { }
|
||||||
|
|
||||||
|
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||||
|
DebugLoc DL) const
|
||||||
|
{
|
||||||
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
const AMDGPURegisterInfo & RI = getRegisterInfo();
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
|
||||||
|
MachineOperand &MO = MI.getOperand(i);
|
||||||
|
// Convert dst regclass to one that is supported by the ISA
|
||||||
|
if (MO.isReg() && MO.isDef()) {
|
||||||
|
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
|
||||||
|
const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
|
||||||
|
const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
|
||||||
|
|
||||||
|
assert(newRegClass);
|
||||||
|
|
||||||
|
MRI.setRegClass(MO.getReg(), newRegClass);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
46
lib/Target/AMDGPU/AMDGPUInstrInfo.h
Normal file
46
lib/Target/AMDGPU/AMDGPUInstrInfo.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the definition of a TargetInstrInfo class that is common
|
||||||
|
// to all AMD GPUs.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDGPUINSTRUCTIONINFO_H_
|
||||||
|
#define AMDGPUINSTRUCTIONINFO_H_
|
||||||
|
|
||||||
|
#include "AMDGPURegisterInfo.h"
|
||||||
|
#include "AMDILInstrInfo.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class AMDGPUTargetMachine;
|
||||||
|
class MachineFunction;
|
||||||
|
class MachineInstr;
|
||||||
|
class MachineInstrBuilder;
|
||||||
|
|
||||||
|
class AMDGPUInstrInfo : public AMDILInstrInfo {
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
|
||||||
|
|
||||||
|
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
|
||||||
|
|
||||||
|
/// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
|
||||||
|
/// MachineInstr
|
||||||
|
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||||
|
DebugLoc DL) const;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End llvm namespace
|
||||||
|
|
||||||
|
#endif // AMDGPUINSTRINFO_H_
|
69
lib/Target/AMDGPU/AMDGPUInstrInfo.td
Normal file
69
lib/Target/AMDGPU/AMDGPUInstrInfo.td
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains DAG node defintions for the AMDGPU target.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AMDGPU DAG Profiles
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
|
||||||
|
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AMDGPU DAG Nodes
|
||||||
|
//
|
||||||
|
|
||||||
|
// out = ((a << 32) | b) >> c)
|
||||||
|
//
|
||||||
|
// Can be used to optimize rtol:
|
||||||
|
// rotl(a, b) = bitalign(a, a, 32 - b)
|
||||||
|
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
|
||||||
|
|
||||||
|
// out = a - floor(a)
|
||||||
|
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
|
||||||
|
|
||||||
|
// out = max(a, b) a and b are floats
|
||||||
|
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
|
||||||
|
[SDNPCommutative, SDNPAssociative]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// out = max(a, b) a and b are signed ints
|
||||||
|
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
|
||||||
|
[SDNPCommutative, SDNPAssociative]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// out = max(a, b) a and b are unsigned ints
|
||||||
|
def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
|
||||||
|
[SDNPCommutative, SDNPAssociative]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// out = min(a, b) a and b are floats
|
||||||
|
def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
|
||||||
|
[SDNPCommutative, SDNPAssociative]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// out = min(a, b) a snd b are signed ints
|
||||||
|
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
|
||||||
|
[SDNPCommutative, SDNPAssociative]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// out = min(a, b) a and b are unsigned ints
|
||||||
|
def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
|
||||||
|
[SDNPCommutative, SDNPAssociative]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// urecip - This operation is a helper for integer division, it returns the
|
||||||
|
// result of 1 / a as a fractional unsigned integer.
|
||||||
|
// out = (2^32 / a) + e
|
||||||
|
// e is rounding error
|
||||||
|
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
|
123
lib/Target/AMDGPU/AMDGPUInstructions.td
Normal file
123
lib/Target/AMDGPU/AMDGPUInstructions.td
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains instruction defs that are common to all hw codegen
|
||||||
|
// targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
|
||||||
|
field bits<16> AMDILOp = 0;
|
||||||
|
field bits<3> Gen = 0;
|
||||||
|
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let OutOperandList = outs;
|
||||||
|
let InOperandList = ins;
|
||||||
|
let AsmString = asm;
|
||||||
|
let Pattern = pattern;
|
||||||
|
let Itinerary = NullALU;
|
||||||
|
let TSFlags{42-40} = Gen;
|
||||||
|
let TSFlags{63-48} = AMDILOp;
|
||||||
|
}
|
||||||
|
|
||||||
|
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
|
||||||
|
: AMDGPUInst<outs, ins, asm, pattern> {
|
||||||
|
|
||||||
|
field bits<32> Inst = 0xffffffff;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class Constants {
|
||||||
|
int TWO_PI = 0x40c90fdb;
|
||||||
|
int PI = 0x40490fdb;
|
||||||
|
int TWO_PI_INV = 0x3e22f983;
|
||||||
|
}
|
||||||
|
def CONST : Constants;
|
||||||
|
|
||||||
|
def FP_ZERO : PatLeaf <
|
||||||
|
(fpimm),
|
||||||
|
[{return N->getValueAPF().isZero();}]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def FP_ONE : PatLeaf <
|
||||||
|
(fpimm),
|
||||||
|
[{return N->isExactlyValue(1.0);}]
|
||||||
|
>;
|
||||||
|
|
||||||
|
let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
|
||||||
|
|
||||||
|
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
|
||||||
|
(outs rc:$dst),
|
||||||
|
(ins rc:$src0),
|
||||||
|
"CLAMP $dst, $src0",
|
||||||
|
[(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
||||||
|
(outs rc:$dst),
|
||||||
|
(ins rc:$src0),
|
||||||
|
"FABS $dst, $src0",
|
||||||
|
[(set rc:$dst, (fabs rc:$src0))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
|
||||||
|
(outs rc:$dst),
|
||||||
|
(ins rc:$src0),
|
||||||
|
"FNEG $dst, $src0",
|
||||||
|
[(set rc:$dst, (fneg rc:$src0))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
|
||||||
|
|
||||||
|
/* Generic helper patterns for intrinsics */
|
||||||
|
/* -------------------------------------- */
|
||||||
|
|
||||||
|
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
|
||||||
|
RegisterClass rc> : Pat <
|
||||||
|
(int_AMDGPU_pow rc:$src0, rc:$src1),
|
||||||
|
(exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
|
||||||
|
>;
|
||||||
|
|
||||||
|
/* Other helper patterns */
|
||||||
|
/* --------------------- */
|
||||||
|
|
||||||
|
/* Extract element pattern */
|
||||||
|
class Extract_Element <ValueType sub_type, ValueType vec_type,
|
||||||
|
RegisterClass vec_class, int sub_idx,
|
||||||
|
SubRegIndex sub_reg>: Pat<
|
||||||
|
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
|
||||||
|
(EXTRACT_SUBREG vec_class:$src, sub_reg)
|
||||||
|
>;
|
||||||
|
|
||||||
|
/* Insert element pattern */
|
||||||
|
class Insert_Element <ValueType elem_type, ValueType vec_type,
|
||||||
|
RegisterClass elem_class, RegisterClass vec_class,
|
||||||
|
int sub_idx, SubRegIndex sub_reg> : Pat <
|
||||||
|
|
||||||
|
(vec_type (vector_insert (vec_type vec_class:$vec),
|
||||||
|
(elem_type elem_class:$elem), sub_idx)),
|
||||||
|
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
|
||||||
|
>;
|
||||||
|
|
||||||
|
// Vector Build pattern
|
||||||
|
class Vector_Build <ValueType vecType, RegisterClass elemClass> : Pat <
|
||||||
|
(IL_vbuild elemClass:$src),
|
||||||
|
(INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
|
||||||
|
>;
|
||||||
|
|
||||||
|
// bitconvert pattern
|
||||||
|
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
|
||||||
|
(dt (bitconvert (st rc:$src0))),
|
||||||
|
(dt rc:$src0)
|
||||||
|
>;
|
||||||
|
|
||||||
|
include "R600Instructions.td"
|
||||||
|
|
||||||
|
include "SIInstrInfo.td"
|
||||||
|
|
64
lib/Target/AMDGPU/AMDGPUIntrinsics.td
Normal file
64
lib/Target/AMDGPU/AMDGPUIntrinsics.td
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines intrinsics that are used by all hw codegen targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
||||||
|
|
||||||
|
def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
|
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
|
||||||
|
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
|
||||||
|
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let TargetPrefix = "TGSI", isTarget = 1 in {
|
||||||
|
|
||||||
|
def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
include "SIIntrinsics.td"
|
24
lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
Normal file
24
lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Parent TargetRegisterInfo class common to all hw codegen targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPURegisterInfo.h"
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
|
||||||
|
const TargetInstrInfo &tii)
|
||||||
|
: AMDILRegisterInfo(tm, tii),
|
||||||
|
TM(tm),
|
||||||
|
TII(tii)
|
||||||
|
{ }
|
42
lib/Target/AMDGPU/AMDGPURegisterInfo.h
Normal file
42
lib/Target/AMDGPU/AMDGPURegisterInfo.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the TargetRegisterInfo interface that is implemented
|
||||||
|
// by all hw codegen targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDGPUREGISTERINFO_H_
|
||||||
|
#define AMDGPUREGISTERINFO_H_
|
||||||
|
|
||||||
|
#include "AMDILRegisterInfo.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class AMDGPUTargetMachine;
|
||||||
|
class TargetInstrInfo;
|
||||||
|
|
||||||
|
struct AMDGPURegisterInfo : public AMDILRegisterInfo
|
||||||
|
{
|
||||||
|
AMDGPUTargetMachine &TM;
|
||||||
|
const TargetInstrInfo &TII;
|
||||||
|
|
||||||
|
AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
||||||
|
|
||||||
|
virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
|
||||||
|
|
||||||
|
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
||||||
|
/// ISA reg class that is equivalent to the given AMDIL reg class.
|
||||||
|
virtual const TargetRegisterClass *
|
||||||
|
getISARegClass(const TargetRegisterClass * rc) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDIDSAREGISTERINFO_H_
|
22
lib/Target/AMDGPU/AMDGPURegisterInfo.td
Normal file
22
lib/Target/AMDGPU/AMDGPURegisterInfo.td
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Tablegen register definitions common to all hw codegen targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
let Namespace = "AMDGPU" in {
|
||||||
|
def sel_x : SubRegIndex;
|
||||||
|
def sel_y : SubRegIndex;
|
||||||
|
def sel_z : SubRegIndex;
|
||||||
|
def sel_w : SubRegIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
include "R600RegisterInfo.td"
|
||||||
|
include "SIRegisterInfo.td"
|
36
lib/Target/AMDGPU/AMDGPUSubtarget.h
Normal file
36
lib/Target/AMDGPU/AMDGPUSubtarget.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file declares the AMDGPU specific subclass of TargetSubtarget.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef _AMDGPUSUBTARGET_H_
|
||||||
|
#define _AMDGPUSUBTARGET_H_
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class AMDGPUSubtarget : public AMDILSubtarget
|
||||||
|
{
|
||||||
|
InstrItineraryData InstrItins;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
||||||
|
AMDILSubtarget(TT, CPU, FS)
|
||||||
|
{
|
||||||
|
InstrItins = getInstrItineraryForCPU(CPU);
|
||||||
|
}
|
||||||
|
|
||||||
|
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDGPUSUBTARGET_H_
|
162
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Normal file
162
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The AMDGPU target machine contains all of the hardware specific information
|
||||||
|
// needed to emit code for R600 and SI GPUs.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "R600ISelLowering.h"
|
||||||
|
#include "R600InstrInfo.h"
|
||||||
|
#include "SIISelLowering.h"
|
||||||
|
#include "SIInstrInfo.h"
|
||||||
|
#include "llvm/Analysis/Passes.h"
|
||||||
|
#include "llvm/Analysis/Verifier.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||||
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||||
|
#include "llvm/CodeGen/Passes.h"
|
||||||
|
#include "llvm/MC/MCAsmInfo.h"
|
||||||
|
#include "llvm/PassManager.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/raw_os_ostream.h"
|
||||||
|
#include "llvm/Transforms/IPO.h"
|
||||||
|
#include "llvm/Transforms/Scalar.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||||
|
// Register the target
|
||||||
|
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
|
||||||
|
StringRef CPU, StringRef FS,
|
||||||
|
TargetOptions Options,
|
||||||
|
Reloc::Model RM, CodeModel::Model CM,
|
||||||
|
CodeGenOpt::Level OptLevel
|
||||||
|
)
|
||||||
|
:
|
||||||
|
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
|
||||||
|
Subtarget(TT, CPU, FS),
|
||||||
|
DataLayout(Subtarget.getDataLayout()),
|
||||||
|
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||||
|
Subtarget.device()->getStackAlignment(), 0),
|
||||||
|
IntrinsicInfo(this),
|
||||||
|
InstrItins(&Subtarget.getInstrItineraryData()),
|
||||||
|
mDump(false)
|
||||||
|
|
||||||
|
{
|
||||||
|
// TLInfo uses InstrInfo so it must be initialized after.
|
||||||
|
if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
|
||||||
|
InstrInfo = new R600InstrInfo(*this);
|
||||||
|
TLInfo = new R600TargetLowering(*this);
|
||||||
|
} else {
|
||||||
|
InstrInfo = new SIInstrInfo(*this);
|
||||||
|
TLInfo = new SITargetLowering(*this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDGPUTargetMachine::~AMDGPUTargetMachine()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
|
||||||
|
formatted_raw_ostream &Out,
|
||||||
|
CodeGenFileType FileType,
|
||||||
|
bool DisableVerify,
|
||||||
|
AnalysisID StartAfter,
|
||||||
|
AnalysisID StopAfter) {
|
||||||
|
// XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
|
||||||
|
// only using it to access addPassesToGenerateCode()
|
||||||
|
bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
|
||||||
|
DisableVerify);
|
||||||
|
assert(fail);
|
||||||
|
|
||||||
|
const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>();
|
||||||
|
std::string gpu = STM.getDeviceName();
|
||||||
|
if (gpu == "SI") {
|
||||||
|
PM.add(createSICodeEmitterPass(Out));
|
||||||
|
} else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
|
||||||
|
PM.add(createR600CodeEmitterPass(Out));
|
||||||
|
} else {
|
||||||
|
abort();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
PM.add(createGCInfoDeleter());
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class AMDGPUPassConfig : public TargetPassConfig {
|
||||||
|
public:
|
||||||
|
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
|
||||||
|
: TargetPassConfig(TM, PM) {}
|
||||||
|
|
||||||
|
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
|
||||||
|
return getTM<AMDGPUTargetMachine>();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool addPreISel();
|
||||||
|
virtual bool addInstSelector();
|
||||||
|
virtual bool addPreRegAlloc();
|
||||||
|
virtual bool addPostRegAlloc();
|
||||||
|
virtual bool addPreSched2();
|
||||||
|
virtual bool addPreEmitPass();
|
||||||
|
};
|
||||||
|
} // End of anonymous namespace
|
||||||
|
|
||||||
|
TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||||
|
return new AMDGPUPassConfig(this, PM);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AMDGPUPassConfig::addPreISel()
|
||||||
|
{
|
||||||
|
const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
|
||||||
|
if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
|
||||||
|
addPass(createR600KernelParametersPass(
|
||||||
|
getAMDGPUTargetMachine().getTargetData()));
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPassConfig::addInstSelector() {
|
||||||
|
addPass(createAMDILPeepholeOpt(*TM));
|
||||||
|
addPass(createAMDILISelDag(getAMDGPUTargetMachine()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||||
|
const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
|
||||||
|
|
||||||
|
if (ST.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
|
||||||
|
addPass(createSIAssignInterpRegsPass(*TM));
|
||||||
|
}
|
||||||
|
addPass(createAMDGPUConvertToISAPass(*TM));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPassConfig::addPostRegAlloc() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPassConfig::addPreSched2() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUPassConfig::addPreEmitPass() {
|
||||||
|
addPass(createAMDILCFGPreparationPass(*TM));
|
||||||
|
addPass(createAMDILCFGStructurizerPass(*TM));
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
76
lib/Target/AMDGPU/AMDGPUTargetMachine.h
Normal file
76
lib/Target/AMDGPU/AMDGPUTargetMachine.h
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The AMDGPU TargetMachine interface definition for hw codgen targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDGPU_TARGET_MACHINE_H
|
||||||
|
#define AMDGPU_TARGET_MACHINE_H
|
||||||
|
|
||||||
|
#include "AMDGPUInstrInfo.h"
|
||||||
|
#include "AMDGPUSubtarget.h"
|
||||||
|
#include "AMDILFrameLowering.h"
|
||||||
|
#include "AMDILIntrinsicInfo.h"
|
||||||
|
#include "R600ISelLowering.h"
|
||||||
|
#include "llvm/ADT/OwningPtr.h"
|
||||||
|
#include "llvm/Target/TargetData.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
|
||||||
|
|
||||||
|
class AMDGPUTargetMachine : public LLVMTargetMachine {
|
||||||
|
|
||||||
|
AMDGPUSubtarget Subtarget;
|
||||||
|
const TargetData DataLayout;
|
||||||
|
AMDILFrameLowering FrameLowering;
|
||||||
|
AMDILIntrinsicInfo IntrinsicInfo;
|
||||||
|
const AMDGPUInstrInfo * InstrInfo;
|
||||||
|
AMDGPUTargetLowering * TLInfo;
|
||||||
|
const InstrItineraryData* InstrItins;
|
||||||
|
bool mDump;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
|
||||||
|
StringRef CPU,
|
||||||
|
TargetOptions Options,
|
||||||
|
Reloc::Model RM, CodeModel::Model CM,
|
||||||
|
CodeGenOpt::Level OL);
|
||||||
|
~AMDGPUTargetMachine();
|
||||||
|
virtual const AMDILFrameLowering* getFrameLowering() const {
|
||||||
|
return &FrameLowering;
|
||||||
|
}
|
||||||
|
virtual const AMDILIntrinsicInfo* getIntrinsicInfo() const {
|
||||||
|
return &IntrinsicInfo;
|
||||||
|
}
|
||||||
|
virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
|
||||||
|
virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
|
||||||
|
virtual const AMDGPURegisterInfo *getRegisterInfo() const {
|
||||||
|
return &InstrInfo->getRegisterInfo();
|
||||||
|
}
|
||||||
|
virtual AMDGPUTargetLowering * getTargetLowering() const {
|
||||||
|
return TLInfo;
|
||||||
|
}
|
||||||
|
virtual const InstrItineraryData* getInstrItineraryData() const {
|
||||||
|
return InstrItins;
|
||||||
|
}
|
||||||
|
virtual const TargetData* getTargetData() const { return &DataLayout; }
|
||||||
|
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
||||||
|
virtual bool addPassesToEmitFile(PassManagerBase &PM,
|
||||||
|
formatted_raw_ostream &Out,
|
||||||
|
CodeGenFileType FileType,
|
||||||
|
bool DisableVerify,
|
||||||
|
AnalysisID StartAfter = 0,
|
||||||
|
AnalysisID StopAfter = 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDGPU_TARGET_MACHINE_H
|
139
lib/Target/AMDGPU/AMDGPUUtil.cpp
Normal file
139
lib/Target/AMDGPU/AMDGPUUtil.cpp
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Common utility functions used by hw codegen targets
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPUUtil.h"
|
||||||
|
#include "AMDGPURegisterInfo.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/Target/TargetInstrInfo.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
#include "llvm/Target/TargetRegisterInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
// Some instructions act as place holders to emulate operations that the GPU
|
||||||
|
// hardware does automatically. This function can be used to check if
|
||||||
|
// an opcode falls into this category.
|
||||||
|
bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
|
||||||
|
{
|
||||||
|
switch (opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::RETURN:
|
||||||
|
case AMDGPU::LOAD_INPUT:
|
||||||
|
case AMDGPU::LAST:
|
||||||
|
case AMDGPU::MASK_WRITE:
|
||||||
|
case AMDGPU::RESERVE_REG:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPU::isTransOp(unsigned opcode)
|
||||||
|
{
|
||||||
|
switch(opcode) {
|
||||||
|
default: return false;
|
||||||
|
|
||||||
|
case AMDGPU::COS_r600:
|
||||||
|
case AMDGPU::COS_eg:
|
||||||
|
case AMDGPU::MULLIT:
|
||||||
|
case AMDGPU::MUL_LIT_r600:
|
||||||
|
case AMDGPU::MUL_LIT_eg:
|
||||||
|
case AMDGPU::EXP_IEEE_r600:
|
||||||
|
case AMDGPU::EXP_IEEE_eg:
|
||||||
|
case AMDGPU::LOG_CLAMPED_r600:
|
||||||
|
case AMDGPU::LOG_IEEE_r600:
|
||||||
|
case AMDGPU::LOG_CLAMPED_eg:
|
||||||
|
case AMDGPU::LOG_IEEE_eg:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPU::isTexOp(unsigned opcode)
|
||||||
|
{
|
||||||
|
switch(opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::TEX_LD:
|
||||||
|
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
|
||||||
|
case AMDGPU::TEX_SAMPLE:
|
||||||
|
case AMDGPU::TEX_SAMPLE_C:
|
||||||
|
case AMDGPU::TEX_SAMPLE_L:
|
||||||
|
case AMDGPU::TEX_SAMPLE_C_L:
|
||||||
|
case AMDGPU::TEX_SAMPLE_LB:
|
||||||
|
case AMDGPU::TEX_SAMPLE_C_LB:
|
||||||
|
case AMDGPU::TEX_SAMPLE_G:
|
||||||
|
case AMDGPU::TEX_SAMPLE_C_G:
|
||||||
|
case AMDGPU::TEX_GET_GRADIENTS_H:
|
||||||
|
case AMDGPU::TEX_GET_GRADIENTS_V:
|
||||||
|
case AMDGPU::TEX_SET_GRADIENTS_H:
|
||||||
|
case AMDGPU::TEX_SET_GRADIENTS_V:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPU::isReductionOp(unsigned opcode)
|
||||||
|
{
|
||||||
|
switch(opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::DOT4_r600:
|
||||||
|
case AMDGPU::DOT4_eg:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPU::isCubeOp(unsigned opcode)
|
||||||
|
{
|
||||||
|
switch(opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::CUBE_r600:
|
||||||
|
case AMDGPU::CUBE_eg:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool AMDGPU::isFCOp(unsigned opcode)
|
||||||
|
{
|
||||||
|
switch(opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::BREAK_LOGICALZ_f32:
|
||||||
|
case AMDGPU::BREAK_LOGICALNZ_i32:
|
||||||
|
case AMDGPU::BREAK_LOGICALZ_i32:
|
||||||
|
case AMDGPU::BREAK_LOGICALNZ_f32:
|
||||||
|
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
||||||
|
case AMDGPU::IF_LOGICALNZ_i32:
|
||||||
|
case AMDGPU::IF_LOGICALZ_f32:
|
||||||
|
case AMDGPU::ELSE:
|
||||||
|
case AMDGPU::ENDIF:
|
||||||
|
case AMDGPU::ENDLOOP:
|
||||||
|
case AMDGPU::IF_LOGICALNZ_f32:
|
||||||
|
case AMDGPU::WHILELOOP:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPU::utilAddLiveIn(MachineFunction * MF,
|
||||||
|
MachineRegisterInfo & MRI,
|
||||||
|
const TargetInstrInfo * TII,
|
||||||
|
unsigned physReg, unsigned virtReg)
|
||||||
|
{
|
||||||
|
if (!MRI.isLiveIn(physReg)) {
|
||||||
|
MRI.addLiveIn(physReg, virtReg);
|
||||||
|
MF->front().addLiveIn(physReg);
|
||||||
|
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
|
||||||
|
TII->get(TargetOpcode::COPY), virtReg)
|
||||||
|
.addReg(physReg);
|
||||||
|
} else {
|
||||||
|
MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
|
||||||
|
}
|
||||||
|
}
|
46
lib/Target/AMDGPU/AMDGPUUtil.h
Normal file
46
lib/Target/AMDGPU/AMDGPUUtil.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Declarations for utility functions common to all hw codegen targets.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDGPU_UTIL_H
|
||||||
|
#define AMDGPU_UTIL_H
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class MachineFunction;
|
||||||
|
class MachineRegisterInfo;
|
||||||
|
class TargetInstrInfo;
|
||||||
|
|
||||||
|
namespace AMDGPU {
|
||||||
|
|
||||||
|
bool isPlaceHolderOpcode(unsigned opcode);
|
||||||
|
|
||||||
|
bool isTransOp(unsigned opcode);
|
||||||
|
bool isTexOp(unsigned opcode);
|
||||||
|
bool isReductionOp(unsigned opcode);
|
||||||
|
bool isCubeOp(unsigned opcode);
|
||||||
|
bool isFCOp(unsigned opcode);
|
||||||
|
|
||||||
|
// XXX: Move these to AMDGPUInstrInfo.h
|
||||||
|
#define MO_FLAG_CLAMP (1 << 0)
|
||||||
|
#define MO_FLAG_NEG (1 << 1)
|
||||||
|
#define MO_FLAG_ABS (1 << 2)
|
||||||
|
#define MO_FLAG_MASK (1 << 3)
|
||||||
|
|
||||||
|
void utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
|
||||||
|
const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
|
||||||
|
|
||||||
|
} // End namespace AMDGPU
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDGPU_UTIL_H
|
251
lib/Target/AMDGPU/AMDIL.h
Normal file
251
lib/Target/AMDGPU/AMDIL.h
Normal file
@ -0,0 +1,251 @@
|
|||||||
|
//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the entry points for global functions defined in the LLVM
|
||||||
|
// AMDIL back-end.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDIL_H_
|
||||||
|
#define AMDIL_H_
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
#define AMDIL_MAJOR_VERSION 2
|
||||||
|
#define AMDIL_MINOR_VERSION 0
|
||||||
|
#define AMDIL_REVISION_NUMBER 74
|
||||||
|
#define ARENA_SEGMENT_RESERVED_UAVS 12
|
||||||
|
#define DEFAULT_ARENA_UAV_ID 8
|
||||||
|
#define DEFAULT_RAW_UAV_ID 7
|
||||||
|
#define GLOBAL_RETURN_RAW_UAV_ID 11
|
||||||
|
#define HW_MAX_NUM_CB 8
|
||||||
|
#define MAX_NUM_UNIQUE_UAVS 8
|
||||||
|
#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
|
||||||
|
#define OPENCL_MAX_READ_IMAGES 128
|
||||||
|
#define OPENCL_MAX_WRITE_IMAGES 8
|
||||||
|
#define OPENCL_MAX_SAMPLERS 16
|
||||||
|
|
||||||
|
// The next two values can never be zero, as zero is the ID that is
|
||||||
|
// used to assert against.
|
||||||
|
#define DEFAULT_LDS_ID 1
|
||||||
|
#define DEFAULT_GDS_ID 1
|
||||||
|
#define DEFAULT_SCRATCH_ID 1
|
||||||
|
#define DEFAULT_VEC_SLOTS 8
|
||||||
|
|
||||||
|
// SC->CAL version matchings.
|
||||||
|
#define CAL_VERSION_SC_150 1700
|
||||||
|
#define CAL_VERSION_SC_149 1700
|
||||||
|
#define CAL_VERSION_SC_148 1525
|
||||||
|
#define CAL_VERSION_SC_147 1525
|
||||||
|
#define CAL_VERSION_SC_146 1525
|
||||||
|
#define CAL_VERSION_SC_145 1451
|
||||||
|
#define CAL_VERSION_SC_144 1451
|
||||||
|
#define CAL_VERSION_SC_143 1441
|
||||||
|
#define CAL_VERSION_SC_142 1441
|
||||||
|
#define CAL_VERSION_SC_141 1420
|
||||||
|
#define CAL_VERSION_SC_140 1400
|
||||||
|
#define CAL_VERSION_SC_139 1387
|
||||||
|
#define CAL_VERSION_SC_138 1387
|
||||||
|
#define CAL_APPEND_BUFFER_SUPPORT 1340
|
||||||
|
#define CAL_VERSION_SC_137 1331
|
||||||
|
#define CAL_VERSION_SC_136 982
|
||||||
|
#define CAL_VERSION_SC_135 950
|
||||||
|
#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
|
||||||
|
|
||||||
|
#define OCL_DEVICE_RV710 0x0001
|
||||||
|
#define OCL_DEVICE_RV730 0x0002
|
||||||
|
#define OCL_DEVICE_RV770 0x0004
|
||||||
|
#define OCL_DEVICE_CEDAR 0x0008
|
||||||
|
#define OCL_DEVICE_REDWOOD 0x0010
|
||||||
|
#define OCL_DEVICE_JUNIPER 0x0020
|
||||||
|
#define OCL_DEVICE_CYPRESS 0x0040
|
||||||
|
#define OCL_DEVICE_CAICOS 0x0080
|
||||||
|
#define OCL_DEVICE_TURKS 0x0100
|
||||||
|
#define OCL_DEVICE_BARTS 0x0200
|
||||||
|
#define OCL_DEVICE_CAYMAN 0x0400
|
||||||
|
#define OCL_DEVICE_ALL 0x3FFF
|
||||||
|
|
||||||
|
/// The number of function ID's that are reserved for
|
||||||
|
/// internal compiler usage.
|
||||||
|
const unsigned int RESERVED_FUNCS = 1024;
|
||||||
|
|
||||||
|
#define AMDIL_OPT_LEVEL_DECL
|
||||||
|
#define AMDIL_OPT_LEVEL_VAR
|
||||||
|
#define AMDIL_OPT_LEVEL_VAR_NO_COMMA
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class AMDILInstrPrinter;
|
||||||
|
class FunctionPass;
|
||||||
|
class MCAsmInfo;
|
||||||
|
class raw_ostream;
|
||||||
|
class Target;
|
||||||
|
class TargetMachine;
|
||||||
|
|
||||||
|
/// Instruction selection passes.
|
||||||
|
FunctionPass*
|
||||||
|
createAMDILISelDag(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||||
|
FunctionPass*
|
||||||
|
createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||||
|
|
||||||
|
/// Pre emit passes.
|
||||||
|
FunctionPass*
|
||||||
|
createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||||
|
FunctionPass*
|
||||||
|
createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||||
|
|
||||||
|
extern Target TheAMDILTarget;
|
||||||
|
extern Target TheAMDGPUTarget;
|
||||||
|
} // end namespace llvm;
|
||||||
|
|
||||||
|
#define GET_REGINFO_ENUM
|
||||||
|
#include "AMDGPUGenRegisterInfo.inc"
|
||||||
|
#define GET_INSTRINFO_ENUM
|
||||||
|
#include "AMDGPUGenInstrInfo.inc"
|
||||||
|
|
||||||
|
/// Include device information enumerations
|
||||||
|
#include "AMDILDeviceInfo.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
/// OpenCL uses address spaces to differentiate between
|
||||||
|
/// various memory regions on the hardware. On the CPU
|
||||||
|
/// all of the address spaces point to the same memory,
|
||||||
|
/// however on the GPU, each address space points to
|
||||||
|
/// a seperate piece of memory that is unique from other
|
||||||
|
/// memory locations.
|
||||||
|
namespace AMDILAS {
|
||||||
|
enum AddressSpaces {
|
||||||
|
PRIVATE_ADDRESS = 0, // Address space for private memory.
|
||||||
|
GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
|
||||||
|
CONSTANT_ADDRESS = 2, // Address space for constant memory.
|
||||||
|
LOCAL_ADDRESS = 3, // Address space for local memory.
|
||||||
|
REGION_ADDRESS = 4, // Address space for region memory.
|
||||||
|
ADDRESS_NONE = 5, // Address space for unknown memory.
|
||||||
|
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
|
||||||
|
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
|
||||||
|
USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
|
||||||
|
LAST_ADDRESS = 9
|
||||||
|
};
|
||||||
|
|
||||||
|
// This union/struct combination is an easy way to read out the
|
||||||
|
// exact bits that are needed.
|
||||||
|
typedef union ResourceRec {
|
||||||
|
struct {
|
||||||
|
#ifdef __BIG_ENDIAN__
|
||||||
|
unsigned short isImage : 1; // Reserved for future use/llvm.
|
||||||
|
unsigned short ResourceID : 10; // Flag to specify the resourece ID for
|
||||||
|
// the op.
|
||||||
|
unsigned short HardwareInst : 1; // Flag to specify that this instruction
|
||||||
|
// is a hardware instruction.
|
||||||
|
unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
|
||||||
|
// conflict.
|
||||||
|
unsigned short ByteStore : 1; // Flag to specify if the op is a byte
|
||||||
|
// store op.
|
||||||
|
unsigned short PointerPath : 1; // Flag to specify if the op is on the
|
||||||
|
// pointer path.
|
||||||
|
unsigned short CacheableRead : 1; // Flag to specify if the read is
|
||||||
|
// cacheable.
|
||||||
|
#else
|
||||||
|
unsigned short CacheableRead : 1; // Flag to specify if the read is
|
||||||
|
// cacheable.
|
||||||
|
unsigned short PointerPath : 1; // Flag to specify if the op is on the
|
||||||
|
// pointer path.
|
||||||
|
unsigned short ByteStore : 1; // Flag to specify if the op is byte
|
||||||
|
// store op.
|
||||||
|
unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
|
||||||
|
// a conflict.
|
||||||
|
unsigned short HardwareInst : 1; // Flag to specify that this instruction
|
||||||
|
// is a hardware instruction.
|
||||||
|
unsigned short ResourceID : 10; // Flag to specify the resource ID for
|
||||||
|
// the op.
|
||||||
|
unsigned short isImage : 1; // Reserved for future use.
|
||||||
|
#endif
|
||||||
|
} bits;
|
||||||
|
unsigned short u16all;
|
||||||
|
} InstrResEnc;
|
||||||
|
|
||||||
|
} // namespace AMDILAS
|
||||||
|
|
||||||
|
// Enums corresponding to AMDIL condition codes for IL. These
|
||||||
|
// values must be kept in sync with the ones in the .td file.
|
||||||
|
namespace AMDILCC {
|
||||||
|
enum CondCodes {
|
||||||
|
// AMDIL specific condition codes. These correspond to the IL_CC_*
|
||||||
|
// in AMDILInstrInfo.td and must be kept in the same order.
|
||||||
|
IL_CC_D_EQ = 0, // DEQ instruction.
|
||||||
|
IL_CC_D_GE = 1, // DGE instruction.
|
||||||
|
IL_CC_D_LT = 2, // DLT instruction.
|
||||||
|
IL_CC_D_NE = 3, // DNE instruction.
|
||||||
|
IL_CC_F_EQ = 4, // EQ instruction.
|
||||||
|
IL_CC_F_GE = 5, // GE instruction.
|
||||||
|
IL_CC_F_LT = 6, // LT instruction.
|
||||||
|
IL_CC_F_NE = 7, // NE instruction.
|
||||||
|
IL_CC_I_EQ = 8, // IEQ instruction.
|
||||||
|
IL_CC_I_GE = 9, // IGE instruction.
|
||||||
|
IL_CC_I_LT = 10, // ILT instruction.
|
||||||
|
IL_CC_I_NE = 11, // INE instruction.
|
||||||
|
IL_CC_U_GE = 12, // UGE instruction.
|
||||||
|
IL_CC_U_LT = 13, // ULE instruction.
|
||||||
|
// Pseudo IL Comparison instructions here.
|
||||||
|
IL_CC_F_GT = 14, // GT instruction.
|
||||||
|
IL_CC_U_GT = 15,
|
||||||
|
IL_CC_I_GT = 16,
|
||||||
|
IL_CC_D_GT = 17,
|
||||||
|
IL_CC_F_LE = 18, // LE instruction
|
||||||
|
IL_CC_U_LE = 19,
|
||||||
|
IL_CC_I_LE = 20,
|
||||||
|
IL_CC_D_LE = 21,
|
||||||
|
IL_CC_F_UNE = 22,
|
||||||
|
IL_CC_F_UEQ = 23,
|
||||||
|
IL_CC_F_ULT = 24,
|
||||||
|
IL_CC_F_UGT = 25,
|
||||||
|
IL_CC_F_ULE = 26,
|
||||||
|
IL_CC_F_UGE = 27,
|
||||||
|
IL_CC_F_ONE = 28,
|
||||||
|
IL_CC_F_OEQ = 29,
|
||||||
|
IL_CC_F_OLT = 30,
|
||||||
|
IL_CC_F_OGT = 31,
|
||||||
|
IL_CC_F_OLE = 32,
|
||||||
|
IL_CC_F_OGE = 33,
|
||||||
|
IL_CC_D_UNE = 34,
|
||||||
|
IL_CC_D_UEQ = 35,
|
||||||
|
IL_CC_D_ULT = 36,
|
||||||
|
IL_CC_D_UGT = 37,
|
||||||
|
IL_CC_D_ULE = 38,
|
||||||
|
IL_CC_D_UGE = 39,
|
||||||
|
IL_CC_D_ONE = 40,
|
||||||
|
IL_CC_D_OEQ = 41,
|
||||||
|
IL_CC_D_OLT = 42,
|
||||||
|
IL_CC_D_OGT = 43,
|
||||||
|
IL_CC_D_OLE = 44,
|
||||||
|
IL_CC_D_OGE = 45,
|
||||||
|
IL_CC_U_EQ = 46,
|
||||||
|
IL_CC_U_NE = 47,
|
||||||
|
IL_CC_F_O = 48,
|
||||||
|
IL_CC_D_O = 49,
|
||||||
|
IL_CC_F_UO = 50,
|
||||||
|
IL_CC_D_UO = 51,
|
||||||
|
IL_CC_L_LE = 52,
|
||||||
|
IL_CC_L_GE = 53,
|
||||||
|
IL_CC_L_EQ = 54,
|
||||||
|
IL_CC_L_NE = 55,
|
||||||
|
IL_CC_L_LT = 56,
|
||||||
|
IL_CC_L_GT = 57,
|
||||||
|
IL_CC_UL_LE = 58,
|
||||||
|
IL_CC_UL_GE = 59,
|
||||||
|
IL_CC_UL_EQ = 60,
|
||||||
|
IL_CC_UL_NE = 61,
|
||||||
|
IL_CC_UL_LT = 62,
|
||||||
|
IL_CC_UL_GT = 63,
|
||||||
|
COND_ERROR = 64
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace AMDILCC
|
||||||
|
} // end namespace llvm
|
||||||
|
#endif // AMDIL_H_
|
128
lib/Target/AMDGPU/AMDIL7XXDevice.cpp
Normal file
128
lib/Target/AMDGPU/AMDIL7XXDevice.cpp
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#include "AMDIL7XXDevice.h"
|
||||||
|
#include "AMDILDevice.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
|
||||||
|
{
|
||||||
|
setCaps();
|
||||||
|
std::string name = mSTM->getDeviceName();
|
||||||
|
if (name == "rv710") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_RV710;
|
||||||
|
} else if (name == "rv730") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_RV730;
|
||||||
|
} else {
|
||||||
|
mDeviceFlag = OCL_DEVICE_RV770;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDIL7XXDevice::~AMDIL7XXDevice()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDIL7XXDevice::setCaps()
|
||||||
|
{
|
||||||
|
mSWBits.set(AMDILDeviceInfo::LocalMem);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDIL7XXDevice::getMaxLDSSize() const
|
||||||
|
{
|
||||||
|
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||||
|
return MAX_LDS_SIZE_700;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDIL7XXDevice::getWavefrontSize() const
|
||||||
|
{
|
||||||
|
return AMDILDevice::HalfWavefrontSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t AMDIL7XXDevice::getGeneration() const
|
||||||
|
{
|
||||||
|
return AMDILDeviceInfo::HD4XXX;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
|
||||||
|
{
|
||||||
|
switch (DeviceID) {
|
||||||
|
default:
|
||||||
|
assert(0 && "ID type passed in is unknown!");
|
||||||
|
break;
|
||||||
|
case GLOBAL_ID:
|
||||||
|
case CONSTANT_ID:
|
||||||
|
case RAW_UAV_ID:
|
||||||
|
case ARENA_UAV_ID:
|
||||||
|
break;
|
||||||
|
case LDS_ID:
|
||||||
|
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||||
|
return DEFAULT_LDS_ID;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SCRATCH_ID:
|
||||||
|
if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
|
||||||
|
return DEFAULT_SCRATCH_ID;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GDS_ID:
|
||||||
|
assert(0 && "GDS UAV ID is not supported on this chip");
|
||||||
|
if (usesHardware(AMDILDeviceInfo::RegionMem)) {
|
||||||
|
return DEFAULT_GDS_ID;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
|
||||||
|
{
|
||||||
|
setCaps();
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDIL770Device::~AMDIL770Device()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDIL770Device::setCaps()
|
||||||
|
{
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::FMA);
|
||||||
|
mHWBits.set(AMDILDeviceInfo::DoubleOps);
|
||||||
|
}
|
||||||
|
mSWBits.set(AMDILDeviceInfo::BarrierDetect);
|
||||||
|
mHWBits.reset(AMDILDeviceInfo::LongOps);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::LongOps);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::LocalMem);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDIL770Device::getWavefrontSize() const
|
||||||
|
{
|
||||||
|
return AMDILDevice::WavefrontSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDIL710Device::~AMDIL710Device()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDIL710Device::getWavefrontSize() const
|
||||||
|
{
|
||||||
|
return AMDILDevice::QuarterWavefrontSize;
|
||||||
|
}
|
71
lib/Target/AMDGPU/AMDIL7XXDevice.h
Normal file
71
lib/Target/AMDGPU/AMDIL7XXDevice.h
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface for the subtarget data classes.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// This file will define the interface that each generation needs to
|
||||||
|
// implement in order to correctly answer queries on the capabilities of the
|
||||||
|
// specific hardware.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDIL7XXDEVICEIMPL_H_
|
||||||
|
#define _AMDIL7XXDEVICEIMPL_H_
|
||||||
|
#include "AMDILDevice.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class AMDILSubtarget;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// 7XX generation of devices and their respective sub classes
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
|
||||||
|
// devices are derived from this class. The AMDIL7XX device will only
|
||||||
|
// support the minimal features that are required to be considered OpenCL 1.0
|
||||||
|
// compliant and nothing more.
|
||||||
|
class AMDIL7XXDevice : public AMDILDevice {
|
||||||
|
public:
|
||||||
|
AMDIL7XXDevice(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDIL7XXDevice();
|
||||||
|
virtual size_t getMaxLDSSize() const;
|
||||||
|
virtual size_t getWavefrontSize() const;
|
||||||
|
virtual uint32_t getGeneration() const;
|
||||||
|
virtual uint32_t getResourceID(uint32_t DeviceID) const;
|
||||||
|
virtual uint32_t getMaxNumUAVs() const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void setCaps();
|
||||||
|
}; // AMDIL7XXDevice
|
||||||
|
|
||||||
|
// The AMDIL770Device class represents the RV770 chip and it's
|
||||||
|
// derivative cards. The difference between this device and the base
|
||||||
|
// class is this device device adds support for double precision
|
||||||
|
// and has a larger wavefront size.
|
||||||
|
class AMDIL770Device : public AMDIL7XXDevice {
|
||||||
|
public:
|
||||||
|
AMDIL770Device(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDIL770Device();
|
||||||
|
virtual size_t getWavefrontSize() const;
|
||||||
|
private:
|
||||||
|
virtual void setCaps();
|
||||||
|
}; // AMDIL770Device
|
||||||
|
|
||||||
|
// The AMDIL710Device class derives from the 7XX base class, but this
|
||||||
|
// class is a smaller derivative, so we need to overload some of the
|
||||||
|
// functions in order to correctly specify this information.
|
||||||
|
class AMDIL710Device : public AMDIL7XXDevice {
|
||||||
|
public:
|
||||||
|
AMDIL710Device(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDIL710Device();
|
||||||
|
virtual size_t getWavefrontSize() const;
|
||||||
|
}; // AMDIL710Device
|
||||||
|
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // _AMDILDEVICEIMPL_H_
|
93
lib/Target/AMDGPU/AMDILAlgorithms.tpp
Normal file
93
lib/Target/AMDGPU/AMDILAlgorithms.tpp
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file provides templates algorithms that extend the STL algorithms, but
|
||||||
|
// are useful for the AMDIL backend
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// A template function that loops through the iterators and passes the second
|
||||||
|
// argument along with each iterator to the function. If the function returns
|
||||||
|
// true, then the current iterator is invalidated and it moves back, before
|
||||||
|
// moving forward to the next iterator, otherwise it moves forward without
|
||||||
|
// issue. This is based on the for_each STL function, but allows a reference to
|
||||||
|
// the second argument
|
||||||
|
template<class InputIterator, class Function, typename Arg>
|
||||||
|
Function binaryForEach(InputIterator First, InputIterator Last, Function F,
|
||||||
|
Arg &Second)
|
||||||
|
{
|
||||||
|
for ( ; First!=Last; ++First ) {
|
||||||
|
F(*First, Second);
|
||||||
|
}
|
||||||
|
return F;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class InputIterator, class Function, typename Arg>
|
||||||
|
Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
|
||||||
|
Arg &Second)
|
||||||
|
{
|
||||||
|
for ( ; First!=Last; ++First ) {
|
||||||
|
if (F(*First, Second)) {
|
||||||
|
--First;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return F;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A template function that has two levels of looping before calling the
|
||||||
|
// function with the passed in argument. See binaryForEach for further
|
||||||
|
// explanation
|
||||||
|
template<class InputIterator, class Function, typename Arg>
|
||||||
|
Function binaryNestedForEach(InputIterator First, InputIterator Last,
|
||||||
|
Function F, Arg &Second)
|
||||||
|
{
|
||||||
|
for ( ; First != Last; ++First) {
|
||||||
|
binaryForEach(First->begin(), First->end(), F, Second);
|
||||||
|
}
|
||||||
|
return F;
|
||||||
|
}
|
||||||
|
template<class InputIterator, class Function, typename Arg>
|
||||||
|
Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
|
||||||
|
Function F, Arg &Second)
|
||||||
|
{
|
||||||
|
for ( ; First != Last; ++First) {
|
||||||
|
safeBinaryForEach(First->begin(), First->end(), F, Second);
|
||||||
|
}
|
||||||
|
return F;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
|
||||||
|
// versions of these functions This allows the function to handle situations
|
||||||
|
// such as invalidated iterators
|
||||||
|
template<class InputIterator, class Function>
|
||||||
|
Function safeForEach(InputIterator First, InputIterator Last, Function F)
|
||||||
|
{
|
||||||
|
for ( ; First!=Last; ++First ) F(&First)
|
||||||
|
; // Do nothing.
|
||||||
|
return F;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A template function that has two levels of looping before calling the
|
||||||
|
// function with a pointer to the current iterator. See binaryForEach for
|
||||||
|
// further explanation
|
||||||
|
template<class InputIterator, class SecondIterator, class Function>
|
||||||
|
Function safeNestedForEach(InputIterator First, InputIterator Last,
|
||||||
|
SecondIterator S, Function F)
|
||||||
|
{
|
||||||
|
for ( ; First != Last; ++First) {
|
||||||
|
SecondIterator sf, sl;
|
||||||
|
for (sf = First->begin(), sl = First->end();
|
||||||
|
sf != sl; ) {
|
||||||
|
if (!F(&sf)) {
|
||||||
|
++sf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return F;
|
||||||
|
}
|
113
lib/Target/AMDGPU/AMDILBase.td
Normal file
113
lib/Target/AMDGPU/AMDILBase.td
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Target-independent interfaces which we are implementing
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
include "llvm/Target/Target.td"
|
||||||
|
|
||||||
|
// Dummy Instruction itineraries for pseudo instructions
|
||||||
|
def ALU_NULL : FuncUnit;
|
||||||
|
def NullALU : InstrItinClass;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AMDIL Subtarget features.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def FeatureFP64 : SubtargetFeature<"fp64",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::DoubleOps]",
|
||||||
|
"true",
|
||||||
|
"Enable 64bit double precision operations">;
|
||||||
|
def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::ByteStores]",
|
||||||
|
"true",
|
||||||
|
"Enable byte addressable stores">;
|
||||||
|
def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::BarrierDetect]",
|
||||||
|
"true",
|
||||||
|
"Enable duplicate barrier detection(HD5XXX or later).">;
|
||||||
|
def FeatureImages : SubtargetFeature<"images",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::Images]",
|
||||||
|
"true",
|
||||||
|
"Enable image functions">;
|
||||||
|
def FeatureMultiUAV : SubtargetFeature<"multi_uav",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::MultiUAV]",
|
||||||
|
"true",
|
||||||
|
"Generate multiple UAV code(HD5XXX family or later)">;
|
||||||
|
def FeatureMacroDB : SubtargetFeature<"macrodb",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::MacroDB]",
|
||||||
|
"true",
|
||||||
|
"Use internal macrodb, instead of macrodb in driver">;
|
||||||
|
def FeatureNoAlias : SubtargetFeature<"noalias",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::NoAlias]",
|
||||||
|
"true",
|
||||||
|
"assert that all kernel argument pointers are not aliased">;
|
||||||
|
def FeatureNoInline : SubtargetFeature<"no-inline",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::NoInline]",
|
||||||
|
"true",
|
||||||
|
"specify whether to not inline functions">;
|
||||||
|
|
||||||
|
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
|
||||||
|
"mIs64bit",
|
||||||
|
"false",
|
||||||
|
"Specify if 64bit addressing should be used.">;
|
||||||
|
|
||||||
|
def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
|
||||||
|
"mIs32on64bit",
|
||||||
|
"false",
|
||||||
|
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
|
||||||
|
def FeatureDebug : SubtargetFeature<"debug",
|
||||||
|
"CapsOverride[AMDILDeviceInfo::Debug]",
|
||||||
|
"true",
|
||||||
|
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
|
||||||
|
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
||||||
|
"mDumpCode",
|
||||||
|
"true",
|
||||||
|
"Dump MachineInstrs in the CodeEmitter">;
|
||||||
|
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Register File, Calling Conv, Instruction Descriptions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
include "AMDILRegisterInfo.td"
|
||||||
|
include "AMDILCallingConv.td"
|
||||||
|
include "AMDILInstrInfo.td"
|
||||||
|
|
||||||
|
def AMDILInstrInfo : InstrInfo {}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AMDIL processors supported.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//include "Processors.td"
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Declare the target which we are implementing
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def AMDILAsmWriter : AsmWriter {
|
||||||
|
string AsmWriterClassName = "AsmPrinter";
|
||||||
|
int Variant = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
def AMDILAsmParser : AsmParser {
|
||||||
|
string AsmParserClassName = "AsmParser";
|
||||||
|
int Variant = 0;
|
||||||
|
|
||||||
|
string CommentDelimiter = ";";
|
||||||
|
|
||||||
|
string RegisterPrefix = "r";
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def AMDIL : Target {
|
||||||
|
// Pull in Instruction Info:
|
||||||
|
let InstructionSet = AMDILInstrInfo;
|
||||||
|
let AssemblyWriters = [AMDILAsmWriter];
|
||||||
|
let AssemblyParsers = [AMDILAsmParser];
|
||||||
|
}
|
3236
lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
Normal file
3236
lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
42
lib/Target/AMDGPU/AMDILCallingConv.td
Normal file
42
lib/Target/AMDGPU/AMDILCallingConv.td
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
//===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This describes the calling conventions for the AMDIL architectures.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Return Value Calling Conventions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// AMDIL 32-bit C return-value convention.
|
||||||
|
def RetCC_AMDIL32 : CallingConv<[
|
||||||
|
// Since IL has no return values, all values can be emulated on the stack
|
||||||
|
// The stack can then be mapped to a number of sequential virtual registers
|
||||||
|
// in IL
|
||||||
|
|
||||||
|
// Integer and FP scalar values get put on the stack at 16-byte alignment
|
||||||
|
// but with a size of 4 bytes
|
||||||
|
CCIfType<[i32, f32], CCAssignToReg<
|
||||||
|
[
|
||||||
|
R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
|
||||||
|
]> >, CCAssignToStack<16, 16>]>;
|
||||||
|
|
||||||
|
// AMDIL 32-bit C Calling convention.
|
||||||
|
def CC_AMDIL32 : CallingConv<[
|
||||||
|
// Since IL has parameter values, all values can be emulated on the stack
|
||||||
|
// The stack can then be mapped to a number of sequential virtual registers
|
||||||
|
// in IL
|
||||||
|
// Integer and FP scalar values get put on the stack at 16-byte alignment
|
||||||
|
// but with a size of 4 bytes
|
||||||
|
// Integer and FP scalar values get put on the stack at 16-byte alignment
|
||||||
|
// but with a size of 4 bytes
|
||||||
|
CCIfType<[i32, f32], CCAssignToReg<
|
||||||
|
[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
|
||||||
|
]> >, CCAssignToStack<16, 16>]>;
|
48
lib/Target/AMDGPU/AMDILCodeEmitter.h
Normal file
48
lib/Target/AMDGPU/AMDILCodeEmitter.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// CodeEmitter interface for R600 and SI codegen.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDILCODEEMITTER_H
|
||||||
|
#define AMDILCODEEMITTER_H
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class AMDILCodeEmitter {
|
||||||
|
public:
|
||||||
|
uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
|
||||||
|
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
||||||
|
const MachineOperand &MO) const { return 0; }
|
||||||
|
virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
|
||||||
|
unsigned OpNo) const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
|
||||||
|
unsigned OpNo) const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
|
||||||
|
uint64_t Value) const {
|
||||||
|
return Value;
|
||||||
|
}
|
||||||
|
virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
|
||||||
|
unsigned OpNo) const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
||||||
|
const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDILCODEEMITTER_H
|
137
lib/Target/AMDGPU/AMDILDevice.cpp
Normal file
137
lib/Target/AMDGPU/AMDILDevice.cpp
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#include "AMDILDevice.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
// Default implementation for all of the classes.
|
||||||
|
AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
|
||||||
|
{
|
||||||
|
mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
|
||||||
|
mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
|
||||||
|
setCaps();
|
||||||
|
mDeviceFlag = OCL_DEVICE_ALL;
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILDevice::~AMDILDevice()
|
||||||
|
{
|
||||||
|
mHWBits.clear();
|
||||||
|
mSWBits.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILDevice::getMaxGDSSize() const
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
AMDILDevice::getDeviceFlag() const
|
||||||
|
{
|
||||||
|
return mDeviceFlag;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILDevice::getMaxNumCBs() const
|
||||||
|
{
|
||||||
|
if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
|
||||||
|
return HW_MAX_NUM_CB;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILDevice::getMaxCBSize() const
|
||||||
|
{
|
||||||
|
if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
|
||||||
|
return MAX_CB_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILDevice::getMaxScratchSize() const
|
||||||
|
{
|
||||||
|
return 65536;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t AMDILDevice::getStackAlignment() const
|
||||||
|
{
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDILDevice::setCaps()
|
||||||
|
{
|
||||||
|
mSWBits.set(AMDILDeviceInfo::HalfOps);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::ByteOps);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::ShortOps);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::NoInline);
|
||||||
|
}
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::MacroDB);
|
||||||
|
}
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::ConstantMem);
|
||||||
|
} else {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::ConstantMem);
|
||||||
|
}
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::PrivateMem);
|
||||||
|
} else {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::PrivateMem);
|
||||||
|
}
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::BarrierDetect);
|
||||||
|
}
|
||||||
|
mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::LongOps);
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILDeviceInfo::ExecutionMode
|
||||||
|
AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
|
||||||
|
{
|
||||||
|
if (mHWBits[Caps]) {
|
||||||
|
assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
|
||||||
|
return AMDILDeviceInfo::Hardware;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mSWBits[Caps]) {
|
||||||
|
assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
|
||||||
|
return AMDILDeviceInfo::Software;
|
||||||
|
}
|
||||||
|
|
||||||
|
return AMDILDeviceInfo::Unsupported;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
|
||||||
|
{
|
||||||
|
return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
|
||||||
|
{
|
||||||
|
return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
|
||||||
|
{
|
||||||
|
return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
AMDILDevice::getDataLayout() const
|
||||||
|
{
|
||||||
|
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
||||||
|
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||||
|
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||||
|
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||||
|
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||||
|
"-n8:16:32:64");
|
||||||
|
}
|
116
lib/Target/AMDGPU/AMDILDevice.h
Normal file
116
lib/Target/AMDGPU/AMDILDevice.h
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface for the subtarget data classes.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// This file will define the interface that each generation needs to
|
||||||
|
// implement in order to correctly answer queries on the capabilities of the
|
||||||
|
// specific hardware.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDILDEVICEIMPL_H_
|
||||||
|
#define _AMDILDEVICEIMPL_H_
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "llvm/ADT/BitVector.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class AMDILSubtarget;
|
||||||
|
class MCStreamer;
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Interface for data that is specific to a single device
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
class AMDILDevice {
|
||||||
|
public:
|
||||||
|
AMDILDevice(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDILDevice();
|
||||||
|
|
||||||
|
// Enum values for the various memory types.
|
||||||
|
enum {
|
||||||
|
RAW_UAV_ID = 0,
|
||||||
|
ARENA_UAV_ID = 1,
|
||||||
|
LDS_ID = 2,
|
||||||
|
GDS_ID = 3,
|
||||||
|
SCRATCH_ID = 4,
|
||||||
|
CONSTANT_ID = 5,
|
||||||
|
GLOBAL_ID = 6,
|
||||||
|
MAX_IDS = 7
|
||||||
|
} IO_TYPE_IDS;
|
||||||
|
|
||||||
|
// Returns the max LDS size that the hardware supports. Size is in
|
||||||
|
// bytes.
|
||||||
|
virtual size_t getMaxLDSSize() const = 0;
|
||||||
|
|
||||||
|
// Returns the max GDS size that the hardware supports if the GDS is
|
||||||
|
// supported by the hardware. Size is in bytes.
|
||||||
|
virtual size_t getMaxGDSSize() const;
|
||||||
|
|
||||||
|
// Returns the max number of hardware constant address spaces that
|
||||||
|
// are supported by this device.
|
||||||
|
virtual size_t getMaxNumCBs() const;
|
||||||
|
|
||||||
|
// Returns the max number of bytes a single hardware constant buffer
|
||||||
|
// can support. Size is in bytes.
|
||||||
|
virtual size_t getMaxCBSize() const;
|
||||||
|
|
||||||
|
// Returns the max number of bytes allowed by the hardware scratch
|
||||||
|
// buffer. Size is in bytes.
|
||||||
|
virtual size_t getMaxScratchSize() const;
|
||||||
|
|
||||||
|
// Get the flag that corresponds to the device.
|
||||||
|
virtual uint32_t getDeviceFlag() const;
|
||||||
|
|
||||||
|
// Returns the number of work-items that exist in a single hardware
|
||||||
|
// wavefront.
|
||||||
|
virtual size_t getWavefrontSize() const = 0;
|
||||||
|
|
||||||
|
// Get the generational name of this specific device.
|
||||||
|
virtual uint32_t getGeneration() const = 0;
|
||||||
|
|
||||||
|
// Get the stack alignment of this specific device.
|
||||||
|
virtual uint32_t getStackAlignment() const;
|
||||||
|
|
||||||
|
// Get the resource ID for this specific device.
|
||||||
|
virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
|
||||||
|
|
||||||
|
// Get the max number of UAV's for this device.
|
||||||
|
virtual uint32_t getMaxNumUAVs() const = 0;
|
||||||
|
|
||||||
|
|
||||||
|
// API utilizing more detailed capabilities of each family of
|
||||||
|
// cards. If a capability is supported, then either usesHardware or
|
||||||
|
// usesSoftware returned true. If usesHardware returned true, then
|
||||||
|
// usesSoftware must return false for the same capability. Hardware
|
||||||
|
// execution means that the feature is done natively by the hardware
|
||||||
|
// and is not emulated by the softare. Software execution means
|
||||||
|
// that the feature could be done in the hardware, but there is
|
||||||
|
// software that emulates it with possibly using the hardware for
|
||||||
|
// support since the hardware does not fully comply with OpenCL
|
||||||
|
// specs.
|
||||||
|
bool isSupported(AMDILDeviceInfo::Caps Mode) const;
|
||||||
|
bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
|
||||||
|
bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
|
||||||
|
virtual std::string getDataLayout() const;
|
||||||
|
static const unsigned int MAX_LDS_SIZE_700 = 16384;
|
||||||
|
static const unsigned int MAX_LDS_SIZE_800 = 32768;
|
||||||
|
static const unsigned int WavefrontSize = 64;
|
||||||
|
static const unsigned int HalfWavefrontSize = 32;
|
||||||
|
static const unsigned int QuarterWavefrontSize = 16;
|
||||||
|
protected:
|
||||||
|
virtual void setCaps();
|
||||||
|
llvm::BitVector mHWBits;
|
||||||
|
llvm::BitVector mSWBits;
|
||||||
|
AMDILSubtarget *mSTM;
|
||||||
|
uint32_t mDeviceFlag;
|
||||||
|
private:
|
||||||
|
AMDILDeviceInfo::ExecutionMode
|
||||||
|
getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
|
||||||
|
}; // AMDILDevice
|
||||||
|
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // _AMDILDEVICEIMPL_H_
|
93
lib/Target/AMDGPU/AMDILDeviceInfo.cpp
Normal file
93
lib/Target/AMDGPU/AMDILDeviceInfo.cpp
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Function that creates DeviceInfo from a device name and other information.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#include "AMDILDevices.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
namespace llvm {
|
||||||
|
namespace AMDILDeviceInfo {
|
||||||
|
AMDILDevice*
|
||||||
|
getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
|
||||||
|
{
|
||||||
|
if (deviceName.c_str()[2] == '7') {
|
||||||
|
switch (deviceName.c_str()[3]) {
|
||||||
|
case '1':
|
||||||
|
return new AMDIL710Device(ptr);
|
||||||
|
case '7':
|
||||||
|
return new AMDIL770Device(ptr);
|
||||||
|
default:
|
||||||
|
return new AMDIL7XXDevice(ptr);
|
||||||
|
};
|
||||||
|
} else if (deviceName == "cypress") {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDILCypressDevice(ptr);
|
||||||
|
} else if (deviceName == "juniper") {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDILEvergreenDevice(ptr);
|
||||||
|
} else if (deviceName == "redwood") {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDILRedwoodDevice(ptr);
|
||||||
|
} else if (deviceName == "cedar") {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDILCedarDevice(ptr);
|
||||||
|
} else if (deviceName == "barts"
|
||||||
|
|| deviceName == "turks") {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDILNIDevice(ptr);
|
||||||
|
} else if (deviceName == "cayman") {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDILCaymanDevice(ptr);
|
||||||
|
} else if (deviceName == "caicos") {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDILNIDevice(ptr);
|
||||||
|
} else if (deviceName == "SI") {
|
||||||
|
return new AMDILSIDevice(ptr);
|
||||||
|
} else {
|
||||||
|
#if DEBUG
|
||||||
|
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||||
|
assert(!is64on32bit && "This device does not support 64bit"
|
||||||
|
" on 32bit pointers!");
|
||||||
|
#endif
|
||||||
|
return new AMDIL7XXDevice(ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // End namespace AMDILDeviceInfo
|
||||||
|
} // End namespace llvm
|
89
lib/Target/AMDGPU/AMDILDeviceInfo.h
Normal file
89
lib/Target/AMDGPU/AMDILDeviceInfo.h
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDILDEVICEINFO_H_
|
||||||
|
#define _AMDILDEVICEINFO_H_
|
||||||
|
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace llvm
|
||||||
|
{
|
||||||
|
class AMDILDevice;
|
||||||
|
class AMDILSubtarget;
|
||||||
|
namespace AMDILDeviceInfo
|
||||||
|
{
|
||||||
|
// Each Capabilities can be executed using a hardware instruction,
|
||||||
|
// emulated with a sequence of software instructions, or not
|
||||||
|
// supported at all.
|
||||||
|
enum ExecutionMode {
|
||||||
|
Unsupported = 0, // Unsupported feature on the card(Default value)
|
||||||
|
Software, // This is the execution mode that is set if the
|
||||||
|
// feature is emulated in software
|
||||||
|
Hardware // This execution mode is set if the feature exists
|
||||||
|
// natively in hardware
|
||||||
|
};
|
||||||
|
|
||||||
|
// Any changes to this needs to have a corresponding update to the
|
||||||
|
// twiki page GPUMetadataABI
|
||||||
|
enum Caps {
|
||||||
|
HalfOps = 0x1, // Half float is supported or not.
|
||||||
|
DoubleOps = 0x2, // Double is supported or not.
|
||||||
|
ByteOps = 0x3, // Byte(char) is support or not.
|
||||||
|
ShortOps = 0x4, // Short is supported or not.
|
||||||
|
LongOps = 0x5, // Long is supported or not.
|
||||||
|
Images = 0x6, // Images are supported or not.
|
||||||
|
ByteStores = 0x7, // ByteStores available(!HD4XXX).
|
||||||
|
ConstantMem = 0x8, // Constant/CB memory.
|
||||||
|
LocalMem = 0x9, // Local/LDS memory.
|
||||||
|
PrivateMem = 0xA, // Scratch/Private/Stack memory.
|
||||||
|
RegionMem = 0xB, // OCL GDS Memory Extension.
|
||||||
|
FMA = 0xC, // Use HW FMA or SW FMA.
|
||||||
|
ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
|
||||||
|
MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
|
||||||
|
Reserved0 = 0xF, // ReservedFlag
|
||||||
|
NoAlias = 0x10, // Cached loads.
|
||||||
|
Signed24BitOps = 0x11, // Peephole Optimization.
|
||||||
|
// Debug mode implies that no hardware features or optimizations
|
||||||
|
// are performned and that all memory access go through a single
|
||||||
|
// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
|
||||||
|
Debug = 0x12, // Debug mode is enabled.
|
||||||
|
CachedMem = 0x13, // Cached mem is available or not.
|
||||||
|
BarrierDetect = 0x14, // Detect duplicate barriers.
|
||||||
|
Reserved1 = 0x15, // Reserved flag
|
||||||
|
ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
|
||||||
|
ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
|
||||||
|
TmrReg = 0x18, // Flag to specify if Tmr register is supported.
|
||||||
|
NoInline = 0x19, // Flag to specify that no inlining should occur.
|
||||||
|
MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
|
||||||
|
HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
|
||||||
|
ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
|
||||||
|
PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
|
||||||
|
// If more capabilities are required, then
|
||||||
|
// this number needs to be increased.
|
||||||
|
// All capabilities must come before this
|
||||||
|
// number.
|
||||||
|
MaxNumberCapabilities = 0x20
|
||||||
|
};
|
||||||
|
// These have to be in order with the older generations
|
||||||
|
// having the lower number enumerations.
|
||||||
|
enum Generation {
|
||||||
|
HD4XXX = 0, // 7XX based devices.
|
||||||
|
HD5XXX, // Evergreen based devices.
|
||||||
|
HD6XXX, // NI/Evergreen+ based devices.
|
||||||
|
HD7XXX,
|
||||||
|
HDTEST, // Experimental feature testing device.
|
||||||
|
HDNUMGEN
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
AMDILDevice*
|
||||||
|
getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
|
||||||
|
} // namespace AMDILDeviceInfo
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // _AMDILDEVICEINFO_H_
|
19
lib/Target/AMDGPU/AMDILDevices.h
Normal file
19
lib/Target/AMDGPU/AMDILDevices.h
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#ifndef __AMDIL_DEVICES_H_
|
||||||
|
#define __AMDIL_DEVICES_H_
|
||||||
|
// Include all of the device specific header files
|
||||||
|
// This file is for Internal use only!
|
||||||
|
#include "AMDIL7XXDevice.h"
|
||||||
|
#include "AMDILDevice.h"
|
||||||
|
#include "AMDILEvergreenDevice.h"
|
||||||
|
#include "AMDILNIDevice.h"
|
||||||
|
#include "AMDILSIDevice.h"
|
||||||
|
|
||||||
|
#endif // _AMDIL_DEVICES_H_
|
522
lib/Target/AMDGPU/AMDILEnumeratedTypes.td
Normal file
522
lib/Target/AMDGPU/AMDILEnumeratedTypes.td
Normal file
@ -0,0 +1,522 @@
|
|||||||
|
//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
// ILEnumreatedTypes.td - The IL Enumerated Types
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// Section 5.1 IL Shader
|
||||||
|
class ILShader<bits<8> val> {
|
||||||
|
bits<8> Value = val;
|
||||||
|
}
|
||||||
|
// Table 5-1
|
||||||
|
def IL_SHADER_PIXEL : ILShader<0>;
|
||||||
|
def IL_SHADER_COMPUTE : ILShader<1>;
|
||||||
|
|
||||||
|
// Section 5.2 IL RegType
|
||||||
|
class ILRegType<bits<6> val> {
|
||||||
|
bits<6> Value = val;
|
||||||
|
}
|
||||||
|
// Table 5-2
|
||||||
|
def IL_REGTYPE_TEMP : ILRegType<0>;
|
||||||
|
def IL_REGTYPE_WINCOORD : ILRegType<1>;
|
||||||
|
def IL_REGTYPE_CONST_BUF : ILRegType<2>;
|
||||||
|
def IL_REGTYPE_LITERAL : ILRegType<3>;
|
||||||
|
def IL_REGTYPE_ITEMP : ILRegType<4>;
|
||||||
|
def IL_REGTYPE_GLOBAL : ILRegType<5>;
|
||||||
|
|
||||||
|
// Section 5.3 IL Component Select
|
||||||
|
class ILComponentSelect<bits<3> val, string text> {
|
||||||
|
bits<3> Value = val;
|
||||||
|
string Text = text;
|
||||||
|
}
|
||||||
|
// Table 5-3
|
||||||
|
def IL_COMPSEL_X : ILComponentSelect<0, "x">;
|
||||||
|
def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
|
||||||
|
def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
|
||||||
|
def IL_COMPSEL_W : ILComponentSelect<3, "w">;
|
||||||
|
def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
|
||||||
|
def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
|
||||||
|
|
||||||
|
// Section 5.4 IL Mod Dst Comp
|
||||||
|
class ILModDstComp<bits<2> val, string text> {
|
||||||
|
bits<2> Value = val;
|
||||||
|
string Text = text;
|
||||||
|
}
|
||||||
|
// Table 5-4
|
||||||
|
def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
|
||||||
|
def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
|
||||||
|
def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
|
||||||
|
def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
|
||||||
|
def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
|
||||||
|
def IL_MODCOMP_0 : ILModDstComp<2, "0">;
|
||||||
|
def IL_MODCOMP_1 : ILModDstComp<3, "1">;
|
||||||
|
|
||||||
|
// Section 5.5 IL Import Usage
|
||||||
|
class ILImportUsage<bits<1> val, string usage> {
|
||||||
|
bits<1> Value = val;
|
||||||
|
string Text = usage;
|
||||||
|
}
|
||||||
|
// Table 5-5
|
||||||
|
def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
|
||||||
|
|
||||||
|
// Section 5.6 Il Shift Scale
|
||||||
|
class ILShiftScale<bits<4> val, string scale> {
|
||||||
|
bits<4> Value = val;
|
||||||
|
string Text = scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-6
|
||||||
|
def IL_SHIFT_NONE : ILShiftScale<0, "">;
|
||||||
|
def IL_SHIFT_X2 : ILShiftScale<1, "_x2">;
|
||||||
|
def IL_SHIFT_X4 : ILShiftScale<2, "_x4">;
|
||||||
|
def IL_SHIFT_X8 : ILShiftScale<3, "_x8">;
|
||||||
|
def IL_SHIFT_D2 : ILShiftScale<4, "_d2">;
|
||||||
|
def IL_SHIFT_D4 : ILShiftScale<5, "_d4">;
|
||||||
|
def IL_SHIFT_D8 : ILShiftScale<6, "_d8">;
|
||||||
|
|
||||||
|
// Section 5.7 IL Divide Component
|
||||||
|
class ILDivComp<bits<3> val, string divcomp> {
|
||||||
|
bits<3> Value = val;
|
||||||
|
string Text = divcomp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-7
|
||||||
|
def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
|
||||||
|
def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">;
|
||||||
|
def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">;
|
||||||
|
def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">;
|
||||||
|
//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
|
||||||
|
|
||||||
|
// Section 5.8 IL Relational Op
|
||||||
|
class ILRelOp<bits<3> val, string op> {
|
||||||
|
bits<3> Value = val;
|
||||||
|
string Text = op;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-8
|
||||||
|
def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
|
||||||
|
def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
|
||||||
|
def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
|
||||||
|
def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
|
||||||
|
def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
|
||||||
|
def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
|
||||||
|
|
||||||
|
// Section 5.9 IL Zero Op
|
||||||
|
class ILZeroOp<bits<3> val, string behavior> {
|
||||||
|
bits<3> Value = val;
|
||||||
|
string Text = behavior;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-9
|
||||||
|
def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">;
|
||||||
|
def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">;
|
||||||
|
def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">;
|
||||||
|
def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
|
||||||
|
|
||||||
|
// Section 5.10 IL Cmp Value
|
||||||
|
class ILCmpValue<bits<3> val, string num> {
|
||||||
|
bits<3> Value = val;
|
||||||
|
string Text = num;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-10
|
||||||
|
def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">;
|
||||||
|
def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">;
|
||||||
|
def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">;
|
||||||
|
def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
|
||||||
|
def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
|
||||||
|
|
||||||
|
// Section 5.11 IL Addressing
|
||||||
|
class ILAddressing<bits<3> val> {
|
||||||
|
bits<3> Value = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-11
|
||||||
|
def IL_ADDR_ABSOLUTE : ILAddressing<0>;
|
||||||
|
def IL_ADDR_RELATIVE : ILAddressing<1>;
|
||||||
|
def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
|
||||||
|
|
||||||
|
// Section 5.11 IL Element Format
|
||||||
|
class ILElementFormat<bits<5> val> {
|
||||||
|
bits<5> Value = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-11
|
||||||
|
def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
|
||||||
|
def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>;
|
||||||
|
def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>;
|
||||||
|
def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>;
|
||||||
|
def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>;
|
||||||
|
def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>;
|
||||||
|
def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>;
|
||||||
|
def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>;
|
||||||
|
def IL_ELEMENTFORMAT_Last : ILElementFormat<8>;
|
||||||
|
|
||||||
|
// Section 5.12 IL Op Code
|
||||||
|
class ILOpCode<bits<16> val = -1, string cmd> {
|
||||||
|
bits<16> Value = val;
|
||||||
|
string Text = cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 5-12
|
||||||
|
def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">;
|
||||||
|
def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">;
|
||||||
|
def IL_DCL_INPUT : ILOpCode<2, "dcl_input">;
|
||||||
|
def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">;
|
||||||
|
def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">;
|
||||||
|
def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">;
|
||||||
|
def IL_OP_ABS : ILOpCode<6, "abs">;
|
||||||
|
def IL_OP_ADD : ILOpCode<7, "add">;
|
||||||
|
def IL_OP_AND : ILOpCode<8, "iand">;
|
||||||
|
def IL_OP_BREAK : ILOpCode<9, "break">;
|
||||||
|
def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">;
|
||||||
|
def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">;
|
||||||
|
def IL_OP_BREAKC : ILOpCode<12, "breakc">;
|
||||||
|
def IL_OP_CALL : ILOpCode<13, "call">;
|
||||||
|
def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">;
|
||||||
|
def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">;
|
||||||
|
def IL_OP_CASE : ILOpCode<16, "case">;
|
||||||
|
def IL_OP_CLG : ILOpCode<17, "clg">;
|
||||||
|
def IL_OP_CMOV : ILOpCode<18, "cmov">;
|
||||||
|
def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">;
|
||||||
|
def IL_OP_CMP : ILOpCode<20, "cmp">;
|
||||||
|
def IL_OP_CONTINUE : ILOpCode<21, "continue">;
|
||||||
|
def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">;
|
||||||
|
def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">;
|
||||||
|
def IL_OP_CONTINUEC : ILOpCode<24, "continuec">;
|
||||||
|
def IL_OP_COS : ILOpCode<25, "cos">;
|
||||||
|
def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">;
|
||||||
|
def IL_OP_D_2_F : ILOpCode<27, "d2f">;
|
||||||
|
def IL_OP_D_ADD : ILOpCode<28, "dadd">;
|
||||||
|
def IL_OP_D_EQ : ILOpCode<29, "deq">;
|
||||||
|
def IL_OP_D_FRC : ILOpCode<30, "dfrac">;
|
||||||
|
def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">;
|
||||||
|
def IL_OP_D_GE : ILOpCode<32, "dge">;
|
||||||
|
def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">;
|
||||||
|
def IL_OP_D_LT : ILOpCode<34, "dlt">;
|
||||||
|
def IL_OP_D_MAD : ILOpCode<35, "dmad">;
|
||||||
|
def IL_OP_D_MUL : ILOpCode<36, "dmul">;
|
||||||
|
def IL_OP_D_NE : ILOpCode<37, "dne">;
|
||||||
|
def IL_OP_DEFAULT : ILOpCode<38, "default">;
|
||||||
|
def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">;
|
||||||
|
def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">;
|
||||||
|
def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">;
|
||||||
|
def IL_OP_DP2 : ILOpCode<42, "dp2">;
|
||||||
|
def IL_OP_DP3 : ILOpCode<43, "dp3">;
|
||||||
|
def IL_OP_DP4 : ILOpCode<44, "dp4">;
|
||||||
|
def IL_OP_ELSE : ILOpCode<45, "else">;
|
||||||
|
def IL_OP_END : ILOpCode<46, "end">;
|
||||||
|
def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">;
|
||||||
|
def IL_OP_ENDIF : ILOpCode<48, "endif">;
|
||||||
|
def IL_OP_ENDLOOP : ILOpCode<49, "endloop">;
|
||||||
|
def IL_OP_ENDMAIN : ILOpCode<50, "endmain">;
|
||||||
|
def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">;
|
||||||
|
def IL_OP_EQ : ILOpCode<52, "eq">;
|
||||||
|
def IL_OP_EXP : ILOpCode<53, "exp">;
|
||||||
|
def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">;
|
||||||
|
def IL_OP_F_2_D : ILOpCode<55, "f2d">;
|
||||||
|
def IL_OP_FLR : ILOpCode<56, "flr">;
|
||||||
|
def IL_OP_FRC : ILOpCode<57, "frc">;
|
||||||
|
def IL_OP_FTOI : ILOpCode<58, "ftoi">;
|
||||||
|
def IL_OP_FTOU : ILOpCode<59, "ftou">;
|
||||||
|
def IL_OP_FUNC : ILOpCode<60, "func">;
|
||||||
|
def IL_OP_GE : ILOpCode<61, "ge">;
|
||||||
|
def IL_OP_I_ADD : ILOpCode<62, "iadd">;
|
||||||
|
def IL_OP_I_EQ : ILOpCode<63, "ieq">;
|
||||||
|
def IL_OP_I_GE : ILOpCode<64, "ige">;
|
||||||
|
def IL_OP_I_LT : ILOpCode<65, "ilt">;
|
||||||
|
def IL_OP_I_MAD : ILOpCode<66, "imad">;
|
||||||
|
def IL_OP_I_MAX : ILOpCode<67, "imax">;
|
||||||
|
def IL_OP_I_MIN : ILOpCode<68, "imin">;
|
||||||
|
def IL_OP_I_MUL : ILOpCode<69, "imul">;
|
||||||
|
def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">;
|
||||||
|
def IL_OP_I_NE : ILOpCode<71, "ine">;
|
||||||
|
def IL_OP_I_NEGATE : ILOpCode<72, "inegate">;
|
||||||
|
def IL_OP_I_NOT : ILOpCode<73, "inot">;
|
||||||
|
def IL_OP_I_OR : ILOpCode<74, "ior">;
|
||||||
|
def IL_OP_I_SHL : ILOpCode<75, "ishl">;
|
||||||
|
def IL_OP_I_SHR : ILOpCode<76, "ishr">;
|
||||||
|
def IL_OP_I_XOR : ILOpCode<77, "ixor">;
|
||||||
|
def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">;
|
||||||
|
def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">;
|
||||||
|
def IL_OP_IFC : ILOpCode<80, "ifc">;
|
||||||
|
def IL_OP_ITOF : ILOpCode<81, "itof">;
|
||||||
|
def IL_OP_LN : ILOpCode<82, "ln">;
|
||||||
|
def IL_OP_LOG : ILOpCode<83, "log">;
|
||||||
|
def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">;
|
||||||
|
def IL_OP_LOOP : ILOpCode<85, "loop">;
|
||||||
|
def IL_OP_LT : ILOpCode<86, "lt">;
|
||||||
|
def IL_OP_MAD : ILOpCode<87, "mad_ieee">;
|
||||||
|
def IL_OP_MAX : ILOpCode<88, "max_ieee">;
|
||||||
|
def IL_OP_MIN : ILOpCode<89, "min_ieee">;
|
||||||
|
def IL_OP_MOD : ILOpCode<90, "mod_ieee">;
|
||||||
|
def IL_OP_MOV : ILOpCode<91, "mov">;
|
||||||
|
def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">;
|
||||||
|
def IL_OP_NE : ILOpCode<93, "ne">;
|
||||||
|
def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
|
||||||
|
def IL_OP_POW : ILOpCode<95, "pow">;
|
||||||
|
def IL_OP_RCP : ILOpCode<96, "rcp">;
|
||||||
|
def IL_OP_RET : ILOpCode<97, "ret">;
|
||||||
|
def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">;
|
||||||
|
def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">;
|
||||||
|
def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">;
|
||||||
|
def IL_OP_RND : ILOpCode<101, "rnd">;
|
||||||
|
def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">;
|
||||||
|
def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">;
|
||||||
|
def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">;
|
||||||
|
def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">;
|
||||||
|
def IL_OP_RSQ : ILOpCode<106, "rsq">;
|
||||||
|
def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">;
|
||||||
|
def IL_OP_SAMPLE : ILOpCode<108, "sample">;
|
||||||
|
def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">;
|
||||||
|
def IL_OP_SET : ILOpCode<110, "set">;
|
||||||
|
def IL_OP_SGN : ILOpCode<111, "sgn">;
|
||||||
|
def IL_OP_SIN : ILOpCode<112, "sin">;
|
||||||
|
def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">;
|
||||||
|
def IL_OP_SUB : ILOpCode<114, "sub">;
|
||||||
|
def IL_OP_SWITCH : ILOpCode<115, "switch">;
|
||||||
|
def IL_OP_TRC : ILOpCode<116, "trc">;
|
||||||
|
def IL_OP_U_DIV : ILOpCode<117, "udiv">;
|
||||||
|
def IL_OP_U_GE : ILOpCode<118, "uge">;
|
||||||
|
def IL_OP_U_LT : ILOpCode<119, "ult">;
|
||||||
|
def IL_OP_U_MAD : ILOpCode<120, "umad">;
|
||||||
|
def IL_OP_U_MAX : ILOpCode<121, "umax">;
|
||||||
|
def IL_OP_U_MIN : ILOpCode<122, "umin">;
|
||||||
|
def IL_OP_U_MOD : ILOpCode<123, "umod">;
|
||||||
|
def IL_OP_U_MUL : ILOpCode<124, "umul">;
|
||||||
|
def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">;
|
||||||
|
def IL_OP_U_SHR : ILOpCode<126, "ushr">;
|
||||||
|
def IL_OP_UTOF : ILOpCode<127, "utof">;
|
||||||
|
def IL_OP_WHILE : ILOpCode<128, "whileloop">;
|
||||||
|
// SC IL instructions that are not in CAL IL
|
||||||
|
def IL_OP_ACOS : ILOpCode<129, "acos">;
|
||||||
|
def IL_OP_ASIN : ILOpCode<130, "asin">;
|
||||||
|
def IL_OP_EXN : ILOpCode<131, "exn">;
|
||||||
|
def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">;
|
||||||
|
def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">;
|
||||||
|
def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">;
|
||||||
|
def IL_OP_SQRT : ILOpCode<135, "sqrt">;
|
||||||
|
def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">;
|
||||||
|
def IL_OP_ATAN : ILOpCode<137, "atan">;
|
||||||
|
def IL_OP_TAN : ILOpCode<137, "tan">;
|
||||||
|
def IL_OP_D_DIV : ILOpCode<138, "ddiv">;
|
||||||
|
def IL_OP_F_NEG : ILOpCode<139, "mov">;
|
||||||
|
def IL_OP_GT : ILOpCode<140, "gt">;
|
||||||
|
def IL_OP_LE : ILOpCode<141, "lt">;
|
||||||
|
def IL_OP_DIST : ILOpCode<142, "dist">;
|
||||||
|
def IL_OP_LEN : ILOpCode<143, "len">;
|
||||||
|
def IL_OP_MACRO : ILOpCode<144, "mcall">;
|
||||||
|
def IL_OP_INTR : ILOpCode<145, "call">;
|
||||||
|
def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">;
|
||||||
|
def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">;
|
||||||
|
def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">;
|
||||||
|
def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">;
|
||||||
|
def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">;
|
||||||
|
def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">;
|
||||||
|
def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">;
|
||||||
|
def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">;
|
||||||
|
def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">;
|
||||||
|
def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">;
|
||||||
|
def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">;
|
||||||
|
def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">;
|
||||||
|
def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">;
|
||||||
|
def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">;
|
||||||
|
def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">;
|
||||||
|
def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">;
|
||||||
|
def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">;
|
||||||
|
def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">;
|
||||||
|
def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">;
|
||||||
|
def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">;
|
||||||
|
def IL_OP_SAD : ILOpCode<166, "sad">;
|
||||||
|
def IL_OP_SAD_HI : ILOpCode<167, "sadhi">;
|
||||||
|
def IL_OP_SAD4 : ILOpCode<168, "sad4">;
|
||||||
|
def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">;
|
||||||
|
def IL_OP_I_CARRY : ILOpCode<170, "icarry">;
|
||||||
|
def IL_OP_I_BORROW : ILOpCode<171, "iborrow">;
|
||||||
|
def IL_OP_U_MAD24 : ILOpCode<172, "umad24">;
|
||||||
|
def IL_OP_U_MUL24 : ILOpCode<173, "umul24">;
|
||||||
|
def IL_OP_I_MAD24 : ILOpCode<174, "imad24">;
|
||||||
|
def IL_OP_I_MUL24 : ILOpCode<175, "imul24">;
|
||||||
|
def IL_OP_CLAMP : ILOpCode<176, "clamp">;
|
||||||
|
def IL_OP_LERP : ILOpCode<177, "lrp">;
|
||||||
|
def IL_OP_FMA : ILOpCode<178, "fma">;
|
||||||
|
def IL_OP_D_MIN : ILOpCode<179, "dmin">;
|
||||||
|
def IL_OP_D_MAX : ILOpCode<180, "dmax">;
|
||||||
|
def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">;
|
||||||
|
def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">;
|
||||||
|
def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">;
|
||||||
|
def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">;
|
||||||
|
def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">;
|
||||||
|
def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">;
|
||||||
|
def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">;
|
||||||
|
def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">;
|
||||||
|
def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">;
|
||||||
|
def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">;
|
||||||
|
def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">;
|
||||||
|
def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">;
|
||||||
|
def IL_OP_D_MOV : ILOpCode<193, "dmov">;
|
||||||
|
def IL_OP_D_MOVC : ILOpCode<194, "dmovc">;
|
||||||
|
def IL_OP_NOP : ILOpCode<195, "nop">;
|
||||||
|
def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">;
|
||||||
|
def IL_OP_UAV_AND : ILOpCode<197, "uav_and">;
|
||||||
|
def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">;
|
||||||
|
def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">;
|
||||||
|
def IL_OP_UAV_OR : ILOpCode<200, "uav_or">;
|
||||||
|
def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">;
|
||||||
|
def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">;
|
||||||
|
def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">;
|
||||||
|
def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">;
|
||||||
|
def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">;
|
||||||
|
def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">;
|
||||||
|
def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">;
|
||||||
|
def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">;
|
||||||
|
def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">;
|
||||||
|
def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">;
|
||||||
|
def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">;
|
||||||
|
def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">;
|
||||||
|
def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">;
|
||||||
|
def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">;
|
||||||
|
def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">;
|
||||||
|
def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">;
|
||||||
|
def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">;
|
||||||
|
def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">;
|
||||||
|
def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">;
|
||||||
|
def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">;
|
||||||
|
def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">;
|
||||||
|
def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">;
|
||||||
|
def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">;
|
||||||
|
def IL_OP_LDS_AND : ILOpCode<224, "lds_and">;
|
||||||
|
def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">;
|
||||||
|
def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">;
|
||||||
|
def IL_OP_LDS_OR : ILOpCode<227, "lds_or">;
|
||||||
|
def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">;
|
||||||
|
def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">;
|
||||||
|
def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">;
|
||||||
|
def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">;
|
||||||
|
def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">;
|
||||||
|
def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">;
|
||||||
|
def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">;
|
||||||
|
def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">;
|
||||||
|
def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">;
|
||||||
|
def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">;
|
||||||
|
def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">;
|
||||||
|
def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">;
|
||||||
|
def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">;
|
||||||
|
def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">;
|
||||||
|
def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">;
|
||||||
|
def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">;
|
||||||
|
def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">;
|
||||||
|
def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">;
|
||||||
|
def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">;
|
||||||
|
def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">;
|
||||||
|
def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">;
|
||||||
|
def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">;
|
||||||
|
def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">;
|
||||||
|
def IL_OP_GDS_AND : ILOpCode<251, "gds_and">;
|
||||||
|
def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">;
|
||||||
|
def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">;
|
||||||
|
def IL_OP_GDS_OR : ILOpCode<254, "gds_or">;
|
||||||
|
def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">;
|
||||||
|
def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">;
|
||||||
|
def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">;
|
||||||
|
def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">;
|
||||||
|
def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">;
|
||||||
|
def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">;
|
||||||
|
def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">;
|
||||||
|
def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">;
|
||||||
|
def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">;
|
||||||
|
def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">;
|
||||||
|
def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">;
|
||||||
|
def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">;
|
||||||
|
def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">;
|
||||||
|
def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">;
|
||||||
|
def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">;
|
||||||
|
def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">;
|
||||||
|
def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">;
|
||||||
|
def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">;
|
||||||
|
def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">;
|
||||||
|
def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">;
|
||||||
|
def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">;
|
||||||
|
def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">;
|
||||||
|
def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">;
|
||||||
|
def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">;
|
||||||
|
def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">;
|
||||||
|
def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">;
|
||||||
|
def IL_OP_I64_ADD : ILOpCode<281, "i64add">;
|
||||||
|
def IL_OP_I64_MAX : ILOpCode<282, "i64max">;
|
||||||
|
def IL_OP_U64_MAX : ILOpCode<283, "u64max">;
|
||||||
|
def IL_OP_I64_MIN : ILOpCode<284, "i64min">;
|
||||||
|
def IL_OP_U64_MIN : ILOpCode<285, "u64min">;
|
||||||
|
def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">;
|
||||||
|
def IL_OP_I64_SHL : ILOpCode<287, "i64shl">;
|
||||||
|
def IL_OP_I64_SHR : ILOpCode<288, "i64shr">;
|
||||||
|
def IL_OP_U64_SHR : ILOpCode<289, "u64shr">;
|
||||||
|
def IL_OP_I64_EQ : ILOpCode<290, "i64eq">;
|
||||||
|
def IL_OP_I64_GE : ILOpCode<291, "i64ge">;
|
||||||
|
def IL_OP_U64_GE : ILOpCode<292, "u64ge">;
|
||||||
|
def IL_OP_I64_LT : ILOpCode<293, "i64lt">;
|
||||||
|
def IL_OP_U64_LT : ILOpCode<294, "u64lt">;
|
||||||
|
def IL_OP_I64_NE : ILOpCode<295, "i64ne">;
|
||||||
|
def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">;
|
||||||
|
def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">;
|
||||||
|
def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">;
|
||||||
|
def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">;
|
||||||
|
def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">;
|
||||||
|
def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">;
|
||||||
|
def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">;
|
||||||
|
def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">;
|
||||||
|
def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">;
|
||||||
|
def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">;
|
||||||
|
def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">;
|
||||||
|
def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">;
|
||||||
|
def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">;
|
||||||
|
def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">;
|
||||||
|
def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">;
|
||||||
|
def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">;
|
||||||
|
def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">;
|
||||||
|
def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">;
|
||||||
|
def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">;
|
||||||
|
def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">;
|
||||||
|
def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">;
|
||||||
|
def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">;
|
||||||
|
def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">;
|
||||||
|
def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">;
|
||||||
|
def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">;
|
||||||
|
def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">;
|
||||||
|
def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">;
|
||||||
|
def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">;
|
||||||
|
def IL_OP_MUL : ILOpCode<323, "mul">;
|
||||||
|
def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">;
|
||||||
|
def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">;
|
||||||
|
def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">;
|
||||||
|
def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">;
|
||||||
|
def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">;
|
||||||
|
def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">;
|
||||||
|
def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">;
|
||||||
|
def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">;
|
||||||
|
def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">;
|
||||||
|
def IL_OP_I64_MUL : ILOpCode<333, "i64mul">;
|
||||||
|
def IL_OP_U64_MUL : ILOpCode<334, "u64mul">;
|
||||||
|
def IL_OP_CU_ID : ILOpCode<355, "cu_id">;
|
||||||
|
def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">;
|
||||||
|
def IL_OP_I64_SUB : ILOpCode<357, "i64sub">;
|
||||||
|
def IL_OP_I64_DIV : ILOpCode<358, "i64div">;
|
||||||
|
def IL_OP_U64_DIV : ILOpCode<359, "u64div">;
|
||||||
|
def IL_OP_I64_MOD : ILOpCode<360, "i64mod">;
|
||||||
|
def IL_OP_U64_MOD : ILOpCode<361, "u64mod">;
|
||||||
|
def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">;
|
||||||
|
def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">;
|
||||||
|
def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">;
|
||||||
|
def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">;
|
||||||
|
def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">;
|
||||||
|
def IL_OP_BARRIER_REGION : ILOpCode<377, "fence_threads_gds">;
|
||||||
|
def IL_OP_BFI : ILOpCode<394, "bfi">;
|
||||||
|
def IL_OP_BFM : ILOpCode<395, "bfm">;
|
||||||
|
def IL_DBG_STRING : ILOpCode<396, "dbg_string">;
|
||||||
|
def IL_DBG_LINE : ILOpCode<397, "dbg_line">;
|
||||||
|
def IL_DBG_TEMPLOC : ILOpCode<398, "dbg_temploc">;
|
183
lib/Target/AMDGPU/AMDILEvergreenDevice.cpp
Normal file
183
lib/Target/AMDGPU/AMDILEvergreenDevice.cpp
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#include "AMDILEvergreenDevice.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
|
||||||
|
: AMDILDevice(ST) {
|
||||||
|
setCaps();
|
||||||
|
std::string name = ST->getDeviceName();
|
||||||
|
if (name == "cedar") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_CEDAR;
|
||||||
|
} else if (name == "redwood") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_REDWOOD;
|
||||||
|
} else if (name == "cypress") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_CYPRESS;
|
||||||
|
} else {
|
||||||
|
mDeviceFlag = OCL_DEVICE_JUNIPER;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILEvergreenDevice::~AMDILEvergreenDevice() {
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILEvergreenDevice::getMaxLDSSize() const {
|
||||||
|
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||||
|
return MAX_LDS_SIZE_800;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size_t AMDILEvergreenDevice::getMaxGDSSize() const {
|
||||||
|
if (usesHardware(AMDILDeviceInfo::RegionMem)) {
|
||||||
|
return MAX_LDS_SIZE_800;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
|
||||||
|
return 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
|
||||||
|
switch(id) {
|
||||||
|
default:
|
||||||
|
assert(0 && "ID type passed in is unknown!");
|
||||||
|
break;
|
||||||
|
case CONSTANT_ID:
|
||||||
|
case RAW_UAV_ID:
|
||||||
|
if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
|
||||||
|
return GLOBAL_RETURN_RAW_UAV_ID;
|
||||||
|
} else {
|
||||||
|
return DEFAULT_RAW_UAV_ID;
|
||||||
|
}
|
||||||
|
case GLOBAL_ID:
|
||||||
|
case ARENA_UAV_ID:
|
||||||
|
return DEFAULT_ARENA_UAV_ID;
|
||||||
|
case LDS_ID:
|
||||||
|
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||||
|
return DEFAULT_LDS_ID;
|
||||||
|
} else {
|
||||||
|
return DEFAULT_ARENA_UAV_ID;
|
||||||
|
}
|
||||||
|
case GDS_ID:
|
||||||
|
if (usesHardware(AMDILDeviceInfo::RegionMem)) {
|
||||||
|
return DEFAULT_GDS_ID;
|
||||||
|
} else {
|
||||||
|
return DEFAULT_ARENA_UAV_ID;
|
||||||
|
}
|
||||||
|
case SCRATCH_ID:
|
||||||
|
if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
|
||||||
|
return DEFAULT_SCRATCH_ID;
|
||||||
|
} else {
|
||||||
|
return DEFAULT_ARENA_UAV_ID;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILEvergreenDevice::getWavefrontSize() const {
|
||||||
|
return AMDILDevice::WavefrontSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t AMDILEvergreenDevice::getGeneration() const {
|
||||||
|
return AMDILDeviceInfo::HD5XXX;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDILEvergreenDevice::setCaps() {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::ArenaSegment);
|
||||||
|
mHWBits.set(AMDILDeviceInfo::ArenaUAV);
|
||||||
|
if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
|
||||||
|
mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
|
||||||
|
}
|
||||||
|
mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::ByteStores);
|
||||||
|
}
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::LocalMem);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::RegionMem);
|
||||||
|
} else {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::LocalMem);
|
||||||
|
mHWBits.set(AMDILDeviceInfo::RegionMem);
|
||||||
|
}
|
||||||
|
mHWBits.set(AMDILDeviceInfo::Images);
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::NoAlias);
|
||||||
|
}
|
||||||
|
if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::CachedMem);
|
||||||
|
}
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::MultiUAV);
|
||||||
|
}
|
||||||
|
if (mSTM->calVersion() > CAL_VERSION_SC_136) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
|
||||||
|
mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
|
||||||
|
mHWBits.set(AMDILDeviceInfo::ArenaVectors);
|
||||||
|
} else {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::ArenaVectors);
|
||||||
|
}
|
||||||
|
if (mSTM->calVersion() > CAL_VERSION_SC_137) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::LongOps);
|
||||||
|
mSWBits.reset(AMDILDeviceInfo::LongOps);
|
||||||
|
}
|
||||||
|
mHWBits.set(AMDILDeviceInfo::TmrReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
|
||||||
|
: AMDILEvergreenDevice(ST) {
|
||||||
|
setCaps();
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILCypressDevice::~AMDILCypressDevice() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDILCypressDevice::setCaps() {
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::DoubleOps);
|
||||||
|
mHWBits.set(AMDILDeviceInfo::FMA);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
|
||||||
|
: AMDILEvergreenDevice(ST) {
|
||||||
|
setCaps();
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILCedarDevice::~AMDILCedarDevice() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDILCedarDevice::setCaps() {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::FMA);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILCedarDevice::getWavefrontSize() const {
|
||||||
|
return AMDILDevice::QuarterWavefrontSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
|
||||||
|
: AMDILEvergreenDevice(ST) {
|
||||||
|
setCaps();
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILRedwoodDevice::~AMDILRedwoodDevice()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDILRedwoodDevice::setCaps() {
|
||||||
|
mSWBits.set(AMDILDeviceInfo::FMA);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t AMDILRedwoodDevice::getWavefrontSize() const {
|
||||||
|
return AMDILDevice::HalfWavefrontSize;
|
||||||
|
}
|
87
lib/Target/AMDGPU/AMDILEvergreenDevice.h
Normal file
87
lib/Target/AMDGPU/AMDILEvergreenDevice.h
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface for the subtarget data classes.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// This file will define the interface that each generation needs to
|
||||||
|
// implement in order to correctly answer queries on the capabilities of the
|
||||||
|
// specific hardware.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDILEVERGREENDEVICE_H_
|
||||||
|
#define _AMDILEVERGREENDEVICE_H_
|
||||||
|
#include "AMDILDevice.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class AMDILSubtarget;
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Evergreen generation of devices and their respective sub classes
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
// The AMDILEvergreenDevice is the base device class for all of the Evergreen
|
||||||
|
// series of cards. This class contains information required to differentiate
|
||||||
|
// the Evergreen device from the generic AMDILDevice. This device represents
|
||||||
|
// that capabilities of the 'Juniper' cards, also known as the HD57XX.
|
||||||
|
class AMDILEvergreenDevice : public AMDILDevice {
|
||||||
|
public:
|
||||||
|
AMDILEvergreenDevice(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDILEvergreenDevice();
|
||||||
|
virtual size_t getMaxLDSSize() const;
|
||||||
|
virtual size_t getMaxGDSSize() const;
|
||||||
|
virtual size_t getWavefrontSize() const;
|
||||||
|
virtual uint32_t getGeneration() const;
|
||||||
|
virtual uint32_t getMaxNumUAVs() const;
|
||||||
|
virtual uint32_t getResourceID(uint32_t) const;
|
||||||
|
protected:
|
||||||
|
virtual void setCaps();
|
||||||
|
}; // AMDILEvergreenDevice
|
||||||
|
|
||||||
|
// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
|
||||||
|
// support for double precision operations. This device is used to represent
|
||||||
|
// both the Cypress and Hemlock cards, which are commercially known as HD58XX
|
||||||
|
// and HD59XX cards.
|
||||||
|
class AMDILCypressDevice : public AMDILEvergreenDevice {
|
||||||
|
public:
|
||||||
|
AMDILCypressDevice(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDILCypressDevice();
|
||||||
|
private:
|
||||||
|
virtual void setCaps();
|
||||||
|
}; // AMDILCypressDevice
|
||||||
|
|
||||||
|
|
||||||
|
// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
|
||||||
|
// devices. This class differs from the base AMDILEvergreenDevice in that the
|
||||||
|
// device is a ~quarter of the 'Juniper'. These are commercially known as the
|
||||||
|
// HD54XX and HD53XX series of cards.
|
||||||
|
class AMDILCedarDevice : public AMDILEvergreenDevice {
|
||||||
|
public:
|
||||||
|
AMDILCedarDevice(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDILCedarDevice();
|
||||||
|
virtual size_t getWavefrontSize() const;
|
||||||
|
private:
|
||||||
|
virtual void setCaps();
|
||||||
|
}; // AMDILCedarDevice
|
||||||
|
|
||||||
|
// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
|
||||||
|
// devices. This class differs from the base class, in that these devices are
|
||||||
|
// considered about half of a 'Juniper' device. These are commercially known as
|
||||||
|
// the HD55XX and HD56XX series of cards.
|
||||||
|
class AMDILRedwoodDevice : public AMDILEvergreenDevice {
|
||||||
|
public:
|
||||||
|
AMDILRedwoodDevice(AMDILSubtarget *ST);
|
||||||
|
virtual ~AMDILRedwoodDevice();
|
||||||
|
virtual size_t getWavefrontSize() const;
|
||||||
|
private:
|
||||||
|
virtual void setCaps();
|
||||||
|
}; // AMDILRedwoodDevice
|
||||||
|
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // _AMDILEVERGREENDEVICE_H_
|
175
lib/Target/AMDGPU/AMDILFormats.td
Normal file
175
lib/Target/AMDGPU/AMDILFormats.td
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
//==- AMDILFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILTokenDesc.td"
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// The parent IL instruction class that inherits the Instruction class. This
|
||||||
|
// class sets the corresponding namespace, the out and input dag lists the
|
||||||
|
// pattern to match to and the string to print out for the assembly printer.
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||||
|
: Instruction {
|
||||||
|
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
dag OutOperandList = outs;
|
||||||
|
dag InOperandList = ins;
|
||||||
|
ILOpCode operation = op;
|
||||||
|
let Pattern = pattern;
|
||||||
|
let AsmString = !strconcat(asmstr, "\n");
|
||||||
|
let isPseudo = 1;
|
||||||
|
let Itinerary = NullALU;
|
||||||
|
bit hasIEEEFlag = 0;
|
||||||
|
bit hasZeroOpFlag = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Class that has one input parameters and one output parameter.
|
||||||
|
// The basic pattern for this class is "Opcode Dst, Src0" and
|
||||||
|
// handles the unary math operators.
|
||||||
|
// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
|
||||||
|
// if the addressing is register relative for input and output register 0.
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
class OneInOneOut<ILOpCode op, dag outs, dag ins,
|
||||||
|
string asmstr, list<dag> pattern>
|
||||||
|
: ILFormat<op, outs, ins, asmstr, pattern>
|
||||||
|
{
|
||||||
|
ILDst dst_reg;
|
||||||
|
ILDstMod dst_mod;
|
||||||
|
ILRelAddr dst_rel;
|
||||||
|
ILSrc dst_reg_rel;
|
||||||
|
ILSrcMod dst_reg_rel_mod;
|
||||||
|
ILSrc src0_reg;
|
||||||
|
ILSrcMod src0_mod;
|
||||||
|
ILRelAddr src0_rel;
|
||||||
|
ILSrc src0_reg_rel;
|
||||||
|
ILSrcMod src0_reg_rel_mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// This class is similiar to the UnaryOp class, however, there is no
|
||||||
|
// result value to assign.
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
|
||||||
|
string asmstr, list<dag> pattern>
|
||||||
|
: ILFormat<op, outs, ins, asmstr, pattern>
|
||||||
|
{
|
||||||
|
ILSrc src0_reg;
|
||||||
|
ILSrcMod src0_mod;
|
||||||
|
ILRelAddr src0_rel;
|
||||||
|
ILSrc src0_reg_rel;
|
||||||
|
ILSrcMod src0_reg_rel_mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Set of classes that have two input parameters and one output parameter.
|
||||||
|
// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
|
||||||
|
// handles the binary math operators and comparison operations.
|
||||||
|
// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
|
||||||
|
// if the addressing is register relative for input register 1.
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
class TwoInOneOut<ILOpCode op, dag outs, dag ins,
|
||||||
|
string asmstr, list<dag> pattern>
|
||||||
|
: OneInOneOut<op, outs, ins, asmstr, pattern>
|
||||||
|
{
|
||||||
|
ILSrc src1_reg;
|
||||||
|
ILSrcMod src1_mod;
|
||||||
|
ILRelAddr src1_rel;
|
||||||
|
ILSrc src1_reg_rel;
|
||||||
|
ILSrcMod src1_reg_rel_mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Similiar to the UnaryOpNoRet class, but takes as arguments two input
|
||||||
|
// operands. Used mainly for barrier instructions on PC platform.
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
|
||||||
|
string asmstr, list<dag> pattern>
|
||||||
|
: UnaryOpNoRet<op, outs, ins, asmstr, pattern>
|
||||||
|
{
|
||||||
|
ILSrc src1_reg;
|
||||||
|
ILSrcMod src1_mod;
|
||||||
|
ILRelAddr src1_rel;
|
||||||
|
ILSrc src1_reg_rel;
|
||||||
|
ILSrcMod src1_reg_rel_mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Set of classes that have three input parameters and one output parameter.
|
||||||
|
// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
|
||||||
|
// handles the mad and conditional mov instruction.
|
||||||
|
// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
|
||||||
|
// if the addressing is register relative.
|
||||||
|
// This class is the parent class of TernaryOp
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
|
||||||
|
string asmstr, list<dag> pattern>
|
||||||
|
: TwoInOneOut<op, outs, ins, asmstr, pattern> {
|
||||||
|
ILSrc src2_reg;
|
||||||
|
ILSrcMod src2_mod;
|
||||||
|
ILRelAddr src2_rel;
|
||||||
|
ILSrc src2_reg_rel;
|
||||||
|
ILSrcMod src2_reg_rel_mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Intrinsic classes
|
||||||
|
// Generic versions of the above classes but for Target specific intrinsics
|
||||||
|
// instead of SDNode patterns.
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||||
|
class VoidIntLong :
|
||||||
|
Intrinsic<[llvm_i64_ty], [], []>;
|
||||||
|
class VoidIntInt :
|
||||||
|
Intrinsic<[llvm_i32_ty], [], []>;
|
||||||
|
class VoidIntBool :
|
||||||
|
Intrinsic<[llvm_i32_ty], [], []>;
|
||||||
|
class UnaryIntInt :
|
||||||
|
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
class UnaryIntFloat :
|
||||||
|
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
class ConvertIntFTOI :
|
||||||
|
Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
class ConvertIntITOF :
|
||||||
|
Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
|
||||||
|
class UnaryIntNoRetInt :
|
||||||
|
Intrinsic<[], [llvm_anyint_ty], []>;
|
||||||
|
class UnaryIntNoRetFloat :
|
||||||
|
Intrinsic<[], [llvm_anyfloat_ty], []>;
|
||||||
|
class BinaryIntInt :
|
||||||
|
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
class BinaryIntFloat :
|
||||||
|
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
class BinaryIntNoRetInt :
|
||||||
|
Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
|
||||||
|
class BinaryIntNoRetFloat :
|
||||||
|
Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
|
||||||
|
class TernaryIntInt :
|
||||||
|
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||||
|
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
class TernaryIntFloat :
|
||||||
|
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
|
||||||
|
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
class QuaternaryIntInt :
|
||||||
|
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||||
|
LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
class UnaryAtomicInt :
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
class BinaryAtomicInt :
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
class TernaryAtomicInt :
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||||
|
class UnaryAtomicIntNoRet :
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
class BinaryAtomicIntNoRet :
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
class TernaryAtomicIntNoRet :
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
}
|
53
lib/Target/AMDGPU/AMDILFrameLowering.cpp
Normal file
53
lib/Target/AMDGPU/AMDILFrameLowering.cpp
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#include "AMDILFrameLowering.h"
|
||||||
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
|
||||||
|
int LAO, unsigned TransAl)
|
||||||
|
: TargetFrameLowering(D, StackAl, LAO, TransAl)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILFrameLowering::~AMDILFrameLowering()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/// getFrameIndexOffset - Returns the displacement from the frame register to
|
||||||
|
/// the stack frame of the specified index.
|
||||||
|
int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
|
||||||
|
int FI) const {
|
||||||
|
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
|
return MFI->getObjectOffset(FI);
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetFrameLowering::SpillSlot *
|
||||||
|
AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
|
||||||
|
{
|
||||||
|
NumEntries = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
void
|
||||||
|
AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
|
||||||
|
{
|
||||||
|
}
|
||||||
|
void
|
||||||
|
AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
|
||||||
|
{
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
AMDILFrameLowering::hasFP(const MachineFunction &MF) const
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
46
lib/Target/AMDGPU/AMDILFrameLowering.h
Normal file
46
lib/Target/AMDGPU/AMDILFrameLowering.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDILFRAME_LOWERING_H_
|
||||||
|
#define _AMDILFRAME_LOWERING_H_
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
|
#include "llvm/Target/TargetFrameLowering.h"
|
||||||
|
|
||||||
|
/// Information about the stack frame layout on the AMDIL targets. It holds
|
||||||
|
/// the direction of the stack growth, the known stack alignment on entry to
|
||||||
|
/// each function, and the offset to the locals area.
|
||||||
|
/// See TargetFrameInfo for more comments.
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class AMDILFrameLowering : public TargetFrameLowering {
|
||||||
|
public:
|
||||||
|
AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
|
||||||
|
TransAl = 1);
|
||||||
|
virtual ~AMDILFrameLowering();
|
||||||
|
virtual int getFrameIndexOffset(const MachineFunction &MF,
|
||||||
|
int FI) const;
|
||||||
|
virtual const SpillSlot *
|
||||||
|
getCalleeSavedSpillSlots(unsigned &NumEntries) const;
|
||||||
|
virtual void emitPrologue(MachineFunction &MF) const;
|
||||||
|
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||||
|
virtual bool hasFP(const MachineFunction &MF) const;
|
||||||
|
}; // class AMDILFrameLowering
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // _AMDILFRAME_LOWERING_H_
|
393
lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
Normal file
393
lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp
Normal file
@ -0,0 +1,393 @@
|
|||||||
|
//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines an instruction selector for the AMDIL target.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
||||||
|
#include "AMDILDevices.h"
|
||||||
|
#include "AMDILUtilityFunctions.h"
|
||||||
|
#include "llvm/ADT/ValueMap.h"
|
||||||
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||||
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||||
|
#include "llvm/Support/Compiler.h"
|
||||||
|
#include <list>
|
||||||
|
#include <queue>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Instruction Selector Implementation
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
|
||||||
|
// //for SelectionDAG operations.
|
||||||
|
//
|
||||||
|
namespace {
|
||||||
|
class AMDILDAGToDAGISel : public SelectionDAGISel {
|
||||||
|
// Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
|
||||||
|
// make the right decision when generating code for different targets.
|
||||||
|
const AMDILSubtarget &Subtarget;
|
||||||
|
public:
|
||||||
|
AMDILDAGToDAGISel(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
|
||||||
|
virtual ~AMDILDAGToDAGISel();
|
||||||
|
|
||||||
|
SDNode *Select(SDNode *N);
|
||||||
|
virtual const char *getPassName() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
inline SDValue getSmallIPtrImm(unsigned Imm);
|
||||||
|
|
||||||
|
// Complex pattern selectors
|
||||||
|
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
||||||
|
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
|
||||||
|
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
|
||||||
|
|
||||||
|
static bool checkType(const Value *ptr, unsigned int addrspace);
|
||||||
|
static const Value *getBasePointerValue(const Value *V);
|
||||||
|
|
||||||
|
static bool isGlobalStore(const StoreSDNode *N);
|
||||||
|
static bool isPrivateStore(const StoreSDNode *N);
|
||||||
|
static bool isLocalStore(const StoreSDNode *N);
|
||||||
|
static bool isRegionStore(const StoreSDNode *N);
|
||||||
|
|
||||||
|
static bool isCPLoad(const LoadSDNode *N);
|
||||||
|
static bool isConstantLoad(const LoadSDNode *N, int cbID);
|
||||||
|
static bool isGlobalLoad(const LoadSDNode *N);
|
||||||
|
static bool isPrivateLoad(const LoadSDNode *N);
|
||||||
|
static bool isLocalLoad(const LoadSDNode *N);
|
||||||
|
static bool isRegionLoad(const LoadSDNode *N);
|
||||||
|
|
||||||
|
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||||
|
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||||
|
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||||
|
|
||||||
|
// Include the pieces autogenerated from the target description.
|
||||||
|
#include "AMDGPUGenDAGISel.inc"
|
||||||
|
};
|
||||||
|
} // end anonymous namespace
|
||||||
|
|
||||||
|
// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
|
||||||
|
// DAG, ready for instruction scheduling.
|
||||||
|
//
|
||||||
|
FunctionPass *llvm::createAMDILISelDag(TargetMachine &TM
|
||||||
|
AMDIL_OPT_LEVEL_DECL) {
|
||||||
|
return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILDAGToDAGISel::AMDILDAGToDAGISel(TargetMachine &TM
|
||||||
|
AMDIL_OPT_LEVEL_DECL)
|
||||||
|
: SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
|
||||||
|
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::SelectADDRParam(
|
||||||
|
SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||||
|
|
||||||
|
if (Addr.getOpcode() == ISD::FrameIndex) {
|
||||||
|
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
||||||
|
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
|
||||||
|
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
|
} else {
|
||||||
|
R1 = Addr;
|
||||||
|
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
|
}
|
||||||
|
} else if (Addr.getOpcode() == ISD::ADD) {
|
||||||
|
R1 = Addr.getOperand(0);
|
||||||
|
R2 = Addr.getOperand(1);
|
||||||
|
} else {
|
||||||
|
R1 = Addr;
|
||||||
|
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||||
|
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||||
|
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return SelectADDRParam(Addr, R1, R2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||||
|
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||||
|
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Addr.getOpcode() == ISD::FrameIndex) {
|
||||||
|
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
||||||
|
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
|
||||||
|
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||||
|
} else {
|
||||||
|
R1 = Addr;
|
||||||
|
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||||
|
}
|
||||||
|
} else if (Addr.getOpcode() == ISD::ADD) {
|
||||||
|
R1 = Addr.getOperand(0);
|
||||||
|
R2 = Addr.getOperand(1);
|
||||||
|
} else {
|
||||||
|
R1 = Addr;
|
||||||
|
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
|
||||||
|
unsigned int Opc = N->getOpcode();
|
||||||
|
if (N->isMachineOpcode()) {
|
||||||
|
return NULL; // Already selected.
|
||||||
|
}
|
||||||
|
switch (Opc) {
|
||||||
|
default: break;
|
||||||
|
case ISD::FrameIndex:
|
||||||
|
{
|
||||||
|
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
|
||||||
|
unsigned int FI = FIN->getIndex();
|
||||||
|
EVT OpVT = N->getValueType(0);
|
||||||
|
unsigned int NewOpc = AMDGPU::COPY;
|
||||||
|
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
|
||||||
|
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return SelectCode(N);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
|
||||||
|
if (!ptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Type *ptrType = ptr->getType();
|
||||||
|
return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V)
|
||||||
|
{
|
||||||
|
if (!V) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
const Value *ret = NULL;
|
||||||
|
ValueMap<const Value *, bool> ValueBitMap;
|
||||||
|
std::queue<const Value *, std::list<const Value *> > ValueQueue;
|
||||||
|
ValueQueue.push(V);
|
||||||
|
while (!ValueQueue.empty()) {
|
||||||
|
V = ValueQueue.front();
|
||||||
|
if (ValueBitMap.find(V) == ValueBitMap.end()) {
|
||||||
|
ValueBitMap[V] = true;
|
||||||
|
if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
|
||||||
|
ret = V;
|
||||||
|
break;
|
||||||
|
} else if (dyn_cast<GlobalVariable>(V)) {
|
||||||
|
ret = V;
|
||||||
|
break;
|
||||||
|
} else if (dyn_cast<Constant>(V)) {
|
||||||
|
const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
|
||||||
|
if (CE) {
|
||||||
|
ValueQueue.push(CE->getOperand(0));
|
||||||
|
}
|
||||||
|
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
|
||||||
|
ret = AI;
|
||||||
|
break;
|
||||||
|
} else if (const Instruction *I = dyn_cast<Instruction>(V)) {
|
||||||
|
uint32_t numOps = I->getNumOperands();
|
||||||
|
for (uint32_t x = 0; x < numOps; ++x) {
|
||||||
|
ValueQueue.push(I->getOperand(x));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// assert(0 && "Found a Value that we didn't know how to handle!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ValueQueue.pop();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
|
||||||
|
return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
|
||||||
|
return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
|
||||||
|
&& !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
|
||||||
|
&& !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
|
||||||
|
return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
||||||
|
return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
|
||||||
|
if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
MachineMemOperand *MMO = N->getMemOperand();
|
||||||
|
const Value *V = MMO->getValue();
|
||||||
|
const Value *BV = getBasePointerValue(V);
|
||||||
|
if (MMO
|
||||||
|
&& MMO->getValue()
|
||||||
|
&& ((V && dyn_cast<GlobalValue>(V))
|
||||||
|
|| (BV && dyn_cast<GlobalValue>(
|
||||||
|
getBasePointerValue(MMO->getValue()))))) {
|
||||||
|
return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
|
||||||
|
return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
|
||||||
|
return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
|
||||||
|
return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
|
||||||
|
MachineMemOperand *MMO = N->getMemOperand();
|
||||||
|
if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
|
||||||
|
if (MMO) {
|
||||||
|
const Value *V = MMO->getValue();
|
||||||
|
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
|
||||||
|
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
|
||||||
|
if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
|
||||||
|
// Check to make sure we are not a constant pool load or a constant load
|
||||||
|
// that is marked as a private load
|
||||||
|
if (isCPLoad(N) || isConstantLoad(N, -1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
|
||||||
|
&& !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
|
||||||
|
&& !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
|
||||||
|
&& !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
|
||||||
|
&& !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
|
||||||
|
&& !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *AMDILDAGToDAGISel::getPassName() const {
|
||||||
|
return "AMDIL DAG->DAG Pattern Instruction Selection";
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUGTMP
|
||||||
|
#undef INT64_C
|
||||||
|
#endif
|
||||||
|
#undef DEBUGTMP
|
||||||
|
|
||||||
|
///==== AMDGPU Functions ====///
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
|
||||||
|
SDValue& Offset) {
|
||||||
|
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||||
|
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (Addr.getOpcode() == ISD::ADD) {
|
||||||
|
bool Match = false;
|
||||||
|
|
||||||
|
// Find the base ptr and the offset
|
||||||
|
for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
|
||||||
|
SDValue Arg = Addr.getOperand(i);
|
||||||
|
ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
|
||||||
|
// This arg isn't a constant so it must be the base PTR.
|
||||||
|
if (!OffsetNode) {
|
||||||
|
Base = Addr.getOperand(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Check if the constant argument fits in 8-bits. The offset is in bytes
|
||||||
|
// so we need to convert it to dwords.
|
||||||
|
if (isInt<8>(OffsetNode->getZExtValue() >> 2)) {
|
||||||
|
Match = true;
|
||||||
|
Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
|
||||||
|
MVT::i32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Match;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default case, no offset
|
||||||
|
Base = Addr;
|
||||||
|
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
||||||
|
SDValue &Offset)
|
||||||
|
{
|
||||||
|
ConstantSDNode * IMMOffset;
|
||||||
|
|
||||||
|
if (Addr.getOpcode() == ISD::ADD
|
||||||
|
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
|
||||||
|
&& isInt<16>(IMMOffset->getZExtValue())) {
|
||||||
|
|
||||||
|
Base = Addr.getOperand(0);
|
||||||
|
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
||||||
|
return true;
|
||||||
|
// If the pointer address is constant, we can move it to the offset field.
|
||||||
|
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
|
||||||
|
&& isInt<16>(IMMOffset->getZExtValue())) {
|
||||||
|
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
|
||||||
|
CurDAG->getEntryNode().getDebugLoc(),
|
||||||
|
AMDGPU::ZERO, MVT::i32);
|
||||||
|
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default case, no offset
|
||||||
|
Base = Addr;
|
||||||
|
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
|
||||||
|
SDValue& Offset) {
|
||||||
|
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||||
|
Addr.getOpcode() == ISD::TargetGlobalAddress ||
|
||||||
|
Addr.getOpcode() != ISD::ADD) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Base = Addr.getOperand(0);
|
||||||
|
Offset = Addr.getOperand(1);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
1850
lib/Target/AMDGPU/AMDILISelLowering.cpp
Normal file
1850
lib/Target/AMDGPU/AMDILISelLowering.cpp
Normal file
File diff suppressed because it is too large
Load Diff
203
lib/Target/AMDGPU/AMDILISelLowering.h
Normal file
203
lib/Target/AMDGPU/AMDILISelLowering.h
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines the interfaces that AMDIL uses to lower LLVM code into a
|
||||||
|
// selection DAG.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDIL_ISELLOWERING_H_
|
||||||
|
#define AMDIL_ISELLOWERING_H_
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/SelectionDAG.h"
|
||||||
|
#include "llvm/Target/TargetLowering.h"
|
||||||
|
|
||||||
|
namespace llvm
|
||||||
|
{
|
||||||
|
namespace AMDILISD
|
||||||
|
{
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||||
|
CMOVLOG, // 32bit FP Conditional move logical instruction
|
||||||
|
MAD, // 32bit Fused Multiply Add instruction
|
||||||
|
VBUILD, // scalar to vector mov instruction
|
||||||
|
CALL, // Function call based on a single integer
|
||||||
|
SELECT_CC, // Select the correct conditional instruction
|
||||||
|
UMUL, // 32bit unsigned multiplication
|
||||||
|
DIV_INF, // Divide with infinity returned on zero divisor
|
||||||
|
CMP,
|
||||||
|
IL_CC_I_GT,
|
||||||
|
IL_CC_I_LT,
|
||||||
|
IL_CC_I_GE,
|
||||||
|
IL_CC_I_LE,
|
||||||
|
IL_CC_I_EQ,
|
||||||
|
IL_CC_I_NE,
|
||||||
|
RET_FLAG,
|
||||||
|
BRANCH_COND,
|
||||||
|
LAST_ISD_NUMBER
|
||||||
|
};
|
||||||
|
} // AMDILISD
|
||||||
|
|
||||||
|
class MachineBasicBlock;
|
||||||
|
class MachineInstr;
|
||||||
|
class DebugLoc;
|
||||||
|
class TargetInstrInfo;
|
||||||
|
|
||||||
|
class AMDILTargetLowering : public TargetLowering
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
AMDILTargetLowering(TargetMachine &TM);
|
||||||
|
|
||||||
|
virtual SDValue
|
||||||
|
LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
/// computeMaskedBitsForTargetNode - Determine which of
|
||||||
|
/// the bits specified
|
||||||
|
/// in Mask are known to be either zero or one and return them in
|
||||||
|
/// the
|
||||||
|
/// KnownZero/KnownOne bitsets.
|
||||||
|
virtual void
|
||||||
|
computeMaskedBitsForTargetNode(
|
||||||
|
const SDValue Op,
|
||||||
|
APInt &KnownZero,
|
||||||
|
APInt &KnownOne,
|
||||||
|
const SelectionDAG &DAG,
|
||||||
|
unsigned Depth = 0
|
||||||
|
) const;
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||||
|
const CallInst &I, unsigned Intrinsic) const;
|
||||||
|
virtual const char*
|
||||||
|
getTargetNodeName(
|
||||||
|
unsigned Opcode
|
||||||
|
) const;
|
||||||
|
// We want to mark f32/f64 floating point values as
|
||||||
|
// legal
|
||||||
|
bool
|
||||||
|
isFPImmLegal(const APFloat &Imm, EVT VT) const;
|
||||||
|
// We don't want to shrink f64/f32 constants because
|
||||||
|
// they both take up the same amount of space and
|
||||||
|
// we don't want to use a f2d instruction.
|
||||||
|
bool ShouldShrinkFPConstant(EVT VT) const;
|
||||||
|
|
||||||
|
/// getFunctionAlignment - Return the Log2 alignment of this
|
||||||
|
/// function.
|
||||||
|
virtual unsigned int
|
||||||
|
getFunctionAlignment(const Function *F) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
CCAssignFn*
|
||||||
|
CCAssignFnForNode(unsigned int CC) const;
|
||||||
|
|
||||||
|
SDValue LowerCallResult(SDValue Chain,
|
||||||
|
SDValue InFlag,
|
||||||
|
CallingConv::ID CallConv,
|
||||||
|
bool isVarArg,
|
||||||
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
|
DebugLoc dl,
|
||||||
|
SelectionDAG &DAG,
|
||||||
|
SmallVectorImpl<SDValue> &InVals) const;
|
||||||
|
|
||||||
|
SDValue LowerMemArgument(SDValue Chain,
|
||||||
|
CallingConv::ID CallConv,
|
||||||
|
const SmallVectorImpl<ISD::InputArg> &ArgInfo,
|
||||||
|
DebugLoc dl, SelectionDAG &DAG,
|
||||||
|
const CCValAssign &VA, MachineFrameInfo *MFI,
|
||||||
|
unsigned i) const;
|
||||||
|
|
||||||
|
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
|
||||||
|
SDValue Arg,
|
||||||
|
DebugLoc dl, SelectionDAG &DAG,
|
||||||
|
const CCValAssign &VA,
|
||||||
|
ISD::ArgFlagsTy Flags) const;
|
||||||
|
|
||||||
|
virtual SDValue
|
||||||
|
LowerFormalArguments(SDValue Chain,
|
||||||
|
CallingConv::ID CallConv, bool isVarArg,
|
||||||
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
|
DebugLoc dl, SelectionDAG &DAG,
|
||||||
|
SmallVectorImpl<SDValue> &InVals) const;
|
||||||
|
|
||||||
|
virtual SDValue
|
||||||
|
LowerCall(CallLoweringInfo &CLI,
|
||||||
|
SmallVectorImpl<SDValue> &InVals) const;
|
||||||
|
|
||||||
|
virtual SDValue
|
||||||
|
LowerReturn(SDValue Chain,
|
||||||
|
CallingConv::ID CallConv, bool isVarArg,
|
||||||
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||||
|
const SmallVectorImpl<SDValue> &OutVals,
|
||||||
|
DebugLoc dl, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerSREM(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
EVT
|
||||||
|
genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue
|
||||||
|
LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
}; // AMDILTargetLowering
|
||||||
|
} // end namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDIL_ISELLOWERING_H_
|
508
lib/Target/AMDGPU/AMDILInstrInfo.cpp
Normal file
508
lib/Target/AMDGPU/AMDILInstrInfo.cpp
Normal file
@ -0,0 +1,508 @@
|
|||||||
|
//===- AMDILInstrInfo.cpp - AMDIL Instruction Information -------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the AMDIL implementation of the TargetInstrInfo class.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDILInstrInfo.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "AMDILISelLowering.h"
|
||||||
|
#include "AMDILUtilityFunctions.h"
|
||||||
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||||
|
#include "llvm/Instructions.h"
|
||||||
|
|
||||||
|
#define GET_INSTRINFO_CTOR
|
||||||
|
#include "AMDGPUGenInstrInfo.inc"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDILInstrInfo::AMDILInstrInfo(TargetMachine &tm)
|
||||||
|
: AMDILGenInstrInfo(),
|
||||||
|
RI(tm, *this) {
|
||||||
|
}
|
||||||
|
|
||||||
|
const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
|
||||||
|
return RI;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||||
|
unsigned &SrcReg, unsigned &DstReg,
|
||||||
|
unsigned &SubIdx) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
|
||||||
|
int &FrameIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
||||||
|
int &FrameIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
|
||||||
|
const MachineMemOperand *&MMO,
|
||||||
|
int &FrameIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
unsigned AMDILInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
|
||||||
|
int &FrameIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
unsigned AMDILInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
||||||
|
int &FrameIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
|
||||||
|
const MachineMemOperand *&MMO,
|
||||||
|
int &FrameIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineInstr *
|
||||||
|
AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
|
MachineBasicBlock::iterator &MBBI,
|
||||||
|
LiveVariables *LV) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
||||||
|
MachineBasicBlock &MBB) const {
|
||||||
|
while (iter != MBB.end()) {
|
||||||
|
switch (iter->getOpcode()) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
||||||
|
case AMDGPU::BRANCH:
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
++iter;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock *&TBB,
|
||||||
|
MachineBasicBlock *&FBB,
|
||||||
|
SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
bool AllowModify) const {
|
||||||
|
bool retVal = true;
|
||||||
|
return retVal;
|
||||||
|
MachineBasicBlock::iterator iter = MBB.begin();
|
||||||
|
if (!getNextBranchInstr(iter, MBB)) {
|
||||||
|
retVal = false;
|
||||||
|
} else {
|
||||||
|
MachineInstr *firstBranch = iter;
|
||||||
|
if (!getNextBranchInstr(++iter, MBB)) {
|
||||||
|
if (firstBranch->getOpcode() == AMDGPU::BRANCH) {
|
||||||
|
TBB = firstBranch->getOperand(0).getMBB();
|
||||||
|
firstBranch->eraseFromParent();
|
||||||
|
retVal = false;
|
||||||
|
} else {
|
||||||
|
TBB = firstBranch->getOperand(0).getMBB();
|
||||||
|
FBB = *(++MBB.succ_begin());
|
||||||
|
if (FBB == TBB) {
|
||||||
|
FBB = *(MBB.succ_begin());
|
||||||
|
}
|
||||||
|
Cond.push_back(firstBranch->getOperand(1));
|
||||||
|
retVal = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MachineInstr *secondBranch = iter;
|
||||||
|
if (!getNextBranchInstr(++iter, MBB)) {
|
||||||
|
if (secondBranch->getOpcode() == AMDGPU::BRANCH) {
|
||||||
|
TBB = firstBranch->getOperand(0).getMBB();
|
||||||
|
Cond.push_back(firstBranch->getOperand(1));
|
||||||
|
FBB = secondBranch->getOperand(0).getMBB();
|
||||||
|
secondBranch->eraseFromParent();
|
||||||
|
retVal = false;
|
||||||
|
} else {
|
||||||
|
assert(0 && "Should not have two consecutive conditional branches");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MBB.getParent()->viewCFG();
|
||||||
|
assert(0 && "Should not have three branch instructions in"
|
||||||
|
" a single basic block");
|
||||||
|
retVal = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const {
|
||||||
|
const MachineInstr *MI = op.getParent();
|
||||||
|
|
||||||
|
switch (MI->getDesc().OpInfo->RegClass) {
|
||||||
|
default: // FIXME: fallthrough??
|
||||||
|
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
|
||||||
|
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock *TBB,
|
||||||
|
MachineBasicBlock *FBB,
|
||||||
|
const SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
DebugLoc DL) const
|
||||||
|
{
|
||||||
|
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
||||||
|
for (unsigned int x = 0; x < Cond.size(); ++x) {
|
||||||
|
Cond[x].getParent()->dump();
|
||||||
|
}
|
||||||
|
if (FBB == 0) {
|
||||||
|
if (Cond.empty()) {
|
||||||
|
BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB);
|
||||||
|
} else {
|
||||||
|
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
|
||||||
|
.addMBB(TBB).addReg(Cond[0].getReg());
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
|
||||||
|
.addMBB(TBB).addReg(Cond[0].getReg());
|
||||||
|
BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB);
|
||||||
|
}
|
||||||
|
assert(0 && "Inserting two branches not supported");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||||
|
MachineBasicBlock::iterator I = MBB.end();
|
||||||
|
if (I == MBB.begin()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
--I;
|
||||||
|
switch (I->getOpcode()) {
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
||||||
|
case AMDGPU::BRANCH:
|
||||||
|
I->eraseFromParent();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
I = MBB.end();
|
||||||
|
|
||||||
|
if (I == MBB.begin()) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
--I;
|
||||||
|
switch (I->getOpcode()) {
|
||||||
|
// FIXME: only one case??
|
||||||
|
default:
|
||||||
|
return 1;
|
||||||
|
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
||||||
|
I->eraseFromParent();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
|
||||||
|
MachineBasicBlock::iterator tmp = MBB->end();
|
||||||
|
if (!MBB->size()) {
|
||||||
|
return MBB->end();
|
||||||
|
}
|
||||||
|
while (--tmp) {
|
||||||
|
if (tmp->getOpcode() == AMDGPU::ENDLOOP
|
||||||
|
|| tmp->getOpcode() == AMDGPU::ENDIF
|
||||||
|
|| tmp->getOpcode() == AMDGPU::ELSE) {
|
||||||
|
if (tmp == MBB->begin()) {
|
||||||
|
return tmp;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return ++tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return MBB->end();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI,
|
||||||
|
unsigned SrcReg, bool isKill,
|
||||||
|
int FrameIndex,
|
||||||
|
const TargetRegisterClass *RC,
|
||||||
|
const TargetRegisterInfo *TRI) const {
|
||||||
|
unsigned int Opc = 0;
|
||||||
|
// MachineInstr *curMI = MI;
|
||||||
|
MachineFunction &MF = *(MBB.getParent());
|
||||||
|
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||||
|
|
||||||
|
DebugLoc DL;
|
||||||
|
switch (RC->getID()) {
|
||||||
|
case AMDGPU::GPRF32RegClassID:
|
||||||
|
Opc = AMDGPU::PRIVATESTORE_f32;
|
||||||
|
break;
|
||||||
|
case AMDGPU::GPRI32RegClassID:
|
||||||
|
Opc = AMDGPU::PRIVATESTORE_i32;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (MI != MBB.end()) DL = MI->getDebugLoc();
|
||||||
|
MachineMemOperand *MMO =
|
||||||
|
new MachineMemOperand(
|
||||||
|
MachinePointerInfo::getFixedStack(FrameIndex),
|
||||||
|
MachineMemOperand::MOLoad,
|
||||||
|
MFI.getObjectSize(FrameIndex),
|
||||||
|
MFI.getObjectAlignment(FrameIndex));
|
||||||
|
if (MI != MBB.end()) {
|
||||||
|
DL = MI->getDebugLoc();
|
||||||
|
}
|
||||||
|
BuildMI(MBB, MI, DL, get(Opc))
|
||||||
|
.addReg(SrcReg, getKillRegState(isKill))
|
||||||
|
.addFrameIndex(FrameIndex)
|
||||||
|
.addMemOperand(MMO)
|
||||||
|
.addImm(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI,
|
||||||
|
unsigned DestReg, int FrameIndex,
|
||||||
|
const TargetRegisterClass *RC,
|
||||||
|
const TargetRegisterInfo *TRI) const {
|
||||||
|
unsigned int Opc = 0;
|
||||||
|
MachineFunction &MF = *(MBB.getParent());
|
||||||
|
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||||
|
DebugLoc DL;
|
||||||
|
switch (RC->getID()) {
|
||||||
|
case AMDGPU::GPRF32RegClassID:
|
||||||
|
Opc = AMDGPU::PRIVATELOAD_f32;
|
||||||
|
break;
|
||||||
|
case AMDGPU::GPRI32RegClassID:
|
||||||
|
Opc = AMDGPU::PRIVATELOAD_i32;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineMemOperand *MMO =
|
||||||
|
new MachineMemOperand(
|
||||||
|
MachinePointerInfo::getFixedStack(FrameIndex),
|
||||||
|
MachineMemOperand::MOLoad,
|
||||||
|
MFI.getObjectSize(FrameIndex),
|
||||||
|
MFI.getObjectAlignment(FrameIndex));
|
||||||
|
if (MI != MBB.end()) {
|
||||||
|
DL = MI->getDebugLoc();
|
||||||
|
}
|
||||||
|
BuildMI(MBB, MI, DL, get(Opc))
|
||||||
|
.addReg(DestReg, RegState::Define)
|
||||||
|
.addFrameIndex(FrameIndex)
|
||||||
|
.addMemOperand(MMO)
|
||||||
|
.addImm(0);
|
||||||
|
}
|
||||||
|
MachineInstr *
|
||||||
|
AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||||
|
MachineInstr *MI,
|
||||||
|
const SmallVectorImpl<unsigned> &Ops,
|
||||||
|
int FrameIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
MachineInstr*
|
||||||
|
AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||||
|
MachineInstr *MI,
|
||||||
|
const SmallVectorImpl<unsigned> &Ops,
|
||||||
|
MachineInstr *LoadMI) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
|
||||||
|
const SmallVectorImpl<unsigned> &Ops) const
|
||||||
|
{
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
||||||
|
unsigned Reg, bool UnfoldLoad,
|
||||||
|
bool UnfoldStore,
|
||||||
|
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
||||||
|
SmallVectorImpl<SDNode*> &NewNodes) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned
|
||||||
|
AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
|
||||||
|
bool UnfoldLoad, bool UnfoldStore,
|
||||||
|
unsigned *LoadRegIndex) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||||
|
int64_t Offset1, int64_t Offset2,
|
||||||
|
unsigned NumLoads) const {
|
||||||
|
assert(Offset2 > Offset1
|
||||||
|
&& "Second offset should be larger than first offset!");
|
||||||
|
// If we have less than 16 loads in a row, and the offsets are within 16,
|
||||||
|
// then schedule together.
|
||||||
|
// TODO: Make the loads schedule near if it fits in a cacheline
|
||||||
|
return (NumLoads < 16 && (Offset2 - Offset1) < 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
|
||||||
|
const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||||
|
const SmallVectorImpl<MachineOperand> &Pred2)
|
||||||
|
const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
|
||||||
|
std::vector<MachineOperand> &Pred) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return MI->getDesc().isPredicable();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
||||||
|
// TODO: Implement this function
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const {
|
||||||
|
if (strstr(getName(MI->getOpcode()), "LOADCONST")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return strstr(getName(MI->getOpcode()), "LOAD");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const {
|
||||||
|
return strstr(getName(MI->getOpcode()), "EXTLOAD");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const {
|
||||||
|
return strstr(getName(MI->getOpcode()), "SEXTLOAD");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const {
|
||||||
|
return strstr(getName(MI->getOpcode()), "AEXTLOAD");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const {
|
||||||
|
return strstr(getName(MI->getOpcode()), "ZEXTLOAD");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const {
|
||||||
|
return strstr(getName(MI->getOpcode()), "STORE");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const {
|
||||||
|
return strstr(getName(MI->getOpcode()), "TRUNCSTORE");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const {
|
||||||
|
return strstr(getName(MI->getOpcode()), "ATOM");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const {
|
||||||
|
if (!MI->memoperands_empty()) {
|
||||||
|
for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
|
||||||
|
moe = MI->memoperands_end(); mob != moe; ++mob) {
|
||||||
|
// If there is a volatile mem operand, this is a volatile instruction.
|
||||||
|
if ((*mob)->isVolatile()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "GLOBAL");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "PRIVATE");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "CONSTANT")
|
||||||
|
|| strstr(getName(MI->getOpcode()), "CPOOL");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "REGION");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "LOCAL");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "IMAGE");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "APPEND");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "ATOM_R");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "ATOM_L");
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "ATOM_G")
|
||||||
|
|| isArenaAtomic(MI);
|
||||||
|
}
|
||||||
|
bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const
|
||||||
|
{
|
||||||
|
return strstr(getName(MI->getOpcode()), "ATOM_A");
|
||||||
|
}
|
160
lib/Target/AMDGPU/AMDILInstrInfo.h
Normal file
160
lib/Target/AMDGPU/AMDILInstrInfo.h
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
//===- AMDILInstrInfo.h - AMDIL Instruction Information ---------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the AMDIL implementation of the TargetInstrInfo class.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDILINSTRUCTIONINFO_H_
|
||||||
|
#define AMDILINSTRUCTIONINFO_H_
|
||||||
|
|
||||||
|
#include "AMDILRegisterInfo.h"
|
||||||
|
#include "llvm/Target/TargetInstrInfo.h"
|
||||||
|
|
||||||
|
#define GET_INSTRINFO_HEADER
|
||||||
|
#include "AMDGPUGenInstrInfo.inc"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
// AMDIL - This namespace holds all of the target specific flags that
|
||||||
|
// instruction info tracks.
|
||||||
|
//
|
||||||
|
//class AMDILTargetMachine;
|
||||||
|
class AMDILInstrInfo : public AMDILGenInstrInfo {
|
||||||
|
private:
|
||||||
|
const AMDILRegisterInfo RI;
|
||||||
|
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
||||||
|
MachineBasicBlock &MBB) const;
|
||||||
|
unsigned int getBranchInstr(const MachineOperand &op) const;
|
||||||
|
public:
|
||||||
|
explicit AMDILInstrInfo(TargetMachine &tm);
|
||||||
|
|
||||||
|
// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
|
||||||
|
// such, whenever a client has an instance of instruction info, it should
|
||||||
|
// always be able to get register info as well (through this method).
|
||||||
|
const AMDILRegisterInfo &getRegisterInfo() const;
|
||||||
|
|
||||||
|
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||||
|
unsigned &DstReg, unsigned &SubIdx) const;
|
||||||
|
|
||||||
|
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
||||||
|
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
||||||
|
int &FrameIndex) const;
|
||||||
|
bool hasLoadFromStackSlot(const MachineInstr *MI,
|
||||||
|
const MachineMemOperand *&MMO,
|
||||||
|
int &FrameIndex) const;
|
||||||
|
unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
||||||
|
unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
||||||
|
int &FrameIndex) const;
|
||||||
|
bool hasStoreFromStackSlot(const MachineInstr *MI,
|
||||||
|
const MachineMemOperand *&MMO,
|
||||||
|
int &FrameIndex) const;
|
||||||
|
|
||||||
|
MachineInstr *
|
||||||
|
convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||||
|
MachineBasicBlock::iterator &MBBI,
|
||||||
|
LiveVariables *LV) const;
|
||||||
|
|
||||||
|
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||||
|
MachineBasicBlock *&FBB,
|
||||||
|
SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
bool AllowModify) const;
|
||||||
|
|
||||||
|
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
||||||
|
|
||||||
|
unsigned
|
||||||
|
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||||
|
MachineBasicBlock *FBB,
|
||||||
|
const SmallVectorImpl<MachineOperand> &Cond,
|
||||||
|
DebugLoc DL) const;
|
||||||
|
|
||||||
|
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||||
|
unsigned DestReg, unsigned SrcReg,
|
||||||
|
bool KillSrc) const = 0;
|
||||||
|
|
||||||
|
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI,
|
||||||
|
unsigned SrcReg, bool isKill, int FrameIndex,
|
||||||
|
const TargetRegisterClass *RC,
|
||||||
|
const TargetRegisterInfo *TRI) const;
|
||||||
|
void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI,
|
||||||
|
unsigned DestReg, int FrameIndex,
|
||||||
|
const TargetRegisterClass *RC,
|
||||||
|
const TargetRegisterInfo *TRI) const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
||||||
|
MachineInstr *MI,
|
||||||
|
const SmallVectorImpl<unsigned> &Ops,
|
||||||
|
int FrameIndex) const;
|
||||||
|
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
||||||
|
MachineInstr *MI,
|
||||||
|
const SmallVectorImpl<unsigned> &Ops,
|
||||||
|
MachineInstr *LoadMI) const;
|
||||||
|
public:
|
||||||
|
bool canFoldMemoryOperand(const MachineInstr *MI,
|
||||||
|
const SmallVectorImpl<unsigned> &Ops) const;
|
||||||
|
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
||||||
|
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
|
||||||
|
SmallVectorImpl<MachineInstr *> &NewMIs) const;
|
||||||
|
bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
||||||
|
SmallVectorImpl<SDNode *> &NewNodes) const;
|
||||||
|
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
|
||||||
|
bool UnfoldLoad, bool UnfoldStore,
|
||||||
|
unsigned *LoadRegIndex = 0) const;
|
||||||
|
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||||
|
int64_t Offset1, int64_t Offset2,
|
||||||
|
unsigned NumLoads) const;
|
||||||
|
|
||||||
|
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
|
||||||
|
void insertNoop(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI) const;
|
||||||
|
bool isPredicated(const MachineInstr *MI) const;
|
||||||
|
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||||
|
const SmallVectorImpl<MachineOperand> &Pred2) const;
|
||||||
|
bool DefinesPredicate(MachineInstr *MI,
|
||||||
|
std::vector<MachineOperand> &Pred) const;
|
||||||
|
bool isPredicable(MachineInstr *MI) const;
|
||||||
|
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
|
||||||
|
|
||||||
|
// Helper functions that check the opcode for status information
|
||||||
|
bool isLoadInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isExtLoadInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isSExtLoadInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isZExtLoadInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isStoreInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isAtomicInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isVolatileInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isGlobalInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isPrivateInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isConstantInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isRegionInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isLocalInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isImageInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isAppendInst(llvm::MachineInstr *MI) const;
|
||||||
|
bool isRegionAtomic(llvm::MachineInstr *MI) const;
|
||||||
|
bool isLocalAtomic(llvm::MachineInstr *MI) const;
|
||||||
|
bool isGlobalAtomic(llvm::MachineInstr *MI) const;
|
||||||
|
bool isArenaAtomic(llvm::MachineInstr *MI) const;
|
||||||
|
|
||||||
|
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||||
|
int64_t Imm) const = 0;
|
||||||
|
|
||||||
|
virtual unsigned getIEQOpcode() const = 0;
|
||||||
|
|
||||||
|
virtual bool isMov(unsigned Opcode) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // AMDILINSTRINFO_H_
|
108
lib/Target/AMDGPU/AMDILInstrInfo.td
Normal file
108
lib/Target/AMDGPU/AMDILInstrInfo.td
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file describes the AMDIL instructions in TableGen format.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AMDIL Instruction Predicate Definitions
|
||||||
|
// Predicate that is set to true if the hardware supports double precision
|
||||||
|
// divide
|
||||||
|
def HasHWDDiv : Predicate<"Subtarget.device()"
|
||||||
|
"->getGeneration() > AMDILDeviceInfo::HD4XXX && "
|
||||||
|
"Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
|
||||||
|
|
||||||
|
// Predicate that is set to true if the hardware supports double, but not double
|
||||||
|
// precision divide in hardware
|
||||||
|
def HasSWDDiv : Predicate<"Subtarget.device()"
|
||||||
|
"->getGeneration() == AMDILDeviceInfo::HD4XXX &&"
|
||||||
|
"Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
|
||||||
|
|
||||||
|
// Predicate that is set to true if the hardware support 24bit signed
|
||||||
|
// math ops. Otherwise a software expansion to 32bit math ops is used instead.
|
||||||
|
def HasHWSign24Bit : Predicate<"Subtarget.device()"
|
||||||
|
"->getGeneration() > AMDILDeviceInfo::HD5XXX">;
|
||||||
|
|
||||||
|
// Predicate that is set to true if 64bit operations are supported or not
|
||||||
|
def HasHW64Bit : Predicate<"Subtarget.device()"
|
||||||
|
"->usesHardware(AMDILDeviceInfo::LongOps)">;
|
||||||
|
def HasSW64Bit : Predicate<"Subtarget.device()"
|
||||||
|
"->usesSoftware(AMDILDeviceInfo::LongOps)">;
|
||||||
|
|
||||||
|
// Predicate that is set to true if the timer register is supported
|
||||||
|
def HasTmrRegister : Predicate<"Subtarget.device()"
|
||||||
|
"->isSupported(AMDILDeviceInfo::TmrReg)">;
|
||||||
|
// Predicate that is true if we are at least evergreen series
|
||||||
|
def HasDeviceIDInst : Predicate<"Subtarget.device()"
|
||||||
|
"->getGeneration() >= AMDILDeviceInfo::HD5XXX">;
|
||||||
|
|
||||||
|
// Predicate that is true if we have region address space.
|
||||||
|
def hasRegionAS : Predicate<"Subtarget.device()"
|
||||||
|
"->usesHardware(AMDILDeviceInfo::RegionMem)">;
|
||||||
|
|
||||||
|
// Predicate that is false if we don't have region address space.
|
||||||
|
def noRegionAS : Predicate<"!Subtarget.device()"
|
||||||
|
"->isSupported(AMDILDeviceInfo::RegionMem)">;
|
||||||
|
|
||||||
|
|
||||||
|
// Predicate that is set to true if 64bit Mul is supported in the IL or not
|
||||||
|
def HasHW64Mul : Predicate<"Subtarget.calVersion()"
|
||||||
|
">= CAL_VERSION_SC_139"
|
||||||
|
"&& Subtarget.device()"
|
||||||
|
"->getGeneration() >="
|
||||||
|
"AMDILDeviceInfo::HD5XXX">;
|
||||||
|
def HasSW64Mul : Predicate<"Subtarget.calVersion()"
|
||||||
|
"< CAL_VERSION_SC_139">;
|
||||||
|
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
|
||||||
|
def HasHW64DivMod : Predicate<"Subtarget.device()"
|
||||||
|
"->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">;
|
||||||
|
def HasSW64DivMod : Predicate<"Subtarget.device()"
|
||||||
|
"->usesSoftware(AMDILDeviceInfo::HW64BitDivMod)">;
|
||||||
|
|
||||||
|
// Predicate that is set to true if 64bit pointer are used.
|
||||||
|
def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
|
||||||
|
def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Custom Operands
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILOperands.td"
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Custom Selection DAG Type Profiles
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILProfiles.td"
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Custom Selection DAG Nodes
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILNodes.td"
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Custom Pattern DAG Nodes
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILPatterns.td"
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Instruction format classes
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
include "AMDILFormats.td"
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Multiclass Instruction formats
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILMultiClass.td"
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Intrinsics support
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILIntrinsics.td"
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Instructions support
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
include "AMDILInstructions.td"
|
143
lib/Target/AMDGPU/AMDILInstructions.td
Normal file
143
lib/Target/AMDGPU/AMDILInstructions.td
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
//===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
let Predicates = [Has32BitPtr] in {
|
||||||
|
let isCodeGenOnly=1 in {
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Store Memory Operations
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
defm GLOBALTRUNCSTORE : GTRUNCSTORE<"!global trunc store">;
|
||||||
|
defm LOCALTRUNCSTORE : LTRUNCSTORE<"!local trunc store">;
|
||||||
|
defm LOCALSTORE : STORE<"!local store" , local_store>;
|
||||||
|
defm PRIVATETRUNCSTORE : PTRUNCSTORE<"!private trunc store">;
|
||||||
|
defm PRIVATESTORE : STORE<"!private store" , private_store>;
|
||||||
|
defm REGIONTRUNCSTORE : RTRUNCSTORE<"!region trunc store">;
|
||||||
|
defm REGIONSTORE : STORE<"!region hw store" , region_store>;
|
||||||
|
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// Load Memory Operations
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
defm GLOBALZEXTLOAD : LOAD<"!global zext load" , global_zext_load>;
|
||||||
|
defm GLOBALSEXTLOAD : LOAD<"!global sext load" , global_sext_load>;
|
||||||
|
defm GLOBALAEXTLOAD : LOAD<"!global aext load" , global_aext_load>;
|
||||||
|
defm PRIVATELOAD : LOAD<"!private load" , private_load>;
|
||||||
|
defm PRIVATEZEXTLOAD : LOAD<"!private zext load" , private_zext_load>;
|
||||||
|
defm PRIVATESEXTLOAD : LOAD<"!private sext load" , private_sext_load>;
|
||||||
|
defm PRIVATEAEXTLOAD : LOAD<"!private aext load" , private_aext_load>;
|
||||||
|
defm CPOOLLOAD : LOAD<"!constant pool load" , cp_load>;
|
||||||
|
defm CPOOLZEXTLOAD : LOAD<"!constant pool zext load", cp_zext_load>;
|
||||||
|
defm CPOOLSEXTLOAD : LOAD<"!constant pool sext load", cp_sext_load>;
|
||||||
|
defm CPOOLAEXTLOAD : LOAD<"!constant aext pool load", cp_aext_load>;
|
||||||
|
defm CONSTANTLOAD : LOAD<"!constant load" , constant_load>;
|
||||||
|
defm CONSTANTZEXTLOAD : LOAD<"!constant zext load" , constant_zext_load>;
|
||||||
|
defm CONSTANTSEXTLOAD : LOAD<"!constant sext load" , constant_sext_load>;
|
||||||
|
defm CONSTANTAEXTLOAD : LOAD<"!constant aext load" , constant_aext_load>;
|
||||||
|
defm LOCALLOAD : LOAD<"!local load" , local_load>;
|
||||||
|
defm LOCALZEXTLOAD : LOAD<"!local zext load" , local_zext_load>;
|
||||||
|
defm LOCALSEXTLOAD : LOAD<"!local sext load" , local_sext_load>;
|
||||||
|
defm LOCALAEXTLOAD : LOAD<"!local aext load" , local_aext_load>;
|
||||||
|
defm REGIONLOAD : LOAD<"!region load" , region_load>;
|
||||||
|
defm REGIONZEXTLOAD : LOAD<"!region zext load" , region_zext_load>;
|
||||||
|
defm REGIONSEXTLOAD : LOAD<"!region sext load" , region_sext_load>;
|
||||||
|
defm REGIONAEXTLOAD : LOAD<"!region aext load" , region_aext_load>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// Custom Inserter for Branches and returns, this eventually will be a
|
||||||
|
// seperate pass
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
let isTerminator = 1 in {
|
||||||
|
def BRANCH : ILFormat<IL_PSEUDO_INST, (outs), (ins brtarget:$target),
|
||||||
|
"; Pseudo unconditional branch instruction",
|
||||||
|
[(br bb:$target)]>;
|
||||||
|
defm BRANCH_COND : BranchConditional<IL_brcond>;
|
||||||
|
}
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// return instructions
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
|
||||||
|
def RETURN : ILFormat<IL_OP_RET,(outs), (ins variable_ops),
|
||||||
|
IL_OP_RET.Text, [(IL_retflag)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// Handle a function call
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
let isCall = 1,
|
||||||
|
Defs = [
|
||||||
|
R1, R2, R3, R4, R5, R6, R7, R8, R9, R10
|
||||||
|
]
|
||||||
|
,
|
||||||
|
Uses = [
|
||||||
|
R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
|
||||||
|
]
|
||||||
|
in {
|
||||||
|
def CALL : UnaryOpNoRet<IL_OP_CALL, (outs),
|
||||||
|
(ins calltarget:$dst),
|
||||||
|
!strconcat(IL_OP_CALL.Text, " $dst"), []>;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// Flow and Program control Instructions
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
let isTerminator=1 in {
|
||||||
|
def SWITCH : ILFormat<IL_OP_SWITCH, (outs), (ins GPRI32:$src),
|
||||||
|
!strconcat(IL_OP_SWITCH.Text, " $src"), []>;
|
||||||
|
def CASE : ILFormat<IL_OP_CASE, (outs), (ins GPRI32:$src),
|
||||||
|
!strconcat(IL_OP_CASE.Text, " $src"), []>;
|
||||||
|
def BREAK : ILFormat<IL_OP_BREAK, (outs), (ins),
|
||||||
|
IL_OP_BREAK.Text, []>;
|
||||||
|
def CONTINUE : ILFormat<IL_OP_CONTINUE, (outs), (ins),
|
||||||
|
IL_OP_CONTINUE.Text, []>;
|
||||||
|
def DEFAULT : ILFormat<IL_OP_DEFAULT, (outs), (ins),
|
||||||
|
IL_OP_DEFAULT.Text, []>;
|
||||||
|
def ELSE : ILFormat<IL_OP_ELSE, (outs), (ins),
|
||||||
|
IL_OP_ELSE.Text, []>;
|
||||||
|
def ENDSWITCH : ILFormat<IL_OP_ENDSWITCH, (outs), (ins),
|
||||||
|
IL_OP_ENDSWITCH.Text, []>;
|
||||||
|
def ENDMAIN : ILFormat<IL_OP_ENDMAIN, (outs), (ins),
|
||||||
|
IL_OP_ENDMAIN.Text, []>;
|
||||||
|
def END : ILFormat<IL_OP_END, (outs), (ins),
|
||||||
|
IL_OP_END.Text, []>;
|
||||||
|
def ENDFUNC : ILFormat<IL_OP_ENDFUNC, (outs), (ins),
|
||||||
|
IL_OP_ENDFUNC.Text, []>;
|
||||||
|
def ENDIF : ILFormat<IL_OP_ENDIF, (outs), (ins),
|
||||||
|
IL_OP_ENDIF.Text, []>;
|
||||||
|
def WHILELOOP : ILFormat<IL_OP_WHILE, (outs), (ins),
|
||||||
|
IL_OP_WHILE.Text, []>;
|
||||||
|
def ENDLOOP : ILFormat<IL_OP_ENDLOOP, (outs), (ins),
|
||||||
|
IL_OP_ENDLOOP.Text, []>;
|
||||||
|
def FUNC : ILFormat<IL_OP_FUNC, (outs), (ins),
|
||||||
|
IL_OP_FUNC.Text, []>;
|
||||||
|
def RETDYN : ILFormat<IL_OP_RET_DYN, (outs), (ins),
|
||||||
|
IL_OP_RET_DYN.Text, []>;
|
||||||
|
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||||
|
defm IF_LOGICALNZ : BranchInstr<IL_OP_IF_LOGICALNZ>;
|
||||||
|
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||||
|
defm IF_LOGICALZ : BranchInstr<IL_OP_IF_LOGICALZ>;
|
||||||
|
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||||
|
defm BREAK_LOGICALNZ : BranchInstr<IL_OP_BREAK_LOGICALNZ>;
|
||||||
|
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||||
|
defm BREAK_LOGICALZ : BranchInstr<IL_OP_BREAK_LOGICALZ>;
|
||||||
|
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||||
|
defm CONTINUE_LOGICALNZ : BranchInstr<IL_OP_CONTINUE_LOGICALNZ>;
|
||||||
|
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||||
|
defm CONTINUE_LOGICALZ : BranchInstr<IL_OP_CONTINUE_LOGICALZ>;
|
||||||
|
defm IFC : BranchInstr2<IL_OP_IFC>;
|
||||||
|
defm BREAKC : BranchInstr2<IL_OP_BREAKC>;
|
||||||
|
defm CONTINUEC : BranchInstr2<IL_OP_CONTINUEC>;
|
||||||
|
}
|
||||||
|
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
|
||||||
|
def TRAP : ILFormat<IL_OP_NOP, (outs), (ins),
|
||||||
|
IL_OP_NOP.Text, [(trap)]>;
|
||||||
|
}
|
||||||
|
|
93
lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp
Normal file
93
lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the AMDIL Implementation of the IntrinsicInfo class.
|
||||||
|
//
|
||||||
|
//===-----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDILIntrinsicInfo.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
#include "llvm/DerivedTypes.h"
|
||||||
|
#include "llvm/Intrinsics.h"
|
||||||
|
#include "llvm/Module.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
||||||
|
#include "AMDGPUGenIntrinsics.inc"
|
||||||
|
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
||||||
|
|
||||||
|
AMDILIntrinsicInfo::AMDILIntrinsicInfo(TargetMachine *tm)
|
||||||
|
: TargetIntrinsicInfo()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
|
||||||
|
unsigned int numTys) const
|
||||||
|
{
|
||||||
|
static const char* const names[] = {
|
||||||
|
#define GET_INTRINSIC_NAME_TABLE
|
||||||
|
#include "AMDGPUGenIntrinsics.inc"
|
||||||
|
#undef GET_INTRINSIC_NAME_TABLE
|
||||||
|
};
|
||||||
|
|
||||||
|
//assert(!isOverloaded(IntrID)
|
||||||
|
//&& "AMDIL Intrinsics are not overloaded");
|
||||||
|
if (IntrID < Intrinsic::num_intrinsics) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
assert(IntrID < AMDGPUIntrinsic::num_AMDIL_intrinsics
|
||||||
|
&& "Invalid intrinsic ID");
|
||||||
|
|
||||||
|
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
|
||||||
|
{
|
||||||
|
#define GET_FUNCTION_RECOGNIZER
|
||||||
|
#include "AMDGPUGenIntrinsics.inc"
|
||||||
|
#undef GET_FUNCTION_RECOGNIZER
|
||||||
|
AMDGPUIntrinsic::ID IntrinsicID
|
||||||
|
= (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
|
||||||
|
IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
|
||||||
|
|
||||||
|
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
|
||||||
|
return IntrinsicID;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AMDILIntrinsicInfo::isOverloaded(unsigned id) const
|
||||||
|
{
|
||||||
|
// Overload Table
|
||||||
|
#define GET_INTRINSIC_OVERLOAD_TABLE
|
||||||
|
#include "AMDGPUGenIntrinsics.inc"
|
||||||
|
#undef GET_INTRINSIC_OVERLOAD_TABLE
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This defines the "getAttributes(ID id)" method.
|
||||||
|
#define GET_INTRINSIC_ATTRIBUTES
|
||||||
|
#include "AMDGPUGenIntrinsics.inc"
|
||||||
|
#undef GET_INTRINSIC_ATTRIBUTES
|
||||||
|
|
||||||
|
Function*
|
||||||
|
AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
|
||||||
|
Type **Tys,
|
||||||
|
unsigned numTys) const
|
||||||
|
{
|
||||||
|
//Silence a warning
|
||||||
|
AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
|
||||||
|
(void)List;
|
||||||
|
assert(!"Not implemented");
|
||||||
|
}
|
47
lib/Target/AMDGPU/AMDILIntrinsicInfo.h
Normal file
47
lib/Target/AMDGPU/AMDILIntrinsicInfo.h
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface for the AMDIL Implementation of the Intrinsic Info class.
|
||||||
|
//
|
||||||
|
//===-----------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDIL_INTRINSICS_H_
|
||||||
|
#define _AMDIL_INTRINSICS_H_
|
||||||
|
|
||||||
|
#include "llvm/Intrinsics.h"
|
||||||
|
#include "llvm/Target/TargetIntrinsicInfo.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class TargetMachine;
|
||||||
|
namespace AMDGPUIntrinsic {
|
||||||
|
enum ID {
|
||||||
|
last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
|
||||||
|
#define GET_INTRINSIC_ENUM_VALUES
|
||||||
|
#include "AMDGPUGenIntrinsics.inc"
|
||||||
|
#undef GET_INTRINSIC_ENUM_VALUES
|
||||||
|
, num_AMDIL_intrinsics
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AMDILIntrinsicInfo : public TargetIntrinsicInfo {
|
||||||
|
public:
|
||||||
|
AMDILIntrinsicInfo(TargetMachine *tm);
|
||||||
|
std::string getName(unsigned int IntrId, Type **Tys = 0,
|
||||||
|
unsigned int numTys = 0) const;
|
||||||
|
unsigned int lookupName(const char *Name, unsigned int Len) const;
|
||||||
|
bool isOverloaded(unsigned int IID) const;
|
||||||
|
Function *getDeclaration(Module *M, unsigned int ID,
|
||||||
|
Type **Tys = 0,
|
||||||
|
unsigned int numTys = 0) const;
|
||||||
|
}; // AMDILIntrinsicInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _AMDIL_INTRINSICS_H_
|
||||||
|
|
705
lib/Target/AMDGPU/AMDILIntrinsics.td
Normal file
705
lib/Target/AMDGPU/AMDILIntrinsics.td
Normal file
@ -0,0 +1,705 @@
|
|||||||
|
//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines all of the amdil-specific intrinsics
|
||||||
|
//
|
||||||
|
//===---------------------------------------------------------------===//
|
||||||
|
|
||||||
|
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||||
|
//------------- Synchronization Functions - OpenCL 6.11.9 --------------------//
|
||||||
|
def int_AMDIL_fence : GCCBuiltin<"mem_fence">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_global : GCCBuiltin<"mem_fence_global">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_local : GCCBuiltin<"mem_fence_local">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_region : GCCBuiltin<"mem_fence_region">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_read_only : GCCBuiltin<"read_mem_fence">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_read_only_global : GCCBuiltin<"read_mem_fence_global">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_read_only_local : GCCBuiltin<"read_mem_fence_local">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_read_only_region : GCCBuiltin<"read_mem_fence_region">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_write_only : GCCBuiltin<"write_mem_fence">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_write_only_global : GCCBuiltin<"write_mem_fence_global">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_write_only_local : GCCBuiltin<"write_mem_fence_local">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
def int_AMDIL_fence_write_only_region : GCCBuiltin<"write_mem_fence_region">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
|
||||||
|
def int_AMDIL_early_exit : GCCBuiltin<"__amdil_early_exit">,
|
||||||
|
UnaryIntNoRetInt;
|
||||||
|
|
||||||
|
def int_AMDIL_cmov_logical : GCCBuiltin<"__amdil_cmov_logical">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat;
|
||||||
|
def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
|
||||||
|
|
||||||
|
def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
|
||||||
|
UnaryIntInt;
|
||||||
|
def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
|
||||||
|
UnaryIntInt;
|
||||||
|
def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
|
||||||
|
UnaryIntInt;
|
||||||
|
def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
|
||||||
|
UnaryIntInt;
|
||||||
|
def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
|
||||||
|
UnaryIntInt;
|
||||||
|
def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
|
||||||
|
QuaternaryIntInt;
|
||||||
|
def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
|
||||||
|
TernaryIntFloat;
|
||||||
|
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
|
||||||
|
BinaryIntFloat;
|
||||||
|
def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
|
||||||
|
BinaryIntInt;
|
||||||
|
def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
|
||||||
|
BinaryIntFloat;
|
||||||
|
def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
|
||||||
|
TernaryIntInt;
|
||||||
|
def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
|
||||||
|
TernaryIntFloat;
|
||||||
|
def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_round_posinf : GCCBuiltin<"__amdil_round_posinf">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
|
||||||
|
def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
|
||||||
|
def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
|
||||||
|
def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_log : GCCBuiltin<"__amdil_log">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
|
||||||
|
TernaryIntFloat;
|
||||||
|
def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
|
||||||
|
UnaryIntFloat;
|
||||||
|
def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
|
||||||
|
TernaryIntFloat;
|
||||||
|
def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
|
||||||
|
llvm_v4i32_ty, llvm_i32_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
|
||||||
|
Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
|
||||||
|
def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
|
||||||
|
Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
|
||||||
|
def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
|
||||||
|
Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
|
||||||
|
def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
|
||||||
|
ConvertIntITOF;
|
||||||
|
def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
|
||||||
|
ConvertIntFTOI;
|
||||||
|
def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
|
||||||
|
ConvertIntFTOI;
|
||||||
|
def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
|
||||||
|
ConvertIntFTOI;
|
||||||
|
def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
|
||||||
|
ConvertIntFTOI;
|
||||||
|
def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
|
||||||
|
ConvertIntFTOI;
|
||||||
|
def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
|
||||||
|
ConvertIntFTOI;
|
||||||
|
def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
|
||||||
|
def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
|
||||||
|
ConvertIntITOF;
|
||||||
|
def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
|
||||||
|
ConvertIntITOF;
|
||||||
|
def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
|
||||||
|
ConvertIntITOF;
|
||||||
|
def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
|
||||||
|
ConvertIntITOF;
|
||||||
|
def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
|
||||||
|
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
||||||
|
llvm_v2f32_ty, llvm_float_ty], []>;
|
||||||
|
def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
|
||||||
|
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
||||||
|
llvm_v2f32_ty], []>;
|
||||||
|
def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
|
||||||
|
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
||||||
|
llvm_v4f32_ty], []>;
|
||||||
|
def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
|
||||||
|
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
||||||
|
llvm_v4f32_ty], []>;
|
||||||
|
//===---------------------- Image functions begin ------------------------===//
|
||||||
|
def int_AMDIL_image1d_write : GCCBuiltin<"__amdil_image1d_write">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_read_norm : GCCBuiltin<"__amdil_image1d_read_norm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_read_unnorm : GCCBuiltin<"__amdil_image1d_read_unnorm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_info0 : GCCBuiltin<"__amdil_image1d_info0">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_info1 : GCCBuiltin<"__amdil_image1d_info1">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_array_read_norm : GCCBuiltin<"__amdil_image1d_array_read_norm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_array_read_unnorm : GCCBuiltin<"__amdil_image1d_array_read_unnorm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_array_info0 : GCCBuiltin<"__amdil_image1d_array_info0">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image1d_array_info1 : GCCBuiltin<"__amdil_image1d_array_info1">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_write : GCCBuiltin<"__amdil_image2d_write">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_read_norm : GCCBuiltin<"__amdil_image2d_read_norm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_read_unnorm : GCCBuiltin<"__amdil_image2d_read_unnorm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_info0 : GCCBuiltin<"__amdil_image2d_info0">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_info1 : GCCBuiltin<"__amdil_image2d_info1">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_array_write : GCCBuiltin<"__amdil_image2d_array_write">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_array_read_norm : GCCBuiltin<"__amdil_image2d_array_read_norm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_array_read_unnorm : GCCBuiltin<"__amdil_image2d_array_read_unnorm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_array_info0 : GCCBuiltin<"__amdil_image2d_array_info0">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image2d_array_info1 : GCCBuiltin<"__amdil_image2d_array_info1">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image3d_write : GCCBuiltin<"__amdil_image3d_write">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image3d_read_norm : GCCBuiltin<"__amdil_image3d_read_norm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image3d_read_unnorm : GCCBuiltin<"__amdil_image3d_read_unnorm">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_image3d_info0 : GCCBuiltin<"__amdil_image3d_info0">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
def int_AMDIL_image3d_info1 : GCCBuiltin<"__amdil_image3d_info1">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
|
||||||
|
|
||||||
|
//===---------------------- Image functions end --------------------------===//
|
||||||
|
|
||||||
|
def int_AMDIL_append_alloc_i32 : GCCBuiltin<"__amdil_append_alloc">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||||
|
def int_AMDIL_append_consume_i32 : GCCBuiltin<"__amdil_append_consume">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||||
|
def int_AMDIL_append_alloc_i32_noret : GCCBuiltin<"__amdil_append_alloc_noret">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||||
|
def int_AMDIL_append_consume_i32_noret : GCCBuiltin<"__amdil_append_consume_noret">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
|
||||||
|
|
||||||
|
def int_AMDIL_get_global_id : GCCBuiltin<"__amdil_get_global_id_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_local_id : GCCBuiltin<"__amdil_get_local_id_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_group_id : GCCBuiltin<"__amdil_get_group_id_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_num_groups : GCCBuiltin<"__amdil_get_num_groups_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_local_size : GCCBuiltin<"__amdil_get_local_size_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_global_size : GCCBuiltin<"__amdil_get_global_size_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_global_offset : GCCBuiltin<"__amdil_get_global_offset_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_work_dim : GCCBuiltin<"get_work_dim">,
|
||||||
|
Intrinsic<[llvm_i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_printf_offset : GCCBuiltin<"__amdil_get_printf_offset">,
|
||||||
|
Intrinsic<[llvm_i32_ty], []>;
|
||||||
|
def int_AMDIL_get_printf_size : GCCBuiltin<"__amdil_get_printf_size">,
|
||||||
|
Intrinsic<[llvm_i32_ty], []>;
|
||||||
|
|
||||||
|
/// Intrinsics for atomic instructions with no return value
|
||||||
|
/// Signed 32 bit integer atomics for global address space
|
||||||
|
def int_AMDIL_atomic_add_gi32_noret : GCCBuiltin<"__atomic_add_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_sub_gi32_noret : GCCBuiltin<"__atomic_sub_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_rsub_gi32_noret : GCCBuiltin<"__atomic_rsub_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xchg_gi32_noret : GCCBuiltin<"__atomic_xchg_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_inc_gi32_noret : GCCBuiltin<"__atomic_inc_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_dec_gi32_noret : GCCBuiltin<"__atomic_dec_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_gi32_noret : GCCBuiltin<"__atomic_cmpxchg_gi32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_min_gi32_noret : GCCBuiltin<"__atomic_min_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_max_gi32_noret : GCCBuiltin<"__atomic_max_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_and_gi32_noret : GCCBuiltin<"__atomic_and_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_or_gi32_noret : GCCBuiltin<"__atomic_or_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xor_gi32_noret : GCCBuiltin<"__atomic_xor_gi32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Unsigned 32 bit integer atomics for global address space
|
||||||
|
def int_AMDIL_atomic_add_gu32_noret : GCCBuiltin<"__atomic_add_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_sub_gu32_noret : GCCBuiltin<"__atomic_sub_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_rsub_gu32_noret : GCCBuiltin<"__atomic_rsub_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xchg_gu32_noret : GCCBuiltin<"__atomic_xchg_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_inc_gu32_noret : GCCBuiltin<"__atomic_inc_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_dec_gu32_noret : GCCBuiltin<"__atomic_dec_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_gu32_noret : GCCBuiltin<"__atomic_cmpxchg_gu32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_min_gu32_noret : GCCBuiltin<"__atomic_min_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_max_gu32_noret : GCCBuiltin<"__atomic_max_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_and_gu32_noret : GCCBuiltin<"__atomic_and_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_or_gu32_noret : GCCBuiltin<"__atomic_or_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xor_gu32_noret : GCCBuiltin<"__atomic_xor_gu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
|
||||||
|
/// Intrinsics for atomic instructions with a return value
|
||||||
|
/// Signed 32 bit integer atomics for global address space
|
||||||
|
def int_AMDIL_atomic_add_gi32 : GCCBuiltin<"__atomic_add_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_sub_gi32 : GCCBuiltin<"__atomic_sub_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_rsub_gi32 : GCCBuiltin<"__atomic_rsub_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_gi32 : GCCBuiltin<"__atomic_xchg_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_inc_gi32 : GCCBuiltin<"__atomic_inc_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_dec_gi32 : GCCBuiltin<"__atomic_dec_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_gi32 : GCCBuiltin<"__atomic_cmpxchg_gi32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_min_gi32 : GCCBuiltin<"__atomic_min_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_max_gi32 : GCCBuiltin<"__atomic_max_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_and_gi32 : GCCBuiltin<"__atomic_and_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_or_gi32 : GCCBuiltin<"__atomic_or_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xor_gi32 : GCCBuiltin<"__atomic_xor_gi32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
|
||||||
|
/// 32 bit float atomics required by OpenCL
|
||||||
|
def int_AMDIL_atomic_xchg_gf32 : GCCBuiltin<"__atomic_xchg_gf32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_gf32_noret : GCCBuiltin<"__atomic_xchg_gf32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
/// Unsigned 32 bit integer atomics for global address space
|
||||||
|
def int_AMDIL_atomic_add_gu32 : GCCBuiltin<"__atomic_add_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_sub_gu32 : GCCBuiltin<"__atomic_sub_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_rsub_gu32 : GCCBuiltin<"__atomic_rsub_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_gu32 : GCCBuiltin<"__atomic_xchg_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_inc_gu32 : GCCBuiltin<"__atomic_inc_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_dec_gu32 : GCCBuiltin<"__atomic_dec_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_gu32 : GCCBuiltin<"__atomic_cmpxchg_gu32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_min_gu32 : GCCBuiltin<"__atomic_min_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_max_gu32 : GCCBuiltin<"__atomic_max_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_and_gu32 : GCCBuiltin<"__atomic_and_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_or_gu32 : GCCBuiltin<"__atomic_or_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xor_gu32 : GCCBuiltin<"__atomic_xor_gu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
|
||||||
|
|
||||||
|
/// Intrinsics for atomic instructions with no return value
|
||||||
|
/// Signed 32 bit integer atomics for local address space
|
||||||
|
def int_AMDIL_atomic_add_li32_noret : GCCBuiltin<"__atomic_add_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_sub_li32_noret : GCCBuiltin<"__atomic_sub_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_rsub_li32_noret : GCCBuiltin<"__atomic_rsub_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xchg_li32_noret : GCCBuiltin<"__atomic_xchg_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_inc_li32_noret : GCCBuiltin<"__atomic_inc_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_dec_li32_noret : GCCBuiltin<"__atomic_dec_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_li32_noret : GCCBuiltin<"__atomic_cmpxchg_li32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_min_li32_noret : GCCBuiltin<"__atomic_min_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_max_li32_noret : GCCBuiltin<"__atomic_max_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_and_li32_noret : GCCBuiltin<"__atomic_and_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_or_li32_noret : GCCBuiltin<"__atomic_or_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_mskor_li32_noret : GCCBuiltin<"__atomic_mskor_li32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xor_li32_noret : GCCBuiltin<"__atomic_xor_li32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
/// Signed 32 bit integer atomics for region address space
|
||||||
|
def int_AMDIL_atomic_add_ri32_noret : GCCBuiltin<"__atomic_add_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_sub_ri32_noret : GCCBuiltin<"__atomic_sub_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_rsub_ri32_noret : GCCBuiltin<"__atomic_rsub_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xchg_ri32_noret : GCCBuiltin<"__atomic_xchg_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_inc_ri32_noret : GCCBuiltin<"__atomic_inc_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_dec_ri32_noret : GCCBuiltin<"__atomic_dec_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_ri32_noret : GCCBuiltin<"__atomic_cmpxchg_ri32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_min_ri32_noret : GCCBuiltin<"__atomic_min_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_max_ri32_noret : GCCBuiltin<"__atomic_max_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_and_ri32_noret : GCCBuiltin<"__atomic_and_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_or_ri32_noret : GCCBuiltin<"__atomic_or_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_mskor_ri32_noret : GCCBuiltin<"__atomic_mskor_ri32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xor_ri32_noret : GCCBuiltin<"__atomic_xor_ri32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Unsigned 32 bit integer atomics for local address space
|
||||||
|
def int_AMDIL_atomic_add_lu32_noret : GCCBuiltin<"__atomic_add_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_sub_lu32_noret : GCCBuiltin<"__atomic_sub_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_rsub_lu32_noret : GCCBuiltin<"__atomic_rsub_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xchg_lu32_noret : GCCBuiltin<"__atomic_xchg_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_inc_lu32_noret : GCCBuiltin<"__atomic_inc_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_dec_lu32_noret : GCCBuiltin<"__atomic_dec_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_lu32_noret : GCCBuiltin<"__atomic_cmpxchg_lu32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_min_lu32_noret : GCCBuiltin<"__atomic_min_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_max_lu32_noret : GCCBuiltin<"__atomic_max_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_and_lu32_noret : GCCBuiltin<"__atomic_and_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_or_lu32_noret : GCCBuiltin<"__atomic_or_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_mskor_lu32_noret : GCCBuiltin<"__atomic_mskor_lu32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xor_lu32_noret : GCCBuiltin<"__atomic_xor_lu32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
/// Unsigned 32 bit integer atomics for region address space
|
||||||
|
def int_AMDIL_atomic_add_ru32_noret : GCCBuiltin<"__atomic_add_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_sub_ru32_noret : GCCBuiltin<"__atomic_sub_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_rsub_ru32_noret : GCCBuiltin<"__atomic_rsub_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xchg_ru32_noret : GCCBuiltin<"__atomic_xchg_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_inc_ru32_noret : GCCBuiltin<"__atomic_inc_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_dec_ru32_noret : GCCBuiltin<"__atomic_dec_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_ru32_noret : GCCBuiltin<"__atomic_cmpxchg_ru32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_min_ru32_noret : GCCBuiltin<"__atomic_min_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_max_ru32_noret : GCCBuiltin<"__atomic_max_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_and_ru32_noret : GCCBuiltin<"__atomic_and_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_or_ru32_noret : GCCBuiltin<"__atomic_or_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_mskor_ru32_noret : GCCBuiltin<"__atomic_mskor_ru32_noret">,
|
||||||
|
TernaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xor_ru32_noret : GCCBuiltin<"__atomic_xor_ru32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
def int_AMDIL_get_cycle_count : GCCBuiltin<"__amdil_get_cycle_count">,
|
||||||
|
VoidIntLong;
|
||||||
|
|
||||||
|
def int_AMDIL_compute_unit_id : GCCBuiltin<"__amdil_compute_unit_id">,
|
||||||
|
VoidIntInt;
|
||||||
|
|
||||||
|
def int_AMDIL_wavefront_id : GCCBuiltin<"__amdil_wavefront_id">,
|
||||||
|
VoidIntInt;
|
||||||
|
|
||||||
|
|
||||||
|
/// Intrinsics for atomic instructions with a return value
|
||||||
|
/// Signed 32 bit integer atomics for local address space
|
||||||
|
def int_AMDIL_atomic_add_li32 : GCCBuiltin<"__atomic_add_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_sub_li32 : GCCBuiltin<"__atomic_sub_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_rsub_li32 : GCCBuiltin<"__atomic_rsub_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_li32 : GCCBuiltin<"__atomic_xchg_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_inc_li32 : GCCBuiltin<"__atomic_inc_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_dec_li32 : GCCBuiltin<"__atomic_dec_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_li32 : GCCBuiltin<"__atomic_cmpxchg_li32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_min_li32 : GCCBuiltin<"__atomic_min_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_max_li32 : GCCBuiltin<"__atomic_max_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_and_li32 : GCCBuiltin<"__atomic_and_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_or_li32 : GCCBuiltin<"__atomic_or_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_mskor_li32 : GCCBuiltin<"__atomic_mskor_li32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xor_li32 : GCCBuiltin<"__atomic_xor_li32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
|
||||||
|
/// Signed 32 bit integer atomics for region address space
|
||||||
|
def int_AMDIL_atomic_add_ri32 : GCCBuiltin<"__atomic_add_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_sub_ri32 : GCCBuiltin<"__atomic_sub_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_rsub_ri32 : GCCBuiltin<"__atomic_rsub_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_ri32 : GCCBuiltin<"__atomic_xchg_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_inc_ri32 : GCCBuiltin<"__atomic_inc_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_dec_ri32 : GCCBuiltin<"__atomic_dec_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_ri32 : GCCBuiltin<"__atomic_cmpxchg_ri32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_min_ri32 : GCCBuiltin<"__atomic_min_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_max_ri32 : GCCBuiltin<"__atomic_max_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_and_ri32 : GCCBuiltin<"__atomic_and_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_or_ri32 : GCCBuiltin<"__atomic_or_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_mskor_ri32 : GCCBuiltin<"__atomic_mskor_ri32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xor_ri32 : GCCBuiltin<"__atomic_xor_ri32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
|
||||||
|
/// 32 bit float atomics required by OpenCL
|
||||||
|
def int_AMDIL_atomic_xchg_lf32 : GCCBuiltin<"__atomic_xchg_lf32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_lf32_noret : GCCBuiltin<"__atomic_xchg_lf32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
def int_AMDIL_atomic_xchg_rf32 : GCCBuiltin<"__atomic_xchg_rf32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_rf32_noret : GCCBuiltin<"__atomic_xchg_rf32_noret">,
|
||||||
|
BinaryAtomicIntNoRet;
|
||||||
|
|
||||||
|
/// Unsigned 32 bit integer atomics for local address space
|
||||||
|
def int_AMDIL_atomic_add_lu32 : GCCBuiltin<"__atomic_add_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_sub_lu32 : GCCBuiltin<"__atomic_sub_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_rsub_lu32 : GCCBuiltin<"__atomic_rsub_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_lu32 : GCCBuiltin<"__atomic_xchg_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_inc_lu32 : GCCBuiltin<"__atomic_inc_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_dec_lu32 : GCCBuiltin<"__atomic_dec_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_lu32 : GCCBuiltin<"__atomic_cmpxchg_lu32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_min_lu32 : GCCBuiltin<"__atomic_min_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_max_lu32 : GCCBuiltin<"__atomic_max_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_and_lu32 : GCCBuiltin<"__atomic_and_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_or_lu32 : GCCBuiltin<"__atomic_or_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_mskor_lu32 : GCCBuiltin<"__atomic_mskor_lu32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xor_lu32 : GCCBuiltin<"__atomic_xor_lu32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
|
||||||
|
/// Unsigned 32 bit integer atomics for region address space
|
||||||
|
def int_AMDIL_atomic_add_ru32 : GCCBuiltin<"__atomic_add_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_sub_ru32 : GCCBuiltin<"__atomic_sub_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_rsub_ru32 : GCCBuiltin<"__atomic_rsub_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xchg_ru32 : GCCBuiltin<"__atomic_xchg_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_inc_ru32 : GCCBuiltin<"__atomic_inc_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_dec_ru32 : GCCBuiltin<"__atomic_dec_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_cmpxchg_ru32 : GCCBuiltin<"__atomic_cmpxchg_ru32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_min_ru32 : GCCBuiltin<"__atomic_min_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_max_ru32 : GCCBuiltin<"__atomic_max_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_and_ru32 : GCCBuiltin<"__atomic_and_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_or_ru32 : GCCBuiltin<"__atomic_or_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_mskor_ru32 : GCCBuiltin<"__atomic_mskor_ru32">,
|
||||||
|
TernaryAtomicInt;
|
||||||
|
def int_AMDIL_atomic_xor_ru32 : GCCBuiltin<"__atomic_xor_ru32">,
|
||||||
|
BinaryAtomicInt;
|
||||||
|
|
||||||
|
/// Semaphore signal/wait/init
|
||||||
|
def int_AMDIL_semaphore_init : GCCBuiltin<"__amdil_semaphore_init">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
|
||||||
|
def int_AMDIL_semaphore_wait : GCCBuiltin<"__amdil_semaphore_wait">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty]>;
|
||||||
|
def int_AMDIL_semaphore_signal : GCCBuiltin<"__amdil_semaphore_signal">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty]>;
|
||||||
|
def int_AMDIL_semaphore_size : GCCBuiltin<"__amdil_max_semaphore_size">,
|
||||||
|
Intrinsic<[llvm_i32_ty], []>;
|
||||||
|
}
|
95
lib/Target/AMDGPU/AMDILMultiClass.td
Normal file
95
lib/Target/AMDGPU/AMDILMultiClass.td
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
//===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
// Multiclass that handles branch instructions
|
||||||
|
multiclass BranchConditional<SDNode Op> {
|
||||||
|
def _i32 : ILFormat<IL_OP_IFC, (outs),
|
||||||
|
(ins brtarget:$target, GPRI32:$src0),
|
||||||
|
"; i32 Pseudo branch instruction",
|
||||||
|
[(Op bb:$target, GPRI32:$src0)]>;
|
||||||
|
def _f32 : ILFormat<IL_OP_IFC, (outs),
|
||||||
|
(ins brtarget:$target, GPRF32:$src0),
|
||||||
|
"; f32 Pseudo branch instruction",
|
||||||
|
[(Op bb:$target, GPRF32:$src0)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiclass that handles memory store operations
|
||||||
|
multiclass GTRUNCSTORE<string asm> {
|
||||||
|
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(global_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(global_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiclass that handles memory store operations
|
||||||
|
multiclass LTRUNCSTORE<string asm> {
|
||||||
|
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(local_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(local_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiclass that handles memory store operations
|
||||||
|
multiclass PTRUNCSTORE<string asm> {
|
||||||
|
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(private_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(private_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiclass that handles memory store operations
|
||||||
|
multiclass RTRUNCSTORE<string asm> {
|
||||||
|
def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(region_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(region_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Multiclass that handles memory store operations
|
||||||
|
multiclass STORE<string asm, PatFrag OpNode> {
|
||||||
|
def _i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(OpNode GPRI32:$val, ADDR:$ptr)]>;
|
||||||
|
def _f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $val $ptr"),
|
||||||
|
[(OpNode GPRF32:$val, ADDR:$ptr)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiclass that handles load operations
|
||||||
|
multiclass LOAD<string asm, PatFrag OpNode> {
|
||||||
|
def _i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $dst $ptr"),
|
||||||
|
[(set GPRI32:$dst, (OpNode ADDR:$ptr))]>;
|
||||||
|
def _f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEMI32:$ptr),
|
||||||
|
!strconcat(asm, " $dst $ptr"),
|
||||||
|
[(set GPRF32:$dst, (OpNode ADDR:$ptr))]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only scalar types should generate flow control
|
||||||
|
multiclass BranchInstr<ILOpCode opc> {
|
||||||
|
def _i32 : UnaryOpNoRet<opc, (outs), (ins GPRI32:$src),
|
||||||
|
!strconcat(opc.Text, " $src"), []>;
|
||||||
|
def _f32 : UnaryOpNoRet<opc, (outs), (ins GPRF32:$src),
|
||||||
|
!strconcat(opc.Text, " $src"), []>;
|
||||||
|
}
|
||||||
|
// Only scalar types should generate flow control
|
||||||
|
multiclass BranchInstr2<ILOpCode opc> {
|
||||||
|
def _i32 : BinaryOpNoRet<opc, (outs), (ins GPRI32:$src0, GPRI32:$src1),
|
||||||
|
!strconcat(opc.Text, " $src0, $src1"), []>;
|
||||||
|
def _f32 : BinaryOpNoRet<opc, (outs), (ins GPRF32:$src0, GPRF32:$src1),
|
||||||
|
!strconcat(opc.Text, " $src0, $src1"), []>;
|
||||||
|
}
|
71
lib/Target/AMDGPU/AMDILNIDevice.cpp
Normal file
71
lib/Target/AMDGPU/AMDILNIDevice.cpp
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#include "AMDILNIDevice.h"
|
||||||
|
#include "AMDILEvergreenDevice.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST)
|
||||||
|
: AMDILEvergreenDevice(ST)
|
||||||
|
{
|
||||||
|
std::string name = ST->getDeviceName();
|
||||||
|
if (name == "caicos") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_CAICOS;
|
||||||
|
} else if (name == "turks") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_TURKS;
|
||||||
|
} else if (name == "cayman") {
|
||||||
|
mDeviceFlag = OCL_DEVICE_CAYMAN;
|
||||||
|
} else {
|
||||||
|
mDeviceFlag = OCL_DEVICE_BARTS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AMDILNIDevice::~AMDILNIDevice()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
AMDILNIDevice::getMaxLDSSize() const
|
||||||
|
{
|
||||||
|
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||||
|
return MAX_LDS_SIZE_900;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
AMDILNIDevice::getGeneration() const
|
||||||
|
{
|
||||||
|
return AMDILDeviceInfo::HD6XXX;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST)
|
||||||
|
: AMDILNIDevice(ST)
|
||||||
|
{
|
||||||
|
setCaps();
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILCaymanDevice::~AMDILCaymanDevice()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AMDILCaymanDevice::setCaps()
|
||||||
|
{
|
||||||
|
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
|
||||||
|
mHWBits.set(AMDILDeviceInfo::DoubleOps);
|
||||||
|
mHWBits.set(AMDILDeviceInfo::FMA);
|
||||||
|
}
|
||||||
|
mHWBits.set(AMDILDeviceInfo::Signed24BitOps);
|
||||||
|
mSWBits.reset(AMDILDeviceInfo::Signed24BitOps);
|
||||||
|
mSWBits.set(AMDILDeviceInfo::ArenaSegment);
|
||||||
|
}
|
||||||
|
|
59
lib/Target/AMDGPU/AMDILNIDevice.h
Normal file
59
lib/Target/AMDGPU/AMDILNIDevice.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface for the subtarget data classes.
|
||||||
|
//
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// This file will define the interface that each generation needs to
|
||||||
|
// implement in order to correctly answer queries on the capabilities of the
|
||||||
|
// specific hardware.
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDILNIDEVICE_H_
|
||||||
|
#define _AMDILNIDEVICE_H_
|
||||||
|
#include "AMDILEvergreenDevice.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class AMDILSubtarget;
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// NI generation of devices and their respective sub classes
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// The AMDILNIDevice is the base class for all Northern Island series of
|
||||||
|
// cards. It is very similiar to the AMDILEvergreenDevice, with the major
|
||||||
|
// exception being differences in wavefront size and hardware capabilities. The
|
||||||
|
// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
||||||
|
// integer operations
|
||||||
|
|
||||||
|
class AMDILNIDevice : public AMDILEvergreenDevice {
|
||||||
|
public:
|
||||||
|
AMDILNIDevice(AMDILSubtarget*);
|
||||||
|
virtual ~AMDILNIDevice();
|
||||||
|
virtual size_t getMaxLDSSize() const;
|
||||||
|
virtual uint32_t getGeneration() const;
|
||||||
|
protected:
|
||||||
|
}; // AMDILNIDevice
|
||||||
|
|
||||||
|
// Just as the AMDILCypressDevice is the double capable version of the
|
||||||
|
// AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of
|
||||||
|
// the AMDILNIDevice. The other major difference that is not as useful from
|
||||||
|
// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
|
||||||
|
// NI family is a 5 wide.
|
||||||
|
|
||||||
|
class AMDILCaymanDevice: public AMDILNIDevice {
|
||||||
|
public:
|
||||||
|
AMDILCaymanDevice(AMDILSubtarget*);
|
||||||
|
virtual ~AMDILCaymanDevice();
|
||||||
|
private:
|
||||||
|
virtual void setCaps();
|
||||||
|
}; // AMDILCaymanDevice
|
||||||
|
|
||||||
|
static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800;
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // _AMDILNIDEVICE_H_
|
47
lib/Target/AMDGPU/AMDILNodes.td
Normal file
47
lib/Target/AMDGPU/AMDILNodes.td
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
//===- AMDILNodes.td - AMD IL nodes ------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Flow Control DAG Nodes
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def IL_brcond : SDNode<"AMDILISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Comparison DAG Nodes
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def IL_cmp : SDNode<"AMDILISD::CMP", SDTIL_Cmp>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Call/Return DAG Nodes
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def IL_call : SDNode<"AMDILISD::CALL", SDTIL_Call,
|
||||||
|
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||||
|
|
||||||
|
def IL_retflag : SDNode<"AMDILISD::RET_FLAG", SDTNone,
|
||||||
|
[SDNPHasChain, SDNPOptInGlue]>;
|
||||||
|
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Instructions
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Floating point math functions
|
||||||
|
def IL_cmov_logical : SDNode<"AMDILISD::CMOVLOG", SDTIL_GenTernaryOp>;
|
||||||
|
def IL_div_inf : SDNode<"AMDILISD::DIV_INF", SDTIL_GenBinaryOp>;
|
||||||
|
def IL_mad : SDNode<"AMDILISD::MAD", SDTIL_GenTernaryOp>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Integer functions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def IL_umul : SDNode<"AMDILISD::UMUL" , SDTIntBinOp,
|
||||||
|
[SDNPCommutative, SDNPAssociative]>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Vector functions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def IL_vbuild : SDNode<"AMDILISD::VBUILD", SDTIL_GenVecBuild,
|
||||||
|
[]>;
|
32
lib/Target/AMDGPU/AMDILOperands.td
Normal file
32
lib/Target/AMDGPU/AMDILOperands.td
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
//===- AMDILOperands.td - AMD IL Operands ------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Custom memory operand
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def MEMI32 : Operand<i32> {
|
||||||
|
let PrintMethod = "printMemOperand";
|
||||||
|
let MIOperandInfo = (ops GPRI32, GPRI32);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call target types
|
||||||
|
def calltarget : Operand<i32>;
|
||||||
|
def brtarget : Operand<OtherVT>;
|
||||||
|
|
||||||
|
// def v2i8imm : Operand<v2i8>;
|
||||||
|
// def v4i8imm : Operand<v4i8>;
|
||||||
|
// def v2i16imm : Operand<v2i16>;
|
||||||
|
// def v4i16imm : Operand<v4i16>;
|
||||||
|
// def v2i32imm : Operand<v2i32>;
|
||||||
|
// def v4i32imm : Operand<v4i32>;
|
||||||
|
// def v2i64imm : Operand<v2i64>;
|
||||||
|
// def v2f32imm : Operand<v2f32>;
|
||||||
|
// def v4f32imm : Operand<v4f32>;
|
||||||
|
// def v2f64imm : Operand<v2f64>;
|
||||||
|
|
504
lib/Target/AMDGPU/AMDILPatterns.td
Normal file
504
lib/Target/AMDGPU/AMDILPatterns.td
Normal file
@ -0,0 +1,504 @@
|
|||||||
|
//===- AMDILPatterns.td - AMDIL Target Patterns------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Store pattern fragments
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def truncstorei64 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||||
|
}]>;
|
||||||
|
def truncstorev2i8 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i8;
|
||||||
|
}]>;
|
||||||
|
def truncstorev2i16 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16;
|
||||||
|
}]>;
|
||||||
|
def truncstorev2i32 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i32;
|
||||||
|
}]>;
|
||||||
|
def truncstorev2i64 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i64;
|
||||||
|
}]>;
|
||||||
|
def truncstorev2f32 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f32;
|
||||||
|
}]>;
|
||||||
|
def truncstorev2f64 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f64;
|
||||||
|
}]>;
|
||||||
|
def truncstorev4i8 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8;
|
||||||
|
}]>;
|
||||||
|
def truncstorev4i16 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i16;
|
||||||
|
}]>;
|
||||||
|
def truncstorev4i32 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i32;
|
||||||
|
}]>;
|
||||||
|
def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def global_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(store node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(store node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(store node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(store node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei8 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei16 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei32 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei64 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref32 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref64 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||||
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei8 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei16 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei32 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei64 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref32 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref64 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||||
|
return isPrivateStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def local_trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei8 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei16 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei32 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei64 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref32 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref64 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||||
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def region_trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstore node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei8 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei16 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei32 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei64 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref32 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstoref64 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i8 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i16 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i32 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2i64 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f32 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev2f64 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i8 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i16 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4i32 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorev4f32 node:$val, node:$ptr), [{
|
||||||
|
return isRegionStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Load pattern fragments
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Global address space loads
|
||||||
|
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||||
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||||
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def global_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||||
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
// Private address space loads
|
||||||
|
def private_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
|
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||||
|
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||||
|
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def private_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||||
|
return isPrivateLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
// Local address space loads
|
||||||
|
def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||||
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||||
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def local_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||||
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
// Region address space loads
|
||||||
|
def region_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
|
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||||
|
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||||
|
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def region_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||||
|
return isRegionLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
// Constant address space loads
|
||||||
|
def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
|
}]>;
|
||||||
|
def constant_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||||
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
|
}]>;
|
||||||
|
def constant_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||||
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
|
}]>;
|
||||||
|
def constant_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||||
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
|
}]>;
|
||||||
|
// Constant pool loads
|
||||||
|
def cp_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
|
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def cp_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
|
||||||
|
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def cp_zext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
|
||||||
|
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
def cp_aext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
|
||||||
|
return isCPLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Complex addressing mode patterns
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
|
||||||
|
def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
|
||||||
|
def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
|
||||||
|
def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
|
||||||
|
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Conditional Instruction Pattern Leafs
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
class IL_CC_Op<int N> : PatLeaf<(i32 N)>;
|
||||||
|
def IL_CC_D_EQ : IL_CC_Op<0>;
|
||||||
|
def IL_CC_D_GE : IL_CC_Op<1>;
|
||||||
|
def IL_CC_D_LT : IL_CC_Op<2>;
|
||||||
|
def IL_CC_D_NE : IL_CC_Op<3>;
|
||||||
|
def IL_CC_F_EQ : IL_CC_Op<4>;
|
||||||
|
def IL_CC_F_GE : IL_CC_Op<5>;
|
||||||
|
def IL_CC_F_LT : IL_CC_Op<6>;
|
||||||
|
def IL_CC_F_NE : IL_CC_Op<7>;
|
||||||
|
def IL_CC_I_EQ : IL_CC_Op<8>;
|
||||||
|
def IL_CC_I_GE : IL_CC_Op<9>;
|
||||||
|
def IL_CC_I_LT : IL_CC_Op<10>;
|
||||||
|
def IL_CC_I_NE : IL_CC_Op<11>;
|
||||||
|
def IL_CC_U_GE : IL_CC_Op<12>;
|
||||||
|
def IL_CC_U_LT : IL_CC_Op<13>;
|
||||||
|
// Pseudo IL comparison instructions that aren't natively supported
|
||||||
|
def IL_CC_F_GT : IL_CC_Op<14>;
|
||||||
|
def IL_CC_U_GT : IL_CC_Op<15>;
|
||||||
|
def IL_CC_I_GT : IL_CC_Op<16>;
|
||||||
|
def IL_CC_D_GT : IL_CC_Op<17>;
|
||||||
|
def IL_CC_F_LE : IL_CC_Op<18>;
|
||||||
|
def IL_CC_U_LE : IL_CC_Op<19>;
|
||||||
|
def IL_CC_I_LE : IL_CC_Op<20>;
|
||||||
|
def IL_CC_D_LE : IL_CC_Op<21>;
|
||||||
|
def IL_CC_F_UNE : IL_CC_Op<22>;
|
||||||
|
def IL_CC_F_UEQ : IL_CC_Op<23>;
|
||||||
|
def IL_CC_F_ULT : IL_CC_Op<24>;
|
||||||
|
def IL_CC_F_UGT : IL_CC_Op<25>;
|
||||||
|
def IL_CC_F_ULE : IL_CC_Op<26>;
|
||||||
|
def IL_CC_F_UGE : IL_CC_Op<27>;
|
||||||
|
def IL_CC_F_ONE : IL_CC_Op<28>;
|
||||||
|
def IL_CC_F_OEQ : IL_CC_Op<29>;
|
||||||
|
def IL_CC_F_OLT : IL_CC_Op<30>;
|
||||||
|
def IL_CC_F_OGT : IL_CC_Op<31>;
|
||||||
|
def IL_CC_F_OLE : IL_CC_Op<32>;
|
||||||
|
def IL_CC_F_OGE : IL_CC_Op<33>;
|
||||||
|
def IL_CC_D_UNE : IL_CC_Op<34>;
|
||||||
|
def IL_CC_D_UEQ : IL_CC_Op<35>;
|
||||||
|
def IL_CC_D_ULT : IL_CC_Op<36>;
|
||||||
|
def IL_CC_D_UGT : IL_CC_Op<37>;
|
||||||
|
def IL_CC_D_ULE : IL_CC_Op<38>;
|
||||||
|
def IL_CC_D_UGE : IL_CC_Op<39>;
|
||||||
|
def IL_CC_D_ONE : IL_CC_Op<30>;
|
||||||
|
def IL_CC_D_OEQ : IL_CC_Op<41>;
|
||||||
|
def IL_CC_D_OLT : IL_CC_Op<42>;
|
||||||
|
def IL_CC_D_OGT : IL_CC_Op<43>;
|
||||||
|
def IL_CC_D_OLE : IL_CC_Op<44>;
|
||||||
|
def IL_CC_D_OGE : IL_CC_Op<45>;
|
||||||
|
def IL_CC_U_EQ : IL_CC_Op<46>;
|
||||||
|
def IL_CC_U_NE : IL_CC_Op<47>;
|
||||||
|
def IL_CC_F_O : IL_CC_Op<48>;
|
||||||
|
def IL_CC_D_O : IL_CC_Op<49>;
|
||||||
|
def IL_CC_F_UO : IL_CC_Op<50>;
|
||||||
|
def IL_CC_D_UO : IL_CC_Op<51>;
|
||||||
|
def IL_CC_L_LE : IL_CC_Op<52>;
|
||||||
|
def IL_CC_L_GE : IL_CC_Op<53>;
|
||||||
|
def IL_CC_L_EQ : IL_CC_Op<54>;
|
||||||
|
def IL_CC_L_NE : IL_CC_Op<55>;
|
||||||
|
def IL_CC_L_LT : IL_CC_Op<56>;
|
||||||
|
def IL_CC_L_GT : IL_CC_Op<57>;
|
||||||
|
def IL_CC_UL_LE : IL_CC_Op<58>;
|
||||||
|
def IL_CC_UL_GE : IL_CC_Op<59>;
|
||||||
|
def IL_CC_UL_EQ : IL_CC_Op<60>;
|
||||||
|
def IL_CC_UL_NE : IL_CC_Op<61>;
|
||||||
|
def IL_CC_UL_LT : IL_CC_Op<62>;
|
||||||
|
def IL_CC_UL_GT : IL_CC_Op<63>;
|
1264
lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp
Normal file
1264
lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
174
lib/Target/AMDGPU/AMDILProfiles.td
Normal file
174
lib/Target/AMDGPU/AMDILProfiles.td
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
//===- AMDILProfiles.td - AMD IL Profiles ------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
// These are used for custom selection dag type profiles
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Custom Selection DAG Type Profiles
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// SDTCisDP - The specified operand has double type
|
||||||
|
// Tablegen needs to be hacked to get this constraint to work
|
||||||
|
//class SDTCisDP<int OpNum> : SDTypeConstraint<OpNum>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Generic Profile Types
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def SDTIL_GenUnaryOp : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisSameAs<0, 1>
|
||||||
|
]>;
|
||||||
|
def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
||||||
|
]>;
|
||||||
|
def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
|
||||||
|
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
|
||||||
|
]>;
|
||||||
|
def SDTIL_GenCMovLog : SDTypeProfile<1, 3, [
|
||||||
|
SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisInt<1>
|
||||||
|
]>;
|
||||||
|
def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisEltOfVec<1, 0>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
def SDTIL_GenVecExtract : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisEltOfVec<0, 1>, SDTCisVT<2, i32>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
def SDTIL_GenVecInsert : SDTypeProfile<1, 4, [
|
||||||
|
SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>,
|
||||||
|
SDTCisVT<3, i32>, SDTCisVT<4, i32>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
def SDTIL_GenVecShuffle : SDTypeProfile <1, 2, [
|
||||||
|
SDTCisSameAs<0, 1>, SDTCisVT<2, i32>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
def SDTIL_GenVecConcat : SDTypeProfile <1, 2, [
|
||||||
|
SDTCisSameAs<1, 2>
|
||||||
|
]>;
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Conversion Profile Types
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def SDTIL_DPToFPOp : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>
|
||||||
|
]>; // d2f
|
||||||
|
|
||||||
|
def SDTIL_AnyToInt : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisInt<0>
|
||||||
|
]>;
|
||||||
|
def SDTIL_IntToAny : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisInt<1>
|
||||||
|
]>;
|
||||||
|
def SDTIL_GenBitConv : SDTypeProfile<1, 1, []>;
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Scalar Profile Types
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// Add instruction pattern to handle offsets of memory operationns
|
||||||
|
def SDTIL_AddAddrri: SDTypeProfile<1, 2, [
|
||||||
|
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisSameAs<0, 2>
|
||||||
|
]>;
|
||||||
|
def SDTIL_AddAddrir : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisInt<0>, SDTCisPtrTy<2>, SDTCisSameAs<0, 1>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
def SDTIL_LCreate : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
|
||||||
|
]>;
|
||||||
|
def SDTIL_LCreate2 : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisVT<0, v2i64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
|
||||||
|
]>;
|
||||||
|
def SDTIL_LComp : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisVT<0, i32>, SDTCisVT<1, i64>
|
||||||
|
]>;
|
||||||
|
def SDTIL_LComp2 : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisVT<0, v2i32>, SDTCisVT<1, v2i64>
|
||||||
|
]>;
|
||||||
|
def SDTIL_DCreate : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
|
||||||
|
]>;
|
||||||
|
def SDTIL_DComp : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisVT<0, i32>, SDTCisVT<1, f64>
|
||||||
|
]>;
|
||||||
|
def SDTIL_DCreate2 : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisVT<0, v2f64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
|
||||||
|
]>;
|
||||||
|
def SDTIL_DComp2 : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisVT<0, v2i32>, SDTCisVT<1, v2f64>
|
||||||
|
]>;
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Flow Control Profile Types
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Profile for Normal Call
|
||||||
|
def SDTIL_Call : SDTypeProfile<0, 1, [
|
||||||
|
SDTCisVT<0, i32>
|
||||||
|
]>;
|
||||||
|
// Branch instruction where second and third are basic blocks
|
||||||
|
def SDTIL_BRCond : SDTypeProfile<0, 2, [
|
||||||
|
SDTCisVT<0, OtherVT>
|
||||||
|
]>;
|
||||||
|
// Comparison instruction
|
||||||
|
def SDTIL_Cmp : SDTypeProfile<1, 3, [
|
||||||
|
SDTCisSameAs<0, 2>, SDTCisSameAs<2,3>, SDTCisVT<1, i32>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Call Sequence Profiles
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def SDTIL_CallSeqStart : SDCallSeqStart< [
|
||||||
|
SDTCisVT<0, i32>
|
||||||
|
]>;
|
||||||
|
def SDTIL_CallSeqEnd : SDCallSeqEnd< [
|
||||||
|
SDTCisVT<0, i32>, SDTCisVT<1, i32>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Image Operation Profiles
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def SDTIL_ImageRead : SDTypeProfile<1, 3,
|
||||||
|
[SDTCisVT<0, v4i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, v4f32>]>;
|
||||||
|
def SDTIL_ImageWrite : SDTypeProfile<0, 3,
|
||||||
|
[SDTCisPtrTy<0>, SDTCisVT<1, v2i32>, SDTCisVT<2, v4i32>]>;
|
||||||
|
def SDTIL_ImageWrite3D : SDTypeProfile<0, 3,
|
||||||
|
[SDTCisPtrTy<0>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>]>;
|
||||||
|
def SDTIL_ImageInfo : SDTypeProfile<1, 1,
|
||||||
|
[SDTCisVT<0, v4i32>, SDTCisPtrTy<1>]>;
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Atomic Operation Profiles
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def SDTIL_UniAtomNoRet : SDTypeProfile<0, 2, [
|
||||||
|
SDTCisPtrTy<0>, SDTCisVT<1, i32>
|
||||||
|
]>;
|
||||||
|
def SDTIL_BinAtomNoRet : SDTypeProfile<0, 3, [
|
||||||
|
SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
|
||||||
|
]>;
|
||||||
|
def SDTIL_TriAtomNoRet : SDTypeProfile<0, 4, [
|
||||||
|
SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
|
||||||
|
]>;
|
||||||
|
def SDTIL_UniAtom : SDTypeProfile<1, 2, [
|
||||||
|
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
|
||||||
|
]>;
|
||||||
|
def SDTIL_BinAtom : SDTypeProfile<1, 3, [
|
||||||
|
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
|
||||||
|
]>;
|
||||||
|
def SDTIL_TriAtom : SDTypeProfile<1, 4, [
|
||||||
|
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>,
|
||||||
|
SDTCisVT<3, i32>, SDTCisVT<4, i32>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
def SDTIL_BinAtomFloat : SDTypeProfile<1, 3, [
|
||||||
|
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, f32>, SDTCisVT<3, f32>
|
||||||
|
]>;
|
||||||
|
def SDTIL_BinAtomNoRetFloat : SDTypeProfile<0, 3, [
|
||||||
|
SDTCisPtrTy<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
def SDTIL_Append : SDTypeProfile<1, 1, [
|
||||||
|
SDTCisVT<0, i32>, SDTCisPtrTy<1>
|
||||||
|
]>;
|
162
lib/Target/AMDGPU/AMDILRegisterInfo.cpp
Normal file
162
lib/Target/AMDGPU/AMDILRegisterInfo.cpp
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
//===- AMDILRegisterInfo.cpp - AMDIL Register Information -------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the AMDIL implementation of the TargetRegisterInfo class.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDILRegisterInfo.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "AMDILInstrInfo.h"
|
||||||
|
#include "llvm/ADT/BitVector.h"
|
||||||
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDILRegisterInfo::AMDILRegisterInfo(TargetMachine &tm,
|
||||||
|
const TargetInstrInfo &tii)
|
||||||
|
: AMDILGenRegisterInfo(0), // RA???
|
||||||
|
TM(tm), TII(tii)
|
||||||
|
{
|
||||||
|
baseOffset = 0;
|
||||||
|
nextFuncOffset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint16_t*
|
||||||
|
AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
|
||||||
|
{
|
||||||
|
static const uint16_t CalleeSavedRegs[] = { 0 };
|
||||||
|
// TODO: Does IL need to actually have any callee saved regs?
|
||||||
|
// I don't think we do since we can just use sequential registers
|
||||||
|
// Maybe this would be easier if every function call was inlined first
|
||||||
|
// and then there would be no callee issues to deal with
|
||||||
|
//TODO(getCalleeSavedRegs);
|
||||||
|
return CalleeSavedRegs;
|
||||||
|
}
|
||||||
|
|
||||||
|
BitVector
|
||||||
|
AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
||||||
|
{
|
||||||
|
BitVector Reserved(getNumRegs());
|
||||||
|
// We reserve the first getNumRegs() registers as they are the ones passed
|
||||||
|
// in live-in/live-out
|
||||||
|
// and therefor cannot be killed by the scheduler. This works around a bug
|
||||||
|
// discovered
|
||||||
|
// that was causing the linearscan register allocator to kill registers
|
||||||
|
// inside of the
|
||||||
|
// function that were also passed as LiveIn registers.
|
||||||
|
for (unsigned int x = 0, y = 256; x < y; ++x) {
|
||||||
|
Reserved.set(x);
|
||||||
|
}
|
||||||
|
return Reserved;
|
||||||
|
}
|
||||||
|
|
||||||
|
BitVector
|
||||||
|
AMDILRegisterInfo::getAllocatableSet(const MachineFunction &MF,
|
||||||
|
const TargetRegisterClass *RC = NULL) const
|
||||||
|
{
|
||||||
|
BitVector Allocatable(getNumRegs());
|
||||||
|
Allocatable.clear();
|
||||||
|
return Allocatable;
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetRegisterClass* const*
|
||||||
|
AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
|
||||||
|
{
|
||||||
|
static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
|
||||||
|
// TODO: Keep in sync with getCalleeSavedRegs
|
||||||
|
//TODO(getCalleeSavedRegClasses);
|
||||||
|
return CalleeSavedRegClasses;
|
||||||
|
}
|
||||||
|
void
|
||||||
|
AMDILRegisterInfo::eliminateCallFramePseudoInstr(
|
||||||
|
MachineFunction &MF,
|
||||||
|
MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator I) const
|
||||||
|
{
|
||||||
|
MBB.erase(I);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each frame index we find, we store the offset in the stack which is
|
||||||
|
// being pushed back into the global buffer. The offset into the stack where
|
||||||
|
// the value is stored is copied into a new register and the frame index is
|
||||||
|
// then replaced with that register.
|
||||||
|
void
|
||||||
|
AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
|
int SPAdj,
|
||||||
|
RegScavenger *RS) const
|
||||||
|
{
|
||||||
|
assert(!"Implement");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AMDILRegisterInfo::processFunctionBeforeFrameFinalized(
|
||||||
|
MachineFunction &MF) const
|
||||||
|
{
|
||||||
|
//TODO(processFunctionBeforeFrameFinalized);
|
||||||
|
// Here we keep track of the amount of stack that the current function
|
||||||
|
// uses so
|
||||||
|
// that we can set the offset to the end of the stack and any other
|
||||||
|
// function call
|
||||||
|
// will not overwrite any stack variables.
|
||||||
|
// baseOffset = nextFuncOffset;
|
||||||
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
|
|
||||||
|
for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) {
|
||||||
|
int64_t size = MFI->getObjectSize(x);
|
||||||
|
if (!(size % 4) && size > 1) {
|
||||||
|
nextFuncOffset += size;
|
||||||
|
} else {
|
||||||
|
nextFuncOffset += 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsigned int
|
||||||
|
AMDILRegisterInfo::getRARegister() const
|
||||||
|
{
|
||||||
|
return AMDGPU::RA;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const
|
||||||
|
{
|
||||||
|
return AMDGPU::FP;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
AMDILRegisterInfo::getEHExceptionRegister() const
|
||||||
|
{
|
||||||
|
assert(0 && "What is the exception register");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
AMDILRegisterInfo::getEHHandlerRegister() const
|
||||||
|
{
|
||||||
|
assert(0 && "What is the exception handler register");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t
|
||||||
|
AMDILRegisterInfo::getStackSize() const
|
||||||
|
{
|
||||||
|
return nextFuncOffset - baseOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GET_REGINFO_TARGET_DESC
|
||||||
|
#include "AMDGPUGenRegisterInfo.inc"
|
||||||
|
|
95
lib/Target/AMDGPU/AMDILRegisterInfo.h
Normal file
95
lib/Target/AMDGPU/AMDILRegisterInfo.h
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
//===- AMDILRegisterInfo.h - AMDIL Register Information Impl ----*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the AMDIL implementation of the TargetRegisterInfo class.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDILREGISTERINFO_H_
|
||||||
|
#define AMDILREGISTERINFO_H_
|
||||||
|
|
||||||
|
#include "llvm/Target/TargetRegisterInfo.h"
|
||||||
|
|
||||||
|
#define GET_REGINFO_HEADER
|
||||||
|
#include "AMDGPUGenRegisterInfo.inc"
|
||||||
|
// See header file for explanation
|
||||||
|
|
||||||
|
namespace llvm
|
||||||
|
{
|
||||||
|
|
||||||
|
class TargetInstrInfo;
|
||||||
|
class Type;
|
||||||
|
|
||||||
|
/// DWARFFlavour - Flavour of dwarf regnumbers
|
||||||
|
///
|
||||||
|
namespace DWARFFlavour {
|
||||||
|
enum {
|
||||||
|
AMDIL_Generic = 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AMDILRegisterInfo : public AMDILGenRegisterInfo
|
||||||
|
{
|
||||||
|
TargetMachine &TM;
|
||||||
|
const TargetInstrInfo &TII;
|
||||||
|
|
||||||
|
AMDILRegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
|
||||||
|
/// Code Generation virtual methods...
|
||||||
|
const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const;
|
||||||
|
|
||||||
|
const TargetRegisterClass* const*
|
||||||
|
getCalleeSavedRegClasses(
|
||||||
|
const MachineFunction *MF = 0) const;
|
||||||
|
|
||||||
|
BitVector
|
||||||
|
getReservedRegs(const MachineFunction &MF) const;
|
||||||
|
BitVector
|
||||||
|
getAllocatableSet(const MachineFunction &MF,
|
||||||
|
const TargetRegisterClass *RC) const;
|
||||||
|
|
||||||
|
void
|
||||||
|
eliminateCallFramePseudoInstr(
|
||||||
|
MachineFunction &MF,
|
||||||
|
MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator I) const;
|
||||||
|
void
|
||||||
|
eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
|
int SPAdj, RegScavenger *RS = NULL) const;
|
||||||
|
|
||||||
|
void
|
||||||
|
processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
|
||||||
|
|
||||||
|
// Debug information queries.
|
||||||
|
unsigned int
|
||||||
|
getRARegister() const;
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
getFrameRegister(const MachineFunction &MF) const;
|
||||||
|
|
||||||
|
// Exception handling queries.
|
||||||
|
unsigned int
|
||||||
|
getEHExceptionRegister() const;
|
||||||
|
unsigned int
|
||||||
|
getEHHandlerRegister() const;
|
||||||
|
|
||||||
|
int64_t
|
||||||
|
getStackSize() const;
|
||||||
|
|
||||||
|
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT)
|
||||||
|
const {
|
||||||
|
return &AMDGPU::GPRI32RegClass;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
mutable int64_t baseOffset;
|
||||||
|
mutable int64_t nextFuncOffset;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDILREGISTERINFO_H_
|
110
lib/Target/AMDGPU/AMDILRegisterInfo.td
Normal file
110
lib/Target/AMDGPU/AMDILRegisterInfo.td
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Declarations that describe the AMDIL register file
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
class AMDILReg<bits<16> num, string n> : Register<n> {
|
||||||
|
field bits<16> Value;
|
||||||
|
let Value = num;
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
}
|
||||||
|
|
||||||
|
// We will start with 8 registers for each class before expanding to more
|
||||||
|
// Since the swizzle is added based on the register class, we can leave it
|
||||||
|
// off here and just specify different registers for different register classes
|
||||||
|
def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
|
||||||
|
def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
|
||||||
|
def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
|
||||||
|
def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
|
||||||
|
def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
|
||||||
|
def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
|
||||||
|
def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
|
||||||
|
def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
|
||||||
|
def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
|
||||||
|
def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
|
||||||
|
def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
|
||||||
|
def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
|
||||||
|
def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
|
||||||
|
def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
|
||||||
|
def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
|
||||||
|
def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
|
||||||
|
def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
|
||||||
|
def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
|
||||||
|
def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
|
||||||
|
def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
|
||||||
|
|
||||||
|
// All registers between 1000 and 1024 are reserved and cannot be used
|
||||||
|
// unless commented in this section
|
||||||
|
// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
|
||||||
|
// r1020 is used to hold the frame index for local arrays
|
||||||
|
// r1019 is used to hold the dynamic stack allocation pointer
|
||||||
|
// r1018 is used as a temporary register for handwritten code
|
||||||
|
// r1017 is used as a temporary register for handwritten code
|
||||||
|
// r1016 is used as a temporary register for load/store code
|
||||||
|
// r1015 is used as a temporary register for data segment offset
|
||||||
|
// r1014 is used as a temporary register for store code
|
||||||
|
// r1013 is used as the section data pointer register
|
||||||
|
// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
|
||||||
|
// r1009 is used as the frame pointer register
|
||||||
|
// r999 is used as the mem register.
|
||||||
|
// r998 is used as the return address register.
|
||||||
|
//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
|
||||||
|
//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
|
||||||
|
//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
|
||||||
|
//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
|
||||||
|
//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
|
||||||
|
//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
|
||||||
|
def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
|
||||||
|
def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
|
||||||
|
def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
|
||||||
|
def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
|
||||||
|
def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
|
||||||
|
def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
|
||||||
|
def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
|
||||||
|
def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
|
||||||
|
def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
|
||||||
|
def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
|
||||||
|
def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
|
||||||
|
def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
|
||||||
|
def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
|
||||||
|
def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
|
||||||
|
def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
|
||||||
|
def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
|
||||||
|
def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
|
||||||
|
def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
|
||||||
|
def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
|
||||||
|
def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
|
||||||
|
def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
|
||||||
|
def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
|
||||||
|
def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
|
||||||
|
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||||
|
{
|
||||||
|
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||||
|
let AltOrderSelect = [{
|
||||||
|
return 1;
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
|
||||||
|
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||||
|
{
|
||||||
|
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||||
|
let AltOrderSelect = [{
|
||||||
|
return 1;
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
|
||||||
|
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||||
|
{
|
||||||
|
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||||
|
let AltOrderSelect = [{
|
||||||
|
return 1;
|
||||||
|
}];
|
||||||
|
}
|
49
lib/Target/AMDGPU/AMDILSIDevice.cpp
Normal file
49
lib/Target/AMDGPU/AMDILSIDevice.cpp
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
#include "AMDILSIDevice.h"
|
||||||
|
#include "AMDILEvergreenDevice.h"
|
||||||
|
#include "AMDILNIDevice.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
|
||||||
|
: AMDILEvergreenDevice(ST)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
AMDILSIDevice::~AMDILSIDevice()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
AMDILSIDevice::getMaxLDSSize() const
|
||||||
|
{
|
||||||
|
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
|
||||||
|
return MAX_LDS_SIZE_900;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
AMDILSIDevice::getGeneration() const
|
||||||
|
{
|
||||||
|
return AMDILDeviceInfo::HD7XXX;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
AMDILSIDevice::getDataLayout() const
|
||||||
|
{
|
||||||
|
return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
|
||||||
|
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||||
|
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||||
|
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||||
|
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||||
|
"-n8:16:32:64");
|
||||||
|
}
|
45
lib/Target/AMDGPU/AMDILSIDevice.h
Normal file
45
lib/Target/AMDGPU/AMDILSIDevice.h
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface for the subtarget data classes.
|
||||||
|
//
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// This file will define the interface that each generation needs to
|
||||||
|
// implement in order to correctly answer queries on the capabilities of the
|
||||||
|
// specific hardware.
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
#ifndef _AMDILSIDEVICE_H_
|
||||||
|
#define _AMDILSIDEVICE_H_
|
||||||
|
#include "AMDILEvergreenDevice.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class AMDILSubtarget;
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// SI generation of devices and their respective sub classes
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// The AMDILSIDevice is the base class for all Northern Island series of
|
||||||
|
// cards. It is very similiar to the AMDILEvergreenDevice, with the major
|
||||||
|
// exception being differences in wavefront size and hardware capabilities. The
|
||||||
|
// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
||||||
|
// integer operations
|
||||||
|
|
||||||
|
class AMDILSIDevice : public AMDILEvergreenDevice {
|
||||||
|
public:
|
||||||
|
AMDILSIDevice(AMDILSubtarget*);
|
||||||
|
virtual ~AMDILSIDevice();
|
||||||
|
virtual size_t getMaxLDSSize() const;
|
||||||
|
virtual uint32_t getGeneration() const;
|
||||||
|
virtual std::string getDataLayout() const;
|
||||||
|
protected:
|
||||||
|
}; // AMDILSIDevice
|
||||||
|
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // _AMDILSIDEVICE_H_
|
178
lib/Target/AMDGPU/AMDILSubtarget.cpp
Normal file
178
lib/Target/AMDGPU/AMDILSubtarget.cpp
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
//===- AMDILSubtarget.cpp - AMDIL Subtarget Information -------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file implements the AMD IL specific subclass of TargetSubtarget.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "AMDILDevices.h"
|
||||||
|
#include "AMDILUtilityFunctions.h"
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
#include "llvm/ADT/StringExtras.h"
|
||||||
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/MC/SubtargetFeature.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
#define GET_SUBTARGETINFO_ENUM
|
||||||
|
#define GET_SUBTARGETINFO_CTOR
|
||||||
|
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||||
|
#include "AMDGPUGenSubtargetInfo.inc"
|
||||||
|
|
||||||
|
AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ),
|
||||||
|
mDumpCode(false)
|
||||||
|
{
|
||||||
|
memset(CapsOverride, 0, sizeof(*CapsOverride)
|
||||||
|
* AMDILDeviceInfo::MaxNumberCapabilities);
|
||||||
|
// Default card
|
||||||
|
std::string GPU = "rv770";
|
||||||
|
GPU = CPU;
|
||||||
|
mIs64bit = false;
|
||||||
|
mVersion = 0;
|
||||||
|
SmallVector<StringRef, DEFAULT_VEC_SLOTS> Features;
|
||||||
|
SplitString(FS, Features, ",");
|
||||||
|
mDefaultSize[0] = 64;
|
||||||
|
mDefaultSize[1] = 1;
|
||||||
|
mDefaultSize[2] = 1;
|
||||||
|
std::string newFeatures = "";
|
||||||
|
#if defined(_DEBUG) || defined(DEBUG)
|
||||||
|
bool useTest = false;
|
||||||
|
#endif
|
||||||
|
for (size_t x = 0; x < Features.size(); ++x) {
|
||||||
|
if (Features[x].startswith("+mwgs")) {
|
||||||
|
SmallVector<StringRef, DEFAULT_VEC_SLOTS> sizes;
|
||||||
|
SplitString(Features[x], sizes, "-");
|
||||||
|
size_t mDim = ::atoi(sizes[1].data());
|
||||||
|
if (mDim > 3) {
|
||||||
|
mDim = 3;
|
||||||
|
}
|
||||||
|
for (size_t y = 0; y < mDim; ++y) {
|
||||||
|
mDefaultSize[y] = ::atoi(sizes[y+2].data());
|
||||||
|
}
|
||||||
|
#if defined(_DEBUG) || defined(DEBUG)
|
||||||
|
} else if (!Features[x].compare("test")) {
|
||||||
|
useTest = true;
|
||||||
|
#endif
|
||||||
|
} else if (Features[x].startswith("+cal")) {
|
||||||
|
SmallVector<StringRef, DEFAULT_VEC_SLOTS> version;
|
||||||
|
SplitString(Features[x], version, "=");
|
||||||
|
mVersion = ::atoi(version[1].data());
|
||||||
|
} else {
|
||||||
|
GPU = CPU;
|
||||||
|
if (x > 0) newFeatures += ',';
|
||||||
|
newFeatures += Features[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we don't have a version then set it to
|
||||||
|
// -1 which enables everything. This is for
|
||||||
|
// offline devices.
|
||||||
|
if (!mVersion) {
|
||||||
|
mVersion = (uint32_t)-1;
|
||||||
|
}
|
||||||
|
for (int x = 0; x < 3; ++x) {
|
||||||
|
if (!mDefaultSize[x]) {
|
||||||
|
mDefaultSize[x] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if defined(_DEBUG) || defined(DEBUG)
|
||||||
|
if (useTest) {
|
||||||
|
GPU = "kauai";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
ParseSubtargetFeatures(GPU, newFeatures);
|
||||||
|
#if defined(_DEBUG) || defined(DEBUG)
|
||||||
|
if (useTest) {
|
||||||
|
GPU = "test";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
mDevName = GPU;
|
||||||
|
mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
|
||||||
|
}
|
||||||
|
AMDILSubtarget::~AMDILSubtarget()
|
||||||
|
{
|
||||||
|
delete mDevice;
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const
|
||||||
|
{
|
||||||
|
assert(caps < AMDILDeviceInfo::MaxNumberCapabilities &&
|
||||||
|
"Caps index is out of bounds!");
|
||||||
|
return CapsOverride[caps];
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
AMDILSubtarget::is64bit() const
|
||||||
|
{
|
||||||
|
return mIs64bit;
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
AMDILSubtarget::isTargetELF() const
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
size_t
|
||||||
|
AMDILSubtarget::getDefaultSize(uint32_t dim) const
|
||||||
|
{
|
||||||
|
if (dim > 3) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return mDefaultSize[dim];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t
|
||||||
|
AMDILSubtarget::calVersion() const
|
||||||
|
{
|
||||||
|
return mVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILGlobalManager*
|
||||||
|
AMDILSubtarget::getGlobalManager() const
|
||||||
|
{
|
||||||
|
return mGM;
|
||||||
|
}
|
||||||
|
void
|
||||||
|
AMDILSubtarget::setGlobalManager(AMDILGlobalManager *gm) const
|
||||||
|
{
|
||||||
|
mGM = gm;
|
||||||
|
}
|
||||||
|
|
||||||
|
AMDILKernelManager*
|
||||||
|
AMDILSubtarget::getKernelManager() const
|
||||||
|
{
|
||||||
|
return mKM;
|
||||||
|
}
|
||||||
|
void
|
||||||
|
AMDILSubtarget::setKernelManager(AMDILKernelManager *km) const
|
||||||
|
{
|
||||||
|
mKM = km;
|
||||||
|
}
|
||||||
|
std::string
|
||||||
|
AMDILSubtarget::getDataLayout() const
|
||||||
|
{
|
||||||
|
if (!mDevice) {
|
||||||
|
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
||||||
|
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||||
|
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||||
|
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||||
|
"-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
|
||||||
|
}
|
||||||
|
return mDevice->getDataLayout();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
AMDILSubtarget::getDeviceName() const
|
||||||
|
{
|
||||||
|
return mDevName;
|
||||||
|
}
|
||||||
|
const AMDILDevice *
|
||||||
|
AMDILSubtarget::device() const
|
||||||
|
{
|
||||||
|
return mDevice;
|
||||||
|
}
|
76
lib/Target/AMDGPU/AMDILSubtarget.h
Normal file
76
lib/Target/AMDGPU/AMDILSubtarget.h
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
//=====-- AMDILSubtarget.h - Define Subtarget for the AMDIL ----*- C++ -*-====//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file declares the AMDIL specific subclass of TargetSubtarget.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef _AMDILSUBTARGET_H_
|
||||||
|
#define _AMDILSUBTARGET_H_
|
||||||
|
|
||||||
|
#include "AMDILDevice.h"
|
||||||
|
#include "llvm/ADT/BitVector.h"
|
||||||
|
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#define GET_SUBTARGETINFO_HEADER
|
||||||
|
#include "AMDGPUGenSubtargetInfo.inc"
|
||||||
|
|
||||||
|
#define MAX_CB_SIZE (1 << 16)
|
||||||
|
namespace llvm {
|
||||||
|
class Module;
|
||||||
|
class AMDILKernelManager;
|
||||||
|
class AMDILGlobalManager;
|
||||||
|
class AMDILDevice;
|
||||||
|
class AMDILSubtarget : public AMDILGenSubtargetInfo {
|
||||||
|
private:
|
||||||
|
bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities];
|
||||||
|
mutable AMDILGlobalManager *mGM;
|
||||||
|
mutable AMDILKernelManager *mKM;
|
||||||
|
const AMDILDevice *mDevice;
|
||||||
|
size_t mDefaultSize[3];
|
||||||
|
std::string mDevName;
|
||||||
|
uint32_t mVersion;
|
||||||
|
bool mIs64bit;
|
||||||
|
bool mIs32on64bit;
|
||||||
|
bool mDumpCode;
|
||||||
|
public:
|
||||||
|
AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
|
||||||
|
virtual ~AMDILSubtarget();
|
||||||
|
bool isOverride(AMDILDeviceInfo::Caps) const;
|
||||||
|
bool is64bit() const;
|
||||||
|
|
||||||
|
// Helper functions to simplify if statements
|
||||||
|
bool isTargetELF() const;
|
||||||
|
AMDILGlobalManager* getGlobalManager() const;
|
||||||
|
void setGlobalManager(AMDILGlobalManager *gm) const;
|
||||||
|
AMDILKernelManager* getKernelManager() const;
|
||||||
|
void setKernelManager(AMDILKernelManager *gm) const;
|
||||||
|
const AMDILDevice* device() const;
|
||||||
|
std::string getDataLayout() const;
|
||||||
|
std::string getDeviceName() const;
|
||||||
|
virtual size_t getDefaultSize(uint32_t dim) const;
|
||||||
|
// Return the version of CAL that the backend should target.
|
||||||
|
uint32_t calVersion() const;
|
||||||
|
// ParseSubtargetFeatures - Parses features string setting specified
|
||||||
|
// subtarget options. Definition of function is
|
||||||
|
//auto generated by tblgen.
|
||||||
|
void
|
||||||
|
ParseSubtargetFeatures(
|
||||||
|
llvm::StringRef CPU,
|
||||||
|
llvm::StringRef FS);
|
||||||
|
bool dumpCode() const { return mDumpCode; }
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDILSUBTARGET_H_
|
120
lib/Target/AMDGPU/AMDILTokenDesc.td
Normal file
120
lib/Target/AMDGPU/AMDILTokenDesc.td
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
//===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
include "AMDILEnumeratedTypes.td"
|
||||||
|
|
||||||
|
// Each token is 32 bits as specified in section 2.1 of the IL spec
|
||||||
|
class ILToken <bits<32> n> {
|
||||||
|
field bits<32> _bits = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.1 - IL Language Token
|
||||||
|
class ILLang<bits<8> client_type> : ILToken<0> {
|
||||||
|
let _bits{0-7} = client_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.2 - IL Version Token
|
||||||
|
class ILVersion<bits<8> minor_version, bits<8> major_version, ILShader shader_type> : ILToken<0> {
|
||||||
|
let _bits{0-7} = minor_version;
|
||||||
|
let _bits{8-15} = major_version;
|
||||||
|
let _bits{16-23} = shader_type.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.3 - IL Opcode Token
|
||||||
|
class ILOpcode<ILOpCode opcode, bits<14> control, bit sec_mod_pre, bit pri_mod_pre> : ILToken<0> {
|
||||||
|
let _bits{0-15} = opcode.Value;
|
||||||
|
let _bits{16-29} = control;
|
||||||
|
let _bits{30} = sec_mod_pre;
|
||||||
|
let _bits{31} = pri_mod_pre;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.4 - IL Destination Token
|
||||||
|
class ILDst<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
|
||||||
|
let _bits{0-15} = register_num.Value;
|
||||||
|
let _bits{16-21} = register_type.Value;
|
||||||
|
let _bits{22} = mod_pre;
|
||||||
|
let _bits{23-24} = relative_address;
|
||||||
|
let _bits{25} = dimension;
|
||||||
|
let _bits{26} = immediate_pre;
|
||||||
|
let _bits{31} = extended;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.5 - IL Destination Modifier Token
|
||||||
|
class ILDstMod<ILModDstComp x, ILModDstComp y, ILModDstComp z, ILModDstComp w, bit clamp, ILShiftScale shift_scale> : ILToken<0> {
|
||||||
|
let _bits{0-1} = x.Value;
|
||||||
|
let _bits{2-3} = y.Value;
|
||||||
|
let _bits{4-5} = z.Value;
|
||||||
|
let _bits{6-7} = w.Value;
|
||||||
|
let _bits{8} = clamp;
|
||||||
|
//let _bits{9-12} = shift_scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.6 - IL Source Token
|
||||||
|
class ILSrc<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
|
||||||
|
let _bits{0-15} = register_num.Value;
|
||||||
|
let _bits{16-21} = register_type.Value;
|
||||||
|
let _bits{22} = mod_pre;
|
||||||
|
let _bits{23-24} = relative_address;
|
||||||
|
let _bits{25} = dimension;
|
||||||
|
let _bits{26} = immediate_pre;
|
||||||
|
let _bits{31} = extended;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.7 - IL Source Modifier Token
|
||||||
|
class ILSrcMod<ILComponentSelect swizzle_x, bit negate_x, ILComponentSelect swizzle_y, bit negate_y,
|
||||||
|
ILComponentSelect swizzle_z, bit negate_z, ILComponentSelect swizzle_w, bit negate_w,
|
||||||
|
bit invert, bit bias, bit x2, bit sign, bit abs, ILDivComp divComp,
|
||||||
|
bits<8> clamp> : ILToken<0> {
|
||||||
|
let _bits{0-2} = swizzle_x.Value;
|
||||||
|
let _bits{3} = negate_x;
|
||||||
|
let _bits{4-6} = swizzle_y.Value;
|
||||||
|
let _bits{7} = negate_y;
|
||||||
|
let _bits{8-10} = swizzle_z.Value;
|
||||||
|
let _bits{11} = negate_z;
|
||||||
|
let _bits{12-14} = swizzle_w.Value;
|
||||||
|
let _bits{15} = negate_w;
|
||||||
|
let _bits{16} = invert;
|
||||||
|
let _bits{17} = bias;
|
||||||
|
let _bits{18} = x2;
|
||||||
|
let _bits{19} = sign;
|
||||||
|
let _bits{20} = abs;
|
||||||
|
let _bits{21-23} = divComp.Value;
|
||||||
|
let _bits{24-31} = clamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section 2.2.8 - IL Relative Address Token
|
||||||
|
class ILRelAddr<AMDILReg address_register, bit loop_relative, ILAddressing component> : ILToken<0> {
|
||||||
|
let _bits{0-15} = address_register.Value;
|
||||||
|
let _bits{16} = loop_relative;
|
||||||
|
let _bits{17-19} = component.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// IL Literal Token
|
||||||
|
class ILLiteral<bits<32> val> : ILToken<0> {
|
||||||
|
let _bits = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// All tokens required for a destination register
|
||||||
|
class ILDstReg<ILDst Reg, ILDstMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
|
||||||
|
ILDst reg = Reg;
|
||||||
|
ILDstMod mod = Mod;
|
||||||
|
ILRelAddr rel = Rel;
|
||||||
|
ILSrc reg_rel = Reg_Rel;
|
||||||
|
ILSrcMod reg_rel_mod = Reg_Rel_Mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
// All tokens required for a source register
|
||||||
|
class ILSrcReg<ILSrc Reg, ILSrcMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
|
||||||
|
ILSrc reg = Reg;
|
||||||
|
ILSrcMod mod = Mod;
|
||||||
|
ILRelAddr rel = Rel;
|
||||||
|
ILSrc reg_rel = Reg_Rel;
|
||||||
|
ILSrcMod reg_rel_mod = Reg_Rel_Mod;
|
||||||
|
}
|
||||||
|
|
75
lib/Target/AMDGPU/AMDILUtilityFunctions.h
Normal file
75
lib/Target/AMDGPU/AMDILUtilityFunctions.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//==-----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file provides helper macros for expanding case statements.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
#ifndef AMDILUTILITYFUNCTIONS_H_
|
||||||
|
#define AMDILUTILITYFUNCTIONS_H_
|
||||||
|
|
||||||
|
// Macros that are used to help with switch statements for various data types
|
||||||
|
// However, these macro's do not return anything unlike the second set below.
|
||||||
|
#define ExpandCaseTo32bitIntTypes(Instr) \
|
||||||
|
case Instr##_i32:
|
||||||
|
|
||||||
|
#define ExpandCaseTo32bitIntTruncTypes(Instr) \
|
||||||
|
case Instr##_i32i8: \
|
||||||
|
case Instr##_i32i16:
|
||||||
|
|
||||||
|
#define ExpandCaseToIntTypes(Instr) \
|
||||||
|
ExpandCaseTo32bitIntTypes(Instr)
|
||||||
|
|
||||||
|
#define ExpandCaseToIntTruncTypes(Instr) \
|
||||||
|
ExpandCaseTo32bitIntTruncTypes(Instr)
|
||||||
|
|
||||||
|
#define ExpandCaseToFloatTypes(Instr) \
|
||||||
|
case Instr##_f32:
|
||||||
|
|
||||||
|
#define ExpandCaseTo32bitScalarTypes(Instr) \
|
||||||
|
ExpandCaseTo32bitIntTypes(Instr) \
|
||||||
|
case Instr##_f32:
|
||||||
|
|
||||||
|
#define ExpandCaseToAllScalarTypes(Instr) \
|
||||||
|
ExpandCaseToFloatTypes(Instr) \
|
||||||
|
ExpandCaseToIntTypes(Instr)
|
||||||
|
|
||||||
|
#define ExpandCaseToAllScalarTruncTypes(Instr) \
|
||||||
|
ExpandCaseToFloatTruncTypes(Instr) \
|
||||||
|
ExpandCaseToIntTruncTypes(Instr)
|
||||||
|
|
||||||
|
#define ExpandCaseToAllTypes(Instr) \
|
||||||
|
ExpandCaseToAllScalarTypes(Instr)
|
||||||
|
|
||||||
|
#define ExpandCaseToAllTruncTypes(Instr) \
|
||||||
|
ExpandCaseToAllScalarTruncTypes(Instr)
|
||||||
|
|
||||||
|
// Macros that expand into statements with return values
|
||||||
|
#define ExpandCaseTo32bitIntReturn(Instr, Return) \
|
||||||
|
case Instr##_i32: return Return##_i32;
|
||||||
|
|
||||||
|
#define ExpandCaseToIntReturn(Instr, Return) \
|
||||||
|
ExpandCaseTo32bitIntReturn(Instr, Return)
|
||||||
|
|
||||||
|
#define ExpandCaseToFloatReturn(Instr, Return) \
|
||||||
|
case Instr##_f32: return Return##_f32;\
|
||||||
|
|
||||||
|
#define ExpandCaseToAllScalarReturn(Instr, Return) \
|
||||||
|
ExpandCaseToFloatReturn(Instr, Return) \
|
||||||
|
ExpandCaseToIntReturn(Instr, Return)
|
||||||
|
|
||||||
|
// These macros expand to common groupings of RegClass ID's
|
||||||
|
#define ExpandCaseTo1CompRegID \
|
||||||
|
case AMDGPU::GPRI32RegClassID: \
|
||||||
|
case AMDGPU::GPRF32RegClassID:
|
||||||
|
|
||||||
|
#define ExpandCaseTo32BitType(Instr) \
|
||||||
|
case Instr##_i32: \
|
||||||
|
case Instr##_f32:
|
||||||
|
|
||||||
|
#endif // AMDILUTILITYFUNCTIONS_H_
|
58
lib/Target/AMDGPU/AMDILVersion.td
Normal file
58
lib/Target/AMDGPU/AMDILVersion.td
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
//===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
// Intrinsic operation support
|
||||||
|
//===--------------------------------------------------------------------===//
|
||||||
|
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||||
|
def int_AMDIL_barrier : GCCBuiltin<"barrier">,
|
||||||
|
BinaryIntNoRetInt;
|
||||||
|
def int_AMDIL_barrier_global : GCCBuiltin<"barrierGlobal">,
|
||||||
|
BinaryIntNoRetInt;
|
||||||
|
def int_AMDIL_barrier_local : GCCBuiltin<"barrierLocal">,
|
||||||
|
BinaryIntNoRetInt;
|
||||||
|
def int_AMDIL_barrier_region : GCCBuiltin<"barrierRegion">,
|
||||||
|
BinaryIntNoRetInt;
|
||||||
|
def int_AMDIL_get_region_id : GCCBuiltin<"__amdil_get_region_id_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_region_local_id : GCCBuiltin<"__amdil_get_region_local_id_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_num_regions : GCCBuiltin<"__amdil_get_num_regions_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
def int_AMDIL_get_region_size : GCCBuiltin<"__amdil_get_region_size_int">,
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [], []>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let isCall=1, isNotDuplicable=1 in {
|
||||||
|
let Predicates=[hasRegionAS] in {
|
||||||
|
def BARRIER_EGNI : BinaryOpNoRet<IL_OP_BARRIER, (outs),
|
||||||
|
(ins GPRI32:$flag, GPRI32:$id),
|
||||||
|
"fence_threads_memory_lds_gds_gws",
|
||||||
|
[(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
|
||||||
|
}
|
||||||
|
let Predicates=[noRegionAS] in {
|
||||||
|
def BARRIER_7XX : BinaryOpNoRet<IL_OP_BARRIER, (outs),
|
||||||
|
(ins GPRI32:$flag, GPRI32:$id),
|
||||||
|
"fence_threads_memory_lds",
|
||||||
|
[(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
def BARRIER_LOCAL : BinaryOpNoRet<IL_OP_BARRIER_LOCAL, (outs),
|
||||||
|
(ins GPRI32:$flag, GPRI32:$id),
|
||||||
|
"fence_threads_lds",
|
||||||
|
[(int_AMDIL_barrier_local GPRI32:$flag, GPRI32:$id)]>;
|
||||||
|
|
||||||
|
def BARRIER_GLOBAL : BinaryOpNoRet<IL_OP_BARRIER_GLOBAL, (outs),
|
||||||
|
(ins GPRI32:$flag, GPRI32:$id),
|
||||||
|
"fence_threads_memory",
|
||||||
|
[(int_AMDIL_barrier_global GPRI32:$flag, GPRI32:$id)]>;
|
||||||
|
|
||||||
|
def BARRIER_REGION : BinaryOpNoRet<IL_OP_BARRIER_REGION, (outs),
|
||||||
|
(ins GPRI32:$flag, GPRI32:$id),
|
||||||
|
"fence_threads_gds",
|
||||||
|
[(int_AMDIL_barrier_region GPRI32:$flag, GPRI32:$id)]>;
|
||||||
|
}
|
50
lib/Target/AMDGPU/CMakeLists.txt
Normal file
50
lib/Target/AMDGPU/CMakeLists.txt
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
|
||||||
|
|
||||||
|
tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
|
||||||
|
tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
|
||||||
|
tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
|
||||||
|
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
|
||||||
|
tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
|
||||||
|
tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
|
||||||
|
tablegen(LLVM AMDGPUGenCodeEmitter.inc -gen-emitter)
|
||||||
|
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
|
||||||
|
add_public_tablegen_target(AMDGPUCommonTableGen)
|
||||||
|
|
||||||
|
add_llvm_target(AMDGPUCodeGen
|
||||||
|
AMDIL7XXDevice.cpp
|
||||||
|
AMDILCFGStructurizer.cpp
|
||||||
|
AMDILDevice.cpp
|
||||||
|
AMDILDeviceInfo.cpp
|
||||||
|
AMDILEvergreenDevice.cpp
|
||||||
|
AMDILFrameLowering.cpp
|
||||||
|
AMDILInstrInfo.cpp
|
||||||
|
AMDILIntrinsicInfo.cpp
|
||||||
|
AMDILISelDAGToDAG.cpp
|
||||||
|
AMDILISelLowering.cpp
|
||||||
|
AMDILNIDevice.cpp
|
||||||
|
AMDILPeepholeOptimizer.cpp
|
||||||
|
AMDILRegisterInfo.cpp
|
||||||
|
AMDILSIDevice.cpp
|
||||||
|
AMDILSubtarget.cpp
|
||||||
|
AMDGPUTargetMachine.cpp
|
||||||
|
AMDGPUISelLowering.cpp
|
||||||
|
AMDGPUConvertToISA.cpp
|
||||||
|
AMDGPUInstrInfo.cpp
|
||||||
|
AMDGPURegisterInfo.cpp
|
||||||
|
AMDGPUUtil.cpp
|
||||||
|
R600CodeEmitter.cpp
|
||||||
|
R600InstrInfo.cpp
|
||||||
|
R600ISelLowering.cpp
|
||||||
|
R600KernelParameters.cpp
|
||||||
|
R600MachineFunctionInfo.cpp
|
||||||
|
R600RegisterInfo.cpp
|
||||||
|
SIAssignInterpRegs.cpp
|
||||||
|
SICodeEmitter.cpp
|
||||||
|
SIInstrInfo.cpp
|
||||||
|
SIISelLowering.cpp
|
||||||
|
SIMachineFunctionInfo.cpp
|
||||||
|
SIRegisterInfo.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_subdirectory(TargetInfo)
|
||||||
|
add_subdirectory(MCTargetDesc)
|
13
lib/Target/AMDGPU/GENERATED_FILES
Normal file
13
lib/Target/AMDGPU/GENERATED_FILES
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
There are 3 files used by this backend that are generated by perl scripts:
|
||||||
|
|
||||||
|
- R600RegisterInfo.td
|
||||||
|
+ Generated with:
|
||||||
|
perl R600GenRegisterInfo.pl > R600RegisterInfo.td
|
||||||
|
|
||||||
|
- R600HwRegInfo.include
|
||||||
|
+ Generated with:
|
||||||
|
perl R600GenRegisterInfo.pl
|
||||||
|
|
||||||
|
- SIRegisterInfo.td
|
||||||
|
+ Generated with:
|
||||||
|
perl SIGenRegisterInfo.pl > SIRegisterInfo.td
|
32
lib/Target/AMDGPU/LLVMBuild.txt
Normal file
32
lib/Target/AMDGPU/LLVMBuild.txt
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
|
||||||
|
;
|
||||||
|
; The LLVM Compiler Infrastructure
|
||||||
|
;
|
||||||
|
; This file is distributed under the University of Illinois Open Source
|
||||||
|
; License. See LICENSE.TXT for details.
|
||||||
|
;
|
||||||
|
;===------------------------------------------------------------------------===;
|
||||||
|
;
|
||||||
|
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||||
|
;
|
||||||
|
; For more information on the LLVMBuild system, please see:
|
||||||
|
;
|
||||||
|
; http://llvm.org/docs/LLVMBuild.html
|
||||||
|
;
|
||||||
|
;===------------------------------------------------------------------------===;
|
||||||
|
|
||||||
|
[common]
|
||||||
|
subdirectories = MCTargetDesc TargetInfo
|
||||||
|
|
||||||
|
[component_0]
|
||||||
|
type = TargetGroup
|
||||||
|
name = AMDGPU
|
||||||
|
parent = Target
|
||||||
|
has_asmprinter = 0
|
||||||
|
|
||||||
|
[component_1]
|
||||||
|
type = Library
|
||||||
|
name = AMDGPUCodeGen
|
||||||
|
parent = AMDGPU
|
||||||
|
required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC AMDGPUInfo AMDGPUDesc
|
||||||
|
add_to_library_groups = AMDGPU
|
104
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
Normal file
104
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - TODO: Add brief description -------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// TODO: Add full description
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPUMCAsmInfo.h"
|
||||||
|
#ifndef NULL
|
||||||
|
#define NULL 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
|
||||||
|
{
|
||||||
|
//===------------------------------------------------------------------===//
|
||||||
|
HasSubsectionsViaSymbols = true;
|
||||||
|
HasMachoZeroFillDirective = false;
|
||||||
|
HasMachoTBSSDirective = false;
|
||||||
|
HasStaticCtorDtorReferenceInStaticMode = false;
|
||||||
|
LinkerRequiresNonEmptyDwarfLines = true;
|
||||||
|
MaxInstLength = 16;
|
||||||
|
PCSymbol = "$";
|
||||||
|
SeparatorString = "\n";
|
||||||
|
CommentColumn = 40;
|
||||||
|
CommentString = ";";
|
||||||
|
LabelSuffix = ":";
|
||||||
|
GlobalPrefix = "@";
|
||||||
|
PrivateGlobalPrefix = ";.";
|
||||||
|
LinkerPrivateGlobalPrefix = "!";
|
||||||
|
InlineAsmStart = ";#ASMSTART";
|
||||||
|
InlineAsmEnd = ";#ASMEND";
|
||||||
|
AssemblerDialect = 0;
|
||||||
|
AllowQuotesInName = false;
|
||||||
|
AllowNameToStartWithDigit = false;
|
||||||
|
AllowPeriodsInName = false;
|
||||||
|
|
||||||
|
//===--- Data Emission Directives -------------------------------------===//
|
||||||
|
ZeroDirective = ".zero";
|
||||||
|
AsciiDirective = ".ascii\t";
|
||||||
|
AscizDirective = ".asciz\t";
|
||||||
|
Data8bitsDirective = ".byte\t";
|
||||||
|
Data16bitsDirective = ".short\t";
|
||||||
|
Data32bitsDirective = ".long\t";
|
||||||
|
Data64bitsDirective = ".quad\t";
|
||||||
|
GPRel32Directive = NULL;
|
||||||
|
SunStyleELFSectionSwitchSyntax = true;
|
||||||
|
UsesELFSectionDirectiveForBSS = true;
|
||||||
|
HasMicrosoftFastStdCallMangling = false;
|
||||||
|
|
||||||
|
//===--- Alignment Information ----------------------------------------===//
|
||||||
|
AlignDirective = ".align\t";
|
||||||
|
AlignmentIsInBytes = true;
|
||||||
|
TextAlignFillValue = 0;
|
||||||
|
|
||||||
|
//===--- Global Variable Emission Directives --------------------------===//
|
||||||
|
GlobalDirective = ".global";
|
||||||
|
ExternDirective = ".extern";
|
||||||
|
HasSetDirective = false;
|
||||||
|
HasAggressiveSymbolFolding = true;
|
||||||
|
LCOMMDirectiveType = LCOMM::None;
|
||||||
|
COMMDirectiveAlignmentIsInBytes = false;
|
||||||
|
HasDotTypeDotSizeDirective = false;
|
||||||
|
HasSingleParameterDotFile = true;
|
||||||
|
HasNoDeadStrip = true;
|
||||||
|
HasSymbolResolver = false;
|
||||||
|
WeakRefDirective = ".weakref\t";
|
||||||
|
WeakDefDirective = ".weakdef\t";
|
||||||
|
LinkOnceDirective = NULL;
|
||||||
|
HiddenVisibilityAttr = MCSA_Hidden;
|
||||||
|
HiddenDeclarationVisibilityAttr = MCSA_Hidden;
|
||||||
|
ProtectedVisibilityAttr = MCSA_Protected;
|
||||||
|
|
||||||
|
//===--- Dwarf Emission Directives -----------------------------------===//
|
||||||
|
HasLEB128 = true;
|
||||||
|
SupportsDebugInformation = true;
|
||||||
|
ExceptionsType = ExceptionHandling::None;
|
||||||
|
DwarfUsesInlineInfoSection = false;
|
||||||
|
DwarfSectionOffsetDirective = ".offset";
|
||||||
|
|
||||||
|
}
|
||||||
|
const char*
|
||||||
|
AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
|
||||||
|
{
|
||||||
|
switch (AS) {
|
||||||
|
default:
|
||||||
|
return NULL;
|
||||||
|
case 0:
|
||||||
|
return NULL;
|
||||||
|
};
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MCSection*
|
||||||
|
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
30
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
Normal file
30
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// TODO: Add full description
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef AMDGPUMCASMINFO_H_
|
||||||
|
#define AMDGPUMCASMINFO_H_
|
||||||
|
|
||||||
|
#include "llvm/MC/MCAsmInfo.h"
|
||||||
|
namespace llvm {
|
||||||
|
class Target;
|
||||||
|
class StringRef;
|
||||||
|
|
||||||
|
class AMDGPUMCAsmInfo : public MCAsmInfo {
|
||||||
|
public:
|
||||||
|
explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
|
||||||
|
const char*
|
||||||
|
getDataASDirective(unsigned int Size, unsigned int AS) const;
|
||||||
|
const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
|
||||||
|
};
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // AMDGPUMCASMINFO_H_
|
61
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
Normal file
61
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#include "AMDGPUMCTargetDesc.h"
|
||||||
|
#include "AMDGPUMCAsmInfo.h"
|
||||||
|
#include "llvm/MC/MachineLocation.h"
|
||||||
|
#include "llvm/MC/MCCodeGenInfo.h"
|
||||||
|
#include "llvm/MC/MCInstrInfo.h"
|
||||||
|
#include "llvm/MC/MCRegisterInfo.h"
|
||||||
|
#include "llvm/MC/MCSubtargetInfo.h"
|
||||||
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
|
||||||
|
#define GET_INSTRINFO_MC_DESC
|
||||||
|
#include "AMDGPUGenInstrInfo.inc"
|
||||||
|
|
||||||
|
#define GET_SUBTARGETINFO_MC_DESC
|
||||||
|
#include "AMDGPUGenSubtargetInfo.inc"
|
||||||
|
|
||||||
|
#define GET_REGINFO_MC_DESC
|
||||||
|
#include "AMDGPUGenRegisterInfo.inc"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
static MCInstrInfo *createAMDGPUMCInstrInfo() {
|
||||||
|
MCInstrInfo *X = new MCInstrInfo();
|
||||||
|
InitAMDILMCInstrInfo(X);
|
||||||
|
return X;
|
||||||
|
}
|
||||||
|
|
||||||
|
static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
|
||||||
|
MCRegisterInfo *X = new MCRegisterInfo();
|
||||||
|
InitAMDILMCRegisterInfo(X, 0);
|
||||||
|
return X;
|
||||||
|
}
|
||||||
|
|
||||||
|
static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
|
||||||
|
StringRef FS) {
|
||||||
|
MCSubtargetInfo * X = new MCSubtargetInfo();
|
||||||
|
InitAMDILMCSubtargetInfo(X, TT, CPU, FS);
|
||||||
|
return X;
|
||||||
|
}
|
||||||
|
|
||||||
|
static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
||||||
|
CodeModel::Model CM,
|
||||||
|
CodeGenOpt::Level OL) {
|
||||||
|
MCCodeGenInfo *X = new MCCodeGenInfo();
|
||||||
|
X->InitMCCodeGenInfo(RM, CM, OL);
|
||||||
|
return X;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void LLVMInitializeAMDGPUTargetMC() {
|
||||||
|
|
||||||
|
RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
|
||||||
|
|
||||||
|
TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
|
||||||
|
|
||||||
|
TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
|
||||||
|
|
||||||
|
TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
|
||||||
|
|
||||||
|
TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
|
||||||
|
|
||||||
|
}
|
35
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
Normal file
35
lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file provides AMDGPU specific target descriptions.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef AMDGPUMCTARGETDESC_H
|
||||||
|
#define AMDGPUMCTARGETDESC_H
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class MCSubtargetInfo;
|
||||||
|
class Target;
|
||||||
|
|
||||||
|
extern Target TheAMDGPUTarget;
|
||||||
|
|
||||||
|
} // End llvm namespace
|
||||||
|
|
||||||
|
#define GET_REGINFO_ENUM
|
||||||
|
#include "AMDGPUGenRegisterInfo.inc"
|
||||||
|
|
||||||
|
#define GET_INSTRINFO_ENUM
|
||||||
|
#include "AMDGPUGenInstrInfo.inc"
|
||||||
|
|
||||||
|
#define GET_SUBTARGETINFO_ENUM
|
||||||
|
#include "AMDGPUGenSubtargetInfo.inc"
|
||||||
|
|
||||||
|
#endif // AMDGPUMCTARGETDESC_H
|
7
lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
Normal file
7
lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
add_llvm_library(LLVMAMDGPUDesc
|
||||||
|
AMDGPUMCTargetDesc.cpp
|
||||||
|
AMDGPUMCAsmInfo.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_dependencies(LLVMAMDGPUDesc AMDGPUCommonTableGen)
|
23
lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
Normal file
23
lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
;===- ./lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
|
||||||
|
;
|
||||||
|
; The LLVM Compiler Infrastructure
|
||||||
|
;
|
||||||
|
; This file is distributed under the University of Illinois Open Source
|
||||||
|
; License. See LICENSE.TXT for details.
|
||||||
|
;
|
||||||
|
;===------------------------------------------------------------------------===;
|
||||||
|
;
|
||||||
|
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||||
|
;
|
||||||
|
; For more information on the LLVMBuild system, please see:
|
||||||
|
;
|
||||||
|
; http://llvm.org/docs/LLVMBuild.html
|
||||||
|
;
|
||||||
|
;===------------------------------------------------------------------------===;
|
||||||
|
|
||||||
|
[component_0]
|
||||||
|
type = Library
|
||||||
|
name = AMDGPUDesc
|
||||||
|
parent = AMDGPU
|
||||||
|
required_libraries = AMDGPUInfo MC
|
||||||
|
add_to_library_groups = AMDGPU
|
16
lib/Target/AMDGPU/MCTargetDesc/Makefile
Normal file
16
lib/Target/AMDGPU/MCTargetDesc/Makefile
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
|
||||||
|
#
|
||||||
|
# The LLVM Compiler Infrastructure
|
||||||
|
#
|
||||||
|
# This file is distributed under the University of Illinois Open Source
|
||||||
|
# License. See LICENSE.TXT for details.
|
||||||
|
#
|
||||||
|
##===----------------------------------------------------------------------===##
|
||||||
|
|
||||||
|
LEVEL = ../../../..
|
||||||
|
LIBRARYNAME = LLVMAMDGPUDesc
|
||||||
|
|
||||||
|
# Hack: we need to include 'main' target directory to grab private headers
|
||||||
|
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||||
|
|
||||||
|
include $(LEVEL)/Makefile.common
|
22
lib/Target/AMDGPU/Makefile
Normal file
22
lib/Target/AMDGPU/Makefile
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
##===- lib/Target/AMDGPU/Makefile ---------------------------*- Makefile -*-===##
|
||||||
|
#
|
||||||
|
# The LLVM Compiler Infrastructure
|
||||||
|
#
|
||||||
|
# This file is distributed under the University of Illinois Open Source
|
||||||
|
# License. See LICENSE.TXT for details.
|
||||||
|
#
|
||||||
|
##===----------------------------------------------------------------------===##
|
||||||
|
|
||||||
|
LEVEL = ../../..
|
||||||
|
LIBRARYNAME = LLVMAMDGPUCodeGen
|
||||||
|
TARGET = AMDGPU
|
||||||
|
|
||||||
|
# Make sure that tblgen is run, first thing.
|
||||||
|
BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
|
||||||
|
AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
|
||||||
|
AMDGPUGenCodeEmitter.inc AMDGPUGenCallingConv.inc \
|
||||||
|
AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
|
||||||
|
|
||||||
|
DIRS = TargetInfo MCTargetDesc
|
||||||
|
|
||||||
|
include $(LEVEL)/Makefile.common
|
27
lib/Target/AMDGPU/Processors.td
Normal file
27
lib/Target/AMDGPU/Processors.td
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
//===-- Processors.td - TODO: Add brief description -------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// AMDIL processors supported.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
|
||||||
|
: Processor<Name, itin, Features>;
|
||||||
|
def : Proc<"rv710", R600_EG_Itin, []>;
|
||||||
|
def : Proc<"rv730", R600_EG_Itin, []>;
|
||||||
|
def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
|
||||||
|
def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||||
|
def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||||
|
def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||||
|
def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||||
|
def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||||
|
def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||||
|
def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||||
|
def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||||
|
def : Proc<"SI", SI_Itin, []>;
|
614
lib/Target/AMDGPU/R600CodeEmitter.cpp
Normal file
614
lib/Target/AMDGPU/R600CodeEmitter.cpp
Normal file
@ -0,0 +1,614 @@
|
|||||||
|
//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This code emitters outputs bytecode that is understood by the r600g driver
|
||||||
|
// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
|
||||||
|
// except that the size of the instruction fields are rounded up to the
|
||||||
|
// nearest byte.
|
||||||
|
//
|
||||||
|
// [1] http://www.mesa3d.org/
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUUtil.h"
|
||||||
|
#include "AMDILCodeEmitter.h"
|
||||||
|
#include "AMDILInstrInfo.h"
|
||||||
|
#include "AMDILUtilityFunctions.h"
|
||||||
|
#include "R600InstrInfo.h"
|
||||||
|
#include "R600RegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/Support/DataTypes.h"
|
||||||
|
#include "llvm/Support/FormattedStream.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define SRC_BYTE_COUNT 11
|
||||||
|
#define DST_BYTE_COUNT 5
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
static char ID;
|
||||||
|
formatted_raw_ostream &_OS;
|
||||||
|
const TargetMachine * TM;
|
||||||
|
const MachineRegisterInfo * MRI;
|
||||||
|
const R600RegisterInfo * TRI;
|
||||||
|
|
||||||
|
bool IsCube;
|
||||||
|
bool IsReduction;
|
||||||
|
bool IsVector;
|
||||||
|
unsigned currentElement;
|
||||||
|
bool IsLast;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
|
||||||
|
_OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false),
|
||||||
|
IsLast(true) { }
|
||||||
|
|
||||||
|
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
|
||||||
|
|
||||||
|
bool runOnMachineFunction(MachineFunction &MF);
|
||||||
|
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
||||||
|
const MachineOperand &MO) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
void EmitALUInstr(MachineInstr &MI);
|
||||||
|
void EmitSrc(const MachineOperand & MO, int chan_override = -1);
|
||||||
|
void EmitDst(const MachineOperand & MO);
|
||||||
|
void EmitALU(MachineInstr &MI, unsigned numSrc);
|
||||||
|
void EmitTexInstr(MachineInstr &MI);
|
||||||
|
void EmitFCInstr(MachineInstr &MI);
|
||||||
|
|
||||||
|
void EmitNullBytes(unsigned int byteCount);
|
||||||
|
|
||||||
|
void EmitByte(unsigned int byte);
|
||||||
|
|
||||||
|
void EmitTwoBytes(uint32_t bytes);
|
||||||
|
|
||||||
|
void Emit(uint32_t value);
|
||||||
|
void Emit(uint64_t value);
|
||||||
|
|
||||||
|
unsigned getHWReg(unsigned regNo) const;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End anonymous namespace
|
||||||
|
|
||||||
|
enum RegElement {
|
||||||
|
ELEMENT_X = 0,
|
||||||
|
ELEMENT_Y,
|
||||||
|
ELEMENT_Z,
|
||||||
|
ELEMENT_W
|
||||||
|
};
|
||||||
|
|
||||||
|
enum InstrTypes {
|
||||||
|
INSTR_ALU = 0,
|
||||||
|
INSTR_TEX,
|
||||||
|
INSTR_FC,
|
||||||
|
INSTR_NATIVE,
|
||||||
|
INSTR_VTX
|
||||||
|
};
|
||||||
|
|
||||||
|
enum FCInstr {
|
||||||
|
FC_IF = 0,
|
||||||
|
FC_ELSE,
|
||||||
|
FC_ENDIF,
|
||||||
|
FC_BGNLOOP,
|
||||||
|
FC_ENDLOOP,
|
||||||
|
FC_BREAK,
|
||||||
|
FC_BREAK_NZ_INT,
|
||||||
|
FC_CONTINUE,
|
||||||
|
FC_BREAK_Z_INT
|
||||||
|
};
|
||||||
|
|
||||||
|
enum TextureTypes {
|
||||||
|
TEXTURE_1D = 1,
|
||||||
|
TEXTURE_2D,
|
||||||
|
TEXTURE_3D,
|
||||||
|
TEXTURE_CUBE,
|
||||||
|
TEXTURE_RECT,
|
||||||
|
TEXTURE_SHADOW1D,
|
||||||
|
TEXTURE_SHADOW2D,
|
||||||
|
TEXTURE_SHADOWRECT,
|
||||||
|
TEXTURE_1D_ARRAY,
|
||||||
|
TEXTURE_2D_ARRAY,
|
||||||
|
TEXTURE_SHADOW1D_ARRAY,
|
||||||
|
TEXTURE_SHADOW2D_ARRAY
|
||||||
|
};
|
||||||
|
|
||||||
|
char R600CodeEmitter::ID = 0;
|
||||||
|
|
||||||
|
FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
|
||||||
|
return new R600CodeEmitter(OS);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
|
||||||
|
TM = &MF.getTarget();
|
||||||
|
MRI = &MF.getRegInfo();
|
||||||
|
TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
|
||||||
|
const R600InstrInfo * TII = static_cast<const R600InstrInfo *>(TM->getInstrInfo());
|
||||||
|
const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
|
||||||
|
std::string gpu = STM.getDeviceName();
|
||||||
|
|
||||||
|
if (STM.dumpCode()) {
|
||||||
|
MF.dump();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||||
|
BB != BB_E; ++BB) {
|
||||||
|
MachineBasicBlock &MBB = *BB;
|
||||||
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||||
|
I != E; ++I) {
|
||||||
|
MachineInstr &MI = *I;
|
||||||
|
IsReduction = AMDGPU::isReductionOp(MI.getOpcode());
|
||||||
|
IsVector = TII->isVector(MI);
|
||||||
|
IsCube = AMDGPU::isCubeOp(MI.getOpcode());
|
||||||
|
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (AMDGPU::isTexOp(MI.getOpcode())) {
|
||||||
|
EmitTexInstr(MI);
|
||||||
|
} else if (AMDGPU::isFCOp(MI.getOpcode())){
|
||||||
|
EmitFCInstr(MI);
|
||||||
|
} else if (IsReduction || IsVector || IsCube) {
|
||||||
|
IsLast = false;
|
||||||
|
// XXX: On Cayman, some (all?) of the vector instructions only need
|
||||||
|
// to fill the first three slots.
|
||||||
|
for (currentElement = 0; currentElement < 4; currentElement++) {
|
||||||
|
IsLast = (currentElement == 3);
|
||||||
|
EmitALUInstr(MI);
|
||||||
|
}
|
||||||
|
IsReduction = false;
|
||||||
|
IsVector = false;
|
||||||
|
IsCube = false;
|
||||||
|
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||||
|
MI.getOpcode() == AMDGPU::BUNDLE ||
|
||||||
|
MI.getOpcode() == AMDGPU::KILL) {
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
switch(MI.getOpcode()) {
|
||||||
|
case AMDGPU::RAT_WRITE_CACHELESS_eg:
|
||||||
|
{
|
||||||
|
uint64_t inst = getBinaryCodeForInstr(MI);
|
||||||
|
// Set End Of Program bit
|
||||||
|
// XXX: Need better check of end of program. EOP should be
|
||||||
|
// encoded in one of the operands of the MI, and it should be
|
||||||
|
// set in a prior pass.
|
||||||
|
MachineBasicBlock::iterator NextI = llvm::next(I);
|
||||||
|
MachineInstr &NextMI = *NextI;
|
||||||
|
if (NextMI.getOpcode() == AMDGPU::RETURN) {
|
||||||
|
inst |= (((uint64_t)1) << 53);
|
||||||
|
}
|
||||||
|
EmitByte(INSTR_NATIVE);
|
||||||
|
Emit(inst);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AMDGPU::VTX_READ_PARAM_i32_eg:
|
||||||
|
case AMDGPU::VTX_READ_PARAM_f32_eg:
|
||||||
|
case AMDGPU::VTX_READ_GLOBAL_i32_eg:
|
||||||
|
case AMDGPU::VTX_READ_GLOBAL_f32_eg:
|
||||||
|
case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
|
||||||
|
case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
|
||||||
|
{
|
||||||
|
uint64_t InstWord01 = getBinaryCodeForInstr(MI);
|
||||||
|
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
||||||
|
|
||||||
|
EmitByte(INSTR_VTX);
|
||||||
|
Emit(InstWord01);
|
||||||
|
Emit(InstWord2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
EmitALUInstr(MI);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
|
||||||
|
{
|
||||||
|
|
||||||
|
unsigned numOperands = MI.getNumExplicitOperands();
|
||||||
|
|
||||||
|
// Some instructions are just place holder instructions that represent
|
||||||
|
// operations that the GPU does automatically. They should be ignored.
|
||||||
|
if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX Check if instruction writes a result
|
||||||
|
if (numOperands < 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const MachineOperand dstOp = MI.getOperand(0);
|
||||||
|
|
||||||
|
// Emit instruction type
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
if (IsCube) {
|
||||||
|
static const int cube_src_swz[] = {2, 2, 0, 1};
|
||||||
|
EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
|
||||||
|
EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
|
||||||
|
EmitNullBytes(SRC_BYTE_COUNT);
|
||||||
|
} else {
|
||||||
|
unsigned int opIndex;
|
||||||
|
for (opIndex = 1; opIndex < numOperands; opIndex++) {
|
||||||
|
// Literal constants are always stored as the last operand.
|
||||||
|
if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
EmitSrc(MI.getOperand(opIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit zeros for unused sources
|
||||||
|
for ( ; opIndex < 4; opIndex++) {
|
||||||
|
EmitNullBytes(SRC_BYTE_COUNT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EmitDst(dstOp);
|
||||||
|
|
||||||
|
EmitALU(MI, numOperands - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
|
||||||
|
{
|
||||||
|
uint32_t value = 0;
|
||||||
|
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
||||||
|
// For other potential instruction operands, (e.g. constant registers) the
|
||||||
|
// value of the source select is defined in the r600isa docs.
|
||||||
|
if (MO.isReg()) {
|
||||||
|
unsigned reg = MO.getReg();
|
||||||
|
EmitTwoBytes(getHWReg(reg));
|
||||||
|
if (reg == AMDGPU::ALU_LITERAL_X) {
|
||||||
|
const MachineInstr * parent = MO.getParent();
|
||||||
|
unsigned immOpIndex = parent->getNumExplicitOperands() - 1;
|
||||||
|
MachineOperand immOp = parent->getOperand(immOpIndex);
|
||||||
|
if (immOp.isFPImm()) {
|
||||||
|
value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
|
||||||
|
} else {
|
||||||
|
assert(immOp.isImm());
|
||||||
|
value = immOp.getImm();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// XXX: Handle other operand types.
|
||||||
|
EmitTwoBytes(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit the source channel (1 byte)
|
||||||
|
if (chan_override != -1) {
|
||||||
|
EmitByte(chan_override);
|
||||||
|
} else if (IsReduction) {
|
||||||
|
EmitByte(currentElement);
|
||||||
|
} else if (MO.isReg()) {
|
||||||
|
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||||
|
} else {
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Emit isNegated (1 byte)
|
||||||
|
if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
|
||||||
|
&& (MO.getTargetFlags() & MO_FLAG_NEG ||
|
||||||
|
(MO.isReg() &&
|
||||||
|
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
|
||||||
|
EmitByte(1);
|
||||||
|
} else {
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit isAbsolute (1 byte)
|
||||||
|
if (MO.getTargetFlags() & MO_FLAG_ABS) {
|
||||||
|
EmitByte(1);
|
||||||
|
} else {
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Emit relative addressing mode (1 byte)
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// Emit kc_bank, This will be adjusted later by r600_asm
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// Emit the literal value, if applicable (4 bytes).
|
||||||
|
Emit(value);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitDst(const MachineOperand & MO)
|
||||||
|
{
|
||||||
|
if (MO.isReg()) {
|
||||||
|
// Emit the destination register index (1 byte)
|
||||||
|
EmitByte(getHWReg(MO.getReg()));
|
||||||
|
|
||||||
|
// Emit the element of the destination register (1 byte)
|
||||||
|
if (IsReduction || IsCube || IsVector) {
|
||||||
|
EmitByte(currentElement);
|
||||||
|
} else {
|
||||||
|
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit isClamped (1 byte)
|
||||||
|
if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
|
||||||
|
EmitByte(1);
|
||||||
|
} else {
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit writemask (1 byte).
|
||||||
|
if (((IsReduction || IsVector) &&
|
||||||
|
currentElement != TRI->getHWRegChan(MO.getReg()))
|
||||||
|
|| MO.getTargetFlags() & MO_FLAG_MASK) {
|
||||||
|
EmitByte(0);
|
||||||
|
} else {
|
||||||
|
EmitByte(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Emit relative addressing mode
|
||||||
|
EmitByte(0);
|
||||||
|
} else {
|
||||||
|
// XXX: Handle other operand types. Are there any for destination regs?
|
||||||
|
EmitNullBytes(DST_BYTE_COUNT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
|
||||||
|
{
|
||||||
|
// Emit the instruction (2 bytes)
|
||||||
|
EmitTwoBytes(getBinaryCodeForInstr(MI));
|
||||||
|
|
||||||
|
// Emit IsLast (for this instruction group) (1 byte)
|
||||||
|
if (IsLast) {
|
||||||
|
EmitByte(1);
|
||||||
|
} else {
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
// Emit isOp3 (1 byte)
|
||||||
|
if (numSrc == 3) {
|
||||||
|
EmitByte(1);
|
||||||
|
} else {
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Emit predicate (1 byte)
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
|
||||||
|
// r600_asm.c sets it.
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// XXX: Emit OMOD (1 byte) Not implemented.
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// XXX: Emit index_mode. I think this is for indirect addressing, so we
|
||||||
|
// don't need to worry about it.
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitTexInstr(MachineInstr &MI)
|
||||||
|
{
|
||||||
|
|
||||||
|
unsigned opcode = MI.getOpcode();
|
||||||
|
bool hasOffsets = (opcode == AMDGPU::TEX_LD);
|
||||||
|
unsigned op_offset = hasOffsets ? 3 : 0;
|
||||||
|
int64_t sampler = MI.getOperand(op_offset+2).getImm();
|
||||||
|
int64_t textureType = MI.getOperand(op_offset+3).getImm();
|
||||||
|
unsigned srcSelect[4] = {0, 1, 2, 3};
|
||||||
|
|
||||||
|
// Emit instruction type
|
||||||
|
EmitByte(1);
|
||||||
|
|
||||||
|
// Emit instruction
|
||||||
|
EmitByte(getBinaryCodeForInstr(MI));
|
||||||
|
|
||||||
|
// XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
|
||||||
|
EmitByte(sampler + 1 + 1);
|
||||||
|
|
||||||
|
// Emit source register
|
||||||
|
EmitByte(getHWReg(MI.getOperand(1).getReg()));
|
||||||
|
|
||||||
|
// XXX: Emit src isRelativeAddress
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// Emit destination register
|
||||||
|
EmitByte(getHWReg(MI.getOperand(0).getReg()));
|
||||||
|
|
||||||
|
// XXX: Emit dst isRealtiveAddress
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// XXX: Emit dst select
|
||||||
|
EmitByte(0); // X
|
||||||
|
EmitByte(1); // Y
|
||||||
|
EmitByte(2); // Z
|
||||||
|
EmitByte(3); // W
|
||||||
|
|
||||||
|
// XXX: Emit lod bias
|
||||||
|
EmitByte(0);
|
||||||
|
|
||||||
|
// XXX: Emit coord types
|
||||||
|
unsigned coordType[4] = {1, 1, 1, 1};
|
||||||
|
|
||||||
|
if (textureType == TEXTURE_RECT
|
||||||
|
|| textureType == TEXTURE_SHADOWRECT) {
|
||||||
|
coordType[ELEMENT_X] = 0;
|
||||||
|
coordType[ELEMENT_Y] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (textureType == TEXTURE_1D_ARRAY
|
||||||
|
|| textureType == TEXTURE_SHADOW1D_ARRAY) {
|
||||||
|
if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
|
||||||
|
coordType[ELEMENT_Y] = 0;
|
||||||
|
} else {
|
||||||
|
coordType[ELEMENT_Z] = 0;
|
||||||
|
srcSelect[ELEMENT_Z] = ELEMENT_Y;
|
||||||
|
}
|
||||||
|
} else if (textureType == TEXTURE_2D_ARRAY
|
||||||
|
|| textureType == TEXTURE_SHADOW2D_ARRAY) {
|
||||||
|
coordType[ELEMENT_Z] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
EmitByte(coordType[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Emit offsets
|
||||||
|
if (hasOffsets)
|
||||||
|
for (unsigned i = 2; i < 5; i++)
|
||||||
|
EmitByte(MI.getOperand(i).getImm()<<1);
|
||||||
|
else
|
||||||
|
EmitNullBytes(3);
|
||||||
|
|
||||||
|
// Emit sampler id
|
||||||
|
EmitByte(sampler);
|
||||||
|
|
||||||
|
// XXX:Emit source select
|
||||||
|
if ((textureType == TEXTURE_SHADOW1D
|
||||||
|
|| textureType == TEXTURE_SHADOW2D
|
||||||
|
|| textureType == TEXTURE_SHADOWRECT
|
||||||
|
|| textureType == TEXTURE_SHADOW1D_ARRAY)
|
||||||
|
&& opcode != AMDGPU::TEX_SAMPLE_C_L
|
||||||
|
&& opcode != AMDGPU::TEX_SAMPLE_C_LB) {
|
||||||
|
srcSelect[ELEMENT_W] = ELEMENT_Z;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
EmitByte(srcSelect[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitFCInstr(MachineInstr &MI)
|
||||||
|
{
|
||||||
|
// Emit instruction type
|
||||||
|
EmitByte(INSTR_FC);
|
||||||
|
|
||||||
|
// Emit SRC
|
||||||
|
unsigned numOperands = MI.getNumOperands();
|
||||||
|
if (numOperands > 0) {
|
||||||
|
assert(numOperands == 1);
|
||||||
|
EmitSrc(MI.getOperand(0));
|
||||||
|
} else {
|
||||||
|
EmitNullBytes(SRC_BYTE_COUNT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit FC Instruction
|
||||||
|
enum FCInstr instr;
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
case AMDGPU::BREAK_LOGICALZ_f32:
|
||||||
|
instr = FC_BREAK;
|
||||||
|
break;
|
||||||
|
case AMDGPU::BREAK_LOGICALNZ_f32:
|
||||||
|
case AMDGPU::BREAK_LOGICALNZ_i32:
|
||||||
|
instr = FC_BREAK_NZ_INT;
|
||||||
|
break;
|
||||||
|
case AMDGPU::BREAK_LOGICALZ_i32:
|
||||||
|
instr = FC_BREAK_Z_INT;
|
||||||
|
break;
|
||||||
|
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
||||||
|
case AMDGPU::CONTINUE_LOGICALNZ_i32:
|
||||||
|
instr = FC_CONTINUE;
|
||||||
|
break;
|
||||||
|
case AMDGPU::IF_LOGICALNZ_f32:
|
||||||
|
case AMDGPU::IF_LOGICALNZ_i32:
|
||||||
|
instr = FC_IF;
|
||||||
|
break;
|
||||||
|
case AMDGPU::IF_LOGICALZ_f32:
|
||||||
|
abort();
|
||||||
|
break;
|
||||||
|
case AMDGPU::ELSE:
|
||||||
|
instr = FC_ELSE;
|
||||||
|
break;
|
||||||
|
case AMDGPU::ENDIF:
|
||||||
|
instr = FC_ENDIF;
|
||||||
|
break;
|
||||||
|
case AMDGPU::ENDLOOP:
|
||||||
|
instr = FC_ENDLOOP;
|
||||||
|
break;
|
||||||
|
case AMDGPU::WHILELOOP:
|
||||||
|
instr = FC_BGNLOOP;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
abort();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
EmitByte(instr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitNullBytes(unsigned int byteCount)
|
||||||
|
{
|
||||||
|
for (unsigned int i = 0; i < byteCount; i++) {
|
||||||
|
EmitByte(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::EmitByte(unsigned int byte)
|
||||||
|
{
|
||||||
|
_OS.write((uint8_t) byte & 0xff);
|
||||||
|
}
|
||||||
|
void R600CodeEmitter::EmitTwoBytes(unsigned int bytes)
|
||||||
|
{
|
||||||
|
_OS.write((uint8_t) (bytes & 0xff));
|
||||||
|
_OS.write((uint8_t) ((bytes >> 8) & 0xff));
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::Emit(uint32_t value)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
_OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600CodeEmitter::Emit(uint64_t value)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
|
EmitByte((value >> (8 * i)) & 0xff);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
|
||||||
|
{
|
||||||
|
unsigned HWReg;
|
||||||
|
|
||||||
|
HWReg = TRI->getEncodingValue(regNo);
|
||||||
|
if (AMDGPU::R600_CReg32RegClass.contains(regNo)) {
|
||||||
|
HWReg += 512;
|
||||||
|
}
|
||||||
|
return HWReg;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
|
||||||
|
const MachineOperand &MO) const
|
||||||
|
{
|
||||||
|
if (MO.isReg()) {
|
||||||
|
return getHWReg(MO.getReg());
|
||||||
|
} else {
|
||||||
|
return MO.getImm();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "AMDGPUGenCodeEmitter.inc"
|
||||||
|
|
190
lib/Target/AMDGPU/R600GenRegisterInfo.pl
Normal file
190
lib/Target/AMDGPU/R600GenRegisterInfo.pl
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
|
||||||
|
#
|
||||||
|
# The LLVM Compiler Infrastructure
|
||||||
|
#
|
||||||
|
# This file is distributed under the University of Illinois Open Source
|
||||||
|
# License. See LICENSE.TXT for details.
|
||||||
|
#
|
||||||
|
#===------------------------------------------------------------------------===#
|
||||||
|
#
|
||||||
|
# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
|
||||||
|
# it also generates a file called R600HwRegInfo.include, which contains helper
|
||||||
|
# functions for determining the hw encoding of registers.
|
||||||
|
#
|
||||||
|
#===------------------------------------------------------------------------===#
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use constant CONST_REG_COUNT => 100;
|
||||||
|
use constant TEMP_REG_COUNT => 128;
|
||||||
|
|
||||||
|
my $CREG_MAX = CONST_REG_COUNT - 1;
|
||||||
|
my $TREG_MAX = TEMP_REG_COUNT - 1;
|
||||||
|
|
||||||
|
print <<STRING;
|
||||||
|
|
||||||
|
class R600Reg <string name, bits<16> encoding> : Register<name> {
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let HWEncoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
|
||||||
|
RegisterWithSubRegs<n, subregs> {
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
||||||
|
let HWEncoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
STRING
|
||||||
|
|
||||||
|
my $i;
|
||||||
|
|
||||||
|
### REG DEFS ###
|
||||||
|
|
||||||
|
my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
|
||||||
|
my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
|
||||||
|
|
||||||
|
my @t128reg;
|
||||||
|
my @treg_x;
|
||||||
|
for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
|
||||||
|
my $name = "T$i\_XYZW";
|
||||||
|
print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W], $i >;\n};
|
||||||
|
$t128reg[$i] = $name;
|
||||||
|
$treg_x[$i] = "T$i\_X";
|
||||||
|
if ($i % 10 == 0) {
|
||||||
|
$t128reg[$i] .= "\n";
|
||||||
|
$treg_x[$i] .= "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my $treg_string = join(",", @treg_list);
|
||||||
|
my $creg_list = join(",", @creg_list);
|
||||||
|
my $t128_string = join(",", @t128reg);
|
||||||
|
my $treg_x_string = join(",", @treg_x);
|
||||||
|
print <<STRING;
|
||||||
|
|
||||||
|
class RegSet <dag s> {
|
||||||
|
dag set = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
def ZERO : R600Reg<"0.0", 248>;
|
||||||
|
def ONE : R600Reg<"1.0", 249>;
|
||||||
|
def NEG_ONE : R600Reg<"-1.0", 249>;
|
||||||
|
def ONE_INT : R600Reg<"1", 250>;
|
||||||
|
def HALF : R600Reg<"0.5", 252>;
|
||||||
|
def NEG_HALF : R600Reg<"-0.5", 252>;
|
||||||
|
def PV_X : R600Reg<"pv.x", 254>;
|
||||||
|
def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
|
||||||
|
|
||||||
|
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||||
|
$creg_list)>;
|
||||||
|
|
||||||
|
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||||
|
$treg_string)>;
|
||||||
|
|
||||||
|
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||||
|
$treg_x_string)>;
|
||||||
|
|
||||||
|
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||||
|
R600_TReg32,
|
||||||
|
R600_CReg32,
|
||||||
|
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
|
||||||
|
|
||||||
|
def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
|
||||||
|
$t128_string)>
|
||||||
|
{
|
||||||
|
let CopyCost = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
STRING
|
||||||
|
|
||||||
|
my %index_map;
|
||||||
|
my %chan_map;
|
||||||
|
|
||||||
|
for ($i = 0; $i <= $#creg_list; $i++) {
|
||||||
|
push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
|
||||||
|
push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ($i = 0; $i <= $#treg_list; $i++) {
|
||||||
|
push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
|
||||||
|
push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ($i = 0; $i <= $#t128reg; $i++) {
|
||||||
|
push(@{$index_map{$i}}, $t128reg[$i]);
|
||||||
|
push(@{$chan_map{'X'}}, $t128reg[$i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
open(OUTFILE, ">", "R600HwRegInfo.include");
|
||||||
|
|
||||||
|
print OUTFILE <<STRING;
|
||||||
|
|
||||||
|
unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
|
||||||
|
{
|
||||||
|
switch(reg) {
|
||||||
|
default: assert(!"Unknown register"); return 0;
|
||||||
|
STRING
|
||||||
|
|
||||||
|
foreach my $key (keys(%chan_map)) {
|
||||||
|
foreach my $reg (@{$chan_map{$key}}) {
|
||||||
|
chomp($reg);
|
||||||
|
print OUTFILE " case AMDGPU::$reg:\n";
|
||||||
|
}
|
||||||
|
my $val;
|
||||||
|
if ($key eq 'X') {
|
||||||
|
$val = 0;
|
||||||
|
} elsif ($key eq 'Y') {
|
||||||
|
$val = 1;
|
||||||
|
} elsif ($key eq 'Z') {
|
||||||
|
$val = 2;
|
||||||
|
} elsif ($key eq 'W') {
|
||||||
|
$val = 3;
|
||||||
|
} else {
|
||||||
|
die("Unknown chan value; $key");
|
||||||
|
}
|
||||||
|
print OUTFILE " return $val;\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
print OUTFILE " }\n}\n\n";
|
||||||
|
|
||||||
|
sub print_reg_defs {
|
||||||
|
my ($count, $prefix) = @_;
|
||||||
|
|
||||||
|
my @reg_list;
|
||||||
|
|
||||||
|
for ($i = 0; $i < $count; $i++) {
|
||||||
|
my $hw_index = get_hw_index($i);
|
||||||
|
my $chan= get_chan_str($i);
|
||||||
|
my $name = "$prefix$hw_index\_$chan";
|
||||||
|
print qq{def $name : R600Reg <"$prefix$hw_index.$chan", $hw_index>;\n};
|
||||||
|
$reg_list[$i] = $name;
|
||||||
|
if ($i % 10 == 0) {
|
||||||
|
$reg_list[$i] .= "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return @reg_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
#Helper functions
|
||||||
|
sub get_hw_index {
|
||||||
|
my ($index) = @_;
|
||||||
|
return int($index / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub get_chan_str {
|
||||||
|
my ($index) = @_;
|
||||||
|
my $chan = $index % 4;
|
||||||
|
if ($chan == 0 ) {
|
||||||
|
return 'X';
|
||||||
|
} elsif ($chan == 1) {
|
||||||
|
return 'Y';
|
||||||
|
} elsif ($chan == 2) {
|
||||||
|
return 'Z';
|
||||||
|
} elsif ($chan == 3) {
|
||||||
|
return 'W';
|
||||||
|
} else {
|
||||||
|
die("Unknown chan value: $chan");
|
||||||
|
}
|
||||||
|
}
|
1056
lib/Target/AMDGPU/R600HwRegInfo.include
Normal file
1056
lib/Target/AMDGPU/R600HwRegInfo.include
Normal file
File diff suppressed because it is too large
Load Diff
286
lib/Target/AMDGPU/R600ISelLowering.cpp
Normal file
286
lib/Target/AMDGPU/R600ISelLowering.cpp
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
|
||||||
|
// is mostly EmitInstrWithCustomInserter().
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "R600ISelLowering.h"
|
||||||
|
#include "AMDGPUUtil.h"
|
||||||
|
#include "R600InstrInfo.h"
|
||||||
|
#include "R600MachineFunctionInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||||
|
AMDGPUTargetLowering(TM),
|
||||||
|
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
|
||||||
|
{
|
||||||
|
setOperationAction(ISD::MUL, MVT::i64, Expand);
|
||||||
|
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
|
||||||
|
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
|
||||||
|
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
|
||||||
|
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
|
||||||
|
computeRegisterProperties();
|
||||||
|
|
||||||
|
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
||||||
|
|
||||||
|
setOperationAction(ISD::ROTL, MVT::i32, Custom);
|
||||||
|
|
||||||
|
setSchedulingPreference(Sched::VLIW);
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
||||||
|
MachineInstr * MI, MachineBasicBlock * BB) const
|
||||||
|
{
|
||||||
|
MachineFunction * MF = BB->getParent();
|
||||||
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
MachineBasicBlock::iterator I = *MI;
|
||||||
|
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||||
|
case AMDGPU::TGID_X:
|
||||||
|
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
|
||||||
|
break;
|
||||||
|
case AMDGPU::TGID_Y:
|
||||||
|
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
|
||||||
|
break;
|
||||||
|
case AMDGPU::TGID_Z:
|
||||||
|
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
|
||||||
|
break;
|
||||||
|
case AMDGPU::TIDIG_X:
|
||||||
|
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
|
||||||
|
break;
|
||||||
|
case AMDGPU::TIDIG_Y:
|
||||||
|
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
|
||||||
|
break;
|
||||||
|
case AMDGPU::TIDIG_Z:
|
||||||
|
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
|
||||||
|
break;
|
||||||
|
case AMDGPU::NGROUPS_X:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 0);
|
||||||
|
break;
|
||||||
|
case AMDGPU::NGROUPS_Y:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 1);
|
||||||
|
break;
|
||||||
|
case AMDGPU::NGROUPS_Z:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 2);
|
||||||
|
break;
|
||||||
|
case AMDGPU::GLOBAL_SIZE_X:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 3);
|
||||||
|
break;
|
||||||
|
case AMDGPU::GLOBAL_SIZE_Y:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 4);
|
||||||
|
break;
|
||||||
|
case AMDGPU::GLOBAL_SIZE_Z:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 5);
|
||||||
|
break;
|
||||||
|
case AMDGPU::LOCAL_SIZE_X:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 6);
|
||||||
|
break;
|
||||||
|
case AMDGPU::LOCAL_SIZE_Y:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 7);
|
||||||
|
break;
|
||||||
|
case AMDGPU::LOCAL_SIZE_Z:
|
||||||
|
lowerImplicitParameter(MI, *BB, MRI, 8);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::CLAMP_R600:
|
||||||
|
MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(1));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::FABS_R600:
|
||||||
|
MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(1));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::FNEG_R600:
|
||||||
|
MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(1));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::R600_LOAD_CONST:
|
||||||
|
{
|
||||||
|
int64_t RegIndex = MI->getOperand(1).getImm();
|
||||||
|
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addReg(ConstantReg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::LOAD_INPUT:
|
||||||
|
{
|
||||||
|
int64_t RegIndex = MI->getOperand(1).getImm();
|
||||||
|
addLiveIn(MI, MF, MRI, TII,
|
||||||
|
AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::MASK_WRITE:
|
||||||
|
{
|
||||||
|
unsigned maskedRegister = MI->getOperand(0).getReg();
|
||||||
|
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
|
||||||
|
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
|
||||||
|
MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
|
||||||
|
def->addTargetFlag(MO_FLAG_MASK);
|
||||||
|
// Return early so the instruction is not erased
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::RAT_WRITE_CACHELESS_eg:
|
||||||
|
{
|
||||||
|
// Convert to DWORD address
|
||||||
|
unsigned NewAddr = MRI.createVirtualRegister(
|
||||||
|
&AMDGPU::R600_TReg32_XRegClass);
|
||||||
|
unsigned ShiftValue = MRI.createVirtualRegister(
|
||||||
|
&AMDGPU::R600_TReg32RegClass);
|
||||||
|
|
||||||
|
// XXX In theory, we should be able to pass ShiftValue directly to
|
||||||
|
// the LSHR_eg instruction as an inline literal, but I tried doing it
|
||||||
|
// this way and it didn't produce the correct results.
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
|
||||||
|
.addReg(AMDGPU::ALU_LITERAL_X)
|
||||||
|
.addImm(2);
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addReg(ShiftValue);
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addReg(NewAddr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::STORE_OUTPUT:
|
||||||
|
{
|
||||||
|
int64_t OutputIndex = MI->getOperand(1).getImm();
|
||||||
|
unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
|
||||||
|
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
|
||||||
|
.addOperand(MI->getOperand(0));
|
||||||
|
|
||||||
|
if (!MRI.isLiveOut(OutputReg)) {
|
||||||
|
MRI.addLiveOut(OutputReg);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::RESERVE_REG:
|
||||||
|
{
|
||||||
|
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
|
||||||
|
int64_t ReservedIndex = MI->getOperand(0).getImm();
|
||||||
|
unsigned ReservedReg =
|
||||||
|
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
|
||||||
|
MFI->ReservedRegs.push_back(ReservedReg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::TXD:
|
||||||
|
{
|
||||||
|
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
|
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
|
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
||||||
|
.addOperand(MI->getOperand(3))
|
||||||
|
.addOperand(MI->getOperand(4))
|
||||||
|
.addOperand(MI->getOperand(5));
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
||||||
|
.addOperand(MI->getOperand(2))
|
||||||
|
.addOperand(MI->getOperand(4))
|
||||||
|
.addOperand(MI->getOperand(5));
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addOperand(MI->getOperand(4))
|
||||||
|
.addOperand(MI->getOperand(5))
|
||||||
|
.addReg(t0, RegState::Implicit)
|
||||||
|
.addReg(t1, RegState::Implicit);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AMDGPU::TXD_SHADOW:
|
||||||
|
{
|
||||||
|
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
|
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
|
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
||||||
|
.addOperand(MI->getOperand(3))
|
||||||
|
.addOperand(MI->getOperand(4))
|
||||||
|
.addOperand(MI->getOperand(5));
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
||||||
|
.addOperand(MI->getOperand(2))
|
||||||
|
.addOperand(MI->getOperand(4))
|
||||||
|
.addOperand(MI->getOperand(5));
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addOperand(MI->getOperand(4))
|
||||||
|
.addOperand(MI->getOperand(5))
|
||||||
|
.addReg(t0, RegState::Implicit)
|
||||||
|
.addReg(t1, RegState::Implicit);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
MI->eraseFromParent();
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineRegisterInfo & MRI, unsigned dword_offset) const
|
||||||
|
{
|
||||||
|
MachineBasicBlock::iterator I = *MI;
|
||||||
|
unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
|
||||||
|
MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
|
||||||
|
.addReg(AMDGPU::ALU_LITERAL_X)
|
||||||
|
.addImm(dword_offset * 4);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addReg(PtrReg)
|
||||||
|
.addImm(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Custom DAG Lowering Operations
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||||
|
{
|
||||||
|
switch (Op.getOpcode()) {
|
||||||
|
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||||
|
case ISD::ROTL: return LowerROTL(Op, DAG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
|
||||||
|
{
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
|
||||||
|
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
|
||||||
|
Op.getOperand(0),
|
||||||
|
Op.getOperand(0),
|
||||||
|
DAG.getNode(ISD::SUB, DL, VT,
|
||||||
|
DAG.getConstant(32, MVT::i32),
|
||||||
|
Op.getOperand(1)));
|
||||||
|
}
|
48
lib/Target/AMDGPU/R600ISelLowering.h
Normal file
48
lib/Target/AMDGPU/R600ISelLowering.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// R600 DAG Lowering interface definition
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef R600ISELLOWERING_H
|
||||||
|
#define R600ISELLOWERING_H
|
||||||
|
|
||||||
|
#include "AMDGPUISelLowering.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class R600InstrInfo;
|
||||||
|
|
||||||
|
class R600TargetLowering : public AMDGPUTargetLowering
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
R600TargetLowering(TargetMachine &TM);
|
||||||
|
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||||
|
MachineBasicBlock * BB) const;
|
||||||
|
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const R600InstrInfo * TII;
|
||||||
|
|
||||||
|
/// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
|
||||||
|
/// that are stored in the first nine dwords of a Vertex Buffer. These
|
||||||
|
/// implicit parameters are represented by pseudo instructions, which are
|
||||||
|
/// lowered to VTX_READ instructions by this function.
|
||||||
|
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineRegisterInfo & MRI, unsigned dword_offset) const;
|
||||||
|
|
||||||
|
/// LowerROTL - Lower ROTL opcode to BITALIGN
|
||||||
|
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace llvm;
|
||||||
|
|
||||||
|
#endif // R600ISELLOWERING_H
|
105
lib/Target/AMDGPU/R600InstrInfo.cpp
Normal file
105
lib/Target/AMDGPU/R600InstrInfo.cpp
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// R600 Implementation of TargetInstrInfo.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "R600InstrInfo.h"
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "AMDILSubtarget.h"
|
||||||
|
#include "R600RegisterInfo.h"
|
||||||
|
|
||||||
|
#define GET_INSTRINFO_CTOR
|
||||||
|
#include "AMDGPUGenDFAPacketizer.inc"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
|
||||||
|
: AMDGPUInstrInfo(tm),
|
||||||
|
RI(tm, *this)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
|
||||||
|
{
|
||||||
|
return RI;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600InstrInfo::isTrig(const MachineInstr &MI) const
|
||||||
|
{
|
||||||
|
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600InstrInfo::isVector(const MachineInstr &MI) const
|
||||||
|
{
|
||||||
|
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||||
|
unsigned DestReg, unsigned SrcReg,
|
||||||
|
bool KillSrc) const
|
||||||
|
{
|
||||||
|
|
||||||
|
unsigned subRegMap[4] = {AMDGPU::sel_x, AMDGPU::sel_y,
|
||||||
|
AMDGPU::sel_z, AMDGPU::sel_w};
|
||||||
|
|
||||||
|
if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
||||||
|
&& AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
|
||||||
|
.addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define)
|
||||||
|
.addReg(RI.getSubReg(SrcReg, subRegMap[i]))
|
||||||
|
.addReg(DestReg, RegState::Define | RegState::Implicit);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
/* We can't copy vec4 registers */
|
||||||
|
assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
||||||
|
&& !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
|
||||||
|
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
|
||||||
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
|
||||||
|
unsigned DstReg, int64_t Imm) const
|
||||||
|
{
|
||||||
|
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
|
||||||
|
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
||||||
|
MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
|
||||||
|
MachineInstrBuilder(MI).addImm(Imm);
|
||||||
|
|
||||||
|
return MI;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned R600InstrInfo::getIEQOpcode() const
|
||||||
|
{
|
||||||
|
return AMDGPU::SETE_INT;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600InstrInfo::isMov(unsigned Opcode) const
|
||||||
|
{
|
||||||
|
switch(Opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::MOV:
|
||||||
|
case AMDGPU::MOV_IMM_F32:
|
||||||
|
case AMDGPU::MOV_IMM_I32:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
|
||||||
|
const ScheduleDAG *DAG) const
|
||||||
|
{
|
||||||
|
const InstrItineraryData *II = TM->getInstrItineraryData();
|
||||||
|
return TM->getSubtarget<AMDILSubtarget>().createDFAPacketizer(II);
|
||||||
|
}
|
75
lib/Target/AMDGPU/R600InstrInfo.h
Normal file
75
lib/Target/AMDGPU/R600InstrInfo.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface definition for R600InstrInfo
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef R600INSTRUCTIONINFO_H_
|
||||||
|
#define R600INSTRUCTIONINFO_H_
|
||||||
|
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "AMDILInstrInfo.h"
|
||||||
|
#include "R600RegisterInfo.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class AMDGPUTargetMachine;
|
||||||
|
class DFAPacketizer;
|
||||||
|
class ScheduleDAG;
|
||||||
|
class MachineFunction;
|
||||||
|
class MachineInstr;
|
||||||
|
class MachineInstrBuilder;
|
||||||
|
|
||||||
|
class R600InstrInfo : public AMDGPUInstrInfo {
|
||||||
|
private:
|
||||||
|
const R600RegisterInfo RI;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
|
||||||
|
|
||||||
|
const R600RegisterInfo &getRegisterInfo() const;
|
||||||
|
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||||
|
unsigned DestReg, unsigned SrcReg,
|
||||||
|
bool KillSrc) const;
|
||||||
|
|
||||||
|
bool isTrig(const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
/// isVector - Vector instructions are instructions that must fill all
|
||||||
|
/// instruction slots within an instruction group.
|
||||||
|
bool isVector(const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||||
|
int64_t Imm) const;
|
||||||
|
|
||||||
|
virtual unsigned getIEQOpcode() const;
|
||||||
|
virtual bool isMov(unsigned Opcode) const;
|
||||||
|
|
||||||
|
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
|
||||||
|
const ScheduleDAG *DAG) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End llvm namespace
|
||||||
|
|
||||||
|
namespace R600_InstFlag {
|
||||||
|
enum TIF {
|
||||||
|
TRANS_ONLY = (1 << 0),
|
||||||
|
TEX = (1 << 1),
|
||||||
|
REDUCTION = (1 << 2),
|
||||||
|
FC = (1 << 3),
|
||||||
|
TRIG = (1 << 4),
|
||||||
|
OP3 = (1 << 5),
|
||||||
|
VECTOR = (1 << 6)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // R600INSTRINFO_H_
|
1322
lib/Target/AMDGPU/R600Instructions.td
Normal file
1322
lib/Target/AMDGPU/R600Instructions.td
Normal file
File diff suppressed because it is too large
Load Diff
16
lib/Target/AMDGPU/R600Intrinsics.td
Normal file
16
lib/Target/AMDGPU/R600Intrinsics.td
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// R600 Intrinsic Definitions
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
let TargetPrefix = "R600", isTarget = 1 in {
|
||||||
|
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
}
|
462
lib/Target/AMDGPU/R600KernelParameters.cpp
Normal file
462
lib/Target/AMDGPU/R600KernelParameters.cpp
Normal file
@ -0,0 +1,462 @@
|
|||||||
|
//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This pass lowers kernel function arguments to loads from the vertex buffer.
|
||||||
|
//
|
||||||
|
// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords,
|
||||||
|
// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from
|
||||||
|
// VTX_BUFFER[10], etc.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/Constants.h"
|
||||||
|
#include "llvm/Function.h"
|
||||||
|
#include "llvm/Intrinsics.h"
|
||||||
|
#include "llvm/Metadata.h"
|
||||||
|
#include "llvm/Module.h"
|
||||||
|
#include "llvm/Target/TargetData.h"
|
||||||
|
#include "llvm/Support/IRBuilder.h"
|
||||||
|
#include "llvm/Support/TypeBuilder.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
#define CONSTANT_CACHE_SIZE_DW 127
|
||||||
|
|
||||||
|
class R600KernelParameters : public FunctionPass {
|
||||||
|
const TargetData *TD;
|
||||||
|
LLVMContext* Context;
|
||||||
|
Module *Mod;
|
||||||
|
|
||||||
|
struct Param {
|
||||||
|
Param() : Val(NULL), PtrVal(NULL), OffsetInDW(0), SizeInDW(0),
|
||||||
|
IsIndirect(true), SpecialID(0) {}
|
||||||
|
|
||||||
|
Value* Val;
|
||||||
|
Value* PtrVal;
|
||||||
|
int OffsetInDW;
|
||||||
|
int SizeInDW;
|
||||||
|
|
||||||
|
bool IsIndirect;
|
||||||
|
|
||||||
|
std::string SpecialType;
|
||||||
|
int SpecialID;
|
||||||
|
|
||||||
|
int End() { return OffsetInDW + SizeInDW; }
|
||||||
|
// The first 9 dwords are reserved for the grid sizes.
|
||||||
|
int getRatOffset() { return 9 + OffsetInDW; }
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<Param> Params;
|
||||||
|
|
||||||
|
bool IsOpenCLKernel(const Function *Fun);
|
||||||
|
int getLastSpecialID(const std::string& TypeName);
|
||||||
|
|
||||||
|
int getListSize();
|
||||||
|
void AddParam(Argument *Arg);
|
||||||
|
int CalculateArgumentSize(Argument *Arg);
|
||||||
|
void RunAna(Function *Fun);
|
||||||
|
void Replace(Function *Fun);
|
||||||
|
bool IsIndirect(Value *Val, std::set<Value*> &Visited);
|
||||||
|
void Propagate(Function* Fun);
|
||||||
|
void Propagate(Value *V, const Twine &Name, bool IsIndirect = true);
|
||||||
|
Value* ConstantRead(Function *Fun, Param &P);
|
||||||
|
Value* handleSpecial(Function *Fun, Param &P);
|
||||||
|
bool IsSpecialType(Type *T);
|
||||||
|
std::string getSpecialTypeName(Type *T);
|
||||||
|
public:
|
||||||
|
static char ID;
|
||||||
|
R600KernelParameters() : FunctionPass(ID) {}
|
||||||
|
R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {}
|
||||||
|
bool runOnFunction (Function &F);
|
||||||
|
void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||||
|
const char *getPassName() const;
|
||||||
|
bool doInitialization(Module &M);
|
||||||
|
bool doFinalization(Module &M);
|
||||||
|
};
|
||||||
|
|
||||||
|
char R600KernelParameters::ID = 0;
|
||||||
|
|
||||||
|
static RegisterPass<R600KernelParameters> X("kerparam",
|
||||||
|
"OpenCL Kernel Parameter conversion", false, false);
|
||||||
|
|
||||||
|
bool R600KernelParameters::IsOpenCLKernel(const Function* Fun) {
|
||||||
|
Module *Mod = const_cast<Function*>(Fun)->getParent();
|
||||||
|
NamedMDNode * MD = Mod->getOrInsertNamedMetadata("opencl.kernels");
|
||||||
|
|
||||||
|
if (!MD or !MD->getNumOperands()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < int(MD->getNumOperands()); i++) {
|
||||||
|
if (!MD->getOperand(i) or !MD->getOperand(i)->getOperand(0)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(MD->getOperand(i)->getNumOperands() == 1);
|
||||||
|
|
||||||
|
if (MD->getOperand(i)->getOperand(0)->getName() == Fun->getName()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int R600KernelParameters::getLastSpecialID(const std::string &TypeName) {
|
||||||
|
int LastID = -1;
|
||||||
|
|
||||||
|
for (std::vector<Param>::iterator i = Params.begin(); i != Params.end(); i++) {
|
||||||
|
if (i->SpecialType == TypeName) {
|
||||||
|
LastID = i->SpecialID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return LastID;
|
||||||
|
}
|
||||||
|
|
||||||
|
int R600KernelParameters::getListSize() {
|
||||||
|
if (Params.size() == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Params.back().End();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600KernelParameters::IsIndirect(Value *Val, std::set<Value*> &Visited) {
|
||||||
|
//XXX Direct parameters are not supported yet, so return true here.
|
||||||
|
return true;
|
||||||
|
#if 0
|
||||||
|
if (isa<LoadInst>(Val)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isa<IntegerType>(Val->getType())) {
|
||||||
|
assert(0 and "Internal error");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Visited.count(Val)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Visited.insert(Val);
|
||||||
|
|
||||||
|
if (isa<getElementPtrInst>(Val)) {
|
||||||
|
getElementPtrInst* GEP = dyn_cast<getElementPtrInst>(Val);
|
||||||
|
getElementPtrInst::op_iterator I = GEP->op_begin();
|
||||||
|
|
||||||
|
for (++I; I != GEP->op_end(); ++I) {
|
||||||
|
if (!isa<Constant>(*I)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Value::use_iterator I = Val->use_begin(); i != Val->use_end(); ++I) {
|
||||||
|
Value* V2 = dyn_cast<Value>(*I);
|
||||||
|
|
||||||
|
if (V2) {
|
||||||
|
if (IsIndirect(V2, Visited)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600KernelParameters::AddParam(Argument *Arg) {
|
||||||
|
Param P;
|
||||||
|
|
||||||
|
P.Val = dyn_cast<Value>(Arg);
|
||||||
|
P.OffsetInDW = getListSize();
|
||||||
|
P.SizeInDW = CalculateArgumentSize(Arg);
|
||||||
|
|
||||||
|
if (isa<PointerType>(Arg->getType()) and Arg->hasByValAttr()) {
|
||||||
|
std::set<Value*> Visited;
|
||||||
|
P.IsIndirect = IsIndirect(P.Val, Visited);
|
||||||
|
}
|
||||||
|
|
||||||
|
Params.push_back(P);
|
||||||
|
}
|
||||||
|
|
||||||
|
int R600KernelParameters::CalculateArgumentSize(Argument *Arg) {
|
||||||
|
Type* T = Arg->getType();
|
||||||
|
|
||||||
|
if (Arg->hasByValAttr() and dyn_cast<PointerType>(T)) {
|
||||||
|
T = dyn_cast<PointerType>(T)->getElementType();
|
||||||
|
}
|
||||||
|
|
||||||
|
int StoreSizeInDW = (TD->getTypeStoreSize(T) + 3)/4;
|
||||||
|
|
||||||
|
assert(StoreSizeInDW);
|
||||||
|
|
||||||
|
return StoreSizeInDW;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void R600KernelParameters::RunAna(Function* Fun) {
|
||||||
|
assert(IsOpenCLKernel(Fun));
|
||||||
|
|
||||||
|
for (Function::arg_iterator I = Fun->arg_begin(); I != Fun->arg_end(); ++I) {
|
||||||
|
AddParam(I);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600KernelParameters::Replace(Function* Fun) {
|
||||||
|
for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
|
||||||
|
Value *NewVal;
|
||||||
|
|
||||||
|
if (IsSpecialType(I->Val->getType())) {
|
||||||
|
NewVal = handleSpecial(Fun, *I);
|
||||||
|
} else {
|
||||||
|
NewVal = ConstantRead(Fun, *I);
|
||||||
|
}
|
||||||
|
if (NewVal) {
|
||||||
|
I->Val->replaceAllUsesWith(NewVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600KernelParameters::Propagate(Function* Fun) {
|
||||||
|
for (std::vector<Param>::iterator I = Params.begin(); I != Params.end(); ++I) {
|
||||||
|
if (I->PtrVal) {
|
||||||
|
Propagate(I->PtrVal, I->Val->getName(), I->IsIndirect);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600KernelParameters::Propagate(Value* V, const Twine& Name, bool IsIndirect) {
|
||||||
|
LoadInst* Load = dyn_cast<LoadInst>(V);
|
||||||
|
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
|
||||||
|
|
||||||
|
unsigned Addrspace;
|
||||||
|
|
||||||
|
if (IsIndirect) {
|
||||||
|
Addrspace = AMDILAS::PARAM_I_ADDRESS;
|
||||||
|
} else {
|
||||||
|
Addrspace = AMDILAS::PARAM_D_ADDRESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (GEP and GEP->getType()->getAddressSpace() != Addrspace) {
|
||||||
|
Value *Op = GEP->getPointerOperand();
|
||||||
|
|
||||||
|
if (dyn_cast<PointerType>(Op->getType())->getAddressSpace() != Addrspace) {
|
||||||
|
Op = new BitCastInst(Op, PointerType::get(dyn_cast<PointerType>(
|
||||||
|
Op->getType())->getElementType(), Addrspace),
|
||||||
|
Name, dyn_cast<Instruction>(V));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Value*> Params(GEP->idx_begin(), GEP->idx_end());
|
||||||
|
|
||||||
|
GetElementPtrInst* GEP2 = GetElementPtrInst::Create(Op, Params, Name,
|
||||||
|
dyn_cast<Instruction>(V));
|
||||||
|
GEP2->setIsInBounds(GEP->isInBounds());
|
||||||
|
V = dyn_cast<Value>(GEP2);
|
||||||
|
GEP->replaceAllUsesWith(GEP2);
|
||||||
|
GEP->eraseFromParent();
|
||||||
|
Load = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Load) {
|
||||||
|
///normally at this point we have the right address space
|
||||||
|
if (Load->getPointerAddressSpace() != Addrspace) {
|
||||||
|
Value *OrigPtr = Load->getPointerOperand();
|
||||||
|
PointerType *OrigPtrType = dyn_cast<PointerType>(OrigPtr->getType());
|
||||||
|
|
||||||
|
Type* NewPtrType = PointerType::get(OrigPtrType->getElementType(),
|
||||||
|
Addrspace);
|
||||||
|
|
||||||
|
Value* NewPtr = OrigPtr;
|
||||||
|
|
||||||
|
if (OrigPtr->getType() != NewPtrType) {
|
||||||
|
NewPtr = new BitCastInst(OrigPtr, NewPtrType, "prop_cast", Load);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* new_Load = new LoadInst(NewPtr, Name, Load);
|
||||||
|
Load->replaceAllUsesWith(new_Load);
|
||||||
|
Load->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<User*> Users(V->use_begin(), V->use_end());
|
||||||
|
|
||||||
|
for (int i = 0; i < int(Users.size()); i++) {
|
||||||
|
Value* V2 = dyn_cast<Value>(Users[i]);
|
||||||
|
|
||||||
|
if (V2) {
|
||||||
|
Propagate(V2, Name, IsIndirect);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* R600KernelParameters::ConstantRead(Function *Fun, Param &P) {
|
||||||
|
assert(Fun->front().begin() != Fun->front().end());
|
||||||
|
|
||||||
|
Instruction *FirstInst = Fun->front().begin();
|
||||||
|
IRBuilder <> Builder (FirstInst);
|
||||||
|
/* First 3 dwords are reserved for the dimmension info */
|
||||||
|
|
||||||
|
if (!P.Val->hasNUsesOrMore(1)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
unsigned Addrspace;
|
||||||
|
|
||||||
|
if (P.IsIndirect) {
|
||||||
|
Addrspace = AMDILAS::PARAM_I_ADDRESS;
|
||||||
|
} else {
|
||||||
|
Addrspace = AMDILAS::PARAM_D_ADDRESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
Argument *Arg = dyn_cast<Argument>(P.Val);
|
||||||
|
Type * ArgType = P.Val->getType();
|
||||||
|
PointerType * ArgPtrType = dyn_cast<PointerType>(P.Val->getType());
|
||||||
|
|
||||||
|
if (ArgPtrType and Arg->hasByValAttr()) {
|
||||||
|
Value* ParamAddrSpacePtr = ConstantPointerNull::get(
|
||||||
|
PointerType::get(Type::getInt32Ty(*Context),
|
||||||
|
Addrspace));
|
||||||
|
Value* ParamPtr = GetElementPtrInst::Create(ParamAddrSpacePtr,
|
||||||
|
ConstantInt::get(Type::getInt32Ty(*Context),
|
||||||
|
P.getRatOffset()), Arg->getName(),
|
||||||
|
FirstInst);
|
||||||
|
ParamPtr = new BitCastInst(ParamPtr,
|
||||||
|
PointerType::get(ArgPtrType->getElementType(),
|
||||||
|
Addrspace),
|
||||||
|
Arg->getName(), FirstInst);
|
||||||
|
P.PtrVal = ParamPtr;
|
||||||
|
return ParamPtr;
|
||||||
|
} else {
|
||||||
|
Value *ParamAddrSpacePtr = ConstantPointerNull::get(PointerType::get(
|
||||||
|
ArgType, Addrspace));
|
||||||
|
|
||||||
|
Value *ParamPtr = Builder.CreateGEP(ParamAddrSpacePtr,
|
||||||
|
ConstantInt::get(Type::getInt32Ty(*Context), P.getRatOffset()),
|
||||||
|
Arg->getName());
|
||||||
|
|
||||||
|
Value *Param_Value = Builder.CreateLoad(ParamPtr, Arg->getName());
|
||||||
|
|
||||||
|
return Param_Value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* R600KernelParameters::handleSpecial(Function* Fun, Param& P) {
|
||||||
|
std::string Name = getSpecialTypeName(P.Val->getType());
|
||||||
|
int ID;
|
||||||
|
|
||||||
|
assert(!Name.empty());
|
||||||
|
|
||||||
|
if (Name == "image2d_t" or Name == "image3d_t") {
|
||||||
|
int LastID = std::max(getLastSpecialID("image2d_t"),
|
||||||
|
getLastSpecialID("image3d_t"));
|
||||||
|
|
||||||
|
if (LastID == -1) {
|
||||||
|
ID = 2; ///ID0 and ID1 are used internally by the driver
|
||||||
|
} else {
|
||||||
|
ID = LastID + 1;
|
||||||
|
}
|
||||||
|
} else if (Name == "sampler_t") {
|
||||||
|
int LastID = getLastSpecialID("sampler_t");
|
||||||
|
|
||||||
|
if (LastID == -1) {
|
||||||
|
ID = 0;
|
||||||
|
} else {
|
||||||
|
ID = LastID + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
///TODO: give some error message
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
P.SpecialType = Name;
|
||||||
|
P.SpecialID = ID;
|
||||||
|
|
||||||
|
Instruction *FirstInst = Fun->front().begin();
|
||||||
|
|
||||||
|
return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context),
|
||||||
|
P.SpecialID), P.Val->getType(),
|
||||||
|
"resourceID", FirstInst);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool R600KernelParameters::IsSpecialType(Type* T) {
|
||||||
|
return !getSpecialTypeName(T).empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string R600KernelParameters::getSpecialTypeName(Type* T) {
|
||||||
|
PointerType *PT = dyn_cast<PointerType>(T);
|
||||||
|
StructType *ST = NULL;
|
||||||
|
|
||||||
|
if (PT) {
|
||||||
|
ST = dyn_cast<StructType>(PT->getElementType());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ST) {
|
||||||
|
std::string Prefix = "struct.opencl_builtin_type_";
|
||||||
|
|
||||||
|
std::string Name = ST->getName().str();
|
||||||
|
|
||||||
|
if (Name.substr(0, Prefix.length()) == Prefix) {
|
||||||
|
return Name.substr(Prefix.length(), Name.length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool R600KernelParameters::runOnFunction (Function &F) {
|
||||||
|
if (!IsOpenCLKernel(&F)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
RunAna(&F);
|
||||||
|
Replace(&F);
|
||||||
|
Propagate(&F);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
|
FunctionPass::getAnalysisUsage(AU);
|
||||||
|
AU.setPreservesAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *R600KernelParameters::getPassName() const {
|
||||||
|
return "OpenCL Kernel parameter conversion to memory";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600KernelParameters::doInitialization(Module &M) {
|
||||||
|
Context = &M.getContext();
|
||||||
|
Mod = &M;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600KernelParameters::doFinalization(Module &M) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // End anonymous namespace
|
||||||
|
|
||||||
|
FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD) {
|
||||||
|
return new R600KernelParameters(TD);
|
||||||
|
}
|
16
lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
Normal file
16
lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "R600MachineFunctionInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
|
||||||
|
: MachineFunctionInfo()
|
||||||
|
{ }
|
33
lib/Target/AMDGPU/R600MachineFunctionInfo.h
Normal file
33
lib/Target/AMDGPU/R600MachineFunctionInfo.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// R600MachineFunctionInfo is used for keeping track of which registers have
|
||||||
|
// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef R600MACHINEFUNCTIONINFO_H
|
||||||
|
#define R600MACHINEFUNCTIONINFO_H
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class R600MachineFunctionInfo : public MachineFunctionInfo {
|
||||||
|
|
||||||
|
public:
|
||||||
|
R600MachineFunctionInfo(const MachineFunction &MF);
|
||||||
|
std::vector<unsigned> ReservedRegs;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End llvm namespace
|
||||||
|
|
||||||
|
#endif //R600MACHINEFUNCTIONINFO_H
|
88
lib/Target/AMDGPU/R600RegisterInfo.cpp
Normal file
88
lib/Target/AMDGPU/R600RegisterInfo.cpp
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The file contains the R600 implementation of the TargetRegisterInfo class.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "R600RegisterInfo.h"
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "R600MachineFunctionInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
|
||||||
|
const TargetInstrInfo &tii)
|
||||||
|
: AMDGPURegisterInfo(tm, tii),
|
||||||
|
TM(tm),
|
||||||
|
TII(tii)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
||||||
|
{
|
||||||
|
BitVector Reserved(getNumRegs());
|
||||||
|
const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
|
||||||
|
|
||||||
|
Reserved.set(AMDGPU::ZERO);
|
||||||
|
Reserved.set(AMDGPU::HALF);
|
||||||
|
Reserved.set(AMDGPU::ONE);
|
||||||
|
Reserved.set(AMDGPU::ONE_INT);
|
||||||
|
Reserved.set(AMDGPU::NEG_HALF);
|
||||||
|
Reserved.set(AMDGPU::NEG_ONE);
|
||||||
|
Reserved.set(AMDGPU::PV_X);
|
||||||
|
Reserved.set(AMDGPU::ALU_LITERAL_X);
|
||||||
|
|
||||||
|
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
|
||||||
|
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
|
||||||
|
Reserved.set(*I);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
|
||||||
|
E = MFI->ReservedRegs.end(); I != E; ++I) {
|
||||||
|
Reserved.set(*I);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Reserved;
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetRegisterClass *
|
||||||
|
R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
|
||||||
|
{
|
||||||
|
switch (rc->getID()) {
|
||||||
|
case AMDGPU::GPRF32RegClassID:
|
||||||
|
case AMDGPU::GPRI32RegClassID:
|
||||||
|
return &AMDGPU::R600_Reg32RegClass;
|
||||||
|
default: return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
|
||||||
|
{
|
||||||
|
switch(reg) {
|
||||||
|
case AMDGPU::ZERO:
|
||||||
|
case AMDGPU::ONE:
|
||||||
|
case AMDGPU::ONE_INT:
|
||||||
|
case AMDGPU::NEG_ONE:
|
||||||
|
case AMDGPU::HALF:
|
||||||
|
case AMDGPU::NEG_HALF:
|
||||||
|
case AMDGPU::ALU_LITERAL_X:
|
||||||
|
return 0;
|
||||||
|
default: return getHWRegChanGen(reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
|
||||||
|
MVT VT) const
|
||||||
|
{
|
||||||
|
switch(VT.SimpleTy) {
|
||||||
|
default:
|
||||||
|
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#include "R600HwRegInfo.include"
|
54
lib/Target/AMDGPU/R600RegisterInfo.h
Normal file
54
lib/Target/AMDGPU/R600RegisterInfo.h
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Interface definition for R600RegisterInfo
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef R600REGISTERINFO_H_
|
||||||
|
#define R600REGISTERINFO_H_
|
||||||
|
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "AMDILRegisterInfo.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class R600TargetMachine;
|
||||||
|
class TargetInstrInfo;
|
||||||
|
|
||||||
|
struct R600RegisterInfo : public AMDGPURegisterInfo
|
||||||
|
{
|
||||||
|
AMDGPUTargetMachine &TM;
|
||||||
|
const TargetInstrInfo &TII;
|
||||||
|
|
||||||
|
R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
||||||
|
|
||||||
|
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||||
|
|
||||||
|
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
||||||
|
/// R600 reg class that is equivalent to the given AMDIL reg class.
|
||||||
|
virtual const TargetRegisterClass * getISARegClass(
|
||||||
|
const TargetRegisterClass * rc) const;
|
||||||
|
|
||||||
|
/// getHWRegChan - get the HW encoding for a register's channel.
|
||||||
|
unsigned getHWRegChan(unsigned reg) const;
|
||||||
|
|
||||||
|
/// getCFGStructurizerRegClass - get the register class of the specified
|
||||||
|
/// type to use in the CFGStructurizer
|
||||||
|
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// getHWRegChanGen - Generated function returns a register's channel
|
||||||
|
/// encoding.
|
||||||
|
unsigned getHWRegChanGen(unsigned reg) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif // AMDIDSAREGISTERINFO_H_
|
5271
lib/Target/AMDGPU/R600RegisterInfo.td
Normal file
5271
lib/Target/AMDGPU/R600RegisterInfo.td
Normal file
File diff suppressed because it is too large
Load Diff
36
lib/Target/AMDGPU/R600Schedule.td
Normal file
36
lib/Target/AMDGPU/R600Schedule.td
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
|
||||||
|
// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
|
||||||
|
// slot has been removed.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
def ALU_X : FuncUnit;
|
||||||
|
def ALU_Y : FuncUnit;
|
||||||
|
def ALU_Z : FuncUnit;
|
||||||
|
def ALU_W : FuncUnit;
|
||||||
|
def TRANS : FuncUnit;
|
||||||
|
|
||||||
|
def AnyALU : InstrItinClass;
|
||||||
|
def VecALU : InstrItinClass;
|
||||||
|
def TransALU : InstrItinClass;
|
||||||
|
|
||||||
|
def R600_EG_Itin : ProcessorItineraries <
|
||||||
|
[ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
|
||||||
|
[],
|
||||||
|
[
|
||||||
|
InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
|
||||||
|
InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
|
||||||
|
InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
|
||||||
|
InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
|
||||||
|
]
|
||||||
|
>;
|
117
lib/Target/AMDGPU/SIAssignInterpRegs.cpp
Normal file
117
lib/Target/AMDGPU/SIAssignInterpRegs.cpp
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This pass maps the pseudo interpolation registers to the correct physical
|
||||||
|
// registers. Prior to executing a fragment shader, the GPU loads interpolation
|
||||||
|
// parameters into physical registers. The specific physical register that each
|
||||||
|
// interpolation parameter ends up in depends on the type of the interpolation
|
||||||
|
// parameter as well as how many interpolation parameters are used by the
|
||||||
|
// shader.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUUtil.h"
|
||||||
|
#include "AMDIL.h"
|
||||||
|
#include "SIMachineFunctionInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class SIAssignInterpRegsPass : public MachineFunctionPass {
|
||||||
|
|
||||||
|
private:
|
||||||
|
static char ID;
|
||||||
|
TargetMachine &TM;
|
||||||
|
|
||||||
|
public:
|
||||||
|
SIAssignInterpRegsPass(TargetMachine &tm) :
|
||||||
|
MachineFunctionPass(ID), TM(tm) { }
|
||||||
|
|
||||||
|
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||||
|
|
||||||
|
const char *getPassName() const { return "SI Assign intrpolation registers"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End anonymous namespace
|
||||||
|
|
||||||
|
char SIAssignInterpRegsPass::ID = 0;
|
||||||
|
|
||||||
|
#define INTERP_VALUES 16
|
||||||
|
|
||||||
|
struct interp_info {
|
||||||
|
bool enabled;
|
||||||
|
unsigned regs[3];
|
||||||
|
unsigned reg_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
|
||||||
|
return new SIAssignInterpRegsPass(tm);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
|
||||||
|
{
|
||||||
|
|
||||||
|
struct interp_info InterpUse[INTERP_VALUES] = {
|
||||||
|
{false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
|
||||||
|
{false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
|
||||||
|
{false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
|
||||||
|
{false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
|
||||||
|
{false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
|
||||||
|
{false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
|
||||||
|
{false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
|
||||||
|
{false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
|
||||||
|
{false, {AMDGPU::POS_X_FLOAT}, 1},
|
||||||
|
{false, {AMDGPU::POS_Y_FLOAT}, 1},
|
||||||
|
{false, {AMDGPU::POS_Z_FLOAT}, 1},
|
||||||
|
{false, {AMDGPU::POS_W_FLOAT}, 1},
|
||||||
|
{false, {AMDGPU::FRONT_FACE}, 1},
|
||||||
|
{false, {AMDGPU::ANCILLARY}, 1},
|
||||||
|
{false, {AMDGPU::SAMPLE_COVERAGE}, 1},
|
||||||
|
{false, {AMDGPU::POS_FIXED_PT}, 1}
|
||||||
|
};
|
||||||
|
|
||||||
|
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
|
||||||
|
/* First pass, mark the interpolation values that are used. */
|
||||||
|
for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
|
||||||
|
for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
|
||||||
|
reg_idx++) {
|
||||||
|
InterpUse[interp_idx].enabled =
|
||||||
|
!MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned used_vgprs = 0;
|
||||||
|
|
||||||
|
/* Second pass, replace with VGPRs. */
|
||||||
|
for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
|
||||||
|
if (!InterpUse[interp_idx].enabled) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
MFI->spi_ps_input_addr |= (1 << interp_idx);
|
||||||
|
|
||||||
|
for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
|
||||||
|
reg_idx++, used_vgprs++) {
|
||||||
|
unsigned new_reg = AMDGPU::VReg_32RegClass.getRegister(used_vgprs);
|
||||||
|
unsigned virt_reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||||
|
MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg);
|
||||||
|
AMDGPU::utilAddLiveIn(&MF, MRI, TM.getInstrInfo(), new_reg, virt_reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
321
lib/Target/AMDGPU/SICodeEmitter.cpp
Normal file
321
lib/Target/AMDGPU/SICodeEmitter.cpp
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The SI code emitter produces machine code that can be executed directly on
|
||||||
|
// the GPU device.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUUtil.h"
|
||||||
|
#include "AMDILCodeEmitter.h"
|
||||||
|
#include "SIInstrInfo.h"
|
||||||
|
#include "SIMachineFunctionInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstr.h"
|
||||||
|
#include "llvm/Support/FormattedStream.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define LITERAL_REG 255
|
||||||
|
#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class SICodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
|
||||||
|
|
||||||
|
private:
|
||||||
|
static char ID;
|
||||||
|
formatted_raw_ostream &_OS;
|
||||||
|
const TargetMachine *TM;
|
||||||
|
void emitState(MachineFunction & MF);
|
||||||
|
void emitInstr(MachineInstr &MI);
|
||||||
|
|
||||||
|
void outputBytes(uint64_t value, unsigned bytes);
|
||||||
|
unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift)
|
||||||
|
const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
|
||||||
|
_OS(OS), TM(NULL) { }
|
||||||
|
const char *getPassName() const { return "SI Code Emitter"; }
|
||||||
|
bool runOnMachineFunction(MachineFunction &MF);
|
||||||
|
|
||||||
|
/// getMachineOpValue - Return the encoding for MO
|
||||||
|
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
||||||
|
const MachineOperand &MO) const;
|
||||||
|
|
||||||
|
/// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
|
||||||
|
virtual unsigned GPR4AlignEncode(const MachineInstr &MI, unsigned OpNo)
|
||||||
|
const;
|
||||||
|
|
||||||
|
/// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
|
||||||
|
virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo)
|
||||||
|
const;
|
||||||
|
/// i32LiteralEncode - Encode an i32 literal this is used as an operand
|
||||||
|
/// for an instruction in place of a register.
|
||||||
|
virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo)
|
||||||
|
const;
|
||||||
|
/// SMRDmemriEncode - Encoding for SMRD indexed loads
|
||||||
|
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
||||||
|
const;
|
||||||
|
|
||||||
|
/// VOPPostEncode - Post-Encoder method for VOP instructions
|
||||||
|
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
|
||||||
|
uint64_t Value) const;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
char SICodeEmitter::ID = 0;
|
||||||
|
|
||||||
|
FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) {
|
||||||
|
return new SICodeEmitter(OS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SICodeEmitter::emitState(MachineFunction & MF)
|
||||||
|
{
|
||||||
|
unsigned maxSGPR = 0;
|
||||||
|
unsigned maxVGPR = 0;
|
||||||
|
bool VCCUsed = false;
|
||||||
|
const SIRegisterInfo * RI =
|
||||||
|
static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
|
||||||
|
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
|
||||||
|
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||||
|
BB != BB_E; ++BB) {
|
||||||
|
MachineBasicBlock &MBB = *BB;
|
||||||
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||||
|
I != E; ++I) {
|
||||||
|
MachineInstr &MI = *I;
|
||||||
|
unsigned numOperands = MI.getNumOperands();
|
||||||
|
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
|
||||||
|
MachineOperand & MO = MI.getOperand(op_idx);
|
||||||
|
unsigned maxUsed;
|
||||||
|
unsigned width = 0;
|
||||||
|
bool isSGPR = false;
|
||||||
|
unsigned reg;
|
||||||
|
unsigned hwReg;
|
||||||
|
if (!MO.isReg()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
reg = MO.getReg();
|
||||||
|
if (reg == AMDGPU::VCC) {
|
||||||
|
VCCUsed = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (AMDGPU::SReg_32RegClass.contains(reg)) {
|
||||||
|
isSGPR = true;
|
||||||
|
width = 1;
|
||||||
|
} else if (AMDGPU::VReg_32RegClass.contains(reg)) {
|
||||||
|
isSGPR = false;
|
||||||
|
width = 1;
|
||||||
|
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
|
||||||
|
isSGPR = true;
|
||||||
|
width = 2;
|
||||||
|
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
|
||||||
|
isSGPR = false;
|
||||||
|
width = 2;
|
||||||
|
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
|
||||||
|
isSGPR = true;
|
||||||
|
width = 4;
|
||||||
|
} else if (AMDGPU::VReg_128RegClass.contains(reg)) {
|
||||||
|
isSGPR = false;
|
||||||
|
width = 4;
|
||||||
|
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
|
||||||
|
isSGPR = true;
|
||||||
|
width = 8;
|
||||||
|
} else {
|
||||||
|
assert("!Unknown register class");
|
||||||
|
}
|
||||||
|
hwReg = RI->getEncodingValue(reg);
|
||||||
|
maxUsed = ((hwReg + 1) * width) - 1;
|
||||||
|
if (isSGPR) {
|
||||||
|
maxSGPR = maxUsed > maxSGPR ? maxUsed : maxSGPR;
|
||||||
|
} else {
|
||||||
|
maxVGPR = maxUsed > maxVGPR ? maxUsed : maxVGPR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (VCCUsed) {
|
||||||
|
maxSGPR += 2;
|
||||||
|
}
|
||||||
|
outputBytes(maxSGPR + 1, 4);
|
||||||
|
outputBytes(maxVGPR + 1, 4);
|
||||||
|
outputBytes(MFI->spi_ps_input_addr, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
|
||||||
|
{
|
||||||
|
TM = &MF.getTarget();
|
||||||
|
const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
|
||||||
|
|
||||||
|
if (STM.dumpCode()) {
|
||||||
|
MF.dump();
|
||||||
|
}
|
||||||
|
|
||||||
|
emitState(MF);
|
||||||
|
|
||||||
|
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||||
|
BB != BB_E; ++BB) {
|
||||||
|
MachineBasicBlock &MBB = *BB;
|
||||||
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||||
|
I != E; ++I) {
|
||||||
|
MachineInstr &MI = *I;
|
||||||
|
if (MI.getOpcode() != AMDGPU::KILL && MI.getOpcode() != AMDGPU::RETURN) {
|
||||||
|
emitInstr(MI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Emit S_END_PGM
|
||||||
|
MachineInstr * End = BuildMI(MF, DebugLoc(),
|
||||||
|
TM->getInstrInfo()->get(AMDGPU::S_ENDPGM));
|
||||||
|
emitInstr(*End);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SICodeEmitter::emitInstr(MachineInstr &MI)
|
||||||
|
{
|
||||||
|
const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
|
||||||
|
|
||||||
|
uint64_t hwInst = getBinaryCodeForInstr(MI);
|
||||||
|
|
||||||
|
if ((hwInst & 0xffffffff) == 0xffffffff) {
|
||||||
|
fprintf(stderr, "Unsupported Instruction: \n");
|
||||||
|
MI.dump();
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned bytes = SII->getEncodingBytes(MI);
|
||||||
|
outputBytes(hwInst, bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI,
|
||||||
|
const MachineOperand &MO) const
|
||||||
|
{
|
||||||
|
const SIRegisterInfo * RI =
|
||||||
|
static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
|
||||||
|
|
||||||
|
switch(MO.getType()) {
|
||||||
|
case MachineOperand::MO_Register:
|
||||||
|
return RI->getEncodingValue(MO.getReg());
|
||||||
|
|
||||||
|
case MachineOperand::MO_Immediate:
|
||||||
|
return MO.getImm();
|
||||||
|
|
||||||
|
case MachineOperand::MO_FPImmediate:
|
||||||
|
// XXX: Not all instructions can use inline literals
|
||||||
|
// XXX: We should make sure this is a 32-bit constant
|
||||||
|
return LITERAL_REG | (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32);
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo,
|
||||||
|
unsigned shift) const
|
||||||
|
{
|
||||||
|
const SIRegisterInfo * RI =
|
||||||
|
static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
|
||||||
|
unsigned regCode = RI->getEncodingValue(MI.getOperand(OpNo).getReg());
|
||||||
|
return regCode >> shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI,
|
||||||
|
unsigned OpNo) const
|
||||||
|
{
|
||||||
|
return GPRAlign(MI, OpNo, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI,
|
||||||
|
unsigned OpNo) const
|
||||||
|
{
|
||||||
|
return GPRAlign(MI, OpNo, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI,
|
||||||
|
unsigned OpNo) const
|
||||||
|
{
|
||||||
|
return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SMRD_OFFSET_MASK 0xff
|
||||||
|
#define SMRD_IMM_SHIFT 8
|
||||||
|
#define SMRD_SBASE_MASK 0x3f
|
||||||
|
#define SMRD_SBASE_SHIFT 9
|
||||||
|
/// SMRDmemriEncode - This function is responsibe for encoding the offset
|
||||||
|
/// and the base ptr for SMRD instructions it should return a bit string in
|
||||||
|
/// this format:
|
||||||
|
///
|
||||||
|
/// OFFSET = bits{7-0}
|
||||||
|
/// IMM = bits{8}
|
||||||
|
/// SBASE = bits{14-9}
|
||||||
|
///
|
||||||
|
uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI,
|
||||||
|
unsigned OpNo) const
|
||||||
|
{
|
||||||
|
uint32_t encoding;
|
||||||
|
|
||||||
|
const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1);
|
||||||
|
|
||||||
|
//XXX: Use this function for SMRD loads with register offsets
|
||||||
|
assert(OffsetOp.isImm());
|
||||||
|
|
||||||
|
encoding =
|
||||||
|
(getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK)
|
||||||
|
| (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
|
||||||
|
| ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
|
||||||
|
;
|
||||||
|
|
||||||
|
return encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR.
|
||||||
|
/// XXX: It would be nice if we could handle this without a PostEncode function.
|
||||||
|
uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI,
|
||||||
|
uint64_t Value) const
|
||||||
|
{
|
||||||
|
const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
|
||||||
|
unsigned encodingType = SII->getEncodingType(MI);
|
||||||
|
unsigned numSrcOps;
|
||||||
|
unsigned vgprBitOffset;
|
||||||
|
|
||||||
|
if (encodingType == SIInstrEncodingType::VOP3) {
|
||||||
|
numSrcOps = 3;
|
||||||
|
vgprBitOffset = 32;
|
||||||
|
} else {
|
||||||
|
numSrcOps = 1;
|
||||||
|
vgprBitOffset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add one to skip over the destination reg operand.
|
||||||
|
for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
|
||||||
|
if (!MI.getOperand(opIdx).isReg()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
unsigned reg = MI.getOperand(opIdx).getReg();
|
||||||
|
if (AMDGPU::VReg_32RegClass.contains(reg)
|
||||||
|
|| AMDGPU::VReg_64RegClass.contains(reg)) {
|
||||||
|
Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < bytes; i++) {
|
||||||
|
_OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
|
||||||
|
}
|
||||||
|
}
|
224
lib/Target/AMDGPU/SIGenRegisterInfo.pl
Normal file
224
lib/Target/AMDGPU/SIGenRegisterInfo.pl
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
|
||||||
|
#
|
||||||
|
# The LLVM Compiler Infrastructure
|
||||||
|
#
|
||||||
|
# This file is distributed under the University of Illinois Open Source
|
||||||
|
# License. See LICENSE.TXT for details.
|
||||||
|
#
|
||||||
|
#===------------------------------------------------------------------------===#
|
||||||
|
#
|
||||||
|
# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
|
||||||
|
# it also generates a file called SIHwRegInfo.include, which contains helper
|
||||||
|
# functions for determining the hw encoding of registers.
|
||||||
|
#
|
||||||
|
#===------------------------------------------------------------------------===#
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
my $SGPR_COUNT = 104;
|
||||||
|
my $VGPR_COUNT = 256;
|
||||||
|
|
||||||
|
my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
|
||||||
|
my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
|
||||||
|
|
||||||
|
my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
|
||||||
|
|
||||||
|
print <<STRING;
|
||||||
|
|
||||||
|
let Namespace = "AMDGPU" in {
|
||||||
|
def low : SubRegIndex;
|
||||||
|
def high : SubRegIndex;
|
||||||
|
|
||||||
|
def sub0 : SubRegIndex;
|
||||||
|
def sub1 : SubRegIndex;
|
||||||
|
def sub2 : SubRegIndex;
|
||||||
|
def sub3 : SubRegIndex;
|
||||||
|
def sub4 : SubRegIndex;
|
||||||
|
def sub5 : SubRegIndex;
|
||||||
|
def sub6 : SubRegIndex;
|
||||||
|
def sub7 : SubRegIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let HWEncoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let SubRegIndices = [low, high];
|
||||||
|
let HWEncoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SI_128 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
||||||
|
let HWEncoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SI_256 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
|
||||||
|
let HWEncoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
|
||||||
|
|
||||||
|
class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
|
||||||
|
|
||||||
|
class SGPR_64 <bits<16> num, string name, list<Register> subregs> :
|
||||||
|
SI_64 <name, subregs, num>;
|
||||||
|
|
||||||
|
class VGPR_64 <bits<16> num, string name, list<Register> subregs> :
|
||||||
|
SI_64 <name, subregs, num>;
|
||||||
|
|
||||||
|
class SGPR_128 <bits<16> num, string name, list<Register> subregs> :
|
||||||
|
SI_128 <name, subregs, num>;
|
||||||
|
|
||||||
|
class VGPR_128 <bits<16> num, string name, list<Register> subregs> :
|
||||||
|
SI_128 <name, subregs, num>;
|
||||||
|
|
||||||
|
class SGPR_256 <bits<16> num, string name, list<Register> subregs> :
|
||||||
|
SI_256 <name, subregs, num>;
|
||||||
|
|
||||||
|
def VCC : SIReg<"VCC">;
|
||||||
|
def SCC : SIReg<"SCC">;
|
||||||
|
def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
|
||||||
|
|
||||||
|
def M0 : SIReg <"M0", 124>;
|
||||||
|
|
||||||
|
//Interpolation registers
|
||||||
|
|
||||||
|
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
|
||||||
|
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
|
||||||
|
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
|
||||||
|
def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
|
||||||
|
def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
|
||||||
|
def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
|
||||||
|
def PERSP_I_W : SIReg <"PERSP_I_W">;
|
||||||
|
def PERSP_J_W : SIReg <"PERSP_J_W">;
|
||||||
|
def PERSP_1_W : SIReg <"PERSP_1_W">;
|
||||||
|
def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
|
||||||
|
def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
|
||||||
|
def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
|
||||||
|
def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
|
||||||
|
def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
|
||||||
|
def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
|
||||||
|
def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
|
||||||
|
def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
|
||||||
|
def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
|
||||||
|
def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
|
||||||
|
def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
|
||||||
|
def FRONT_FACE : SIReg <"FRONT_FACE">;
|
||||||
|
def ANCILLARY : SIReg <"ANCILLARY">;
|
||||||
|
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
|
||||||
|
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
|
||||||
|
|
||||||
|
STRING
|
||||||
|
|
||||||
|
#32 bit register
|
||||||
|
|
||||||
|
my @SGPR;
|
||||||
|
for (my $i = 0; $i < $SGPR_COUNT; $i++) {
|
||||||
|
print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
|
||||||
|
$SGPR[$i] = "SGPR$i";
|
||||||
|
}
|
||||||
|
|
||||||
|
my @VGPR;
|
||||||
|
for (my $i = 0; $i < $VGPR_COUNT; $i++) {
|
||||||
|
print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
|
||||||
|
$VGPR[$i] = "VGPR$i";
|
||||||
|
}
|
||||||
|
|
||||||
|
print <<STRING;
|
||||||
|
|
||||||
|
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||||
|
(add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||||
|
(add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
|
||||||
|
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
|
||||||
|
PERSP_CENTER_I, PERSP_CENTER_J,
|
||||||
|
PERSP_CENTROID_I, PERSP_CENTROID_J,
|
||||||
|
PERSP_I_W, PERSP_J_W, PERSP_1_W,
|
||||||
|
LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
|
||||||
|
LINEAR_CENTER_I, LINEAR_CENTER_J,
|
||||||
|
LINEAR_CENTROID_I, LINEAR_CENTROID_J,
|
||||||
|
LINE_STIPPLE_TEX_COORD,
|
||||||
|
POS_X_FLOAT,
|
||||||
|
POS_Y_FLOAT,
|
||||||
|
POS_Z_FLOAT,
|
||||||
|
POS_W_FLOAT,
|
||||||
|
FRONT_FACE,
|
||||||
|
ANCILLARY,
|
||||||
|
SAMPLE_COVERAGE,
|
||||||
|
POS_FIXED_PT
|
||||||
|
)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||||
|
(add VReg_32, SReg_32)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def CCReg : RegisterClass<"AMDGPU", [f32], 32, (add VCC, SCC)>;
|
||||||
|
|
||||||
|
STRING
|
||||||
|
|
||||||
|
my @subregs_64 = ('low', 'high');
|
||||||
|
my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
|
||||||
|
my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
|
||||||
|
|
||||||
|
my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
|
||||||
|
my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
|
||||||
|
my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
|
||||||
|
|
||||||
|
my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
|
||||||
|
my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
|
||||||
|
|
||||||
|
|
||||||
|
my $sgpr64_list = join(',', @SGPR64);
|
||||||
|
my $vgpr64_list = join(',', @VGPR64);
|
||||||
|
print <<STRING;
|
||||||
|
|
||||||
|
def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
|
||||||
|
(add $sgpr64_list, $vgpr64_list)
|
||||||
|
>;
|
||||||
|
|
||||||
|
STRING
|
||||||
|
|
||||||
|
sub print_sgpr_class {
|
||||||
|
my ($reg_width, $sub_reg_ref, @types) = @_;
|
||||||
|
return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub print_vgpr_class {
|
||||||
|
my ($reg_width, $sub_reg_ref, @types) = @_;
|
||||||
|
return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub print_reg_class {
|
||||||
|
my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
|
||||||
|
my @registers;
|
||||||
|
my $component_count = $reg_width / 32;
|
||||||
|
|
||||||
|
for (my $i = 0; $i < $reg_count; $i += $component_count) {
|
||||||
|
my $reg_name = $reg_prefix . $i . '_' . $reg_width;
|
||||||
|
my @sub_regs;
|
||||||
|
for (my $idx = 0; $idx < $component_count; $idx++) {
|
||||||
|
my $sub_idx = $i + $idx;
|
||||||
|
push(@sub_regs, $reg_prefix . $sub_idx);
|
||||||
|
}
|
||||||
|
print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
|
||||||
|
if ($i % 10 == 0) {
|
||||||
|
$reg_name .= "\n";
|
||||||
|
}
|
||||||
|
push (@registers, $reg_name);
|
||||||
|
}
|
||||||
|
my $reg_list = join(', ', @registers);
|
||||||
|
|
||||||
|
print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
|
||||||
|
print "}\n";
|
||||||
|
return @registers;
|
||||||
|
}
|
195
lib/Target/AMDGPU/SIISelLowering.cpp
Normal file
195
lib/Target/AMDGPU/SIISelLowering.cpp
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
|
||||||
|
// mostly EmitInstrWithCustomInserter().
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "SIISelLowering.h"
|
||||||
|
#include "SIInstrInfo.h"
|
||||||
|
#include "SIRegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||||
|
AMDGPUTargetLowering(TM),
|
||||||
|
TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
|
||||||
|
{
|
||||||
|
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
||||||
|
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
|
||||||
|
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
|
||||||
|
addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
|
||||||
|
|
||||||
|
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
|
||||||
|
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
|
||||||
|
|
||||||
|
computeRegisterProperties();
|
||||||
|
|
||||||
|
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||||
|
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||||
|
MachineInstr * MI, MachineBasicBlock * BB) const
|
||||||
|
{
|
||||||
|
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
|
||||||
|
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
|
||||||
|
MachineBasicBlock::iterator I = MI;
|
||||||
|
|
||||||
|
if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
|
||||||
|
AppendS_WAITCNT(MI, *BB, llvm::next(I));
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
default:
|
||||||
|
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||||
|
|
||||||
|
case AMDGPU::CLAMP_SI:
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
// VSRC1-2 are unused, but we still need to fill all the
|
||||||
|
// operand slots, so we just reuse the VSRC0 operand
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addImm(0) // ABS
|
||||||
|
.addImm(1) // CLAMP
|
||||||
|
.addImm(0) // OMOD
|
||||||
|
.addImm(0); // NEG
|
||||||
|
MI->eraseFromParent();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::FABS_SI:
|
||||||
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
// VSRC1-2 are unused, but we still need to fill all the
|
||||||
|
// operand slots, so we just reuse the VSRC0 operand
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addImm(1) // ABS
|
||||||
|
.addImm(0) // CLAMP
|
||||||
|
.addImm(0) // OMOD
|
||||||
|
.addImm(0); // NEG
|
||||||
|
MI->eraseFromParent();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::SI_INTERP:
|
||||||
|
LowerSI_INTERP(MI, *BB, I, MRI);
|
||||||
|
break;
|
||||||
|
case AMDGPU::SI_INTERP_CONST:
|
||||||
|
LowerSI_INTERP_CONST(MI, *BB, I);
|
||||||
|
break;
|
||||||
|
case AMDGPU::SI_V_CNDLT:
|
||||||
|
LowerSI_V_CNDLT(MI, *BB, I, MRI);
|
||||||
|
break;
|
||||||
|
case AMDGPU::USE_SGPR_32:
|
||||||
|
case AMDGPU::USE_SGPR_64:
|
||||||
|
lowerUSE_SGPR(MI, BB->getParent(), MRI);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
break;
|
||||||
|
case AMDGPU::VS_LOAD_BUFFER_INDEX:
|
||||||
|
addLiveIn(MI, BB->getParent(), MRI, TII, AMDGPU::VGPR0);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineBasicBlock::iterator I) const
|
||||||
|
{
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
|
||||||
|
.addImm(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||||
|
{
|
||||||
|
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||||
|
MachineOperand dst = MI->getOperand(0);
|
||||||
|
MachineOperand iReg = MI->getOperand(1);
|
||||||
|
MachineOperand jReg = MI->getOperand(2);
|
||||||
|
MachineOperand attr_chan = MI->getOperand(3);
|
||||||
|
MachineOperand attr = MI->getOperand(4);
|
||||||
|
MachineOperand params = MI->getOperand(5);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
|
||||||
|
.addReg(AMDGPU::M0)
|
||||||
|
.addOperand(params);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
|
||||||
|
.addOperand(iReg)
|
||||||
|
.addOperand(attr_chan)
|
||||||
|
.addOperand(attr);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
|
||||||
|
.addOperand(dst)
|
||||||
|
.addReg(tmp)
|
||||||
|
.addOperand(jReg)
|
||||||
|
.addOperand(attr_chan)
|
||||||
|
.addOperand(attr);
|
||||||
|
|
||||||
|
MI->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
|
||||||
|
MachineBasicBlock &BB, MachineBasicBlock::iterator I) const
|
||||||
|
{
|
||||||
|
MachineOperand dst = MI->getOperand(0);
|
||||||
|
MachineOperand attr_chan = MI->getOperand(1);
|
||||||
|
MachineOperand attr = MI->getOperand(2);
|
||||||
|
MachineOperand params = MI->getOperand(3);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32))
|
||||||
|
.addReg(AMDGPU::M0)
|
||||||
|
.addOperand(params);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
|
||||||
|
.addOperand(dst)
|
||||||
|
.addOperand(attr_chan)
|
||||||
|
.addOperand(attr);
|
||||||
|
|
||||||
|
MI->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||||
|
{
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_LT_F32_e32))
|
||||||
|
.addOperand(MI->getOperand(1))
|
||||||
|
.addReg(AMDGPU::SREG_LIT_0);
|
||||||
|
|
||||||
|
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
|
||||||
|
.addOperand(MI->getOperand(0))
|
||||||
|
.addOperand(MI->getOperand(2))
|
||||||
|
.addOperand(MI->getOperand(3));
|
||||||
|
|
||||||
|
MI->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
|
||||||
|
MachineFunction * MF, MachineRegisterInfo & MRI) const
|
||||||
|
{
|
||||||
|
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
|
||||||
|
unsigned dstReg = MI->getOperand(0).getReg();
|
||||||
|
int64_t newIndex = MI->getOperand(1).getImm();
|
||||||
|
const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
|
||||||
|
unsigned DwordWidth = dstClass->getSize() / 4;
|
||||||
|
assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned");
|
||||||
|
newIndex = newIndex / DwordWidth;
|
||||||
|
|
||||||
|
unsigned newReg = dstClass->getRegister(newIndex);
|
||||||
|
addLiveIn(MI, MF, MRI, TII, newReg);
|
||||||
|
}
|
||||||
|
|
48
lib/Target/AMDGPU/SIISelLowering.h
Normal file
48
lib/Target/AMDGPU/SIISelLowering.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// SI DAG Lowering interface definition
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef SIISELLOWERING_H
|
||||||
|
#define SIISELLOWERING_H
|
||||||
|
|
||||||
|
#include "AMDGPUISelLowering.h"
|
||||||
|
#include "SIInstrInfo.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class SITargetLowering : public AMDGPUTargetLowering
|
||||||
|
{
|
||||||
|
const SIInstrInfo * TII;
|
||||||
|
|
||||||
|
/// AppendS_WAITCNT - Memory reads and writes are syncronized using the
|
||||||
|
/// S_WAITCNT instruction. This function takes the most conservative
|
||||||
|
/// approach and inserts an S_WAITCNT instruction after every read and
|
||||||
|
/// write.
|
||||||
|
void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineBasicBlock::iterator I) const;
|
||||||
|
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||||
|
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineBasicBlock::iterator I) const;
|
||||||
|
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||||
|
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||||
|
void lowerUSE_SGPR(MachineInstr *MI, MachineFunction * MF,
|
||||||
|
MachineRegisterInfo & MRI) const;
|
||||||
|
public:
|
||||||
|
SITargetLowering(TargetMachine &tm);
|
||||||
|
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
|
||||||
|
MachineBasicBlock * BB) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End namespace llvm
|
||||||
|
|
||||||
|
#endif //SIISELLOWERING_H
|
128
lib/Target/AMDGPU/SIInstrFormats.td
Normal file
128
lib/Target/AMDGPU/SIInstrFormats.td
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// SI Instruction format definitions.
|
||||||
|
//
|
||||||
|
// Instructions with _32 take 32-bit operands.
|
||||||
|
// Instructions with _64 take 64-bit operands.
|
||||||
|
//
|
||||||
|
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
|
||||||
|
// encoding is the standard encoding, but instruction that make use of
|
||||||
|
// any of the instruction modifiers must use the 64-bit encoding.
|
||||||
|
//
|
||||||
|
// Instructions with _e32 use the 32-bit encoding.
|
||||||
|
// Instructions with _e64 use the 64-bit encoding.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
|
||||||
|
: VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
||||||
|
|
||||||
|
class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
|
||||||
|
: VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
||||||
|
|
||||||
|
|
||||||
|
class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
|
||||||
|
: SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
|
||||||
|
|
||||||
|
class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
|
||||||
|
: SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
|
||||||
|
|
||||||
|
class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
|
||||||
|
: SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
||||||
|
|
||||||
|
class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
|
||||||
|
: SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
||||||
|
|
||||||
|
class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
||||||
|
string opName, list<dag> pattern> :
|
||||||
|
VOP1 <
|
||||||
|
op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
|
||||||
|
>;
|
||||||
|
|
||||||
|
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
|
||||||
|
def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||||
|
def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||||
|
opName, []
|
||||||
|
>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
|
||||||
|
|
||||||
|
def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||||
|
|
||||||
|
def _e64 : VOP3_64 <
|
||||||
|
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||||
|
opName, []
|
||||||
|
>;
|
||||||
|
}
|
||||||
|
|
||||||
|
class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
|
||||||
|
string opName, list<dag> pattern> :
|
||||||
|
VOP2 <
|
||||||
|
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
|
||||||
|
>;
|
||||||
|
|
||||||
|
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
|
||||||
|
|
||||||
|
def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||||
|
|
||||||
|
def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||||
|
opName, []
|
||||||
|
>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
|
||||||
|
def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||||
|
|
||||||
|
def _e64 : VOP3_64 <
|
||||||
|
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||||
|
opName, []
|
||||||
|
>;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
|
||||||
|
: SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
|
||||||
|
|
||||||
|
class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
|
||||||
|
: SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
|
||||||
|
|
||||||
|
class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
||||||
|
string opName, list<dag> pattern> :
|
||||||
|
VOPC <
|
||||||
|
op, (outs), (ins arc:$src0, vrc:$src1), opName, pattern
|
||||||
|
>;
|
||||||
|
|
||||||
|
multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
|
||||||
|
|
||||||
|
def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||||
|
|
||||||
|
def _e64 : VOP3_32 <
|
||||||
|
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||||
|
opName, []
|
||||||
|
>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
|
||||||
|
|
||||||
|
def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||||
|
|
||||||
|
def _e64 : VOP3_64 <
|
||||||
|
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||||
|
opName, []
|
||||||
|
>;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
|
||||||
|
: SOPC <op, (outs CCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
||||||
|
|
||||||
|
class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
|
||||||
|
: SOPC <op, (outs CCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
||||||
|
|
103
lib/Target/AMDGPU/SIInstrInfo.cpp
Normal file
103
lib/Target/AMDGPU/SIInstrInfo.cpp
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// SI Implementation of TargetInstrInfo.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
||||||
|
#include "SIInstrInfo.h"
|
||||||
|
#include "AMDGPUTargetMachine.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/MC/MCInstrDesc.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
|
||||||
|
: AMDGPUInstrInfo(tm),
|
||||||
|
RI(tm, *this)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
|
||||||
|
{
|
||||||
|
return RI;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||||
|
unsigned DestReg, unsigned SrcReg,
|
||||||
|
bool KillSrc) const
|
||||||
|
{
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
|
||||||
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned SIInstrInfo::getEncodingType(const MachineInstr &MI) const
|
||||||
|
{
|
||||||
|
return get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned SIInstrInfo::getEncodingBytes(const MachineInstr &MI) const
|
||||||
|
{
|
||||||
|
|
||||||
|
/* Instructions with literal constants are expanded to 64-bits, and
|
||||||
|
* the constant is stored in bits [63:32] */
|
||||||
|
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
|
||||||
|
if (MI.getOperand(i).getType() == MachineOperand::MO_FPImmediate) {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This instruction always has a literal */
|
||||||
|
if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned encoding_type = getEncodingType(MI);
|
||||||
|
switch (encoding_type) {
|
||||||
|
case SIInstrEncodingType::EXP:
|
||||||
|
case SIInstrEncodingType::LDS:
|
||||||
|
case SIInstrEncodingType::MUBUF:
|
||||||
|
case SIInstrEncodingType::MTBUF:
|
||||||
|
case SIInstrEncodingType::MIMG:
|
||||||
|
case SIInstrEncodingType::VOP3:
|
||||||
|
return 8;
|
||||||
|
default:
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||||
|
int64_t Imm) const
|
||||||
|
{
|
||||||
|
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
|
||||||
|
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
||||||
|
MachineInstrBuilder(MI).addImm(Imm);
|
||||||
|
|
||||||
|
return MI;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SIInstrInfo::isMov(unsigned Opcode) const
|
||||||
|
{
|
||||||
|
switch(Opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::S_MOV_B32:
|
||||||
|
case AMDGPU::S_MOV_B64:
|
||||||
|
case AMDGPU::V_MOV_B32_e32:
|
||||||
|
case AMDGPU::V_MOV_B32_e64:
|
||||||
|
case AMDGPU::V_MOV_IMM_F32:
|
||||||
|
case AMDGPU::V_MOV_IMM_I32:
|
||||||
|
case AMDGPU::S_MOV_IMM_I32:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user