mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-19 03:24:09 +00:00
Extend initial support for primitive types in PTX backend
- Allow i16, i32, i64, float, and double types, using the native .u16, .u32, .u64, .f32, and .f64 PTX types. - Allow loading/storing of all primitive types. - Allow primitive types to be passed as parameters. - Allow selection of PTX Version and Shader Model as sub-target attributes. - Merge integer/floating-point test cases for load/store. - Use .u32 instead of .s32 to conform to output from NVidia nvcc compiler. Patch by Justin Holewinski git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126824 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -19,8 +19,34 @@ include "llvm/Target/Target.td"
|
|||||||
// Subtarget Features.
|
// Subtarget Features.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
|
//===- Architectural Features ---------------------------------------------===//
|
||||||
"Enable sm_20 target architecture">;
|
|
||||||
|
def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
|
||||||
|
"Do not demote .f64 to .f32">;
|
||||||
|
|
||||||
|
//===- PTX Version --------------------------------------------------------===//
|
||||||
|
|
||||||
|
def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4",
|
||||||
|
"Use PTX Language Version 1.4">;
|
||||||
|
|
||||||
|
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
|
||||||
|
"Use PTX Language Version 2.0",
|
||||||
|
[FeaturePTX14]>;
|
||||||
|
|
||||||
|
def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
|
||||||
|
"Use PTX Language Version 2.1",
|
||||||
|
[FeaturePTX20]>;
|
||||||
|
|
||||||
|
//===- PTX Shader Model ---------------------------------------------------===//
|
||||||
|
|
||||||
|
def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
|
||||||
|
"Enable Shader Model 1.0 compliance">;
|
||||||
|
def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
|
||||||
|
"Enable Shader Model 1.3 compliance",
|
||||||
|
[FeatureSM10, FeatureDouble]>;
|
||||||
|
def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
|
||||||
|
"Enable Shader Model 2.0 compliance",
|
||||||
|
[FeatureSM13]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// PTX supported processors.
|
// PTX supported processors.
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "llvm/ADT/Twine.h"
|
#include "llvm/ADT/Twine.h"
|
||||||
#include "llvm/CodeGen/AsmPrinter.h"
|
#include "llvm/CodeGen/AsmPrinter.h"
|
||||||
#include "llvm/CodeGen/MachineInstr.h"
|
#include "llvm/CodeGen/MachineInstr.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/MC/MCStreamer.h"
|
#include "llvm/MC/MCStreamer.h"
|
||||||
#include "llvm/MC/MCSymbol.h"
|
#include "llvm/MC/MCSymbol.h"
|
||||||
#include "llvm/Target/Mangler.h"
|
#include "llvm/Target/Mangler.h"
|
||||||
@ -37,13 +38,6 @@
|
|||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
static cl::opt<std::string>
|
|
||||||
OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
|
|
||||||
|
|
||||||
static cl::opt<std::string>
|
|
||||||
OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
|
|
||||||
cl::init("sm_10"));
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class PTXAsmPrinter : public AsmPrinter {
|
class PTXAsmPrinter : public AsmPrinter {
|
||||||
public:
|
public:
|
||||||
@ -82,11 +76,14 @@ private:
|
|||||||
static const char PARAM_PREFIX[] = "__param_";
|
static const char PARAM_PREFIX[] = "__param_";
|
||||||
|
|
||||||
static const char *getRegisterTypeName(unsigned RegNo) {
|
static const char *getRegisterTypeName(unsigned RegNo) {
|
||||||
#define TEST_REGCLS(cls, clsstr) \
|
#define TEST_REGCLS(cls, clsstr) \
|
||||||
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
|
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
|
||||||
TEST_REGCLS(RRegf32, f32);
|
|
||||||
TEST_REGCLS(RRegs32, s32);
|
|
||||||
TEST_REGCLS(Preds, pred);
|
TEST_REGCLS(Preds, pred);
|
||||||
|
TEST_REGCLS(RRegu16, u16);
|
||||||
|
TEST_REGCLS(RRegu32, u32);
|
||||||
|
TEST_REGCLS(RRegu64, u64);
|
||||||
|
TEST_REGCLS(RRegf32, f32);
|
||||||
|
TEST_REGCLS(RRegf64, f64);
|
||||||
#undef TEST_REGCLS
|
#undef TEST_REGCLS
|
||||||
|
|
||||||
llvm_unreachable("Not in any register class!");
|
llvm_unreachable("Not in any register class!");
|
||||||
@ -121,7 +118,14 @@ static const char *getTypeName(const Type* type) {
|
|||||||
switch (type->getTypeID()) {
|
switch (type->getTypeID()) {
|
||||||
default: llvm_unreachable("Unknown type");
|
default: llvm_unreachable("Unknown type");
|
||||||
case Type::FloatTyID: return ".f32";
|
case Type::FloatTyID: return ".f32";
|
||||||
case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types.
|
case Type::DoubleTyID: return ".f64";
|
||||||
|
case Type::IntegerTyID:
|
||||||
|
switch (type->getPrimitiveSizeInBits()) {
|
||||||
|
default: llvm_unreachable("Unknown integer bit-width");
|
||||||
|
case 16: return ".u16";
|
||||||
|
case 32: return ".u32";
|
||||||
|
case 64: return ".u64";
|
||||||
|
}
|
||||||
case Type::ArrayTyID:
|
case Type::ArrayTyID:
|
||||||
case Type::PointerTyID:
|
case Type::PointerTyID:
|
||||||
type = dyn_cast<const SequentialType>(type)->getElementType();
|
type = dyn_cast<const SequentialType>(type)->getElementType();
|
||||||
@ -162,8 +166,11 @@ bool PTXAsmPrinter::doFinalization(Module &M) {
|
|||||||
|
|
||||||
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
|
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
|
||||||
{
|
{
|
||||||
OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
|
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
|
||||||
OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
|
|
||||||
|
OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
|
||||||
|
OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
|
||||||
|
(ST.supportsDouble() ? "" : ", map_f64_to_f32")));
|
||||||
OutStreamer.AddBlankLine();
|
OutStreamer.AddBlankLine();
|
||||||
|
|
||||||
// declare global variables
|
// declare global variables
|
||||||
@ -236,11 +243,24 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
|
|||||||
break;
|
break;
|
||||||
case MachineOperand::MO_FPImmediate:
|
case MachineOperand::MO_FPImmediate:
|
||||||
APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
|
APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
|
||||||
if (constFP.getZExtValue() > 0) {
|
bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
|
||||||
OS << "0F" << constFP.toString(16, false);
|
// Emit 0F for 32-bit floats and 0D for 64-bit doubles.
|
||||||
|
if (isFloat) {
|
||||||
|
OS << "0F";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
OS << "0F00000000";
|
OS << "0D";
|
||||||
|
}
|
||||||
|
// Emit the encoded floating-point value.
|
||||||
|
if (constFP.getZExtValue() > 0) {
|
||||||
|
OS << constFP.toString(16, false);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
OS << "00000000";
|
||||||
|
// If We have a double-precision zero, pad to 8-bytes.
|
||||||
|
if (!isFloat) {
|
||||||
|
OS << "00000000";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -338,12 +358,18 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
|
|||||||
if (!MFI->argRegEmpty()) {
|
if (!MFI->argRegEmpty()) {
|
||||||
decl += " (";
|
decl += " (";
|
||||||
if (isKernel) {
|
if (isKernel) {
|
||||||
for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
|
unsigned cnt = 0;
|
||||||
if (i != 0)
|
//for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
|
||||||
|
for(PTXMachineFunctionInfo::reg_iterator
|
||||||
|
i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
|
||||||
|
reg = *i;
|
||||||
|
assert(reg != PTX::NoRegister && "Not a valid register!");
|
||||||
|
if (i != b)
|
||||||
decl += ", ";
|
decl += ", ";
|
||||||
decl += ".param .s32 "; // TODO: add types
|
decl += ".param .u32"; // TODO: Parse type from register map
|
||||||
|
decl += " ";
|
||||||
decl += PARAM_PREFIX;
|
decl += PARAM_PREFIX;
|
||||||
decl += utostr(i + 1);
|
decl += utostr(++cnt);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (PTXMachineFunctionInfo::reg_iterator
|
for (PTXMachineFunctionInfo::reg_iterator
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "PTXTargetMachine.h"
|
#include "PTXTargetMachine.h"
|
||||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||||
#include "llvm/DerivedTypes.h"
|
#include "llvm/DerivedTypes.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
@ -66,14 +67,34 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
|
SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
|
||||||
SDValue index = Node->getOperand(1);
|
SDValue index = Node->getOperand(1);
|
||||||
DebugLoc dl = Node->getDebugLoc();
|
DebugLoc dl = Node->getDebugLoc();
|
||||||
|
unsigned opcode;
|
||||||
|
|
||||||
if (index.getOpcode() != ISD::TargetConstant)
|
if (index.getOpcode() != ISD::TargetConstant)
|
||||||
llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
|
llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
|
||||||
|
|
||||||
|
if (Node->getValueType(0) == MVT::i16) {
|
||||||
|
opcode = PTX::LDpiU16;
|
||||||
|
}
|
||||||
|
else if (Node->getValueType(0) == MVT::i32) {
|
||||||
|
opcode = PTX::LDpiU32;
|
||||||
|
}
|
||||||
|
else if (Node->getValueType(0) == MVT::i64) {
|
||||||
|
opcode = PTX::LDpiU64;
|
||||||
|
}
|
||||||
|
else if (Node->getValueType(0) == MVT::f32) {
|
||||||
|
opcode = PTX::LDpiF32;
|
||||||
|
}
|
||||||
|
else if (Node->getValueType(0) == MVT::f64) {
|
||||||
|
opcode = PTX::LDpiF64;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
llvm_unreachable("Unknown parameter type for ld.param");
|
||||||
|
}
|
||||||
|
|
||||||
return PTXInstrInfo::
|
return PTXInstrInfo::
|
||||||
GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
|
GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match memory operand of the form [reg+reg]
|
// Match memory operand of the form [reg+reg]
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
#include "llvm/CodeGen/SelectionDAG.h"
|
||||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
@ -27,12 +28,16 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
|
|||||||
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
|
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
|
||||||
// Set up the register classes.
|
// Set up the register classes.
|
||||||
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
|
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
|
||||||
addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
|
addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
|
||||||
|
addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
|
||||||
|
addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
|
||||||
addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
|
addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
|
||||||
|
addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
|
||||||
|
|
||||||
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
|
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
|
||||||
|
|
||||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||||
|
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
|
||||||
|
|
||||||
// Customize translation of memory addresses
|
// Customize translation of memory addresses
|
||||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||||
@ -90,10 +95,13 @@ struct argmap_entry {
|
|||||||
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
|
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
|
||||||
} argmap[] = {
|
} argmap[] = {
|
||||||
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
|
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
|
||||||
argmap_entry(MVT::i32, PTX::RRegs32RegisterClass),
|
argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
|
||||||
argmap_entry(MVT::f32, PTX::RRegf32RegisterClass)
|
argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
|
||||||
|
argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
|
||||||
|
argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
|
||||||
|
argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
|
||||||
};
|
};
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
SDValue PTXTargetLowering::
|
SDValue PTXTargetLowering::
|
||||||
LowerFormalArguments(SDValue Chain,
|
LowerFormalArguments(SDValue Chain,
|
||||||
@ -192,12 +200,21 @@ SDValue PTXTargetLowering::
|
|||||||
SDValue Flag;
|
SDValue Flag;
|
||||||
unsigned reg;
|
unsigned reg;
|
||||||
|
|
||||||
if (Outs[0].VT == MVT::i32) {
|
if (Outs[0].VT == MVT::i16) {
|
||||||
|
reg = PTX::RH0;
|
||||||
|
}
|
||||||
|
else if (Outs[0].VT == MVT::i32) {
|
||||||
reg = PTX::R0;
|
reg = PTX::R0;
|
||||||
}
|
}
|
||||||
|
else if (Outs[0].VT == MVT::i64) {
|
||||||
|
reg = PTX::RD0;
|
||||||
|
}
|
||||||
else if (Outs[0].VT == MVT::f32) {
|
else if (Outs[0].VT == MVT::f32) {
|
||||||
reg = PTX::F0;
|
reg = PTX::F0;
|
||||||
}
|
}
|
||||||
|
else if (Outs[0].VT == MVT::f64) {
|
||||||
|
reg = PTX::FD0;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
assert(false && "Can return only basic types");
|
assert(false && "Can return only basic types");
|
||||||
}
|
}
|
||||||
|
@ -27,9 +27,12 @@ static const struct map_entry {
|
|||||||
const TargetRegisterClass *cls;
|
const TargetRegisterClass *cls;
|
||||||
const int opcode;
|
const int opcode;
|
||||||
} map[] = {
|
} map[] = {
|
||||||
{ &PTX::RRegs32RegClass, PTX::MOVrr },
|
{ &PTX::RRegu16RegClass, PTX::MOVU16rr },
|
||||||
{ &PTX::RRegf32RegClass, PTX::MOVrr },
|
{ &PTX::RRegu32RegClass, PTX::MOVU32rr },
|
||||||
{ &PTX::PredsRegClass, PTX::MOVpp }
|
{ &PTX::RRegu64RegClass, PTX::MOVU64rr },
|
||||||
|
{ &PTX::RRegf32RegClass, PTX::MOVF32rr },
|
||||||
|
{ &PTX::RRegf64RegClass, PTX::MOVF64rr },
|
||||||
|
{ &PTX::PredsRegClass, PTX::MOVPREDrr }
|
||||||
};
|
};
|
||||||
|
|
||||||
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||||
@ -76,8 +79,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
|
|||||||
switch (MI.getOpcode()) {
|
switch (MI.getOpcode()) {
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
case PTX::MOVpp:
|
case PTX::MOVU16rr:
|
||||||
case PTX::MOVrr:
|
case PTX::MOVU32rr:
|
||||||
|
case PTX::MOVU64rr:
|
||||||
|
case PTX::MOVF32rr:
|
||||||
|
case PTX::MOVF64rr:
|
||||||
|
case PTX::MOVPREDrr:
|
||||||
assert(MI.getNumOperands() >= 2 &&
|
assert(MI.getNumOperands() >= 2 &&
|
||||||
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
|
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
|
||||||
"Invalid register-register move instruction");
|
"Invalid register-register move instruction");
|
||||||
|
@ -114,7 +114,7 @@ def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
|
|||||||
// Address operands
|
// Address operands
|
||||||
def MEMri : Operand<i32> {
|
def MEMri : Operand<i32> {
|
||||||
let PrintMethod = "printMemOperand";
|
let PrintMethod = "printMemOperand";
|
||||||
let MIOperandInfo = (ops RRegs32, i32imm);
|
let MIOperandInfo = (ops RRegu32, i32imm);
|
||||||
}
|
}
|
||||||
def MEMii : Operand<i32> {
|
def MEMii : Operand<i32> {
|
||||||
let PrintMethod = "printMemOperand";
|
let PrintMethod = "printMemOperand";
|
||||||
@ -143,75 +143,115 @@ def PTXret
|
|||||||
// Instruction Class Templates
|
// Instruction Class Templates
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// Three-operand f32 instruction template
|
// Three-operand floating-point instruction template
|
||||||
multiclass FLOAT3<string opcstr, SDNode opnode> {
|
multiclass FLOAT3<string opcstr, SDNode opnode> {
|
||||||
def rr : InstPTX<(outs RRegf32:$d),
|
def rr32 : InstPTX<(outs RRegf32:$d),
|
||||||
(ins RRegf32:$a, RRegf32:$b),
|
(ins RRegf32:$a, RRegf32:$b),
|
||||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
!strconcat(opcstr, ".f32\t$d, $a, $b"),
|
||||||
[(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
|
[(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
|
||||||
def ri : InstPTX<(outs RRegf32:$d),
|
def ri32 : InstPTX<(outs RRegf32:$d),
|
||||||
(ins RRegf32:$a, f32imm:$b),
|
(ins RRegf32:$a, f32imm:$b),
|
||||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
!strconcat(opcstr, ".f32\t$d, $a, $b"),
|
||||||
[(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
|
[(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
|
||||||
|
def rr64 : InstPTX<(outs RRegf64:$d),
|
||||||
|
(ins RRegf64:$a, RRegf64:$b),
|
||||||
|
!strconcat(opcstr, ".f64\t$d, $a, $b"),
|
||||||
|
[(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
|
||||||
|
def ri64 : InstPTX<(outs RRegf64:$d),
|
||||||
|
(ins RRegf64:$a, f64imm:$b),
|
||||||
|
!strconcat(opcstr, ".f64\t$d, $a, $b"),
|
||||||
|
[(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass INT3<string opcstr, SDNode opnode> {
|
multiclass INT3<string opcstr, SDNode opnode> {
|
||||||
def rr : InstPTX<(outs RRegs32:$d),
|
def rr16 : InstPTX<(outs RRegu16:$d),
|
||||||
(ins RRegs32:$a, RRegs32:$b),
|
(ins RRegu16:$a, RRegu16:$b),
|
||||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
!strconcat(opcstr, ".u16\t$d, $a, $b"),
|
||||||
[(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
|
[(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
|
||||||
def ri : InstPTX<(outs RRegs32:$d),
|
def ri16 : InstPTX<(outs RRegu16:$d),
|
||||||
(ins RRegs32:$a, i32imm:$b),
|
(ins RRegu16:$a, i16imm:$b),
|
||||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
!strconcat(opcstr, ".u16\t$d, $a, $b"),
|
||||||
[(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
|
[(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
|
||||||
|
def rr32 : InstPTX<(outs RRegu32:$d),
|
||||||
|
(ins RRegu32:$a, RRegu32:$b),
|
||||||
|
!strconcat(opcstr, ".u32\t$d, $a, $b"),
|
||||||
|
[(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
|
||||||
|
def ri32 : InstPTX<(outs RRegu32:$d),
|
||||||
|
(ins RRegu32:$a, i32imm:$b),
|
||||||
|
!strconcat(opcstr, ".u32\t$d, $a, $b"),
|
||||||
|
[(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
|
||||||
|
def rr64 : InstPTX<(outs RRegu64:$d),
|
||||||
|
(ins RRegu64:$a, RRegu64:$b),
|
||||||
|
!strconcat(opcstr, ".u64\t$d, $a, $b"),
|
||||||
|
[(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
|
||||||
|
def ri64 : InstPTX<(outs RRegu64:$d),
|
||||||
|
(ins RRegu64:$a, i64imm:$b),
|
||||||
|
!strconcat(opcstr, ".u64\t$d, $a, $b"),
|
||||||
|
[(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// no %type directive, non-communtable
|
// no %type directive, non-communtable
|
||||||
multiclass INT3ntnc<string opcstr, SDNode opnode> {
|
multiclass INT3ntnc<string opcstr, SDNode opnode> {
|
||||||
def rr : InstPTX<(outs RRegs32:$d),
|
def rr : InstPTX<(outs RRegu32:$d),
|
||||||
(ins RRegs32:$a, RRegs32:$b),
|
(ins RRegu32:$a, RRegu32:$b),
|
||||||
!strconcat(opcstr, "\t$d, $a, $b"),
|
!strconcat(opcstr, "\t$d, $a, $b"),
|
||||||
[(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
|
[(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
|
||||||
def ri : InstPTX<(outs RRegs32:$d),
|
def ri : InstPTX<(outs RRegu32:$d),
|
||||||
(ins RRegs32:$a, i32imm:$b),
|
(ins RRegu32:$a, i32imm:$b),
|
||||||
!strconcat(opcstr, "\t$d, $a, $b"),
|
!strconcat(opcstr, "\t$d, $a, $b"),
|
||||||
[(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
|
[(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
|
||||||
def ir : InstPTX<(outs RRegs32:$d),
|
def ir : InstPTX<(outs RRegu32:$d),
|
||||||
(ins i32imm:$a, RRegs32:$b),
|
(ins i32imm:$a, RRegu32:$b),
|
||||||
!strconcat(opcstr, "\t$d, $a, $b"),
|
!strconcat(opcstr, "\t$d, $a, $b"),
|
||||||
[(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
|
[(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
|
multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
|
||||||
def rr : InstPTX<(outs RC:$d),
|
def rr : InstPTX<(outs RC:$d),
|
||||||
(ins MEMri:$a),
|
(ins MEMri:$a),
|
||||||
!strconcat(opstr, ".%type\t$d, [$a]"),
|
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||||
[(set RC:$d, (pat_load ADDRrr:$a))]>;
|
[(set RC:$d, (pat_load ADDRrr:$a))]>;
|
||||||
def ri : InstPTX<(outs RC:$d),
|
def ri : InstPTX<(outs RC:$d),
|
||||||
(ins MEMri:$a),
|
(ins MEMri:$a),
|
||||||
!strconcat(opstr, ".%type\t$d, [$a]"),
|
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||||
[(set RC:$d, (pat_load ADDRri:$a))]>;
|
[(set RC:$d, (pat_load ADDRri:$a))]>;
|
||||||
def ii : InstPTX<(outs RC:$d),
|
def ii : InstPTX<(outs RC:$d),
|
||||||
(ins MEMii:$a),
|
(ins MEMii:$a),
|
||||||
!strconcat(opstr, ".%type\t$d, [$a]"),
|
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||||
[(set RC:$d, (pat_load ADDRii:$a))]>;
|
[(set RC:$d, (pat_load ADDRii:$a))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
|
multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
|
||||||
|
defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
|
||||||
|
defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
|
||||||
|
defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
|
||||||
|
defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
|
||||||
|
defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
|
||||||
def rr : InstPTX<(outs),
|
def rr : InstPTX<(outs),
|
||||||
(ins RC:$d, MEMri:$a),
|
(ins RC:$d, MEMri:$a),
|
||||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||||
[(pat_store RC:$d, ADDRrr:$a)]>;
|
[(pat_store RC:$d, ADDRrr:$a)]>;
|
||||||
def ri : InstPTX<(outs),
|
def ri : InstPTX<(outs),
|
||||||
(ins RC:$d, MEMri:$a),
|
(ins RC:$d, MEMri:$a),
|
||||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||||
[(pat_store RC:$d, ADDRri:$a)]>;
|
[(pat_store RC:$d, ADDRri:$a)]>;
|
||||||
def ii : InstPTX<(outs),
|
def ii : InstPTX<(outs),
|
||||||
(ins RC:$d, MEMii:$a),
|
(ins RC:$d, MEMii:$a),
|
||||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||||
[(pat_store RC:$d, ADDRii:$a)]>;
|
[(pat_store RC:$d, ADDRii:$a)]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
|
||||||
|
defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
|
||||||
|
defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
|
||||||
|
defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
|
||||||
|
defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
|
||||||
|
defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Instructions
|
// Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -236,60 +276,67 @@ defm SRA : INT3ntnc<"shr.s32", PTXsra>;
|
|||||||
///===- Data Movement and Conversion Instructions -------------------------===//
|
///===- Data Movement and Conversion Instructions -------------------------===//
|
||||||
|
|
||||||
let neverHasSideEffects = 1 in {
|
let neverHasSideEffects = 1 in {
|
||||||
// rely on isMoveInstr to separate MOVpp, MOVrr, etc.
|
def MOVPREDrr
|
||||||
def MOVpp
|
|
||||||
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
|
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
|
||||||
def MOVrr
|
def MOVU16rr
|
||||||
: InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
|
: InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
|
||||||
def FMOVrr
|
def MOVU32rr
|
||||||
|
: InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
|
||||||
|
def MOVU64rr
|
||||||
|
: InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
|
||||||
|
def MOVF32rr
|
||||||
: InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
|
: InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
|
||||||
|
def MOVF64rr
|
||||||
|
: InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||||
def MOVpi
|
def MOVPREDri
|
||||||
: InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
|
: InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
|
||||||
[(set Preds:$d, imm:$a)]>;
|
[(set Preds:$d, imm:$a)]>;
|
||||||
def MOVri
|
def MOVU16ri
|
||||||
: InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
|
: InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
|
||||||
[(set RRegs32:$d, imm:$a)]>;
|
[(set RRegu16:$d, imm:$a)]>;
|
||||||
def FMOVri
|
def MOVU32ri
|
||||||
|
: InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
|
||||||
|
[(set RRegu32:$d, imm:$a)]>;
|
||||||
|
def MOVU164ri
|
||||||
|
: InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
|
||||||
|
[(set RRegu64:$d, imm:$a)]>;
|
||||||
|
def MOVF32ri
|
||||||
: InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
|
: InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
|
||||||
[(set RRegf32:$d, fpimm:$a)]>;
|
[(set RRegf32:$d, fpimm:$a)]>;
|
||||||
|
def MOVF64ri
|
||||||
|
: InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
|
||||||
|
[(set RRegf64:$d, fpimm:$a)]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Integer loads
|
// Loads
|
||||||
defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
|
defm LDg : PTX_LD_ALL<"ld.global", load_global>;
|
||||||
defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
|
defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
|
||||||
defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
|
defm LDl : PTX_LD_ALL<"ld.local", load_local>;
|
||||||
defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>;
|
defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
|
||||||
defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
|
|
||||||
|
|
||||||
def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
|
// This is a special instruction that is manually inserted for kernel parameters
|
||||||
"ld.param.%type\t$d, [$a]", []>;
|
def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
|
||||||
|
"ld.param.u16\t$d, [$a]", []>;
|
||||||
|
def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
|
||||||
|
"ld.param.u32\t$d, [$a]", []>;
|
||||||
|
def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
|
||||||
|
"ld.param.u64\t$d, [$a]", []>;
|
||||||
|
def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
|
||||||
|
"ld.param.f32\t$d, [$a]", []>;
|
||||||
|
def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
|
||||||
|
"ld.param.f64\t$d, [$a]", []>;
|
||||||
|
|
||||||
// Floating-point loads
|
// Stores
|
||||||
defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>;
|
defm STg : PTX_ST_ALL<"st.global", store_global>;
|
||||||
defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>;
|
defm STl : PTX_ST_ALL<"st.local", store_local>;
|
||||||
defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>;
|
defm STs : PTX_ST_ALL<"st.shared", store_shared>;
|
||||||
defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>;
|
|
||||||
defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>;
|
|
||||||
|
|
||||||
def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
|
// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
|
||||||
"ld.param.%type\t$d, [$a]", []>;
|
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
|
||||||
|
// TODO: Do something with st.param if/when it is needed.
|
||||||
// Integer stores
|
|
||||||
defm STg : PTX_ST<"st.global", RRegs32, store_global>;
|
|
||||||
defm STl : PTX_ST<"st.local", RRegs32, store_local>;
|
|
||||||
// Store to parameter state space requires PTX 2.0 or higher?
|
|
||||||
// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
|
|
||||||
defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
|
|
||||||
|
|
||||||
// Floating-point stores
|
|
||||||
defm FSTg : PTX_ST<"st.global", RRegf32, store_global>;
|
|
||||||
defm FSTl : PTX_ST<"st.local", RRegf32, store_local>;
|
|
||||||
// Store to parameter state space requires PTX 2.0 or higher?
|
|
||||||
// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>;
|
|
||||||
defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>;
|
|
||||||
|
|
||||||
///===- Control Flow Instructions -----------------------------------------===//
|
///===- Control Flow Instructions -----------------------------------------===//
|
||||||
|
|
||||||
|
@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
|
|
||||||
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
||||||
i = MFI->argRegBegin(), e = MFI->argRegEnd();
|
i = MFI->argRegBegin(), e = MFI->argRegEnd();
|
||||||
i != e; ++i)
|
i != e; ++i)
|
||||||
dbgs() << "Arg Reg: " << *i << "\n";);
|
dbgs() << "Arg Reg: " << *i << "\n";);
|
||||||
|
|
||||||
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
||||||
i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
|
i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
|
||||||
i != e; ++i)
|
i != e; ++i)
|
||||||
dbgs() << "Local Var Reg: " << *i << "\n";);
|
dbgs() << "Local Var Reg: " << *i << "\n";);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -19,6 +19,8 @@ class PTXReg<string n> : Register<n> {
|
|||||||
// Registers
|
// Registers
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
///===- Predicate Registers -----------------------------------------------===//
|
||||||
|
|
||||||
def P0 : PTXReg<"p0">;
|
def P0 : PTXReg<"p0">;
|
||||||
def P1 : PTXReg<"p1">;
|
def P1 : PTXReg<"p1">;
|
||||||
def P2 : PTXReg<"p2">;
|
def P2 : PTXReg<"p2">;
|
||||||
@ -52,6 +54,43 @@ def P29 : PTXReg<"p29">;
|
|||||||
def P30 : PTXReg<"p30">;
|
def P30 : PTXReg<"p30">;
|
||||||
def P31 : PTXReg<"p31">;
|
def P31 : PTXReg<"p31">;
|
||||||
|
|
||||||
|
///===- 16-bit Integer Registers ------------------------------------------===//
|
||||||
|
|
||||||
|
def RH0 : PTXReg<"rh0">;
|
||||||
|
def RH1 : PTXReg<"rh1">;
|
||||||
|
def RH2 : PTXReg<"rh2">;
|
||||||
|
def RH3 : PTXReg<"rh3">;
|
||||||
|
def RH4 : PTXReg<"rh4">;
|
||||||
|
def RH5 : PTXReg<"rh5">;
|
||||||
|
def RH6 : PTXReg<"rh6">;
|
||||||
|
def RH7 : PTXReg<"rh7">;
|
||||||
|
def RH8 : PTXReg<"rh8">;
|
||||||
|
def RH9 : PTXReg<"rh9">;
|
||||||
|
def RH10 : PTXReg<"rh10">;
|
||||||
|
def RH11 : PTXReg<"rh11">;
|
||||||
|
def RH12 : PTXReg<"rh12">;
|
||||||
|
def RH13 : PTXReg<"rh13">;
|
||||||
|
def RH14 : PTXReg<"rh14">;
|
||||||
|
def RH15 : PTXReg<"rh15">;
|
||||||
|
def RH16 : PTXReg<"rh16">;
|
||||||
|
def RH17 : PTXReg<"rh17">;
|
||||||
|
def RH18 : PTXReg<"rh18">;
|
||||||
|
def RH19 : PTXReg<"rh19">;
|
||||||
|
def RH20 : PTXReg<"rh20">;
|
||||||
|
def RH21 : PTXReg<"rh21">;
|
||||||
|
def RH22 : PTXReg<"rh22">;
|
||||||
|
def RH23 : PTXReg<"rh23">;
|
||||||
|
def RH24 : PTXReg<"rh24">;
|
||||||
|
def RH25 : PTXReg<"rh25">;
|
||||||
|
def RH26 : PTXReg<"rh26">;
|
||||||
|
def RH27 : PTXReg<"rh27">;
|
||||||
|
def RH28 : PTXReg<"rh28">;
|
||||||
|
def RH29 : PTXReg<"rh29">;
|
||||||
|
def RH30 : PTXReg<"rh30">;
|
||||||
|
def RH31 : PTXReg<"rh31">;
|
||||||
|
|
||||||
|
///===- 32-bit Integer Registers ------------------------------------------===//
|
||||||
|
|
||||||
def R0 : PTXReg<"r0">;
|
def R0 : PTXReg<"r0">;
|
||||||
def R1 : PTXReg<"r1">;
|
def R1 : PTXReg<"r1">;
|
||||||
def R2 : PTXReg<"r2">;
|
def R2 : PTXReg<"r2">;
|
||||||
@ -85,6 +124,43 @@ def R29 : PTXReg<"r29">;
|
|||||||
def R30 : PTXReg<"r30">;
|
def R30 : PTXReg<"r30">;
|
||||||
def R31 : PTXReg<"r31">;
|
def R31 : PTXReg<"r31">;
|
||||||
|
|
||||||
|
///===- 64-bit Integer Registers ------------------------------------------===//
|
||||||
|
|
||||||
|
def RD0 : PTXReg<"rd0">;
|
||||||
|
def RD1 : PTXReg<"rd1">;
|
||||||
|
def RD2 : PTXReg<"rd2">;
|
||||||
|
def RD3 : PTXReg<"rd3">;
|
||||||
|
def RD4 : PTXReg<"rd4">;
|
||||||
|
def RD5 : PTXReg<"rd5">;
|
||||||
|
def RD6 : PTXReg<"rd6">;
|
||||||
|
def RD7 : PTXReg<"rd7">;
|
||||||
|
def RD8 : PTXReg<"rd8">;
|
||||||
|
def RD9 : PTXReg<"rd9">;
|
||||||
|
def RD10 : PTXReg<"rd10">;
|
||||||
|
def RD11 : PTXReg<"rd11">;
|
||||||
|
def RD12 : PTXReg<"rd12">;
|
||||||
|
def RD13 : PTXReg<"rd13">;
|
||||||
|
def RD14 : PTXReg<"rd14">;
|
||||||
|
def RD15 : PTXReg<"rd15">;
|
||||||
|
def RD16 : PTXReg<"rd16">;
|
||||||
|
def RD17 : PTXReg<"rd17">;
|
||||||
|
def RD18 : PTXReg<"rd18">;
|
||||||
|
def RD19 : PTXReg<"rd19">;
|
||||||
|
def RD20 : PTXReg<"rd20">;
|
||||||
|
def RD21 : PTXReg<"rd21">;
|
||||||
|
def RD22 : PTXReg<"rd22">;
|
||||||
|
def RD23 : PTXReg<"rd23">;
|
||||||
|
def RD24 : PTXReg<"rd24">;
|
||||||
|
def RD25 : PTXReg<"rd25">;
|
||||||
|
def RD26 : PTXReg<"rd26">;
|
||||||
|
def RD27 : PTXReg<"rd27">;
|
||||||
|
def RD28 : PTXReg<"rd28">;
|
||||||
|
def RD29 : PTXReg<"rd29">;
|
||||||
|
def RD30 : PTXReg<"rd30">;
|
||||||
|
def RD31 : PTXReg<"rd31">;
|
||||||
|
|
||||||
|
///===- 32-bit Floating-Point Registers -----------------------------------===//
|
||||||
|
|
||||||
def F0 : PTXReg<"f0">;
|
def F0 : PTXReg<"f0">;
|
||||||
def F1 : PTXReg<"f1">;
|
def F1 : PTXReg<"f1">;
|
||||||
def F2 : PTXReg<"f2">;
|
def F2 : PTXReg<"f2">;
|
||||||
@ -118,6 +194,41 @@ def F29 : PTXReg<"f29">;
|
|||||||
def F30 : PTXReg<"f30">;
|
def F30 : PTXReg<"f30">;
|
||||||
def F31 : PTXReg<"f31">;
|
def F31 : PTXReg<"f31">;
|
||||||
|
|
||||||
|
///===- 64-bit Floating-Point Registers -----------------------------------===//
|
||||||
|
|
||||||
|
def FD0 : PTXReg<"fd0">;
|
||||||
|
def FD1 : PTXReg<"fd1">;
|
||||||
|
def FD2 : PTXReg<"fd2">;
|
||||||
|
def FD3 : PTXReg<"fd3">;
|
||||||
|
def FD4 : PTXReg<"fd4">;
|
||||||
|
def FD5 : PTXReg<"fd5">;
|
||||||
|
def FD6 : PTXReg<"fd6">;
|
||||||
|
def FD7 : PTXReg<"fd7">;
|
||||||
|
def FD8 : PTXReg<"fd8">;
|
||||||
|
def FD9 : PTXReg<"fd9">;
|
||||||
|
def FD10 : PTXReg<"fd10">;
|
||||||
|
def FD11 : PTXReg<"fd11">;
|
||||||
|
def FD12 : PTXReg<"fd12">;
|
||||||
|
def FD13 : PTXReg<"fd13">;
|
||||||
|
def FD14 : PTXReg<"fd14">;
|
||||||
|
def FD15 : PTXReg<"fd15">;
|
||||||
|
def FD16 : PTXReg<"fd16">;
|
||||||
|
def FD17 : PTXReg<"fd17">;
|
||||||
|
def FD18 : PTXReg<"fd18">;
|
||||||
|
def FD19 : PTXReg<"fd19">;
|
||||||
|
def FD20 : PTXReg<"fd20">;
|
||||||
|
def FD21 : PTXReg<"fd21">;
|
||||||
|
def FD22 : PTXReg<"fd22">;
|
||||||
|
def FD23 : PTXReg<"fd23">;
|
||||||
|
def FD24 : PTXReg<"fd24">;
|
||||||
|
def FD25 : PTXReg<"fd25">;
|
||||||
|
def FD26 : PTXReg<"fd26">;
|
||||||
|
def FD27 : PTXReg<"fd27">;
|
||||||
|
def FD28 : PTXReg<"fd28">;
|
||||||
|
def FD29 : PTXReg<"fd29">;
|
||||||
|
def FD30 : PTXReg<"fd30">;
|
||||||
|
def FD31 : PTXReg<"fd31">;
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Register classes
|
// Register classes
|
||||||
@ -129,14 +240,32 @@ def Preds : RegisterClass<"PTX", [i1], 8,
|
|||||||
P16, P17, P18, P19, P20, P21, P22, P23,
|
P16, P17, P18, P19, P20, P21, P22, P23,
|
||||||
P24, P25, P26, P27, P28, P29, P30, P31]>;
|
P24, P25, P26, P27, P28, P29, P30, P31]>;
|
||||||
|
|
||||||
def RRegs32 : RegisterClass<"PTX", [i32], 32,
|
def RRegu16 : RegisterClass<"PTX", [i16], 16,
|
||||||
|
[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
|
||||||
|
RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
|
||||||
|
RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
|
||||||
|
RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31]>;
|
||||||
|
|
||||||
|
def RRegu32 : RegisterClass<"PTX", [i32], 32,
|
||||||
[R0, R1, R2, R3, R4, R5, R6, R7,
|
[R0, R1, R2, R3, R4, R5, R6, R7,
|
||||||
R8, R9, R10, R11, R12, R13, R14, R15,
|
R8, R9, R10, R11, R12, R13, R14, R15,
|
||||||
R16, R17, R18, R19, R20, R21, R22, R23,
|
R16, R17, R18, R19, R20, R21, R22, R23,
|
||||||
R24, R25, R26, R27, R28, R29, R30, R31]>;
|
R24, R25, R26, R27, R28, R29, R30, R31]>;
|
||||||
|
|
||||||
|
def RRegu64 : RegisterClass<"PTX", [i64], 64,
|
||||||
|
[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
|
||||||
|
RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
|
||||||
|
RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
|
||||||
|
RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31]>;
|
||||||
|
|
||||||
def RRegf32 : RegisterClass<"PTX", [f32], 32,
|
def RRegf32 : RegisterClass<"PTX", [f32], 32,
|
||||||
[F0, F1, F2, F3, F4, F5, F6, F7,
|
[F0, F1, F2, F3, F4, F5, F6, F7,
|
||||||
F8, F9, F10, F11, F12, F13, F14, F15,
|
F8, F9, F10, F11, F12, F13, F14, F15,
|
||||||
F16, F17, F18, F19, F20, F21, F22, F23,
|
F16, F17, F18, F19, F20, F21, F22, F23,
|
||||||
F24, F25, F26, F27, F28, F29, F30, F31]>;
|
F24, F25, F26, F27, F28, F29, F30, F31]>;
|
||||||
|
|
||||||
|
def RRegf64 : RegisterClass<"PTX", [f64], 64,
|
||||||
|
[FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
|
||||||
|
FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
|
||||||
|
FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
|
||||||
|
FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31]>;
|
||||||
|
@ -12,12 +12,33 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "PTXSubtarget.h"
|
#include "PTXSubtarget.h"
|
||||||
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
|
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS)
|
||||||
std::string TARGET = "sm_20";
|
: PTXShaderModel(PTX_SM_1_0),
|
||||||
// TODO: call ParseSubtargetFeatures(FS, TARGET);
|
PTXVersion(PTX_VERSION_1_4) {
|
||||||
|
std::string TARGET = "generic";
|
||||||
|
ParseSubtargetFeatures(FS, TARGET);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string PTXSubtarget::getTargetString() const {
|
||||||
|
switch(PTXShaderModel) {
|
||||||
|
default: llvm_unreachable("Unknown shader model");
|
||||||
|
case PTX_SM_1_0: return "sm_10";
|
||||||
|
case PTX_SM_1_3: return "sm_13";
|
||||||
|
case PTX_SM_2_0: return "sm_20";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string PTXSubtarget::getPTXVersionString() const {
|
||||||
|
switch(PTXVersion) {
|
||||||
|
default: llvm_unreachable("Unknown PTX version");
|
||||||
|
case PTX_VERSION_1_4: return "1.4";
|
||||||
|
case PTX_VERSION_2_0: return "2.0";
|
||||||
|
case PTX_VERSION_2_1: return "2.1";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "PTXGenSubtarget.inc"
|
#include "PTXGenSubtarget.inc"
|
||||||
|
@ -19,11 +19,36 @@
|
|||||||
namespace llvm {
|
namespace llvm {
|
||||||
class PTXSubtarget : public TargetSubtarget {
|
class PTXSubtarget : public TargetSubtarget {
|
||||||
private:
|
private:
|
||||||
bool is_sm20;
|
enum PTXShaderModelEnum {
|
||||||
|
PTX_SM_1_0,
|
||||||
|
PTX_SM_1_3,
|
||||||
|
PTX_SM_2_0
|
||||||
|
};
|
||||||
|
|
||||||
|
enum PTXVersionEnum {
|
||||||
|
PTX_VERSION_1_4,
|
||||||
|
PTX_VERSION_2_0,
|
||||||
|
PTX_VERSION_2_1
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Shader Model supported on the target GPU.
|
||||||
|
PTXShaderModelEnum PTXShaderModel;
|
||||||
|
|
||||||
|
/// PTX Language Version.
|
||||||
|
PTXVersionEnum PTXVersion;
|
||||||
|
|
||||||
|
// The native .f64 type is supported on the hardware.
|
||||||
|
bool SupportsDouble;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PTXSubtarget(const std::string &TT, const std::string &FS);
|
PTXSubtarget(const std::string &TT, const std::string &FS);
|
||||||
|
|
||||||
|
std::string getTargetString() const;
|
||||||
|
|
||||||
|
std::string getPTXVersionString() const;
|
||||||
|
|
||||||
|
bool supportsDouble() const { return SupportsDouble; }
|
||||||
|
|
||||||
std::string ParseSubtargetFeatures(const std::string &FS,
|
std::string ParseSubtargetFeatures(const std::string &FS,
|
||||||
const std::string &CPU);
|
const std::string &CPU);
|
||||||
}; // class PTXSubtarget
|
}; // class PTXSubtarget
|
||||||
|
@ -1,29 +1,71 @@
|
|||||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||||
|
|
||||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||||
; CHECK: add.s32 r0, r1, r2;
|
; CHECK: add.u16 rh0, rh1, rh2;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = add i16 %x, %y
|
||||||
|
ret i16 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||||
|
; CHECK: add.u32 r0, r1, r2;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
%z = add i32 %x, %y
|
%z = add i32 %x, %y
|
||||||
; CHECK: ret;
|
|
||||||
ret i32 %z
|
ret i32 %z
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t2(i32 %x) {
|
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||||
; CHECK: add.s32 r0, r1, 1;
|
; CHECK: add.u64 rd0, rd1, rd2;
|
||||||
%z = add i32 %x, 1
|
; CHECK-NEXT: ret;
|
||||||
; CHECK: ret;
|
%z = add i64 %x, %y
|
||||||
ret i32 %z
|
ret i64 %z
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @t3(float %x, float %y) {
|
define ptx_device float @t1_f32(float %x, float %y) {
|
||||||
; CHECK: add.f32 f0, f1, f2
|
; CHECK: add.f32 f0, f1, f2
|
||||||
; CHECK-NEXT: ret;
|
; CHECK-NEXT: ret;
|
||||||
%z = fadd float %x, %y
|
%z = fadd float %x, %y
|
||||||
ret float %z
|
ret float %z
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @t4(float %x) {
|
define ptx_device double @t1_f64(double %x, double %y) {
|
||||||
|
; CHECK: add.f64 fd0, fd1, fd2
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = fadd double %x, %y
|
||||||
|
ret double %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t2_u16(i16 %x) {
|
||||||
|
; CHECK: add.u16 rh0, rh1, 1;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = add i16 %x, 1
|
||||||
|
ret i16 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t2_u32(i32 %x) {
|
||||||
|
; CHECK: add.u32 r0, r1, 1;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = add i32 %x, 1
|
||||||
|
ret i32 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i64 @t2_u64(i64 %x) {
|
||||||
|
; CHECK: add.u64 rd0, rd1, 1;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = add i64 %x, 1
|
||||||
|
ret i64 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t2_f32(float %x) {
|
||||||
; CHECK: add.f32 f0, f1, 0F3F800000;
|
; CHECK: add.f32 f0, f1, 0F3F800000;
|
||||||
; CHECK-NEXT: ret;
|
; CHECK-NEXT: ret;
|
||||||
%z = fadd float %x, 1.0
|
%z = fadd float %x, 1.0
|
||||||
ret float %z
|
ret float %z
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t2_f64(double %x) {
|
||||||
|
; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = fadd double %x, 1.0
|
||||||
|
ret double %z
|
||||||
|
}
|
||||||
|
@ -1,78 +1,422 @@
|
|||||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||||
|
|
||||||
;CHECK: .extern .global .s32 array[];
|
;CHECK: .extern .global .u16 array_i16[];
|
||||||
@array = external global [10 x i32]
|
@array_i16 = external global [10 x i16]
|
||||||
|
|
||||||
;CHECK: .extern .const .s32 array_constant[];
|
;CHECK: .extern .const .u16 array_constant_i16[];
|
||||||
@array_constant = external addrspace(1) constant [10 x i32]
|
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||||
|
|
||||||
;CHECK: .extern .local .s32 array_local[];
|
;CHECK: .extern .local .u16 array_local_i16[];
|
||||||
@array_local = external addrspace(2) global [10 x i32]
|
@array_local_i16 = external addrspace(2) global [10 x i16]
|
||||||
|
|
||||||
;CHECK: .extern .shared .s32 array_shared[];
|
;CHECK: .extern .shared .u16 array_shared_i16[];
|
||||||
@array_shared = external addrspace(4) global [10 x i32]
|
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||||
|
|
||||||
define ptx_device i32 @t1(i32* %p) {
|
;CHECK: .extern .global .u32 array_i32[];
|
||||||
|
@array_i32 = external global [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .u32 array_constant_i32[];
|
||||||
|
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .u32 array_local_i32[];
|
||||||
|
@array_local_i32 = external addrspace(2) global [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .u32 array_shared_i32[];
|
||||||
|
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .global .u64 array_i64[];
|
||||||
|
@array_i64 = external global [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .u64 array_constant_i64[];
|
||||||
|
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .u64 array_local_i64[];
|
||||||
|
@array_local_i64 = external addrspace(2) global [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .u64 array_shared_i64[];
|
||||||
|
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .global .f32 array_float[];
|
||||||
|
@array_float = external global [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .f32 array_constant_float[];
|
||||||
|
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .f32 array_local_float[];
|
||||||
|
@array_local_float = external addrspace(2) global [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .f32 array_shared_float[];
|
||||||
|
@array_shared_float = external addrspace(4) global [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .global .f64 array_double[];
|
||||||
|
@array_double = external global [10 x double]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .f64 array_constant_double[];
|
||||||
|
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .f64 array_local_double[];
|
||||||
|
@array_local_double = external addrspace(2) global [10 x double]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .f64 array_shared_double[];
|
||||||
|
@array_shared_double = external addrspace(4) global [10 x double]
|
||||||
|
|
||||||
|
|
||||||
|
define ptx_device i16 @t1_u16(i16* %p) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: ld.global.s32 r0, [r1];
|
;CHECK: ld.global.u16 rh0, [r1];
|
||||||
|
;CHECK-NEXT; ret;
|
||||||
|
%x = load i16* %p
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t1_u32(i32* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u32 r0, [r1];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
%x = load i32* %p
|
%x = load i32* %p
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t2(i32* %p) {
|
define ptx_device i64 @t1_u64(i64* %p) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: ld.global.s32 r0, [r1+4];
|
;CHECK: ld.global.u64 rd0, [r1];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%x = load i64* %p
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t1_f32(float* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f32 f0, [r1];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%x = load float* %p
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t1_f64(double* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f64 fd0, [r1];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%x = load double* %p
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t2_u16(i16* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u16 rh0, [r1+2];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr i16* %p, i32 1
|
||||||
|
%x = load i16* %i
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t2_u32(i32* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u32 r0, [r1+4];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
%i = getelementptr i32* %p, i32 1
|
%i = getelementptr i32* %p, i32 1
|
||||||
%x = load i32* %i
|
%x = load i32* %i
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t3(i32* %p, i32 %q) {
|
define ptx_device i64 @t2_u64(i64* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u64 rd0, [r1+8];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr i64* %p, i32 1
|
||||||
|
%x = load i64* %i
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t2_f32(float* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f32 f0, [r1+4];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr float* %p, i32 1
|
||||||
|
%x = load float* %i
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t2_f64(double* %p) {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f64 fd0, [r1+8];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr double* %p, i32 1
|
||||||
|
%x = load double* %i
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
|
||||||
|
entry:
|
||||||
|
;CHECK: shl.b32 r0, r2, 1;
|
||||||
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: ld.global.u16 rh0, [r0];
|
||||||
|
%i = getelementptr i16* %p, i32 %q
|
||||||
|
%x = load i16* %i
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: shl.b32 r0, r2, 2;
|
;CHECK: shl.b32 r0, r2, 2;
|
||||||
;CHECK: add.s32 r0, r1, r0;
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
;CHECK: ld.global.s32 r0, [r0];
|
;CHECK-NEXT: ld.global.u32 r0, [r0];
|
||||||
%i = getelementptr i32* %p, i32 %q
|
%i = getelementptr i32* %p, i32 %q
|
||||||
%x = load i32* %i
|
%x = load i32* %i
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t4_global() {
|
define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: ld.global.s32 r0, [array];
|
;CHECK: shl.b32 r0, r2, 3;
|
||||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 0
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: ld.global.u64 rd0, [r0];
|
||||||
|
%i = getelementptr i64* %p, i32 %q
|
||||||
|
%x = load i64* %i
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t3_f32(float* %p, i32 %q) {
|
||||||
|
entry:
|
||||||
|
;CHECK: shl.b32 r0, r2, 2;
|
||||||
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: ld.global.f32 f0, [r0];
|
||||||
|
%i = getelementptr float* %p, i32 %q
|
||||||
|
%x = load float* %i
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t3_f64(double* %p, i32 %q) {
|
||||||
|
entry:
|
||||||
|
;CHECK: shl.b32 r0, r2, 3;
|
||||||
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: ld.global.f64 fd0, [r0];
|
||||||
|
%i = getelementptr double* %p, i32 %q
|
||||||
|
%x = load double* %i
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t4_global_u16() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u16 rh0, [array_i16];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
|
||||||
|
%x = load i16* %i
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t4_global_u32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u32 r0, [array_i32];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||||
%x = load i32* %i
|
%x = load i32* %i
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t4_const() {
|
define ptx_device i64 @t4_global_u64() {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: ld.const.s32 r0, [array_constant];
|
;CHECK: ld.global.u64 rd0, [array_i64];
|
||||||
%i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||||
|
%x = load i64* %i
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t4_global_f32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f32 f0, [array_float];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||||
|
%x = load float* %i
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t4_global_f64() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f64 fd0, [array_double];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||||
|
%x = load double* %i
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t4_const_u16() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.const.u16 rh0, [array_constant_i16];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
|
||||||
|
%x = load i16 addrspace(1)* %i
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t4_const_u32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.const.u32 r0, [array_constant_i32];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
|
||||||
%x = load i32 addrspace(1)* %i
|
%x = load i32 addrspace(1)* %i
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t4_local() {
|
define ptx_device i64 @t4_const_u64() {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: ld.local.s32 r0, [array_local];
|
;CHECK: ld.const.u64 rd0, [array_constant_i64];
|
||||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
|
||||||
|
%x = load i64 addrspace(1)* %i
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t4_const_f32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.const.f32 f0, [array_constant_float];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
|
||||||
|
%x = load float addrspace(1)* %i
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t4_const_f64() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.const.f64 fd0, [array_constant_double];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
|
||||||
|
%x = load double addrspace(1)* %i
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t4_local_u16() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.local.u16 rh0, [array_local_i16];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||||
|
%x = load i16 addrspace(2)* %i
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t4_local_u32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.local.u32 r0, [array_local_i32];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||||
%x = load i32 addrspace(2)* %i
|
%x = load i32 addrspace(2)* %i
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t4_shared() {
|
define ptx_device i64 @t4_local_u64() {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: ld.shared.s32 r0, [array_shared];
|
;CHECK: ld.local.u64 rd0, [array_local_i64];
|
||||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||||
|
%x = load i64 addrspace(2)* %i
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t4_local_f32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.local.f32 f0, [array_local_float];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||||
|
%x = load float addrspace(2)* %i
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t4_local_f64() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.local.f64 fd0, [array_local_double];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||||
|
%x = load double addrspace(2)* %i
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t4_shared_u16() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.shared.u16 rh0, [array_shared_i16];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||||
|
%x = load i16 addrspace(4)* %i
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t4_shared_u32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.shared.u32 r0, [array_shared_i32];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||||
%x = load i32 addrspace(4)* %i
|
%x = load i32 addrspace(4)* %i
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t5() {
|
define ptx_device i64 @t4_shared_u64() {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: ld.global.s32 r0, [array+4];
|
;CHECK: ld.shared.u64 rd0, [array_shared_i64];
|
||||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 1
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||||
|
%x = load i64 addrspace(4)* %i
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t4_shared_f32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.shared.f32 f0, [array_shared_float];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||||
|
%x = load float addrspace(4)* %i
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t4_shared_f64() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.shared.f64 fd0, [array_shared_double];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||||
|
%x = load double addrspace(4)* %i
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t5_u16() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u16 rh0, [array_i16+2];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||||
|
%x = load i16* %i
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t5_u32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u32 r0, [array_i32+4];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||||
%x = load i32* %i
|
%x = load i32* %i
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define ptx_device i64 @t5_u64() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.u64 rd0, [array_i64+8];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||||
|
%x = load i64* %i
|
||||||
|
ret i64 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t5_f32() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f32 f0, [array_float+4];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||||
|
%x = load float* %i
|
||||||
|
ret float %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t5_f64() {
|
||||||
|
entry:
|
||||||
|
;CHECK: ld.global.f64 fd0, [array_double+8];
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||||
|
%x = load double* %i
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
@ -1,86 +0,0 @@
|
|||||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
|
||||||
|
|
||||||
;CHECK: .extern .global .f32 array[];
|
|
||||||
@array = external global [10 x float]
|
|
||||||
|
|
||||||
;CHECK: .extern .const .f32 array_constant[];
|
|
||||||
@array_constant = external addrspace(1) constant [10 x float]
|
|
||||||
|
|
||||||
;CHECK: .extern .local .f32 array_local[];
|
|
||||||
@array_local = external addrspace(2) global [10 x float]
|
|
||||||
|
|
||||||
;CHECK: .extern .shared .f32 array_shared[];
|
|
||||||
@array_shared = external addrspace(4) global [10 x float]
|
|
||||||
|
|
||||||
define ptx_device float @t1(float* %p) {
|
|
||||||
entry:
|
|
||||||
;CHECK: ld.global.f32 f0, [r1];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%x = load float* %p
|
|
||||||
ret float %x
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device float @t2(float* %p) {
|
|
||||||
entry:
|
|
||||||
;CHECK: ld.global.f32 f0, [r1+4];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr float* %p, i32 1
|
|
||||||
%x = load float* %i
|
|
||||||
ret float %x
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device float @t3(float* %p, i32 %q) {
|
|
||||||
entry:
|
|
||||||
;CHECK: shl.b32 r0, r2, 2;
|
|
||||||
;CHECK-NEXT: add.s32 r0, r1, r0;
|
|
||||||
;CHECK-NEXT: ld.global.f32 f0, [r0];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr float* %p, i32 %q
|
|
||||||
%x = load float* %i
|
|
||||||
ret float %x
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device float @t4_global() {
|
|
||||||
entry:
|
|
||||||
;CHECK: ld.global.f32 f0, [array];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float]* @array, i32 0, i32 0
|
|
||||||
%x = load float* %i
|
|
||||||
ret float %x
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device float @t4_const() {
|
|
||||||
entry:
|
|
||||||
;CHECK: ld.const.f32 f0, [array_constant];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float] addrspace(1)* @array_constant, i32 0, i32 0
|
|
||||||
%x = load float addrspace(1)* %i
|
|
||||||
ret float %x
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device float @t4_local() {
|
|
||||||
entry:
|
|
||||||
;CHECK: ld.local.f32 f0, [array_local];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
|
|
||||||
%x = load float addrspace(2)* %i
|
|
||||||
ret float %x
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device float @t4_shared() {
|
|
||||||
entry:
|
|
||||||
;CHECK: ld.shared.f32 f0, [array_shared];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
|
|
||||||
%x = load float addrspace(4)* %i
|
|
||||||
ret float %x
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device float @t5() {
|
|
||||||
entry:
|
|
||||||
;CHECK: ld.global.f32 f0, [array+4];
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float]* @array, i32 0, i32 1
|
|
||||||
%x = load float* %i
|
|
||||||
ret float %x
|
|
||||||
}
|
|
@ -1,25 +1,62 @@
|
|||||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||||
|
|
||||||
define ptx_device i32 @t1() {
|
define ptx_device i16 @t1_u16() {
|
||||||
; CHECK: mov.s32 r0, 0;
|
; CHECK: mov.u16 rh0, 0;
|
||||||
|
; CHECK: ret;
|
||||||
|
ret i16 0
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t1_u32() {
|
||||||
|
; CHECK: mov.u32 r0, 0;
|
||||||
; CHECK: ret;
|
; CHECK: ret;
|
||||||
ret i32 0
|
ret i32 0
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t2(i32 %x) {
|
define ptx_device i64 @t1_u64() {
|
||||||
; CHECK: mov.s32 r0, r1;
|
; CHECK: mov.u64 rd0, 0;
|
||||||
|
; CHECK: ret;
|
||||||
|
ret i64 0
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t1_f32() {
|
||||||
|
; CHECK: mov.f32 f0, 0F00000000;
|
||||||
|
; CHECK: ret;
|
||||||
|
ret float 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t1_f64() {
|
||||||
|
; CHECK: mov.f64 fd0, 0D0000000000000000;
|
||||||
|
; CHECK: ret;
|
||||||
|
ret double 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t2_u16(i16 %x) {
|
||||||
|
; CHECK: mov.u16 rh0, rh1;
|
||||||
|
; CHECK: ret;
|
||||||
|
ret i16 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t2_u32(i32 %x) {
|
||||||
|
; CHECK: mov.u32 r0, r1;
|
||||||
; CHECK: ret;
|
; CHECK: ret;
|
||||||
ret i32 %x
|
ret i32 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @t3() {
|
define ptx_device i64 @t2_u64(i64 %x) {
|
||||||
; CHECK: mov.f32 f0, 0F00000000;
|
; CHECK: mov.u64 rd0, rd1;
|
||||||
; CHECK-NEXT: ret;
|
; CHECK: ret;
|
||||||
ret float 0.0
|
ret i64 %x
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @t4(float %x) {
|
define ptx_device float @t3_f32(float %x) {
|
||||||
; CHECK: mov.f32 f0, f1;
|
; CHECK: mov.f32 f0, f1;
|
||||||
; CHECK-NEXT: ret;
|
; CHECK-NEXT: ret;
|
||||||
ret float %x
|
ret float %x
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t3_f64(double %x) {
|
||||||
|
; CHECK: mov.f64 fd0, fd1;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
ret double %x
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -10,16 +10,30 @@
|
|||||||
; ret i32 %z
|
; ret i32 %z
|
||||||
;}
|
;}
|
||||||
|
|
||||||
define ptx_device float @t3(float %x, float %y) {
|
define ptx_device float @t1_f32(float %x, float %y) {
|
||||||
; CHECK: mul.f32 f0, f1, f2
|
; CHECK: mul.f32 f0, f1, f2
|
||||||
; CHECK-NEXT: ret;
|
; CHECK-NEXT: ret;
|
||||||
%z = fmul float %x, %y
|
%z = fmul float %x, %y
|
||||||
ret float %z
|
ret float %z
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @t4(float %x) {
|
define ptx_device double @t1_f64(double %x, double %y) {
|
||||||
|
; CHECK: mul.f64 fd0, fd1, fd2
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = fmul double %x, %y
|
||||||
|
ret double %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t2_f32(float %x) {
|
||||||
; CHECK: mul.f32 f0, f1, 0F40A00000;
|
; CHECK: mul.f32 f0, f1, 0F40A00000;
|
||||||
; CHECK-NEXT: ret;
|
; CHECK-NEXT: ret;
|
||||||
%z = fmul float %x, 5.0
|
%z = fmul float %x, 5.0
|
||||||
ret float %z
|
ret float %z
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t2_f64(double %x) {
|
||||||
|
; CHECK: mul.f64 fd0, fd1, 0D4014000000000000;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = fmul double %x, 5.0
|
||||||
|
ret double %z
|
||||||
|
}
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0"
|
; RUN: llc < %s -march=ptx -mattr=ptx14 | grep ".version 1.4"
|
||||||
; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20"
|
; RUN: llc < %s -march=ptx -mattr=ptx20 | grep ".version 2.0"
|
||||||
|
; RUN: llc < %s -march=ptx -mattr=ptx21 | grep ".version 2.1"
|
||||||
|
; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20"
|
||||||
|
; RUN: llc < %s -march=ptx -mattr=sm13 | grep ".target sm_13"
|
||||||
|
|
||||||
define ptx_device void @t1() {
|
define ptx_device void @t1() {
|
||||||
ret void
|
ret void
|
||||||
|
@ -1,71 +1,382 @@
|
|||||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||||
|
|
||||||
;CHECK: .extern .global .s32 array[];
|
;CHECK: .extern .global .u16 array_i16[];
|
||||||
@array = external global [10 x i32]
|
@array_i16 = external global [10 x i16]
|
||||||
|
|
||||||
;CHECK: .extern .const .s32 array_constant[];
|
;CHECK: .extern .const .u16 array_constant_i16[];
|
||||||
@array_constant = external addrspace(1) constant [10 x i32]
|
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||||
|
|
||||||
;CHECK: .extern .local .s32 array_local[];
|
;CHECK: .extern .local .u16 array_local_i16[];
|
||||||
@array_local = external addrspace(2) global [10 x i32]
|
@array_local_i16 = external addrspace(2) global [10 x i16]
|
||||||
|
|
||||||
;CHECK: .extern .shared .s32 array_shared[];
|
;CHECK: .extern .shared .u16 array_shared_i16[];
|
||||||
@array_shared = external addrspace(4) global [10 x i32]
|
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||||
|
|
||||||
define ptx_device void @t1(i32* %p, i32 %x) {
|
;CHECK: .extern .global .u32 array_i32[];
|
||||||
|
@array_i32 = external global [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .u32 array_constant_i32[];
|
||||||
|
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .u32 array_local_i32[];
|
||||||
|
@array_local_i32 = external addrspace(2) global [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .u32 array_shared_i32[];
|
||||||
|
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||||
|
|
||||||
|
;CHECK: .extern .global .u64 array_i64[];
|
||||||
|
@array_i64 = external global [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .u64 array_constant_i64[];
|
||||||
|
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .u64 array_local_i64[];
|
||||||
|
@array_local_i64 = external addrspace(2) global [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .u64 array_shared_i64[];
|
||||||
|
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||||
|
|
||||||
|
;CHECK: .extern .global .f32 array_float[];
|
||||||
|
@array_float = external global [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .f32 array_constant_float[];
|
||||||
|
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .f32 array_local_float[];
|
||||||
|
@array_local_float = external addrspace(2) global [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .f32 array_shared_float[];
|
||||||
|
@array_shared_float = external addrspace(4) global [10 x float]
|
||||||
|
|
||||||
|
;CHECK: .extern .global .f64 array_double[];
|
||||||
|
@array_double = external global [10 x double]
|
||||||
|
|
||||||
|
;CHECK: .extern .const .f64 array_constant_double[];
|
||||||
|
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||||
|
|
||||||
|
;CHECK: .extern .local .f64 array_local_double[];
|
||||||
|
@array_local_double = external addrspace(2) global [10 x double]
|
||||||
|
|
||||||
|
;CHECK: .extern .shared .f64 array_shared_double[];
|
||||||
|
@array_shared_double = external addrspace(4) global [10 x double]
|
||||||
|
|
||||||
|
|
||||||
|
define ptx_device void @t1_u16(i16* %p, i16 %x) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: st.global.s32 [r1], r2;
|
;CHECK: st.global.u16 [r1], rh1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
store i16 %x, i16* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t1_u32(i32* %p, i32 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u32 [r1], r2;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
store i32 %x, i32* %p
|
store i32 %x, i32* %p
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device void @t2(i32* %p, i32 %x) {
|
define ptx_device void @t1_u64(i64* %p, i64 %x) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: st.global.s32 [r1+4], r2;
|
;CHECK: st.global.u64 [r1], rd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
store i64 %x, i64* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t1_f32(float* %p, float %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f32 [r1], f1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
store float %x, float* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t1_f64(double* %p, double %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f64 [r1], fd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
store double %x, double* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t2_u16(i16* %p, i16 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u16 [r1+2], rh1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr i16* %p, i32 1
|
||||||
|
store i16 %x, i16* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t2_u32(i32* %p, i32 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u32 [r1+4], r2;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
%i = getelementptr i32* %p, i32 1
|
%i = getelementptr i32* %p, i32 1
|
||||||
store i32 %x, i32* %i
|
store i32 %x, i32* %i
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device void @t3(i32* %p, i32 %q, i32 %x) {
|
define ptx_device void @t2_u64(i64* %p, i64 %x) {
|
||||||
;CHECK: .reg .s32 r0;
|
entry:
|
||||||
|
;CHECK: st.global.u64 [r1+8], rd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr i64* %p, i32 1
|
||||||
|
store i64 %x, i64* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t2_f32(float* %p, float %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f32 [r1+4], f1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr float* %p, i32 1
|
||||||
|
store float %x, float* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t2_f64(double* %p, double %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f64 [r1+8], fd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr double* %p, i32 1
|
||||||
|
store double %x, double* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: shl.b32 r0, r2, 1;
|
||||||
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: st.global.u16 [r0], rh1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr i16* %p, i32 %q
|
||||||
|
store i16 %x, i16* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: shl.b32 r0, r2, 2;
|
;CHECK: shl.b32 r0, r2, 2;
|
||||||
;CHECK: add.s32 r0, r1, r0;
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
;CHECK: st.global.s32 [r0], r3;
|
;CHECK-NEXT: st.global.u32 [r0], r3;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
%i = getelementptr i32* %p, i32 %q
|
%i = getelementptr i32* %p, i32 %q
|
||||||
store i32 %x, i32* %i
|
store i32 %x, i32* %i
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device void @t4_global(i32 %x) {
|
define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: st.global.s32 [array], r1;
|
;CHECK: shl.b32 r0, r2, 3;
|
||||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 0
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: st.global.u64 [r0], rd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr i64* %p, i32 %q
|
||||||
|
store i64 %x, i64* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: shl.b32 r0, r2, 2;
|
||||||
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: st.global.f32 [r0], f1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr float* %p, i32 %q
|
||||||
|
store float %x, float* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: shl.b32 r0, r2, 3;
|
||||||
|
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||||
|
;CHECK-NEXT: st.global.f64 [r0], fd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr double* %p, i32 %q
|
||||||
|
store double %x, double* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_global_u16(i16 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u16 [array_i16], rh1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
|
||||||
|
store i16 %x, i16* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_global_u32(i32 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u32 [array_i32], r1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||||
store i32 %x, i32* %i
|
store i32 %x, i32* %i
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device void @t4_local(i32 %x) {
|
define ptx_device void @t4_global_u64(i64 %x) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: st.local.s32 [array_local], r1;
|
;CHECK: st.global.u64 [array_i64], rd1;
|
||||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||||
|
store i64 %x, i64* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_global_f32(float %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f32 [array_float], f1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||||
|
store float %x, float* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_global_f64(double %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f64 [array_double], fd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||||
|
store double %x, double* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_local_u16(i16 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.local.u16 [array_local_i16], rh1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||||
|
store i16 %x, i16 addrspace(2)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_local_u32(i32 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.local.u32 [array_local_i32], r1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||||
store i32 %x, i32 addrspace(2)* %i
|
store i32 %x, i32 addrspace(2)* %i
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device void @t4_shared(i32 %x) {
|
define ptx_device void @t4_local_u64(i64 %x) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: st.shared.s32 [array_shared], r1;
|
;CHECK: st.local.u64 [array_local_i64], rd1;
|
||||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||||
|
store i64 %x, i64 addrspace(2)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_local_f32(float %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.local.f32 [array_local_float], f1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||||
|
store float %x, float addrspace(2)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_local_f64(double %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.local.f64 [array_local_double], fd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||||
|
store double %x, double addrspace(2)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_shared_u16(i16 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.shared.u16 [array_shared_i16], rh1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||||
|
store i16 %x, i16 addrspace(4)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_shared_u32(i32 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.shared.u32 [array_shared_i32], r1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||||
store i32 %x, i32 addrspace(4)* %i
|
store i32 %x, i32 addrspace(4)* %i
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device void @t5(i32 %x) {
|
define ptx_device void @t4_shared_u64(i64 %x) {
|
||||||
entry:
|
entry:
|
||||||
;CHECK: st.global.s32 [array+4], r1;
|
;CHECK: st.shared.u64 [array_shared_i64], rd1;
|
||||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 1
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||||
|
store i64 %x, i64 addrspace(4)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_shared_f32(float %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.shared.f32 [array_shared_float], f1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||||
|
store float %x, float addrspace(4)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t4_shared_f64(double %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.shared.f64 [array_shared_double], fd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||||
|
store double %x, double addrspace(4)* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t5_u16(i16 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u16 [array_i16+2], rh1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||||
|
store i16 %x, i16* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t5_u32(i32 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u32 [array_i32+4], r1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||||
store i32 %x, i32* %i
|
store i32 %x, i32* %i
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t5_u64(i64 %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.u64 [array_i64+8], rd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||||
|
store i64 %x, i64* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t5_f32(float %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f32 [array_float+4], f1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||||
|
store float %x, float* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device void @t5_f64(double %x) {
|
||||||
|
entry:
|
||||||
|
;CHECK: st.global.f64 [array_double+8], fd1;
|
||||||
|
;CHECK-NEXT: ret;
|
||||||
|
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||||
|
store double %x, double* %i
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
@ -1,78 +0,0 @@
|
|||||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
|
||||||
|
|
||||||
;CHECK: .extern .global .f32 array[];
|
|
||||||
@array = external global [10 x float]
|
|
||||||
|
|
||||||
;CHECK: .extern .const .f32 array_constant[];
|
|
||||||
@array_constant = external addrspace(1) constant [10 x float]
|
|
||||||
|
|
||||||
;CHECK: .extern .local .f32 array_local[];
|
|
||||||
@array_local = external addrspace(2) global [10 x float]
|
|
||||||
|
|
||||||
;CHECK: .extern .shared .f32 array_shared[];
|
|
||||||
@array_shared = external addrspace(4) global [10 x float]
|
|
||||||
|
|
||||||
define ptx_device void @t1(float* %p, float %x) {
|
|
||||||
entry:
|
|
||||||
;CHECK: st.global.f32 [r1], f1;
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
store float %x, float* %p
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device void @t2(float* %p, float %x) {
|
|
||||||
entry:
|
|
||||||
;CHECK: st.global.f32 [r1+4], f1;
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr float* %p, i32 1
|
|
||||||
store float %x, float* %i
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device void @t3(float* %p, i32 %q, float %x) {
|
|
||||||
;CHECK: .reg .s32 r0;
|
|
||||||
entry:
|
|
||||||
;CHECK: shl.b32 r0, r2, 2;
|
|
||||||
;CHECK-NEXT: add.s32 r0, r1, r0;
|
|
||||||
;CHECK-NEXT: st.global.f32 [r0], f1;
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr float* %p, i32 %q
|
|
||||||
store float %x, float* %i
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device void @t4_global(float %x) {
|
|
||||||
entry:
|
|
||||||
;CHECK: st.global.f32 [array], f1;
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float]* @array, i32 0, i32 0
|
|
||||||
store float %x, float* %i
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device void @t4_local(float %x) {
|
|
||||||
entry:
|
|
||||||
;CHECK: st.local.f32 [array_local], f1;
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
|
|
||||||
store float %x, float addrspace(2)* %i
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device void @t4_shared(float %x) {
|
|
||||||
entry:
|
|
||||||
;CHECK: st.shared.f32 [array_shared], f1;
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
|
|
||||||
store float %x, float addrspace(4)* %i
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define ptx_device void @t5(float %x) {
|
|
||||||
entry:
|
|
||||||
;CHECK: st.global.f32 [array+4], f1;
|
|
||||||
;CHECK-NEXT: ret;
|
|
||||||
%i = getelementptr [10 x float]* @array, i32 0, i32 1
|
|
||||||
store float %x, float* %i
|
|
||||||
ret void
|
|
||||||
}
|
|
@ -1,29 +1,71 @@
|
|||||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||||
|
|
||||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||||
;CHECK: sub.s32 r0, r1, r2;
|
; CHECK: sub.u16 rh0, rh1, rh2;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = sub i16 %x, %y
|
||||||
|
ret i16 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||||
|
; CHECK: sub.u32 r0, r1, r2;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
%z = sub i32 %x, %y
|
%z = sub i32 %x, %y
|
||||||
;CHECK: ret;
|
|
||||||
ret i32 %z
|
ret i32 %z
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device i32 @t2(i32 %x) {
|
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||||
;CHECK: add.s32 r0, r1, -1;
|
; CHECK: sub.u64 rd0, rd1, rd2;
|
||||||
%z = sub i32 %x, 1
|
; CHECK-NEXT: ret;
|
||||||
;CHECK: ret;
|
%z = sub i64 %x, %y
|
||||||
ret i32 %z
|
ret i64 %z
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @t3(float %x, float %y) {
|
define ptx_device float @t1_f32(float %x, float %y) {
|
||||||
; CHECK: sub.f32 f0, f1, f2
|
; CHECK: sub.f32 f0, f1, f2
|
||||||
; CHECK-NEXT: ret;
|
; CHECK-NEXT: ret;
|
||||||
%z = fsub float %x, %y
|
%z = fsub float %x, %y
|
||||||
ret float %z
|
ret float %z
|
||||||
}
|
}
|
||||||
|
|
||||||
define ptx_device float @t4(float %x) {
|
define ptx_device double @t1_f64(double %x, double %y) {
|
||||||
|
; CHECK: sub.f64 fd0, fd1, fd2
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = fsub double %x, %y
|
||||||
|
ret double %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i16 @t2_u16(i16 %x) {
|
||||||
|
; CHECK: add.u16 rh0, rh1, -1;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = sub i16 %x, 1
|
||||||
|
ret i16 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i32 @t2_u32(i32 %x) {
|
||||||
|
; CHECK: add.u32 r0, r1, -1;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = sub i32 %x, 1
|
||||||
|
ret i32 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device i64 @t2_u64(i64 %x) {
|
||||||
|
; CHECK: add.u64 rd0, rd1, -1;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = sub i64 %x, 1
|
||||||
|
ret i64 %z
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device float @t2_f32(float %x) {
|
||||||
; CHECK: add.f32 f0, f1, 0FBF800000;
|
; CHECK: add.f32 f0, f1, 0FBF800000;
|
||||||
; CHECK-NEXT: ret;
|
; CHECK-NEXT: ret;
|
||||||
%z = fsub float %x, 1.0
|
%z = fsub float %x, 1.0
|
||||||
ret float %z
|
ret float %z
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define ptx_device double @t2_f64(double %x) {
|
||||||
|
; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000;
|
||||||
|
; CHECK-NEXT: ret;
|
||||||
|
%z = fsub double %x, 1.0
|
||||||
|
ret double %z
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user