mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-20 12:31:40 +00:00
PTX: Add initial support for device function calls
- Calls are supported on SM 2.0+ for function with no return values git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137125 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6d1fd0b979
commit
4bdd4ed564
@ -70,6 +70,8 @@ public:
|
||||
const char *Modifier = 0);
|
||||
void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
|
||||
|
||||
void printCall(const MachineInstr *MI, raw_ostream &O);
|
||||
|
||||
unsigned GetOrCreateSourceID(StringRef FileName,
|
||||
StringRef DirName);
|
||||
|
||||
@ -242,6 +244,19 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
|
||||
OutStreamer.EmitRawText(Twine(def));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned Index = 1;
|
||||
// Print parameter passing params
|
||||
for (PTXMachineFunctionInfo::param_iterator
|
||||
i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
|
||||
std::string def = "\t.param .b";
|
||||
def += utostr(*i);
|
||||
def += " __ret_";
|
||||
def += utostr(Index);
|
||||
Index++;
|
||||
def += ";";
|
||||
OutStreamer.EmitRawText(Twine(def));
|
||||
}
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
@ -302,7 +317,11 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
printPredicateOperand(MI, OS);
|
||||
|
||||
// Write instruction to str
|
||||
if (MI->getOpcode() == PTX::CALL) {
|
||||
printCall(MI, OS);
|
||||
} else {
|
||||
printInstruction(MI, OS);
|
||||
}
|
||||
OS << ';';
|
||||
OS.flush();
|
||||
|
||||
@ -569,6 +588,28 @@ printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
|
||||
}
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::
|
||||
printCall(const MachineInstr *MI, raw_ostream &O) {
|
||||
|
||||
O << "\tcall.uni\t";
|
||||
|
||||
const GlobalValue *Address = MI->getOperand(2).getGlobal();
|
||||
O << Address->getName() << ", (";
|
||||
|
||||
// (0,1) : predicate register/flag
|
||||
// (2) : callee
|
||||
for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
|
||||
//const MachineOperand& MO = MI->getOperand(i);
|
||||
|
||||
printReturnOperand(MI, i, O);
|
||||
if (i < MI->getNumOperands()-1) {
|
||||
O << ", ";
|
||||
}
|
||||
}
|
||||
|
||||
O << ")";
|
||||
}
|
||||
|
||||
unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
|
||||
StringRef DirName) {
|
||||
// If FE did not provide a file name, then assume stdin.
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
@ -134,6 +135,8 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
return "PTXISD::EXIT";
|
||||
case PTXISD::RET:
|
||||
return "PTXISD::RET";
|
||||
case PTXISD::CALL:
|
||||
return "PTXISD::CALL";
|
||||
}
|
||||
}
|
||||
|
||||
@ -345,3 +348,49 @@ SDValue PTXTargetLowering::
|
||||
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
bool &isTailCall,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
|
||||
MachineFunction& MF = DAG.getMachineFunction();
|
||||
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
|
||||
|
||||
assert(ST.callsAreHandled() && "Calls are not handled for the target device");
|
||||
|
||||
// Is there a more "LLVM"-way to create a variable-length array of values?
|
||||
SDValue* ops = new SDValue[OutVals.size() + 2];
|
||||
|
||||
ops[0] = Chain;
|
||||
|
||||
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
|
||||
const GlobalValue *GV = G->getGlobal();
|
||||
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
|
||||
ops[1] = Callee;
|
||||
} else {
|
||||
assert(false && "Function must be a GlobalAddressSDNode");
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i != OutVals.size(); ++i) {
|
||||
unsigned Size = OutVals[i].getValueType().getSizeInBits();
|
||||
SDValue Index = DAG.getTargetConstant(MFI->getNextParam(Size), MVT::i32);
|
||||
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
|
||||
Index, OutVals[i]);
|
||||
ops[i+2] = Index;
|
||||
}
|
||||
|
||||
ops[0] = Chain;
|
||||
|
||||
Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, ops, OutVals.size()+2);
|
||||
|
||||
delete [] ops;
|
||||
|
||||
return Chain;
|
||||
}
|
||||
|
@ -28,7 +28,8 @@ namespace PTXISD {
|
||||
STORE_PARAM,
|
||||
EXIT,
|
||||
RET,
|
||||
COPY_ADDRESS
|
||||
COPY_ADDRESS,
|
||||
CALL
|
||||
};
|
||||
} // namespace PTXISD
|
||||
|
||||
@ -60,6 +61,16 @@ class PTXTargetLowering : public TargetLowering {
|
||||
DebugLoc dl,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerCall(SDValue Chain, SDValue Callee,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
bool &isTailCall,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
|
||||
|
||||
private:
|
||||
|
@ -168,6 +168,18 @@ def MEMret : Operand<i32> {
|
||||
let MIOperandInfo = (ops i32imm);
|
||||
}
|
||||
|
||||
// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
|
||||
// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
|
||||
|
||||
// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart,
|
||||
// [SDNPHasChain, SDNPOutGlue]>;
|
||||
// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd,
|
||||
// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
def PTXcall : SDNode<"PTXISD::CALL", SDTNone,
|
||||
[SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
|
||||
|
||||
// Branch & call targets have OtherVT type.
|
||||
def brtarget : Operand<OtherVT>;
|
||||
def calltarget : Operand<i32>;
|
||||
@ -1073,6 +1085,11 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
|
||||
}
|
||||
|
||||
let hasSideEffects = 1 in {
|
||||
def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>;
|
||||
}
|
||||
|
||||
|
||||
///===- Spill Instructions ------------------------------------------------===//
|
||||
// Special instructions used for stack spilling
|
||||
def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
|
||||
@ -1097,6 +1114,15 @@ def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
|
||||
def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
|
||||
"mov.f64\t$d, s$a", []>;
|
||||
|
||||
|
||||
// Call handling
|
||||
// def ADJCALLSTACKUP :
|
||||
// InstPTX<(outs), (ins i32imm:$amt1, i32imm:$amt2), "",
|
||||
// [(PTXcallseq_end timm:$amt1, timm:$amt2)]>;
|
||||
// def ADJCALLSTACKDOWN :
|
||||
// InstPTX<(outs), (ins i32imm:$amt), "",
|
||||
// [(PTXcallseq_start timm:$amt)]>;
|
||||
|
||||
///===- Intrinsic Instructions --------------------------------------------===//
|
||||
|
||||
include "PTXIntrinsicInstrInfo.td"
|
||||
|
@ -27,6 +27,7 @@ private:
|
||||
bool is_kernel;
|
||||
std::vector<unsigned> reg_arg, reg_local_var;
|
||||
std::vector<unsigned> reg_ret;
|
||||
std::vector<unsigned> call_params;
|
||||
bool _isDoneAddArg;
|
||||
|
||||
public:
|
||||
@ -56,6 +57,7 @@ public:
|
||||
typedef std::vector<unsigned>::const_iterator reg_iterator;
|
||||
typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
|
||||
typedef std::vector<unsigned>::const_iterator ret_iterator;
|
||||
typedef std::vector<unsigned>::const_iterator param_iterator;
|
||||
|
||||
bool argRegEmpty() const { return reg_arg.empty(); }
|
||||
int getNumArg() const { return reg_arg.size(); }
|
||||
@ -73,6 +75,13 @@ public:
|
||||
ret_iterator retRegBegin() const { return reg_ret.begin(); }
|
||||
ret_iterator retRegEnd() const { return reg_ret.end(); }
|
||||
|
||||
param_iterator paramBegin() const { return call_params.begin(); }
|
||||
param_iterator paramEnd() const { return call_params.end(); }
|
||||
unsigned getNextParam(unsigned size) {
|
||||
call_params.push_back(size);
|
||||
return call_params.size()-1;
|
||||
}
|
||||
|
||||
bool isArgReg(unsigned reg) const {
|
||||
return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
|
||||
}
|
||||
|
@ -114,6 +114,11 @@ class StringRef;
|
||||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
bool callsAreHandled() const {
|
||||
return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
|
||||
}; // class PTXSubtarget
|
||||
} // namespace llvm
|
||||
|
14
test/CodeGen/PTX/simple-call.ll
Normal file
14
test/CodeGen/PTX/simple-call.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
|
||||
|
||||
define ptx_device void @test_add(float %x, float %y) {
|
||||
; CHECK: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device float @test_call(float %x, float %y) {
|
||||
%a = fadd float %x, %y
|
||||
; CHECK: call.uni test_add, (__ret_{{[0-9]+}}, __ret_{{[0-9]+}});
|
||||
call void @test_add(float %a, float %y)
|
||||
ret float %a
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user