PTX: Add basic register spilling code

The current implementation generates stack loads/stores, which are
really just mov instructions from/to "special" registers.  This may
not be the most efficient implementation, compared to an approach where
the stack registers are directly folded into instructions, but this is
easier to implement and I have yet to see a case where ptxas is unable
to see through this kind of register usage and know what is really
going on.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133443 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2011-06-20 15:56:20 +00:00
parent 8e9d6720c3
commit df1c8d837d
4 changed files with 135 additions and 0 deletions

View File

@ -23,6 +23,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@ -194,6 +195,18 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
def += ';';
OutStreamer.EmitRawText(Twine(def));
}
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() << " frame object(s)\n");
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
std::string def = "\t.reg .b";
def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
def += " s";
def += utostr(i);
def += ";";
OutStreamer.EmitRawText(Twine(def));
}
}
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {

View File

@ -288,6 +288,81 @@ InsertBranch(MachineBasicBlock &MBB,
}
}
// Memory operand folding for spills
void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MII,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineInstr& MI = *MII;
DebugLoc DL = MI.getDebugLoc();
DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
int OpCode;
// Select the appropriate opcode based on the register class
if (RC == PTX::RegI16RegisterClass) {
OpCode = PTX::STACKSTOREI16;
}
else if (RC == PTX::RegI32RegisterClass) {
OpCode = PTX::STACKSTOREI32;
}
else if (RC == PTX::RegI64RegisterClass) {
OpCode = PTX::STACKSTOREI32;
}
else if (RC == PTX::RegF32RegisterClass) {
OpCode = PTX::STACKSTOREF32;
}
else if (RC == PTX::RegF64RegisterClass) {
OpCode = PTX::STACKSTOREF64;
}
// Build the store instruction (really a mov)
MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
MIB.addImm(FrameIdx);
MIB.addReg(SrcReg);
AddDefaultPredicate(MIB);
}
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MII,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineInstr& MI = *MII;
DebugLoc DL = MI.getDebugLoc();
DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
int OpCode;
// Select the appropriate opcode based on the register class
if (RC == PTX::RegI16RegisterClass) {
OpCode = PTX::STACKLOADI16;
}
else if (RC == PTX::RegI32RegisterClass) {
OpCode = PTX::STACKLOADI32;
}
else if (RC == PTX::RegI64RegisterClass) {
OpCode = PTX::STACKLOADI32;
}
else if (RC == PTX::RegF32RegisterClass) {
OpCode = PTX::STACKLOADF32;
}
else if (RC == PTX::RegF64RegisterClass) {
OpCode = PTX::STACKLOADF64;
}
// Build the load instruction (really a mov)
MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
MIB.addReg(DestReg);
MIB.addImm(FrameIdx);
AddDefaultPredicate(MIB);
}
// static helper routines
MachineSDNode *PTXInstrInfo::

View File

@ -84,6 +84,29 @@ public:
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
// Memory operand folding for spills
// TODO: Implement this eventually and get rid of storeRegToStackSlot and
// loadRegFromStackSlot. Doing so will get rid of the "stack" registers
// we currently use to spill, though I doubt the overall effect on ptxas
// output will be large. I have yet to see a case where ptxas is unable
// to see through the "stack" register usage and hence generates
// efficient code anyway.
// virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
// MachineInstr* MI,
// const SmallVectorImpl<unsigned> &Ops,
// int FrameIndex) const;
virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
MachineBasicBlock::iterator MII,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass* RC,
const TargetRegisterInfo* TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MII,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
// static helper routines
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,

View File

@ -977,6 +977,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
}
///===- Spill Instructions ------------------------------------------------===//
// Special instructions used for stack spilling
def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
"mov.u16\ts$d, $a", []>;
def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
"mov.u32\ts$d, $a", []>;
def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
"mov.u64\ts$d, $a", []>;
def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
"mov.f32\ts$d, $a", []>;
def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
"mov.f64\ts$d, $a", []>;
def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
"mov.u16\t$d, s$a", []>;
def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
"mov.u32\t$d, s$a", []>;
def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
"mov.u64\t$d, s$a", []>;
def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
"mov.f32\t$d, s$a", []>;
def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
"mov.f64\t$d, s$a", []>;
///===- Intrinsic Instructions --------------------------------------------===//
include "PTXIntrinsicInstrInfo.td"