mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
PTX: Add basic register spilling code
The current implementation generates stack loads/stores, which are really just mov instructions from/to "special" registers. This may not be the most efficient implementation, compared to an approach where the stack registers are directly folded into instructions, but this is easier to implement and I have yet to see a case where ptxas is unable to see through this kind of register usage and know what is really going on. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133443 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8e9d6720c3
commit
df1c8d837d
@ -23,6 +23,7 @@
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
@ -194,6 +195,18 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
|
||||
def += ';';
|
||||
OutStreamer.EmitRawText(Twine(def));
|
||||
}
|
||||
|
||||
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
|
||||
DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() << " frame object(s)\n");
|
||||
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
|
||||
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
|
||||
std::string def = "\t.reg .b";
|
||||
def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
|
||||
def += " s";
|
||||
def += utostr(i);
|
||||
def += ";";
|
||||
OutStreamer.EmitRawText(Twine(def));
|
||||
}
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
|
@ -288,6 +288,81 @@ InsertBranch(MachineBasicBlock &MBB,
|
||||
}
|
||||
}
|
||||
|
||||
// Memory operand folding for spills
|
||||
void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned SrcReg, bool isKill, int FrameIdx,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
MachineInstr& MI = *MII;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
|
||||
|
||||
int OpCode;
|
||||
|
||||
// Select the appropriate opcode based on the register class
|
||||
if (RC == PTX::RegI16RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREI16;
|
||||
}
|
||||
else if (RC == PTX::RegI32RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREI32;
|
||||
}
|
||||
else if (RC == PTX::RegI64RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREI32;
|
||||
}
|
||||
else if (RC == PTX::RegF32RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREF32;
|
||||
}
|
||||
else if (RC == PTX::RegF64RegisterClass) {
|
||||
OpCode = PTX::STACKSTOREF64;
|
||||
}
|
||||
|
||||
// Build the store instruction (really a mov)
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
|
||||
MIB.addImm(FrameIdx);
|
||||
MIB.addReg(SrcReg);
|
||||
|
||||
AddDefaultPredicate(MIB);
|
||||
}
|
||||
|
||||
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned DestReg, int FrameIdx,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
MachineInstr& MI = *MII;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
|
||||
|
||||
int OpCode;
|
||||
|
||||
// Select the appropriate opcode based on the register class
|
||||
if (RC == PTX::RegI16RegisterClass) {
|
||||
OpCode = PTX::STACKLOADI16;
|
||||
}
|
||||
else if (RC == PTX::RegI32RegisterClass) {
|
||||
OpCode = PTX::STACKLOADI32;
|
||||
}
|
||||
else if (RC == PTX::RegI64RegisterClass) {
|
||||
OpCode = PTX::STACKLOADI32;
|
||||
}
|
||||
else if (RC == PTX::RegF32RegisterClass) {
|
||||
OpCode = PTX::STACKLOADF32;
|
||||
}
|
||||
else if (RC == PTX::RegF64RegisterClass) {
|
||||
OpCode = PTX::STACKLOADF64;
|
||||
}
|
||||
|
||||
// Build the load instruction (really a mov)
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
|
||||
MIB.addReg(DestReg);
|
||||
MIB.addImm(FrameIdx);
|
||||
|
||||
AddDefaultPredicate(MIB);
|
||||
}
|
||||
|
||||
// static helper routines
|
||||
|
||||
MachineSDNode *PTXInstrInfo::
|
||||
|
@ -84,6 +84,29 @@ public:
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const;
|
||||
|
||||
// Memory operand folding for spills
|
||||
// TODO: Implement this eventually and get rid of storeRegToStackSlot and
|
||||
// loadRegFromStackSlot. Doing so will get rid of the "stack" registers
|
||||
// we currently use to spill, though I doubt the overall effect on ptxas
|
||||
// output will be large. I have yet to see a case where ptxas is unable
|
||||
// to see through the "stack" register usage and hence generates
|
||||
// efficient code anyway.
|
||||
// virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
|
||||
// MachineInstr* MI,
|
||||
// const SmallVectorImpl<unsigned> &Ops,
|
||||
// int FrameIndex) const;
|
||||
|
||||
virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned SrcReg, bool isKill, int FrameIndex,
|
||||
const TargetRegisterClass* RC,
|
||||
const TargetRegisterInfo* TRI) const;
|
||||
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned DestReg, int FrameIdx,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
// static helper routines
|
||||
|
||||
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
|
||||
|
@ -977,6 +977,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
|
||||
}
|
||||
|
||||
///===- Spill Instructions ------------------------------------------------===//
|
||||
// Special instructions used for stack spilling
|
||||
def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
|
||||
"mov.u16\ts$d, $a", []>;
|
||||
def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
|
||||
"mov.u32\ts$d, $a", []>;
|
||||
def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
|
||||
"mov.u64\ts$d, $a", []>;
|
||||
def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
|
||||
"mov.f32\ts$d, $a", []>;
|
||||
def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
|
||||
"mov.f64\ts$d, $a", []>;
|
||||
|
||||
def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
|
||||
"mov.u16\t$d, s$a", []>;
|
||||
def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
|
||||
"mov.u32\t$d, s$a", []>;
|
||||
def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
|
||||
"mov.u64\t$d, s$a", []>;
|
||||
def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
|
||||
"mov.f32\t$d, s$a", []>;
|
||||
def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
|
||||
"mov.f64\t$d, s$a", []>;
|
||||
|
||||
///===- Intrinsic Instructions --------------------------------------------===//
|
||||
|
||||
include "PTXIntrinsicInstrInfo.td"
|
||||
|
Loading…
Reference in New Issue
Block a user