mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-16 12:24:03 +00:00
From Kalle Raiskila:
"the bigstack patch for SPU, with testcase. It is essentially the patch committed as 97091, and reverted as 97099, but with the following additions: -in vararg handling, registers are marked to be live, to not confuse the register scavenger -function prologue and epilogue are not emitted, if the stack size is 16. 16 means it is empty - there is only the register scavenger emergency spill slot, which is not used as there is no stack." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99819 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -66,9 +66,6 @@ namespace llvm {
|
|||||||
//! Predicate test for an unsigned 10-bit value
|
//! Predicate test for an unsigned 10-bit value
|
||||||
/*!
|
/*!
|
||||||
\param Value The input value to be tested
|
\param Value The input value to be tested
|
||||||
|
|
||||||
This predicate tests for an unsigned 10-bit value, returning the 10-bit value
|
|
||||||
as a short if true.
|
|
||||||
*/
|
*/
|
||||||
inline bool isU10Constant(short Value) {
|
inline bool isU10Constant(short Value) {
|
||||||
return (Value == (Value & 0x3ff));
|
return (Value == (Value & 0x3ff));
|
||||||
@ -90,6 +87,70 @@ namespace llvm {
|
|||||||
return (Value == (Value & 0x3ff));
|
return (Value == (Value & 0x3ff));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//! Predicate test for a signed 14-bit value
|
||||||
|
/*!
|
||||||
|
\param Value The input value to be tested
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
inline bool isS14Constant(T Value);
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS14Constant<short>(short Value) {
|
||||||
|
return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS14Constant<int>(int Value) {
|
||||||
|
return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS14Constant<uint32_t>(uint32_t Value) {
|
||||||
|
return (Value <= ((1 << 13) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS14Constant<int64_t>(int64_t Value) {
|
||||||
|
return (Value >= -(1 << 13) && Value <= (1 << 13) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS14Constant<uint64_t>(uint64_t Value) {
|
||||||
|
return (Value <= ((1 << 13) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
//! Predicate test for a signed 16-bit value
|
||||||
|
/*!
|
||||||
|
\param Value The input value to be tested
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
inline bool isS16Constant(T Value);
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS16Constant<short>(short Value) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS16Constant<int>(int Value) {
|
||||||
|
return (Value >= -(1 << 15) && Value <= (1 << 15) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS16Constant<uint32_t>(uint32_t Value) {
|
||||||
|
return (Value <= ((1 << 15) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS16Constant<int64_t>(int64_t Value) {
|
||||||
|
return (Value >= -(1 << 15) && Value <= (1 << 15) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline bool isS16Constant<uint64_t>(uint64_t Value) {
|
||||||
|
return (Value <= ((1 << 15) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
extern Target TheCellSPUTarget;
|
extern Target TheCellSPUTarget;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1107,7 +1107,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
|
|||||||
VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
|
VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
|
||||||
true, false);
|
true, false);
|
||||||
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
|
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
|
||||||
SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
|
unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
|
||||||
|
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
|
||||||
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
|
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
|
||||||
false, false, 0);
|
false, false, 0);
|
||||||
Chain = Store.getOperand(0);
|
Chain = Store.getOperand(0);
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
#include "llvm/CodeGen/MachineLocation.h"
|
#include "llvm/CodeGen/MachineLocation.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/RegisterScavenging.h"
|
||||||
#include "llvm/CodeGen/ValueTypes.h"
|
#include "llvm/CodeGen/ValueTypes.h"
|
||||||
#include "llvm/Target/TargetFrameInfo.h"
|
#include "llvm/Target/TargetFrameInfo.h"
|
||||||
#include "llvm/Target/TargetInstrInfo.h"
|
#include "llvm/Target/TargetInstrInfo.h"
|
||||||
@ -336,6 +337,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
|
|||||||
MachineBasicBlock &MBB = *MI.getParent();
|
MachineBasicBlock &MBB = *MI.getParent();
|
||||||
MachineFunction &MF = *MBB.getParent();
|
MachineFunction &MF = *MBB.getParent();
|
||||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
|
DebugLoc dl = II->getDebugLoc();
|
||||||
|
|
||||||
while (!MI.getOperand(i).isFI()) {
|
while (!MI.getOperand(i).isFI()) {
|
||||||
++i;
|
++i;
|
||||||
@ -364,11 +366,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
|
|||||||
|
|
||||||
// Replace the FrameIndex with base register with $sp (aka $r1)
|
// Replace the FrameIndex with base register with $sp (aka $r1)
|
||||||
SPOp.ChangeToRegister(SPU::R1, false);
|
SPOp.ChangeToRegister(SPU::R1, false);
|
||||||
if (Offset > SPUFrameInfo::maxFrameOffset()
|
|
||||||
|| Offset < SPUFrameInfo::minFrameOffset()) {
|
// if 'Offset' doesn't fit to the D-form instruction's
|
||||||
errs() << "Large stack adjustment ("
|
// immediate, convert the instruction to X-form
|
||||||
<< Offset
|
// if the instruction is not an AI (which takes a s10 immediate), assume
|
||||||
<< ") in SPURegisterInfo::eliminateFrameIndex.";
|
// it is a load/store that can take a s14 immediate
|
||||||
|
if ( (MI.getOpcode() == SPU::AIr32 && !isS10Constant(Offset))
|
||||||
|
|| !isS14Constant(Offset) ) {
|
||||||
|
int newOpcode = convertDFormToXForm(MI.getOpcode());
|
||||||
|
unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
|
||||||
|
BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
|
||||||
|
.addImm(Offset);
|
||||||
|
BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
|
||||||
|
.addReg(tmpReg, RegState::Kill)
|
||||||
|
.addReg(SPU::R1);
|
||||||
|
// remove the replaced D-form instruction
|
||||||
|
MBB.erase(II);
|
||||||
} else {
|
} else {
|
||||||
MO.ChangeToImmediate(Offset);
|
MO.ChangeToImmediate(Offset);
|
||||||
}
|
}
|
||||||
@ -423,6 +436,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
|||||||
MF.getRegInfo().setPhysRegUnused(SPU::R0);
|
MF.getRegInfo().setPhysRegUnused(SPU::R0);
|
||||||
MF.getRegInfo().setPhysRegUnused(SPU::R1);
|
MF.getRegInfo().setPhysRegUnused(SPU::R1);
|
||||||
MF.getRegInfo().setPhysRegUnused(SPU::R2);
|
MF.getRegInfo().setPhysRegUnused(SPU::R2);
|
||||||
|
|
||||||
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
|
const TargetRegisterClass *RC = &SPU::R32CRegClass;
|
||||||
|
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
|
||||||
|
RC->getAlignment(),
|
||||||
|
false));
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
|
void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
|
||||||
@ -448,7 +469,8 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
|
|||||||
assert((FrameSize & 0xf) == 0
|
assert((FrameSize & 0xf) == 0
|
||||||
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
|
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
|
||||||
|
|
||||||
if (FrameSize > 0 || MFI->hasCalls()) {
|
// the "empty" frame size is 16 - just the register scavenger spill slot
|
||||||
|
if (FrameSize > 16 || MFI->hasCalls()) {
|
||||||
FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
|
FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
|
||||||
if (hasDebugInfo) {
|
if (hasDebugInfo) {
|
||||||
// Mark effective beginning of when frame pointer becomes valid.
|
// Mark effective beginning of when frame pointer becomes valid.
|
||||||
@ -467,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
|
|||||||
// Adjust $sp by required amout
|
// Adjust $sp by required amout
|
||||||
BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
|
BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
|
||||||
.addImm(FrameSize);
|
.addImm(FrameSize);
|
||||||
} else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
|
} else if (isS16Constant(FrameSize)) {
|
||||||
// Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
|
// Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
|
||||||
// $r2 to adjust $sp:
|
// $r2 to adjust $sp:
|
||||||
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
|
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
|
||||||
@ -475,7 +497,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
|
|||||||
.addReg(SPU::R1);
|
.addReg(SPU::R1);
|
||||||
BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
|
BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
|
||||||
.addImm(FrameSize);
|
.addImm(FrameSize);
|
||||||
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1)
|
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
|
||||||
.addReg(SPU::R2)
|
.addReg(SPU::R2)
|
||||||
.addReg(SPU::R1);
|
.addReg(SPU::R1);
|
||||||
BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
|
BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
|
||||||
@ -549,7 +571,9 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
|
|||||||
"Can only insert epilog into returning blocks");
|
"Can only insert epilog into returning blocks");
|
||||||
assert((FrameSize & 0xf) == 0
|
assert((FrameSize & 0xf) == 0
|
||||||
&& "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
|
&& "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
|
||||||
if (FrameSize > 0 || MFI->hasCalls()) {
|
|
||||||
|
// the "empty" frame size is 16 - just the register scavenger spill slot
|
||||||
|
if (FrameSize > 16 || MFI->hasCalls()) {
|
||||||
FrameSize = FrameSize + SPUFrameInfo::minStackSize();
|
FrameSize = FrameSize + SPUFrameInfo::minStackSize();
|
||||||
if (isS10Constant(FrameSize + LinkSlotOffset)) {
|
if (isS10Constant(FrameSize + LinkSlotOffset)) {
|
||||||
// Reload $lr, adjust $sp by required amount
|
// Reload $lr, adjust $sp by required amount
|
||||||
@ -574,7 +598,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
|
|||||||
.addReg(SPU::R2);
|
.addReg(SPU::R2);
|
||||||
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
|
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
|
||||||
.addImm(16)
|
.addImm(16)
|
||||||
.addReg(SPU::R2);
|
.addReg(SPU::R1);
|
||||||
BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
|
BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
|
||||||
addReg(SPU::R2)
|
addReg(SPU::R2)
|
||||||
.addImm(16);
|
.addImm(16);
|
||||||
@ -618,4 +642,43 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
|
|||||||
return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
|
return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
|
||||||
|
{
|
||||||
|
switch(dFormOpcode)
|
||||||
|
{
|
||||||
|
case SPU::AIr32: return SPU::Ar32;
|
||||||
|
case SPU::LQDr32: return SPU::LQXr32;
|
||||||
|
case SPU::LQDr128: return SPU::LQXr128;
|
||||||
|
case SPU::LQDv16i8: return SPU::LQXv16i8;
|
||||||
|
case SPU::LQDv4f32: return SPU::LQXv4f32;
|
||||||
|
case SPU::STQDr32: return SPU::STQXr32;
|
||||||
|
case SPU::STQDr128: return SPU::STQXr128;
|
||||||
|
case SPU::STQDv16i8: return SPU::STQXv16i8;
|
||||||
|
case SPU::STQDv4i32: return SPU::STQXv4i32;
|
||||||
|
case SPU::STQDv4f32: return SPU::STQXv4f32;
|
||||||
|
|
||||||
|
default: assert( false && "Unhandled D to X-form conversion");
|
||||||
|
}
|
||||||
|
// default will assert, but need to return something to keep the
|
||||||
|
// compiler happy.
|
||||||
|
return dFormOpcode;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO this is already copied from PPC. Could this convenience function
|
||||||
|
// be moved to the RegScavenger class?
|
||||||
|
unsigned
|
||||||
|
SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
|
||||||
|
RegScavenger *RS,
|
||||||
|
const TargetRegisterClass *RC,
|
||||||
|
int SPAdj) const
|
||||||
|
{
|
||||||
|
assert(RS && "Register scavenging must be on");
|
||||||
|
unsigned Reg = RS->FindUnusedReg(RC);
|
||||||
|
if (Reg == 0)
|
||||||
|
Reg = RS->scavengeRegister(RC, II, SPAdj);
|
||||||
|
assert( Reg && "Register scavenger failed");
|
||||||
|
return Reg;
|
||||||
|
}
|
||||||
|
|
||||||
#include "SPUGenRegisterInfo.inc"
|
#include "SPUGenRegisterInfo.inc"
|
||||||
|
@ -53,6 +53,10 @@ namespace llvm {
|
|||||||
virtual const TargetRegisterClass* const *
|
virtual const TargetRegisterClass* const *
|
||||||
getCalleeSavedRegClasses(const MachineFunction *MF) const;
|
getCalleeSavedRegClasses(const MachineFunction *MF) const;
|
||||||
|
|
||||||
|
//! Allow for scavenging, so we can get scratch registers when needed.
|
||||||
|
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
|
||||||
|
{ return true; }
|
||||||
|
|
||||||
//! Return the reserved registers
|
//! Return the reserved registers
|
||||||
BitVector getReservedRegs(const MachineFunction &MF) const;
|
BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||||
|
|
||||||
@ -97,6 +101,21 @@ namespace llvm {
|
|||||||
|
|
||||||
//! Get DWARF debugging register number
|
//! Get DWARF debugging register number
|
||||||
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
|
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
|
||||||
|
|
||||||
|
//! Convert D-form load/store to X-form load/store
|
||||||
|
/*!
|
||||||
|
Converts a regiser displacement load/store into a register-indexed
|
||||||
|
load/store for large stack frames, when the stack frame exceeds the
|
||||||
|
range of a s10 displacement.
|
||||||
|
*/
|
||||||
|
int convertDFormToXForm(int dFormOpcode) const;
|
||||||
|
|
||||||
|
//! Acquire an unused register in an emergency.
|
||||||
|
unsigned findScratchRegister(MachineBasicBlock::iterator II,
|
||||||
|
RegScavenger *RS,
|
||||||
|
const TargetRegisterClass *RC,
|
||||||
|
int SPAdj) const;
|
||||||
|
|
||||||
};
|
};
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
||||||
|
17
test/CodeGen/CellSPU/bigstack.ll
Normal file
17
test/CodeGen/CellSPU/bigstack.ll
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
; RUN: llc < %s -march=cellspu -o %t1.s
|
||||||
|
; RUN: grep lqx %t1.s | count 4
|
||||||
|
; RUN: grep il %t1.s | grep -v file | count 7
|
||||||
|
; RUN: grep stqx %t1.s | count 2
|
||||||
|
|
||||||
|
define i32 @bigstack() nounwind {
|
||||||
|
entry:
|
||||||
|
%avar = alloca i32
|
||||||
|
%big_data = alloca [2048 x i32]
|
||||||
|
store i32 3840, i32* %avar, align 4
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
return:
|
||||||
|
%retval = load i32* %avar
|
||||||
|
ret i32 %retval
|
||||||
|
}
|
||||||
|
|
Reference in New Issue
Block a user