mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-19 23:29:20 +00:00
Use virtual base registers on PPC
On PowerPC, non-vector loads and stores have r+i forms; however, in functions with large stack frames these were not being used to access slots far from the stack pointer because such slots were out of range for the signed 16-bit immediate offset field. This increases register pressure because we need a separate register for each offset (when the r+r form is used). By enabling virtual base registers, we can deal with large stack frames without unduly increasing register pressure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179105 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
307b8535ee
commit
f6f8198d85
lib/Target/PowerPC
test/CodeGen/PowerPC
@ -454,6 +454,33 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Figure out if the offset in the instruction is shifted right two bits. This
|
||||
// is true for instructions like "STD", which the machine implicitly adds two
|
||||
// low zeros to.
|
||||
static bool usesIXAddr(const MachineInstr &MI) {
|
||||
unsigned OpC = MI.getOpcode();
|
||||
|
||||
switch (OpC) {
|
||||
default:
|
||||
return false;
|
||||
case PPC::LWA:
|
||||
case PPC::LD:
|
||||
case PPC::STD:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
|
||||
static unsigned getOffsetONFromFION(const MachineInstr &MI,
|
||||
unsigned FIOperandNum) {
|
||||
// Take into account whether it's an add or mem instruction
|
||||
unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
|
||||
if (MI.isInlineAsm())
|
||||
OffsetOperandNo = FIOperandNum-1;
|
||||
|
||||
return OffsetOperandNo;
|
||||
}
|
||||
|
||||
void
|
||||
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
@ -471,10 +498,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
|
||||
DebugLoc dl = MI.getDebugLoc();
|
||||
|
||||
// Take into account whether it's an add or mem instruction
|
||||
unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
|
||||
if (MI.isInlineAsm())
|
||||
OffsetOperandNo = FIOperandNum-1;
|
||||
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
|
||||
|
||||
// Get the frame index.
|
||||
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
|
||||
@ -516,17 +540,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
(is64Bit ? PPC::X1 : PPC::R1),
|
||||
false);
|
||||
|
||||
// Figure out if the offset in the instruction is shifted right two bits. This
|
||||
// is true for instructions like "STD", which the machine implicitly adds two
|
||||
// low zeros to.
|
||||
bool isIXAddr = false;
|
||||
switch (OpC) {
|
||||
case PPC::LWA:
|
||||
case PPC::LD:
|
||||
case PPC::STD:
|
||||
isIXAddr = true;
|
||||
break;
|
||||
}
|
||||
// Figure out if the offset in the instruction is shifted right two bits.
|
||||
bool isIXAddr = usesIXAddr(MI);
|
||||
|
||||
// If the instruction is not present in ImmToIdxMap, then it has no immediate
|
||||
// form (and must be r+r).
|
||||
@ -618,3 +633,124 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const {
|
||||
unsigned PPCRegisterInfo::getEHHandlerRegister() const {
|
||||
return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
|
||||
}
|
||||
|
||||
/// Returns true if the instruction's frame index
|
||||
/// reference would be better served by a base register other than FP
|
||||
/// or SP. Used by LocalStackFrameAllocation to determine which frame index
|
||||
/// references it should create new base registers for.
|
||||
bool PPCRegisterInfo::
|
||||
needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
|
||||
assert(Offset < 0 && "Local offset must be negative");
|
||||
|
||||
unsigned FIOperandNum = 0;
|
||||
while (!MI->getOperand(FIOperandNum).isFI()) {
|
||||
++FIOperandNum;
|
||||
assert(FIOperandNum < MI->getNumOperands() &&
|
||||
"Instr doesn't have FrameIndex operand!");
|
||||
}
|
||||
|
||||
unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
|
||||
|
||||
if (!usesIXAddr(*MI))
|
||||
Offset += MI->getOperand(OffsetOperandNo).getImm();
|
||||
else
|
||||
Offset += MI->getOperand(OffsetOperandNo).getImm() << 2;
|
||||
|
||||
// It's the load/store FI references that cause issues, as it can be difficult
|
||||
// to materialize the offset if it won't fit in the literal field. Estimate
|
||||
// based on the size of the local frame and some conservative assumptions
|
||||
// about the rest of the stack frame (note, this is pre-regalloc, so
|
||||
// we don't know everything for certain yet) whether this offset is likely
|
||||
// to be out of range of the immediate. Return true if so.
|
||||
|
||||
// We only generate virtual base registers for loads and stores that have
|
||||
// an r+i form. Return false for everything else.
|
||||
unsigned OpC = MI->getOpcode();
|
||||
if (!ImmToIdxMap.count(OpC))
|
||||
return false;
|
||||
|
||||
// Don't generate a new virtual base register just to add zero to it.
|
||||
if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) &&
|
||||
MI->getOperand(2).getImm() == 0)
|
||||
return false;
|
||||
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
|
||||
const PPCFrameLowering *PPCFI =
|
||||
static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
|
||||
unsigned StackEst =
|
||||
PPCFI->determineFrameLayout(MF, false, true);
|
||||
|
||||
// If we likely don't need a stack frame, then we probably don't need a
|
||||
// virtual base register either.
|
||||
if (!StackEst)
|
||||
return false;
|
||||
|
||||
// Estimate an offset from the stack pointer.
|
||||
// The incoming offset is relating to the SP at the start of the function,
|
||||
// but when we access the local it'll be relative to the SP after local
|
||||
// allocation, so adjust our SP-relative offset by that allocation size.
|
||||
Offset += StackEst;
|
||||
|
||||
// The frame pointer will point to the end of the stack, so estimate the
|
||||
// offset as the difference between the object offset and the FP location.
|
||||
return !isFrameOffsetLegal(MI, Offset);
|
||||
}
|
||||
|
||||
/// Insert defining instruction(s) for BaseReg to
|
||||
/// be a pointer to FrameIdx at the beginning of the basic block.
|
||||
void PPCRegisterInfo::
|
||||
materializeFrameBaseRegister(MachineBasicBlock *MBB,
|
||||
unsigned BaseReg, int FrameIdx,
|
||||
int64_t Offset) const {
|
||||
unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
|
||||
|
||||
MachineBasicBlock::iterator Ins = MBB->begin();
|
||||
DebugLoc DL; // Defaults to "unknown"
|
||||
if (Ins != MBB->end())
|
||||
DL = Ins->getDebugLoc();
|
||||
|
||||
const MCInstrDesc &MCID = TII.get(ADDriOpc);
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
const MachineFunction &MF = *MBB->getParent();
|
||||
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
|
||||
|
||||
BuildMI(*MBB, Ins, DL, MCID, BaseReg)
|
||||
.addFrameIndex(FrameIdx).addImm(Offset);
|
||||
}
|
||||
|
||||
void
|
||||
PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
|
||||
unsigned BaseReg, int64_t Offset) const {
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
unsigned FIOperandNum = 0;
|
||||
while (!MI.getOperand(FIOperandNum).isFI()) {
|
||||
++FIOperandNum;
|
||||
assert(FIOperandNum < MI.getNumOperands() &&
|
||||
"Instr doesn't have FrameIndex operand!");
|
||||
}
|
||||
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
|
||||
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
|
||||
|
||||
bool isIXAddr = usesIXAddr(MI);
|
||||
if (!isIXAddr)
|
||||
Offset += MI.getOperand(OffsetOperandNo).getImm();
|
||||
else
|
||||
Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
|
||||
|
||||
// Figure out if the offset in the instruction is shifted right two bits.
|
||||
if (isIXAddr)
|
||||
Offset >>= 2; // The actual encoded value has the low two bits zero.
|
||||
|
||||
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
|
||||
}
|
||||
|
||||
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
|
||||
int64_t Offset) const {
|
||||
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
|
||||
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
|
||||
}
|
||||
|
||||
|
@ -61,6 +61,10 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
|
||||
void lowerCRSpilling(MachineBasicBlock::iterator II,
|
||||
unsigned FrameIndex) const;
|
||||
@ -77,6 +81,15 @@ public:
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS = NULL) const;
|
||||
|
||||
// Support for virtual base registers.
|
||||
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
|
||||
void materializeFrameBaseRegister(MachineBasicBlock *MBB,
|
||||
unsigned BaseReg, int FrameIdx,
|
||||
int64_t Offset) const;
|
||||
void resolveFrameIndex(MachineBasicBlock::iterator I,
|
||||
unsigned BaseReg, int64_t Offset) const;
|
||||
bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
|
||||
|
||||
// Debug information queries.
|
||||
unsigned getFrameRegister(const MachineFunction &MF) const;
|
||||
|
||||
|
@ -1,5 +1,9 @@
|
||||
; RUN: llc < %s -march=ppc64 | FileCheck %s
|
||||
|
||||
; Temporarily XFAIL this test until LSA stops creating single-use
|
||||
; virtual base registers.
|
||||
; XFAIL: *
|
||||
|
||||
%struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
|
||||
%struct.__mutex_t = type { i32 }
|
||||
%struct.anon = type { i64, i64 }
|
||||
|
48
test/CodeGen/PowerPC/lsa.ll
Normal file
48
test/CodeGen/PowerPC/lsa.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
define signext i32 @foo() #0 {
|
||||
entry:
|
||||
%v = alloca [8200 x i32], align 4
|
||||
%w = alloca [8200 x i32], align 4
|
||||
%q = alloca [8200 x i32], align 4
|
||||
%0 = bitcast [8200 x i32]* %v to i8*
|
||||
call void @llvm.lifetime.start(i64 32800, i8* %0) #0
|
||||
%1 = bitcast [8200 x i32]* %w to i8*
|
||||
call void @llvm.lifetime.start(i64 32800, i8* %1) #0
|
||||
%2 = bitcast [8200 x i32]* %q to i8*
|
||||
call void @llvm.lifetime.start(i64 32800, i8* %2) #0
|
||||
%arraydecay = getelementptr inbounds [8200 x i32]* %q, i64 0, i64 0
|
||||
%arraydecay1 = getelementptr inbounds [8200 x i32]* %v, i64 0, i64 0
|
||||
%arraydecay2 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 0
|
||||
call void @bar(i32* %arraydecay, i32* %arraydecay1, i32* %arraydecay2) #0
|
||||
%3 = load i32* %arraydecay2, align 4, !tbaa !0
|
||||
%arrayidx3 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 1
|
||||
%4 = load i32* %arrayidx3, align 4, !tbaa !0
|
||||
|
||||
; CHECK: @foo
|
||||
; CHECK-NOT: lwzx
|
||||
; CHECK: lwz {{[0-9]+}}, 4([[REG:[0-9]+]])
|
||||
; CHECK: lwz {{[0-9]+}}, 0([[REG]])
|
||||
; CHECK: blr
|
||||
|
||||
%add = add nsw i32 %4, %3
|
||||
call void @llvm.lifetime.end(i64 32800, i8* %2) #0
|
||||
call void @llvm.lifetime.end(i64 32800, i8* %1) #0
|
||||
call void @llvm.lifetime.end(i64 32800, i8* %0) #0
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture) #0
|
||||
|
||||
declare void @bar(i32*, i32*, i32*)
|
||||
|
||||
declare void @llvm.lifetime.end(i64, i8* nocapture) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
!0 = metadata !{metadata !"int", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||
|
Loading…
x
Reference in New Issue
Block a user