mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
Spill/restore X86 floating point stack registers with 64-bits of precision
instead of 80-bits of precision. This fixes PR467. This change speeds up fldry on X86 with LLC from 7.32s on apoc to 4.68s. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@18433 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3986924e0b
commit
45de191b0b
@ -50,7 +50,8 @@ static unsigned getIdx(unsigned SpillSize) {
|
||||
case 8: return 0;
|
||||
case 16: return 1;
|
||||
case 32: return 2;
|
||||
case 80: return 3;
|
||||
case 64: return 3; // FP in 64-bit spill mode.
|
||||
case 80: return 4; // FP in 80-bit spill mode.
|
||||
}
|
||||
}
|
||||
|
||||
@ -58,7 +59,7 @@ void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned SrcReg, int FrameIdx) const {
|
||||
static const unsigned Opcode[] =
|
||||
{ X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FSTP80m };
|
||||
{ X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST64m, X86::FSTP80m };
|
||||
unsigned Idx = getIdx(getSpillSize(SrcReg));
|
||||
addFrameReference(BuildMI(MBB, MI, Opcode[Idx], 5), FrameIdx).addReg(SrcReg);
|
||||
}
|
||||
@ -67,7 +68,7 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned DestReg, int FrameIdx)const{
|
||||
static const unsigned Opcode[] =
|
||||
{ X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD80m };
|
||||
{ X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD64m, X86::FLD80m };
|
||||
unsigned Idx = getIdx(getSpillSize(DestReg));
|
||||
addFrameReference(BuildMI(MBB, MI, Opcode[Idx], 4, DestReg), FrameIdx);
|
||||
}
|
||||
@ -77,7 +78,7 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
const TargetRegisterClass *RC) const {
|
||||
static const unsigned Opcode[] =
|
||||
{ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV };
|
||||
{ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::FpMOV };
|
||||
BuildMI(MBB, MI, Opcode[getIdx(RC->getSize()*8)], 1, DestReg).addReg(SrcReg);
|
||||
}
|
||||
|
||||
|
@ -84,12 +84,18 @@ def R32 : RegisterClass<i32, 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
|
||||
}];
|
||||
}
|
||||
|
||||
def RFP : RegisterClass<f80, 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
|
||||
// FIXME: This sets up the floating point register files as though they are f64
|
||||
// values, though they really are f80 values. This will cause us to spill
|
||||
// values as 64-bit quantities instead of 80-bit quantities, which is much much
|
||||
// faster on common hardware. In reality, this should be controlled by a
|
||||
// command line option or something.
|
||||
|
||||
def RFP : RegisterClass<f64, 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
|
||||
|
||||
// Floating point stack registers (these are not allocatable by the
|
||||
// register allocator - the floating point stackifier is responsible
|
||||
// for transforming FPn allocations to STn registers)
|
||||
def RST : RegisterClass<f80, 32, [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
|
||||
def RST : RegisterClass<f64, 32, [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
|
||||
let Methods = [{
|
||||
iterator allocation_order_end(MachineFunction &MF) const {
|
||||
return begin();
|
||||
|
Loading…
Reference in New Issue
Block a user