mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
now that fp reg kill insertion stuff happens as a separate
pass after isel instead of being interlaced with it, we can trust that all the code for a function has been isel'd before it is run. The practical impact of this is that we can scan for machine instr phis instead of doing a fuzzy match on the LLVM BB for phi nodes. Doing the fuzzy match required knowing when isel would produce an fp reg stack phi which was gross. It was also wrong in cases where select got lowered to a branch tree because cmovs aren't available (PR6828). Just do the scan on machine phis which is simpler, faster and more correct. This fixes PR6828. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@104333 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d596c90030
commit
a26a8471bd
@ -14,7 +14,6 @@
|
|||||||
#define DEBUG_TYPE "x86-codegen"
|
#define DEBUG_TYPE "x86-codegen"
|
||||||
#include "X86.h"
|
#include "X86.h"
|
||||||
#include "X86InstrInfo.h"
|
#include "X86InstrInfo.h"
|
||||||
#include "X86Subtarget.h"
|
|
||||||
#include "llvm/Instructions.h"
|
#include "llvm/Instructions.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
@ -53,10 +52,26 @@ FunctionPass *llvm::createX87FPRegKillInserterPass() {
|
|||||||
return new FPRegKiller();
|
return new FPRegKiller();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isFPStackVReg - Return true if the specified vreg is from a fp stack
|
||||||
|
/// register class.
|
||||||
|
static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
|
||||||
|
if (!TargetRegisterInfo::isVirtualRegister(RegNo))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (MRI.getRegClass(RegNo)->getID()) {
|
||||||
|
default: return false;
|
||||||
|
case X86::RFP32RegClassID:
|
||||||
|
case X86::RFP64RegClassID:
|
||||||
|
case X86::RFP80RegClassID:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// ContainsFPStackCode - Return true if the specific MBB has floating point
|
/// ContainsFPStackCode - Return true if the specific MBB has floating point
|
||||||
/// stack code, and thus needs an FP_REG_KILL.
|
/// stack code, and thus needs an FP_REG_KILL.
|
||||||
static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel,
|
static bool ContainsFPStackCode(MachineBasicBlock *MBB,
|
||||||
MachineRegisterInfo &MRI) {
|
const MachineRegisterInfo &MRI) {
|
||||||
// Scan the block, looking for instructions that define fp stack vregs.
|
// Scan the block, looking for instructions that define fp stack vregs.
|
||||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
|
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
@ -64,40 +79,27 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
|
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
|
||||||
if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef() ||
|
if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
|
||||||
!TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()))
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const TargetRegisterClass *RegClass =
|
if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
|
||||||
MRI.getRegClass(I->getOperand(op).getReg());
|
|
||||||
|
|
||||||
switch (RegClass->getID()) {
|
|
||||||
default: break;
|
|
||||||
case X86::RFP32RegClassID:
|
|
||||||
case X86::RFP64RegClassID:
|
|
||||||
case X86::RFP80RegClassID:
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
|
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
|
||||||
// a copy of the input value in this block. In SSE mode, we only care about
|
// a copy of the input value in this block, which is a definition of the
|
||||||
// 80-bit values.
|
// value.
|
||||||
|
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
|
||||||
|
E = MBB->succ_end(); SI != E; ++ SI) {
|
||||||
|
MachineBasicBlock *SuccBB = *SI;
|
||||||
|
for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
|
||||||
|
I != E; ++I) {
|
||||||
|
// All PHI nodes are at the top of the block.
|
||||||
|
if (!I->isPHI()) break;
|
||||||
|
|
||||||
// Final check, check LLVM BB's that are successors to the LLVM BB
|
if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
|
||||||
// corresponding to BB for FP PHI nodes.
|
|
||||||
const BasicBlock *LLVMBB = MBB->getBasicBlock();
|
|
||||||
for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
|
|
||||||
SI != E; ++SI) {
|
|
||||||
const PHINode *PN;
|
|
||||||
for (BasicBlock::const_iterator II = SI->begin();
|
|
||||||
(PN = dyn_cast<PHINode>(II)); ++II) {
|
|
||||||
if (PN->getType()->isX86_FP80Ty() ||
|
|
||||||
(SSELevel == 0 && PN->getType()->isFloatingPointTy()) ||
|
|
||||||
(SSELevel < 2 && PN->getType()->isDoubleTy())) {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,19 +122,12 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
|
|
||||||
// Fast-path: If nothing is using the x87 registers, we don't need to do
|
// Fast-path: If nothing is using the x87 registers, we don't need to do
|
||||||
// any scanning.
|
// any scanning.
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
|
if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
|
||||||
MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
|
MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
|
||||||
MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
|
MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
|
|
||||||
unsigned SSELevel = 0;
|
|
||||||
if (Subtarget.hasSSE2())
|
|
||||||
SSELevel = 2;
|
|
||||||
else if (Subtarget.hasSSE1())
|
|
||||||
SSELevel = 1;
|
|
||||||
|
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
MachineFunction::iterator MBBI = MF.begin();
|
MachineFunction::iterator MBBI = MF.begin();
|
||||||
MachineFunction::iterator EndMBB = MF.end();
|
MachineFunction::iterator EndMBB = MF.end();
|
||||||
@ -149,7 +144,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If we find any FP stack code, emit the FP_REG_KILL instruction.
|
// If we find any FP stack code, emit the FP_REG_KILL instruction.
|
||||||
if (ContainsFPStackCode(MBB, SSELevel, MRI)) {
|
if (ContainsFPStackCode(MBB, MRI)) {
|
||||||
BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
|
BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
|
||||||
MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
|
MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
|
||||||
++NumFPKill;
|
++NumFPKill;
|
||||||
|
25
test/CodeGen/X86/fp-stack.ll
Normal file
25
test/CodeGen/X86/fp-stack.ll
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
; RUN: llc %s -o - -mcpu=pentium
|
||||||
|
; PR6828
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
|
||||||
|
target triple = "i386-pc-linux-gnu"
|
||||||
|
|
||||||
|
define void @foo() nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp6 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
|
||||||
|
%tmp15 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
|
||||||
|
%tmp24 = load x86_fp80* undef ; <x86_fp80> [#uses=1]
|
||||||
|
br i1 undef, label %return, label %bb.nph
|
||||||
|
|
||||||
|
bb.nph: ; preds = %entry
|
||||||
|
%cmp139 = fcmp ogt x86_fp80 %tmp15, %tmp6 ; <i1> [#uses=1]
|
||||||
|
%maxdiag.0 = select i1 %cmp139, x86_fp80 %tmp15, x86_fp80 %tmp6 ; <x86_fp80> [#uses=1]
|
||||||
|
%cmp139.1 = fcmp ogt x86_fp80 %tmp24, %maxdiag.0 ; <i1> [#uses=1]
|
||||||
|
br i1 %cmp139.1, label %sw.bb372, label %return
|
||||||
|
|
||||||
|
sw.bb372: ; preds = %for.end
|
||||||
|
ret void
|
||||||
|
|
||||||
|
return: ; preds = %for.end
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user