From a960d95253be892d5f2e3017ba5df989c247a0c1 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 13 Jan 2003 01:01:59 +0000 Subject: [PATCH] New files git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@5260 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/FloatingPoint.cpp | 587 +++++++++++++++++++++++++++ lib/Target/X86/PeepholeOptimizer.cpp | 86 ++++ lib/Target/X86/X86FloatingPoint.cpp | 587 +++++++++++++++++++++++++++ lib/Target/X86/X86PeepholeOpt.cpp | 86 ++++ 4 files changed, 1346 insertions(+) create mode 100644 lib/Target/X86/FloatingPoint.cpp create mode 100644 lib/Target/X86/PeepholeOptimizer.cpp create mode 100644 lib/Target/X86/X86FloatingPoint.cpp create mode 100644 lib/Target/X86/X86PeepholeOpt.cpp diff --git a/lib/Target/X86/FloatingPoint.cpp b/lib/Target/X86/FloatingPoint.cpp new file mode 100644 index 00000000000..7fdd97fbc49 --- /dev/null +++ b/lib/Target/X86/FloatingPoint.cpp @@ -0,0 +1,587 @@ +//===-- FloatingPoint.cpp - Floating point Reg -> Stack converter ---------===// +// +// This file defines the pass which converts floating point instructions from +// virtual registers into register stack instructions. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/Target/MachineInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "Support/Statistic.h" +#include +#include + +namespace { + Statistic<> NumFXCH("x86-codegen", "Number of fxch instructions inserted"); + Statistic<> NumFP ("x86-codegen", "Number of floating point instructions"); + + struct FPS : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { return "X86 FP Stackifier"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: + LiveVariables *LV; // Live variable info for current function... + MachineBasicBlock *MBB; // Current basic block + unsigned Stack[8]; // FP Registers in each stack slot... + unsigned RegMap[8]; // Track which stack slot contains each register + unsigned StackTop; // The current top of the FP stack. + + void dumpStack() const { + std::cerr << "Stack contents:"; + for (unsigned i = 0; i != StackTop; ++i) { + std::cerr << " FP" << Stack[i]; + assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); + } + std::cerr << "\n"; + } + private: + // getSlot - Return the stack slot number a particular register number is + // in... + unsigned getSlot(unsigned RegNo) const { + assert(RegNo < 8 && "Regno out of range!"); + return RegMap[RegNo]; + } + + // getStackEntry - Return the X86::FP register in register ST(i) + unsigned getStackEntry(unsigned STi) const { + assert(STi < StackTop && "Access past stack top!"); + return Stack[StackTop-1-STi]; + } + + // getSTReg - Return the X86::ST(i) register which contains the specified + // FP register + unsigned getSTReg(unsigned RegNo) const { + return StackTop - 1 - getSlot(RegNo) + X86::ST0; + } + + // pushReg - Push the specifiex FP register onto the stack + void pushReg(unsigned Reg) { + assert(Reg < 8 && "Register number out of range!"); + assert(StackTop < 8 && "Stack overflow!"); + Stack[StackTop] = Reg; + RegMap[Reg] = StackTop++; + } + + bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } + void moveToTop(unsigned RegNo, MachineBasicBlock::iterator &I) { + if (!isAtTop(RegNo)) { + unsigned Slot = getSlot(RegNo); + unsigned STReg = getSTReg(RegNo); + unsigned RegOnTop = getStackEntry(0); + + // Swap the slots the regs are in + std::swap(RegMap[RegNo], RegMap[RegOnTop]); + + // Swap stack slot contents + assert(RegMap[RegOnTop] < StackTop); + std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); + + // Emit an fxch to update the runtime processors version of the state + MachineInstr *MI = BuildMI(X86::FXCH, 1).addReg(STReg); + I = 1+MBB->insert(I, MI); + NumFXCH++; + } + } + + void duplicateToTop(unsigned RegNo, unsigned AsReg, + MachineBasicBlock::iterator &I) { + unsigned STReg = getSTReg(RegNo); + pushReg(AsReg); // New register on top of stack + + MachineInstr *MI = BuildMI(X86::FLDrr, 1).addReg(STReg); + I = 1+MBB->insert(I, MI); + } + + // popStackAfter - Pop the current value off of the top of the FP stack + // after the specified instruction. + void popStackAfter(MachineBasicBlock::iterator &I); + + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + + void handleZeroArgFP(MachineBasicBlock::iterator &I); + void handleOneArgFP(MachineBasicBlock::iterator &I); + void handleTwoArgFP(MachineBasicBlock::iterator &I); + void handleSpecialFP(MachineBasicBlock::iterator &I); + }; +} + +Pass *createX86FloatingPointStackifierPass() { return new FPS(); } + +/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP +/// register references into FP stack references. +/// +bool FPS::runOnMachineFunction(MachineFunction &MF) { + LV = &getAnalysis(); + StackTop = 0; + + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= processBasicBlock(MF, *I); + return Changed; +} + +/// processBasicBlock - Loop over all of the instructions in the basic block, +/// transforming FP instructions into their stack form. +/// +bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { + const TargetInstrInfo &TII = MF.getTarget().getInstrInfo(); + bool Changed = false; + MBB = &BB; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineInstr *MI = *I; + MachineInstr *PrevMI = I == BB.begin() ? 0 : *(I-1); + unsigned Flags = TII.get(MI->getOpcode()).TSFlags; + + if ((Flags & X86II::FPTypeMask) == 0) continue; // Ignore non-fp insts! + + ++NumFP; // Keep track of # of pseudo instrs + DEBUG(std::cerr << "\nFPInst:\t"; + MI->print(std::cerr, MF.getTarget())); + + // Get dead variables list now because the MI pointer may be deleted as part + // of processing! + LiveVariables::killed_iterator IB = LV->dead_begin(MI); + LiveVariables::killed_iterator IE = LV->dead_end(MI); + + DEBUG(const MRegisterInfo *MRI = MF.getTarget().getRegisterInfo(); + LiveVariables::killed_iterator I = LV->killed_begin(MI); + LiveVariables::killed_iterator E = LV->killed_end(MI); + if (I != E) { + std::cerr << "Killed Operands:"; + for (; I != E; ++I) + std::cerr << " %" << MRI->getName(I->second); + std::cerr << "\n"; + }); + + switch (Flags & X86II::FPTypeMask) { + case X86II::ZeroArgFP: handleZeroArgFP(I); break; + case X86II::OneArgFP: handleOneArgFP(I); break; + + case X86II::OneArgFPRW: // ST(0) = fsqrt(ST(0)) + assert(0 && "FP instr type not handled yet!"); + + case X86II::TwoArgFP: handleTwoArgFP(I); break; + case X86II::SpecialFP: handleSpecialFP(I); break; + default: assert(0 && "Unknown FP Type!"); + } + + // Check to see if any of the values defined by this instruction are dead + // after definition. If so, pop them. + for (; IB != IE; ++IB) { + unsigned Reg = IB->second; + if (Reg >= X86::FP0 && Reg <= X86::FP6) { + DEBUG(std::cerr << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); + ++I; // Insert fxch AFTER the instruction + moveToTop(Reg-X86::FP0, I); // Insert fxch if neccesary + --I; // Move to fxch or old instruction + popStackAfter(I); // Pop the top of the stack, killing value + } + } + + // Print out all of the instructions expanded to if -debug + DEBUG(if (*I == PrevMI) { + std::cerr<< "Just deleted pseudo instruction\n"; + } else { + MachineBasicBlock::iterator Start = I; + // Rewind to first instruction newly inserted. + while (Start != BB.begin() && *(Start-1) != PrevMI) --Start; + std::cerr << "Inserted instructions:\n"; + do TII.print(*Start, std::cerr << "\t", MF.getTarget()); + while (++Start != I+1); + } + dumpStack(); + ); + + Changed = true; + } + + assert(StackTop == 0 && "Stack not empty at end of basic block?"); + return Changed; +} + +//===----------------------------------------------------------------------===// +// Efficient Lookup Table Support +//===----------------------------------------------------------------------===// + +struct TableEntry { + unsigned from; + unsigned to; + bool operator<(const TableEntry &TE) const { return from < TE.from; } + bool operator<(unsigned V) const { return from < V; } +}; + +static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { + for (unsigned i = 0; i != NumEntries-1; ++i) + if (!(Table[i] < Table[i+1])) return false; + return true; +} + +static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { + const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); + if (I != Table+N && I->from == Opcode) + return I->to; + return -1; +} + +#define ARRAY_SIZE(TABLE) \ + (sizeof(TABLE)/sizeof(TABLE[0])) + +#ifdef NDEBUG +#define ASSERT_SORTED(TABLE) +#else +#define ASSERT_SORTED(TABLE) \ + { static bool TABLE##Checked = false; \ + if (!TABLE##Checked) \ + assert(TableIsSorted(TABLE, ARRAY_SIZE(TABLE)) && \ + "All lookup tables must be sorted for efficient access!"); \ + } +#endif + + +//===----------------------------------------------------------------------===// +// Helper Methods +//===----------------------------------------------------------------------===// + +// PopTable - Sorted map of instructions to their popping version. The first +// element is an instruction, the second is the version which pops. +// +static const TableEntry PopTable[] = { + { X86::FSTr32 , X86::FSTPr32 }, + { X86::FSTr64 , X86::FSTPr64 }, + { X86::FSTrr , X86::FSTPrr }, + { X86::FISTr16 , X86::FISTPr16 }, + { X86::FISTr32 , X86::FISTPr32 }, + + { X86::FADDrST0 , X86::FADDPrST0 }, + { X86::FSUBrST0 , X86::FSUBPrST0 }, + { X86::FSUBRrST0, X86::FSUBRPrST0 }, + { X86::FMULrST0 , X86::FMULPrST0 }, + { X86::FDIVrST0 , X86::FDIVPrST0 }, + { X86::FDIVRrST0, X86::FDIVRPrST0 }, + + { X86::FUCOMr , X86::FUCOMPr }, + { X86::FUCOMPr , X86::FUCOMPPr }, +}; + +/// popStackAfter - Pop the current value off of the top of the FP stack after +/// the specified instruction. This attempts to be sneaky and combine the pop +/// into the instruction itself if possible. The iterator is left pointing to +/// the last instruction, be it a new pop instruction inserted, or the old +/// instruction if it was modified in place. +/// +void FPS::popStackAfter(MachineBasicBlock::iterator &I) { + ASSERT_SORTED(PopTable); + assert(StackTop > 0 && "Cannot pop empty stack!"); + RegMap[Stack[--StackTop]] = ~0; // Update state + + // Check to see if there is a popping version of this instruction... + int Opcode = Lookup(PopTable, ARRAY_SIZE(PopTable), (*I)->getOpcode()); + if (Opcode != -1) { + (*I)->setOpcode(Opcode); + if (Opcode == X86::FUCOMPPr) + (*I)->RemoveOperand(0); + + } else { // Insert an explicit pop + MachineInstr *MI = BuildMI(X86::FSTPrr, 1).addReg(X86::ST0); + I = MBB->insert(I+1, MI); + } +} + +static unsigned getFPReg(const MachineOperand &MO) { + assert(MO.isPhysicalRegister() && "Expected an FP register!"); + unsigned Reg = MO.getReg(); + assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); + return Reg - X86::FP0; +} + + +//===----------------------------------------------------------------------===// +// Instruction transformation implementation +//===----------------------------------------------------------------------===// + +/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds +// +void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + unsigned DestReg = getFPReg(MI->getOperand(0)); + MI->RemoveOperand(0); // Remove the explicit ST(0) operand + + // Result gets pushed on the stack... + pushReg(DestReg); +} + +/// handleOneArgFP - fst ST(0), +// +void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + assert(MI->getNumOperands() == 5 && "Can only handle fst* instructions!"); + + unsigned Reg = getFPReg(MI->getOperand(4)); + bool KillsSrc = false; + for (LiveVariables::killed_iterator KI = LV->killed_begin(MI), + E = LV->killed_end(MI); KI != E; ++KI) + KillsSrc |= KI->second == X86::FP0+Reg; + + // FSTPr80 and FISTPr64 are strange because there are no non-popping versions. + // If we have one _and_ we don't want to pop the operand, duplicate the value + // on the stack instead of moving it. This ensure that popping the value is + // always ok. + // + if ((MI->getOpcode() == X86::FSTPr80 || + MI->getOpcode() == X86::FISTPr64) && !KillsSrc) { + duplicateToTop(Reg, 7 /*temp register*/, I); + } else { + moveToTop(Reg, I); // Move to the top of the stack... + } + MI->RemoveOperand(4); // Remove explicit ST(0) operand + + if (MI->getOpcode() == X86::FSTPr80 || MI->getOpcode() == X86::FISTPr64) { + assert(StackTop > 0 && "Stack empty??"); + --StackTop; + } else if (KillsSrc) { // Last use of operand? + popStackAfter(I); + } +} + +//===----------------------------------------------------------------------===// +// Define tables of various ways to map pseudo instructions +// + +// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) +static const TableEntry ForwardST0Table[] = { + { X86::FpADD, X86::FADDST0r }, + { X86::FpSUB, X86::FSUBST0r }, + { X86::FpMUL, X86::FMULST0r }, + { X86::FpDIV, X86::FDIVST0r }, + { X86::FpUCOM, X86::FUCOMr }, +}; + +// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) +static const TableEntry ReverseST0Table[] = { + { X86::FpADD, X86::FADDST0r }, // commutative + { X86::FpSUB, X86::FSUBRST0r }, + { X86::FpMUL, X86::FMULST0r }, // commutative + { X86::FpDIV, X86::FDIVRST0r }, + { X86::FpUCOM, ~0 }, +}; + +// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) +static const TableEntry ForwardSTiTable[] = { + { X86::FpADD, X86::FADDrST0 }, // commutative + { X86::FpSUB, X86::FSUBRrST0 }, + { X86::FpMUL, X86::FMULrST0 }, // commutative + { X86::FpDIV, X86::FDIVRrST0 }, + { X86::FpUCOM, X86::FUCOMr }, +}; + +// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) +static const TableEntry ReverseSTiTable[] = { + { X86::FpADD, X86::FADDrST0 }, + { X86::FpSUB, X86::FSUBrST0 }, + { X86::FpMUL, X86::FMULrST0 }, + { X86::FpDIV, X86::FDIVrST0 }, + { X86::FpUCOM, ~0 }, +}; + + +/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual +/// instructions which need to be simplified and possibly transformed. +/// +/// Result: ST(0) = fsub ST(0), ST(i) +/// ST(i) = fsub ST(0), ST(i) +/// ST(0) = fsubr ST(0), ST(i) +/// ST(i) = fsubr ST(0), ST(i) +/// +/// In addition to three address instructions, this also handles the FpUCOM +/// instruction which only has two operands, but no destination. This +/// instruction is also annoying because there is no "reverse" form of it +/// available. +/// +void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { + ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); + ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); + MachineInstr *MI = *I; + + unsigned NumOperands = MI->getNumOperands(); + assert(NumOperands == 3 || + (NumOperands == 2 && MI->getOpcode() == X86::FpUCOM) && + "Illegal TwoArgFP instruction!"); + unsigned Dest = getFPReg(MI->getOperand(0)); + unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); + unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); + bool KillsOp0 = false, KillsOp1 = false; + + for (LiveVariables::killed_iterator KI = LV->killed_begin(MI), + E = LV->killed_end(MI); KI != E; ++KI) { + KillsOp0 |= (KI->second == X86::FP0+Op0); + KillsOp1 |= (KI->second == X86::FP0+Op1); + } + + // If this is an FpUCOM instruction, we must make sure the first operand is on + // the top of stack, the other one can be anywhere... + if (MI->getOpcode() == X86::FpUCOM) + moveToTop(Op0, I); + + unsigned TOS = getStackEntry(0); + + // One of our operands must be on the top of the stack. If neither is yet, we + // need to move one. + if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? + // We can choose to move either operand to the top of the stack. If one of + // the operands is killed by this instruction, we want that one so that we + // can update right on top of the old version. + if (KillsOp0) { + moveToTop(Op0, I); // Move dead operand to TOS. + TOS = Op0; + } else if (KillsOp1) { + moveToTop(Op1, I); + TOS = Op1; + } else { + // All of the operands are live after this instruction executes, so we + // cannot update on top of any operand. Because of this, we must + // duplicate one of the stack elements to the top. It doesn't matter + // which one we pick. + // + duplicateToTop(Op0, Dest, I); + Op0 = TOS = Dest; + KillsOp0 = true; + } + } else if (!KillsOp0 && !KillsOp1 && MI->getOpcode() != X86::FpUCOM) { + // If we DO have one of our operands at the top of the stack, but we don't + // have a dead operand, we must duplicate one of the operands to a new slot + // on the stack. + duplicateToTop(Op0, Dest, I); + Op0 = TOS = Dest; + KillsOp0 = true; + } + + // Now we know that one of our operands is on the top of the stack, and at + // least one of our operands is killed by this instruction. + assert((TOS == Op0 || TOS == Op1) && + (KillsOp0 || KillsOp1 || MI->getOpcode() == X86::FpUCOM) && + "Stack conditions not set up right!"); + + // We decide which form to use based on what is on the top of the stack, and + // which operand is killed by this instruction. + const TableEntry *InstTable; + bool isForward = TOS == Op0; + bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); + if (updateST0) { + if (isForward) + InstTable = ForwardST0Table; + else + InstTable = ReverseST0Table; + } else { + if (isForward) + InstTable = ForwardSTiTable; + else + InstTable = ReverseSTiTable; + } + + int Opcode = Lookup(InstTable, ARRAY_SIZE(ForwardST0Table), MI->getOpcode()); + assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); + + // NotTOS - The register which is not on the top of stack... + unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; + + // Replace the old instruction with a new instruction + *I = BuildMI(Opcode, 1).addReg(getSTReg(NotTOS)); + + // If both operands are killed, pop one off of the stack in addition to + // overwriting the other one. + if (KillsOp0 && KillsOp1 && Op0 != Op1) { + assert(!updateST0 && "Should have updated other operand!"); + popStackAfter(I); // Pop the top of stack + } + + // Insert an explicit pop of the "updated" operand for FUCOM + if (MI->getOpcode() == X86::FpUCOM) { + if (KillsOp0 && !KillsOp1) + popStackAfter(I); // If we kill the first operand, pop it! + else if (KillsOp1 && Op0 != Op1) { + if (getStackEntry(0) == Op1) { + popStackAfter(I); // If it's right at the top of stack, just pop it + } else { + // Otherwise, move the top of stack into the dead slot, killing the + // operand without having to add in an explicit xchg then pop. + // + unsigned STReg = getSTReg(Op1); + unsigned OldSlot = getSlot(Op1); + unsigned TopReg = Stack[StackTop-1]; + Stack[OldSlot] = TopReg; + RegMap[TopReg] = OldSlot; + RegMap[Op1] = ~0; + Stack[--StackTop] = ~0; + + MachineInstr *MI = BuildMI(X86::FSTPrr, 1).addReg(STReg); + I = MBB->insert(I+1, MI); + } + } + } + + // Update stack information so that we know the destination register is now on + // the stack. + if (MI->getOpcode() != X86::FpUCOM) { + unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); + assert(UpdatedSlot < StackTop && Dest < 7); + Stack[UpdatedSlot] = Dest; + RegMap[Dest] = UpdatedSlot; + } + delete MI; // Remove the old instruction +} + + +/// handleSpecialFP - Handle special instructions which behave unlike other +/// floating point instructions. This is primarily inteaded for use by pseudo +/// instructions. +/// +void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + switch (MI->getOpcode()) { + default: assert(0 && "Unknown SpecialFP instruction!"); + case X86::FpGETRESULT: // Appears immediately after a call returning FP type! + assert(StackTop == 0 && "Stack should be empty after a call!"); + pushReg(getFPReg(MI->getOperand(0))); + break; + case X86::FpSETRESULT: + assert(StackTop == 1 && "Stack should have one element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + case X86::FpMOV: { + unsigned SrcReg = getFPReg(MI->getOperand(1)); + unsigned DestReg = getFPReg(MI->getOperand(0)); + bool KillsSrc = false; + for (LiveVariables::killed_iterator KI = LV->killed_begin(MI), + E = LV->killed_end(MI); KI != E; ++KI) + KillsSrc |= KI->second == X86::FP0+SrcReg; + + if (KillsSrc) { + // If the input operand is killed, we can just change the owner of the + // incoming stack slot into the result. + unsigned Slot = getSlot(SrcReg); + assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); + Stack[Slot] = DestReg; + RegMap[DestReg] = Slot; + + } else { + // For FMOV we just duplicate the specified value to a new stack slot. + // This could be made better, but would require substantial changes. + duplicateToTop(SrcReg, DestReg, I); + } + break; + } + } + + I = MBB->erase(I)-1; // Remove the pseudo instruction +} diff --git a/lib/Target/X86/PeepholeOptimizer.cpp b/lib/Target/X86/PeepholeOptimizer.cpp new file mode 100644 index 00000000000..95828191fbe --- /dev/null +++ b/lib/Target/X86/PeepholeOptimizer.cpp @@ -0,0 +1,86 @@ +//===-- PeepholeOptimizer.cpp - X86 Peephole Optimizer --------------------===// +// +// This file contains a peephole optimizer for the X86. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace { + struct PH : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + bool PeepholeOptimize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &I); + + virtual const char *getPassName() const { return "X86 Peephole Optimizer"; } + }; +} + +Pass *createX86PeepholeOptimizerPass() { return new PH(); } + +bool PH::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI != E; ++BI) + for (MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ) + if (PeepholeOptimize(*BI, I)) + Changed = true; + else + ++I; + + return Changed; +} + + +bool PH::PeepholeOptimize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + MachineInstr *Next = (I+1 != MBB.end()) ? *(I+1) : 0; + unsigned Size = 0; + switch (MI->getOpcode()) { + case X86::MOVrr8: + case X86::MOVrr16: + case X86::MOVrr32: // Destroy X = X copies... + if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + I = MBB.erase(I); + delete MI; + return true; + } + return false; + +#if 0 + case X86::MOVir32: Size++; + case X86::MOVir16: Size++; + case X86::MOVir8: + // FIXME: We can only do this transformation if we know that flags are not + // used here, because XOR clobbers the flags! + if (MI->getOperand(1).isImmediate()) { // avoid mov EAX, + int Val = MI->getOperand(1).getImmedValue(); + if (Val == 0) { // mov EAX, 0 -> xor EAX, EAX + static const unsigned Opcode[] ={X86::XORrr8,X86::XORrr16,X86::XORrr32}; + unsigned Reg = MI->getOperand(0).getReg(); + *I = BuildMI(Opcode[Size], 2, Reg).addReg(Reg).addReg(Reg); + delete MI; + return true; + } else if (Val == -1) { // mov EAX, -1 -> or EAX, -1 + // TODO: 'or Reg, -1' has a smaller encoding than 'mov Reg, -1' + } + } + return false; +#endif + case X86::BSWAPr32: // Change bswap EAX, bswap EAX into nothing + if (Next->getOpcode() == X86::BSWAPr32 && + MI->getOperand(0).getReg() == Next->getOperand(0).getReg()) { + I = MBB.erase(MBB.erase(I)); + delete MI; + delete Next; + return true; + } + return false; + default: + return false; + } +} diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp new file mode 100644 index 00000000000..7fdd97fbc49 --- /dev/null +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -0,0 +1,587 @@ +//===-- FloatingPoint.cpp - Floating point Reg -> Stack converter ---------===// +// +// This file defines the pass which converts floating point instructions from +// virtual registers into register stack instructions. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/Target/MachineInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "Support/Statistic.h" +#include +#include + +namespace { + Statistic<> NumFXCH("x86-codegen", "Number of fxch instructions inserted"); + Statistic<> NumFP ("x86-codegen", "Number of floating point instructions"); + + struct FPS : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { return "X86 FP Stackifier"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: + LiveVariables *LV; // Live variable info for current function... + MachineBasicBlock *MBB; // Current basic block + unsigned Stack[8]; // FP Registers in each stack slot... + unsigned RegMap[8]; // Track which stack slot contains each register + unsigned StackTop; // The current top of the FP stack. + + void dumpStack() const { + std::cerr << "Stack contents:"; + for (unsigned i = 0; i != StackTop; ++i) { + std::cerr << " FP" << Stack[i]; + assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); + } + std::cerr << "\n"; + } + private: + // getSlot - Return the stack slot number a particular register number is + // in... + unsigned getSlot(unsigned RegNo) const { + assert(RegNo < 8 && "Regno out of range!"); + return RegMap[RegNo]; + } + + // getStackEntry - Return the X86::FP register in register ST(i) + unsigned getStackEntry(unsigned STi) const { + assert(STi < StackTop && "Access past stack top!"); + return Stack[StackTop-1-STi]; + } + + // getSTReg - Return the X86::ST(i) register which contains the specified + // FP register + unsigned getSTReg(unsigned RegNo) const { + return StackTop - 1 - getSlot(RegNo) + X86::ST0; + } + + // pushReg - Push the specifiex FP register onto the stack + void pushReg(unsigned Reg) { + assert(Reg < 8 && "Register number out of range!"); + assert(StackTop < 8 && "Stack overflow!"); + Stack[StackTop] = Reg; + RegMap[Reg] = StackTop++; + } + + bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } + void moveToTop(unsigned RegNo, MachineBasicBlock::iterator &I) { + if (!isAtTop(RegNo)) { + unsigned Slot = getSlot(RegNo); + unsigned STReg = getSTReg(RegNo); + unsigned RegOnTop = getStackEntry(0); + + // Swap the slots the regs are in + std::swap(RegMap[RegNo], RegMap[RegOnTop]); + + // Swap stack slot contents + assert(RegMap[RegOnTop] < StackTop); + std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); + + // Emit an fxch to update the runtime processors version of the state + MachineInstr *MI = BuildMI(X86::FXCH, 1).addReg(STReg); + I = 1+MBB->insert(I, MI); + NumFXCH++; + } + } + + void duplicateToTop(unsigned RegNo, unsigned AsReg, + MachineBasicBlock::iterator &I) { + unsigned STReg = getSTReg(RegNo); + pushReg(AsReg); // New register on top of stack + + MachineInstr *MI = BuildMI(X86::FLDrr, 1).addReg(STReg); + I = 1+MBB->insert(I, MI); + } + + // popStackAfter - Pop the current value off of the top of the FP stack + // after the specified instruction. + void popStackAfter(MachineBasicBlock::iterator &I); + + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + + void handleZeroArgFP(MachineBasicBlock::iterator &I); + void handleOneArgFP(MachineBasicBlock::iterator &I); + void handleTwoArgFP(MachineBasicBlock::iterator &I); + void handleSpecialFP(MachineBasicBlock::iterator &I); + }; +} + +Pass *createX86FloatingPointStackifierPass() { return new FPS(); } + +/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP +/// register references into FP stack references. +/// +bool FPS::runOnMachineFunction(MachineFunction &MF) { + LV = &getAnalysis(); + StackTop = 0; + + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= processBasicBlock(MF, *I); + return Changed; +} + +/// processBasicBlock - Loop over all of the instructions in the basic block, +/// transforming FP instructions into their stack form. +/// +bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { + const TargetInstrInfo &TII = MF.getTarget().getInstrInfo(); + bool Changed = false; + MBB = &BB; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineInstr *MI = *I; + MachineInstr *PrevMI = I == BB.begin() ? 0 : *(I-1); + unsigned Flags = TII.get(MI->getOpcode()).TSFlags; + + if ((Flags & X86II::FPTypeMask) == 0) continue; // Ignore non-fp insts! + + ++NumFP; // Keep track of # of pseudo instrs + DEBUG(std::cerr << "\nFPInst:\t"; + MI->print(std::cerr, MF.getTarget())); + + // Get dead variables list now because the MI pointer may be deleted as part + // of processing! + LiveVariables::killed_iterator IB = LV->dead_begin(MI); + LiveVariables::killed_iterator IE = LV->dead_end(MI); + + DEBUG(const MRegisterInfo *MRI = MF.getTarget().getRegisterInfo(); + LiveVariables::killed_iterator I = LV->killed_begin(MI); + LiveVariables::killed_iterator E = LV->killed_end(MI); + if (I != E) { + std::cerr << "Killed Operands:"; + for (; I != E; ++I) + std::cerr << " %" << MRI->getName(I->second); + std::cerr << "\n"; + }); + + switch (Flags & X86II::FPTypeMask) { + case X86II::ZeroArgFP: handleZeroArgFP(I); break; + case X86II::OneArgFP: handleOneArgFP(I); break; + + case X86II::OneArgFPRW: // ST(0) = fsqrt(ST(0)) + assert(0 && "FP instr type not handled yet!"); + + case X86II::TwoArgFP: handleTwoArgFP(I); break; + case X86II::SpecialFP: handleSpecialFP(I); break; + default: assert(0 && "Unknown FP Type!"); + } + + // Check to see if any of the values defined by this instruction are dead + // after definition. If so, pop them. + for (; IB != IE; ++IB) { + unsigned Reg = IB->second; + if (Reg >= X86::FP0 && Reg <= X86::FP6) { + DEBUG(std::cerr << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); + ++I; // Insert fxch AFTER the instruction + moveToTop(Reg-X86::FP0, I); // Insert fxch if neccesary + --I; // Move to fxch or old instruction + popStackAfter(I); // Pop the top of the stack, killing value + } + } + + // Print out all of the instructions expanded to if -debug + DEBUG(if (*I == PrevMI) { + std::cerr<< "Just deleted pseudo instruction\n"; + } else { + MachineBasicBlock::iterator Start = I; + // Rewind to first instruction newly inserted. + while (Start != BB.begin() && *(Start-1) != PrevMI) --Start; + std::cerr << "Inserted instructions:\n"; + do TII.print(*Start, std::cerr << "\t", MF.getTarget()); + while (++Start != I+1); + } + dumpStack(); + ); + + Changed = true; + } + + assert(StackTop == 0 && "Stack not empty at end of basic block?"); + return Changed; +} + +//===----------------------------------------------------------------------===// +// Efficient Lookup Table Support +//===----------------------------------------------------------------------===// + +struct TableEntry { + unsigned from; + unsigned to; + bool operator<(const TableEntry &TE) const { return from < TE.from; } + bool operator<(unsigned V) const { return from < V; } +}; + +static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { + for (unsigned i = 0; i != NumEntries-1; ++i) + if (!(Table[i] < Table[i+1])) return false; + return true; +} + +static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { + const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); + if (I != Table+N && I->from == Opcode) + return I->to; + return -1; +} + +#define ARRAY_SIZE(TABLE) \ + (sizeof(TABLE)/sizeof(TABLE[0])) + +#ifdef NDEBUG +#define ASSERT_SORTED(TABLE) +#else +#define ASSERT_SORTED(TABLE) \ + { static bool TABLE##Checked = false; \ + if (!TABLE##Checked) \ + assert(TableIsSorted(TABLE, ARRAY_SIZE(TABLE)) && \ + "All lookup tables must be sorted for efficient access!"); \ + } +#endif + + +//===----------------------------------------------------------------------===// +// Helper Methods +//===----------------------------------------------------------------------===// + +// PopTable - Sorted map of instructions to their popping version. The first +// element is an instruction, the second is the version which pops. +// +static const TableEntry PopTable[] = { + { X86::FSTr32 , X86::FSTPr32 }, + { X86::FSTr64 , X86::FSTPr64 }, + { X86::FSTrr , X86::FSTPrr }, + { X86::FISTr16 , X86::FISTPr16 }, + { X86::FISTr32 , X86::FISTPr32 }, + + { X86::FADDrST0 , X86::FADDPrST0 }, + { X86::FSUBrST0 , X86::FSUBPrST0 }, + { X86::FSUBRrST0, X86::FSUBRPrST0 }, + { X86::FMULrST0 , X86::FMULPrST0 }, + { X86::FDIVrST0 , X86::FDIVPrST0 }, + { X86::FDIVRrST0, X86::FDIVRPrST0 }, + + { X86::FUCOMr , X86::FUCOMPr }, + { X86::FUCOMPr , X86::FUCOMPPr }, +}; + +/// popStackAfter - Pop the current value off of the top of the FP stack after +/// the specified instruction. This attempts to be sneaky and combine the pop +/// into the instruction itself if possible. The iterator is left pointing to +/// the last instruction, be it a new pop instruction inserted, or the old +/// instruction if it was modified in place. +/// +void FPS::popStackAfter(MachineBasicBlock::iterator &I) { + ASSERT_SORTED(PopTable); + assert(StackTop > 0 && "Cannot pop empty stack!"); + RegMap[Stack[--StackTop]] = ~0; // Update state + + // Check to see if there is a popping version of this instruction... + int Opcode = Lookup(PopTable, ARRAY_SIZE(PopTable), (*I)->getOpcode()); + if (Opcode != -1) { + (*I)->setOpcode(Opcode); + if (Opcode == X86::FUCOMPPr) + (*I)->RemoveOperand(0); + + } else { // Insert an explicit pop + MachineInstr *MI = BuildMI(X86::FSTPrr, 1).addReg(X86::ST0); + I = MBB->insert(I+1, MI); + } +} + +static unsigned getFPReg(const MachineOperand &MO) { + assert(MO.isPhysicalRegister() && "Expected an FP register!"); + unsigned Reg = MO.getReg(); + assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); + return Reg - X86::FP0; +} + + +//===----------------------------------------------------------------------===// +// Instruction transformation implementation +//===----------------------------------------------------------------------===// + +/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds +// +void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + unsigned DestReg = getFPReg(MI->getOperand(0)); + MI->RemoveOperand(0); // Remove the explicit ST(0) operand + + // Result gets pushed on the stack... + pushReg(DestReg); +} + +/// handleOneArgFP - fst ST(0), +// +void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + assert(MI->getNumOperands() == 5 && "Can only handle fst* instructions!"); + + unsigned Reg = getFPReg(MI->getOperand(4)); + bool KillsSrc = false; + for (LiveVariables::killed_iterator KI = LV->killed_begin(MI), + E = LV->killed_end(MI); KI != E; ++KI) + KillsSrc |= KI->second == X86::FP0+Reg; + + // FSTPr80 and FISTPr64 are strange because there are no non-popping versions. + // If we have one _and_ we don't want to pop the operand, duplicate the value + // on the stack instead of moving it. This ensure that popping the value is + // always ok. + // + if ((MI->getOpcode() == X86::FSTPr80 || + MI->getOpcode() == X86::FISTPr64) && !KillsSrc) { + duplicateToTop(Reg, 7 /*temp register*/, I); + } else { + moveToTop(Reg, I); // Move to the top of the stack... + } + MI->RemoveOperand(4); // Remove explicit ST(0) operand + + if (MI->getOpcode() == X86::FSTPr80 || MI->getOpcode() == X86::FISTPr64) { + assert(StackTop > 0 && "Stack empty??"); + --StackTop; + } else if (KillsSrc) { // Last use of operand? + popStackAfter(I); + } +} + +//===----------------------------------------------------------------------===// +// Define tables of various ways to map pseudo instructions +// + +// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) +static const TableEntry ForwardST0Table[] = { + { X86::FpADD, X86::FADDST0r }, + { X86::FpSUB, X86::FSUBST0r }, + { X86::FpMUL, X86::FMULST0r }, + { X86::FpDIV, X86::FDIVST0r }, + { X86::FpUCOM, X86::FUCOMr }, +}; + +// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) +static const TableEntry ReverseST0Table[] = { + { X86::FpADD, X86::FADDST0r }, // commutative + { X86::FpSUB, X86::FSUBRST0r }, + { X86::FpMUL, X86::FMULST0r }, // commutative + { X86::FpDIV, X86::FDIVRST0r }, + { X86::FpUCOM, ~0 }, +}; + +// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) +static const TableEntry ForwardSTiTable[] = { + { X86::FpADD, X86::FADDrST0 }, // commutative + { X86::FpSUB, X86::FSUBRrST0 }, + { X86::FpMUL, X86::FMULrST0 }, // commutative + { X86::FpDIV, X86::FDIVRrST0 }, + { X86::FpUCOM, X86::FUCOMr }, +}; + +// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) +static const TableEntry ReverseSTiTable[] = { + { X86::FpADD, X86::FADDrST0 }, + { X86::FpSUB, X86::FSUBrST0 }, + { X86::FpMUL, X86::FMULrST0 }, + { X86::FpDIV, X86::FDIVrST0 }, + { X86::FpUCOM, ~0 }, +}; + + +/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual +/// instructions which need to be simplified and possibly transformed. +/// +/// Result: ST(0) = fsub ST(0), ST(i) +/// ST(i) = fsub ST(0), ST(i) +/// ST(0) = fsubr ST(0), ST(i) +/// ST(i) = fsubr ST(0), ST(i) +/// +/// In addition to three address instructions, this also handles the FpUCOM +/// instruction which only has two operands, but no destination. This +/// instruction is also annoying because there is no "reverse" form of it +/// available. +/// +void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { + ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); + ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); + MachineInstr *MI = *I; + + unsigned NumOperands = MI->getNumOperands(); + assert(NumOperands == 3 || + (NumOperands == 2 && MI->getOpcode() == X86::FpUCOM) && + "Illegal TwoArgFP instruction!"); + unsigned Dest = getFPReg(MI->getOperand(0)); + unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); + unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); + bool KillsOp0 = false, KillsOp1 = false; + + for (LiveVariables::killed_iterator KI = LV->killed_begin(MI), + E = LV->killed_end(MI); KI != E; ++KI) { + KillsOp0 |= (KI->second == X86::FP0+Op0); + KillsOp1 |= (KI->second == X86::FP0+Op1); + } + + // If this is an FpUCOM instruction, we must make sure the first operand is on + // the top of stack, the other one can be anywhere... + if (MI->getOpcode() == X86::FpUCOM) + moveToTop(Op0, I); + + unsigned TOS = getStackEntry(0); + + // One of our operands must be on the top of the stack. If neither is yet, we + // need to move one. + if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? + // We can choose to move either operand to the top of the stack. If one of + // the operands is killed by this instruction, we want that one so that we + // can update right on top of the old version. + if (KillsOp0) { + moveToTop(Op0, I); // Move dead operand to TOS. + TOS = Op0; + } else if (KillsOp1) { + moveToTop(Op1, I); + TOS = Op1; + } else { + // All of the operands are live after this instruction executes, so we + // cannot update on top of any operand. Because of this, we must + // duplicate one of the stack elements to the top. It doesn't matter + // which one we pick. + // + duplicateToTop(Op0, Dest, I); + Op0 = TOS = Dest; + KillsOp0 = true; + } + } else if (!KillsOp0 && !KillsOp1 && MI->getOpcode() != X86::FpUCOM) { + // If we DO have one of our operands at the top of the stack, but we don't + // have a dead operand, we must duplicate one of the operands to a new slot + // on the stack. + duplicateToTop(Op0, Dest, I); + Op0 = TOS = Dest; + KillsOp0 = true; + } + + // Now we know that one of our operands is on the top of the stack, and at + // least one of our operands is killed by this instruction. + assert((TOS == Op0 || TOS == Op1) && + (KillsOp0 || KillsOp1 || MI->getOpcode() == X86::FpUCOM) && + "Stack conditions not set up right!"); + + // We decide which form to use based on what is on the top of the stack, and + // which operand is killed by this instruction. + const TableEntry *InstTable; + bool isForward = TOS == Op0; + bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); + if (updateST0) { + if (isForward) + InstTable = ForwardST0Table; + else + InstTable = ReverseST0Table; + } else { + if (isForward) + InstTable = ForwardSTiTable; + else + InstTable = ReverseSTiTable; + } + + int Opcode = Lookup(InstTable, ARRAY_SIZE(ForwardST0Table), MI->getOpcode()); + assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); + + // NotTOS - The register which is not on the top of stack... + unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; + + // Replace the old instruction with a new instruction + *I = BuildMI(Opcode, 1).addReg(getSTReg(NotTOS)); + + // If both operands are killed, pop one off of the stack in addition to + // overwriting the other one. + if (KillsOp0 && KillsOp1 && Op0 != Op1) { + assert(!updateST0 && "Should have updated other operand!"); + popStackAfter(I); // Pop the top of stack + } + + // Insert an explicit pop of the "updated" operand for FUCOM + if (MI->getOpcode() == X86::FpUCOM) { + if (KillsOp0 && !KillsOp1) + popStackAfter(I); // If we kill the first operand, pop it! + else if (KillsOp1 && Op0 != Op1) { + if (getStackEntry(0) == Op1) { + popStackAfter(I); // If it's right at the top of stack, just pop it + } else { + // Otherwise, move the top of stack into the dead slot, killing the + // operand without having to add in an explicit xchg then pop. + // + unsigned STReg = getSTReg(Op1); + unsigned OldSlot = getSlot(Op1); + unsigned TopReg = Stack[StackTop-1]; + Stack[OldSlot] = TopReg; + RegMap[TopReg] = OldSlot; + RegMap[Op1] = ~0; + Stack[--StackTop] = ~0; + + MachineInstr *MI = BuildMI(X86::FSTPrr, 1).addReg(STReg); + I = MBB->insert(I+1, MI); + } + } + } + + // Update stack information so that we know the destination register is now on + // the stack. + if (MI->getOpcode() != X86::FpUCOM) { + unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); + assert(UpdatedSlot < StackTop && Dest < 7); + Stack[UpdatedSlot] = Dest; + RegMap[Dest] = UpdatedSlot; + } + delete MI; // Remove the old instruction +} + + +/// handleSpecialFP - Handle special instructions which behave unlike other +/// floating point instructions. This is primarily inteaded for use by pseudo +/// instructions. +/// +void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + switch (MI->getOpcode()) { + default: assert(0 && "Unknown SpecialFP instruction!"); + case X86::FpGETRESULT: // Appears immediately after a call returning FP type! + assert(StackTop == 0 && "Stack should be empty after a call!"); + pushReg(getFPReg(MI->getOperand(0))); + break; + case X86::FpSETRESULT: + assert(StackTop == 1 && "Stack should have one element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + case X86::FpMOV: { + unsigned SrcReg = getFPReg(MI->getOperand(1)); + unsigned DestReg = getFPReg(MI->getOperand(0)); + bool KillsSrc = false; + for (LiveVariables::killed_iterator KI = LV->killed_begin(MI), + E = LV->killed_end(MI); KI != E; ++KI) + KillsSrc |= KI->second == X86::FP0+SrcReg; + + if (KillsSrc) { + // If the input operand is killed, we can just change the owner of the + // incoming stack slot into the result. + unsigned Slot = getSlot(SrcReg); + assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); + Stack[Slot] = DestReg; + RegMap[DestReg] = Slot; + + } else { + // For FMOV we just duplicate the specified value to a new stack slot. + // This could be made better, but would require substantial changes. + duplicateToTop(SrcReg, DestReg, I); + } + break; + } + } + + I = MBB->erase(I)-1; // Remove the pseudo instruction +} diff --git a/lib/Target/X86/X86PeepholeOpt.cpp b/lib/Target/X86/X86PeepholeOpt.cpp new file mode 100644 index 00000000000..95828191fbe --- /dev/null +++ b/lib/Target/X86/X86PeepholeOpt.cpp @@ -0,0 +1,86 @@ +//===-- PeepholeOptimizer.cpp - X86 Peephole Optimizer --------------------===// +// +// This file contains a peephole optimizer for the X86. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace { + struct PH : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + bool PeepholeOptimize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &I); + + virtual const char *getPassName() const { return "X86 Peephole Optimizer"; } + }; +} + +Pass *createX86PeepholeOptimizerPass() { return new PH(); } + +bool PH::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI != E; ++BI) + for (MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ) + if (PeepholeOptimize(*BI, I)) + Changed = true; + else + ++I; + + return Changed; +} + + +bool PH::PeepholeOptimize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &I) { + MachineInstr *MI = *I; + MachineInstr *Next = (I+1 != MBB.end()) ? *(I+1) : 0; + unsigned Size = 0; + switch (MI->getOpcode()) { + case X86::MOVrr8: + case X86::MOVrr16: + case X86::MOVrr32: // Destroy X = X copies... + if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + I = MBB.erase(I); + delete MI; + return true; + } + return false; + +#if 0 + case X86::MOVir32: Size++; + case X86::MOVir16: Size++; + case X86::MOVir8: + // FIXME: We can only do this transformation if we know that flags are not + // used here, because XOR clobbers the flags! + if (MI->getOperand(1).isImmediate()) { // avoid mov EAX, + int Val = MI->getOperand(1).getImmedValue(); + if (Val == 0) { // mov EAX, 0 -> xor EAX, EAX + static const unsigned Opcode[] ={X86::XORrr8,X86::XORrr16,X86::XORrr32}; + unsigned Reg = MI->getOperand(0).getReg(); + *I = BuildMI(Opcode[Size], 2, Reg).addReg(Reg).addReg(Reg); + delete MI; + return true; + } else if (Val == -1) { // mov EAX, -1 -> or EAX, -1 + // TODO: 'or Reg, -1' has a smaller encoding than 'mov Reg, -1' + } + } + return false; +#endif + case X86::BSWAPr32: // Change bswap EAX, bswap EAX into nothing + if (Next->getOpcode() == X86::BSWAPr32 && + MI->getOperand(0).getReg() == Next->getOperand(0).getReg()) { + I = MBB.erase(MBB.erase(I)); + delete MI; + delete Next; + return true; + } + return false; + default: + return false; + } +}