//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the SSEDomainFix pass. // // Some SSE instructions like mov, and, or, xor are available in different // variants for different operand types. These variant instructions are // equivalent, but on Nehalem and newer cpus there is extra latency // transferring data between integer and floating point domains. // // This pass changes the variant instructions to minimize domain crossings. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sse-domain-fix" #include "X86InstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { /// Allocate objects from a pool, allow objects to be recycled, and provide a /// way of deleting everything. template class PoolAllocator { std::vector Pages, Avail; public: ~PoolAllocator() { Clear(); } T* Alloc() { if (Avail.empty()) { T *p = new T[PageSize]; Pages.push_back(p); Avail.reserve(PageSize); for (unsigned n = 0; n != PageSize; ++n) Avail.push_back(p+n); } T *p = Avail.back(); Avail.pop_back(); return p; } // Allow object to be reallocated. It won't be reconstructed. void Recycle(T *p) { p->clear(); Avail.push_back(p); } // Destroy all objects, make sure there are no external pointers to them. void Clear() { Avail.clear(); while (!Pages.empty()) { delete[] Pages.back(); Pages.pop_back(); } } }; /// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track /// of execution domains. /// /// An open DomainValue represents a set of instructions that can still switch /// execution domain. Multiple registers may refer to the same open /// DomainValue - they will eventually be collapsed to the same execution /// domain. /// /// A collapsed DomainValue represents a single register that has been forced /// into one of more execution domains. There is a separate collapsed /// DomainValue for each register, but it may contain multiple execution /// domains. A register value is initially created in a single execution /// domain, but if we were forced to pay the penalty of a domain crossing, we /// keep track of the fact the the register is now available in multiple /// domains. struct DomainValue { // Basic reference counting. unsigned Refs; // Available domains. For an open DomainValue, it is the still possible // domains for collapsing. For a collapsed DomainValue it is the domains where // the register is available for free. unsigned Mask; // Position of the last defining instruction. unsigned Dist; // Twiddleable instructions using or defining these registers. SmallVector Instrs; // Collapsed DomainValue have no instructions to twiddle - it simply keeps // track of the domains where the registers are already available. bool collapsed() const { return Instrs.empty(); } // Is any domain in mask available? bool compat(unsigned mask) const { return Mask & mask; } // Mark domain as available. void add(unsigned domain) { Mask |= 1u << domain; } // First domain available in mask. unsigned firstDomain() const { return CountTrailingZeros_32(Mask); } DomainValue() { clear(); } void clear() { Refs = Mask = Dist = 0; Instrs.clear(); } }; static const unsigned NumRegs = 16; class SSEDomainFixPass : public MachineFunctionPass { static char ID; PoolAllocator Pool; MachineFunction *MF; const X86InstrInfo *TII; const TargetRegisterInfo *TRI; MachineBasicBlock *MBB; DomainValue **LiveRegs; typedef DenseMap LiveOutMap; LiveOutMap LiveOuts; unsigned Distance; public: SSEDomainFixPass() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "SSE execution domain fixup"; } private: // Register mapping. int RegIndex(unsigned Reg); // LiveRegs manipulations. void SetLiveReg(int rx, DomainValue *DV); void Kill(int rx); void Force(int rx, unsigned domain); void Collapse(DomainValue *dv, unsigned domain); bool Merge(DomainValue *A, DomainValue *B); void enterBasicBlock(); void visitGenericInstr(MachineInstr*); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); }; } char SSEDomainFixPass::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. int SSEDomainFixPass::RegIndex(unsigned reg) { // Registers are sorted lexicographically. // We just need them to be consecutive, ordering doesn't matter. assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); reg -= X86::XMM0; return reg < NumRegs ? reg : -1; } /// Set LiveRegs[rx] = dv, updating reference counts. void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); if (!LiveRegs) LiveRegs = (DomainValue**)calloc(sizeof(DomainValue*), NumRegs); if (LiveRegs[rx] == dv) return; if (LiveRegs[rx]) { assert(LiveRegs[rx]->Refs && "Bad refcount"); if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]); } LiveRegs[rx] = dv; if (dv) ++dv->Refs; } // Kill register rx, recycle or collapse any DomainValue. void SSEDomainFixPass::Kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); if (!LiveRegs || !LiveRegs[rx]) return; // Before killing the last reference to an open DomainValue, collapse it to // the first available domain. if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed()) Collapse(LiveRegs[rx], LiveRegs[rx]->firstDomain()); else SetLiveReg(rx, 0); } /// Force register rx into domain. void SSEDomainFixPass::Force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); DomainValue *dv; if (LiveRegs && (dv = LiveRegs[rx])) { if (dv->collapsed()) dv->add(domain); else Collapse(dv, domain); } else { // Set up basic collapsed DomainValue. dv = Pool.Alloc(); dv->Dist = Distance; dv->add(domain); SetLiveReg(rx, dv); } } /// Collapse open DomainValue into given domain. If there are multiple /// registers using dv, they each get a unique collapsed DomainValue. void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { assert(dv->compat(1u << domain) && "Cannot collapse"); // Collapse all the instructions. while (!dv->Instrs.empty()) { MachineInstr *mi = dv->Instrs.back(); TII->SetSSEDomain(mi, domain); dv->Instrs.pop_back(); } dv->Mask = 1u << domain; // If there are multiple users, give them new, unique DomainValues. if (LiveRegs && dv->Refs > 1) { for (unsigned rx = 0; rx != NumRegs; ++rx) if (LiveRegs[rx] == dv) { DomainValue *dv2 = Pool.Alloc(); dv2->Dist = Distance; dv2->add(domain); SetLiveReg(rx, dv2); } } } /// Merge - All instructions and registers in B are moved to A, and B is /// released. bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { assert(!A->collapsed() && "Cannot merge into collapsed"); assert(!B->collapsed() && "Cannot merge from collapsed"); if (A == B) return true; if (!A->compat(B->Mask)) return false; A->Mask &= B->Mask; A->Dist = std::max(A->Dist, B->Dist); A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); for (unsigned rx = 0; rx != NumRegs; ++rx) if (LiveRegs[rx] == B) SetLiveReg(rx, A); return true; } void SSEDomainFixPass::enterBasicBlock() { // Try to coalesce live-out registers from predecessors. for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { int rx = RegIndex(*i); if (rx < 0) continue; for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), pe = MBB->pred_end(); pi != pe; ++pi) { LiveOutMap::const_iterator fi = LiveOuts.find(*pi); if (fi == LiveOuts.end()) continue; DomainValue *pdv = fi->second[rx]; if (!pdv) continue; if (!LiveRegs || !LiveRegs[rx]) SetLiveReg(rx, pdv); else { // We have a live DomainValue from more than one predecessor. if (LiveRegs[rx]->collapsed()) { // We are already collapsed, but predecessor is not. Force him. if (!pdv->collapsed()) Collapse(pdv, LiveRegs[rx]->firstDomain()); } else { // Currently open, merge in predecessor. if (!pdv->collapsed()) Merge(LiveRegs[rx], pdv); else Collapse(LiveRegs[rx], pdv->firstDomain()); } } } } } // A hard instruction only works in one domain. All input registers will be // forced into that domain. void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { // Collapse all uses. for (unsigned i = mi->getDesc().getNumDefs(), e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; int rx = RegIndex(mo.getReg()); if (rx < 0) continue; Force(rx, domain); } // Kill all defs and force them. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; int rx = RegIndex(mo.getReg()); if (rx < 0) continue; Kill(rx); Force(rx, domain); } } // A soft instruction can be changed to work in other domains given by mask. void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Scan the explicit use operands for incoming domains. unsigned collmask = mask; SmallVector used; if (LiveRegs) for (unsigned i = mi->getDesc().getNumDefs(), e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; int rx = RegIndex(mo.getReg()); if (rx < 0) continue; if (DomainValue *dv = LiveRegs[rx]) { // Is it possible to use this collapsed register for free? if (dv->collapsed()) { if (unsigned m = collmask & dv->Mask) collmask = m; } else if (dv->compat(collmask)) used.push_back(rx); else Kill(rx); } } // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(collmask)) { unsigned domain = CountTrailingZeros_32(collmask); TII->SetSSEDomain(mi, domain); visitHardInstr(mi, domain); return; } // Kill off any remaining uses that don't match collmask, and build a list of // incoming DomainValue that we want to merge. SmallVector doms; for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; DomainValue *dv = LiveRegs[rx]; // This useless DomainValue could have been missed above. if (!dv->compat(collmask)) { Kill(*i); continue; } // sorted, uniqued insert. bool inserted = false; for (SmallVector::iterator i = doms.begin(), e = doms.end(); i != e && !inserted; ++i) { if (dv == *i) inserted = true; else if (dv->Dist < (*i)->Dist) { inserted = true; doms.insert(i, dv); } } if (!inserted) doms.push_back(dv); } // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. DomainValue *dv = 0; while (!doms.empty()) { if (!dv) dv = doms.back(); else if (!Merge(dv, doms.back())) for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) if (LiveRegs[*i] == doms.back()) Kill(*i); doms.pop_back(); } // dv is the DomainValue we are going to use for this instruction. if (!dv) dv = Pool.Alloc(); dv->Dist = Distance; dv->Mask = collmask; dv->Instrs.push_back(mi); // Finally set all defs and non-collapsed uses to dv. for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; int rx = RegIndex(mo.getReg()); if (rx < 0) continue; if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { Kill(rx); SetLiveReg(rx, dv); } } } void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { // Process explicit defs, kill any XMM registers redefined. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; int rx = RegIndex(mo.getReg()); if (rx < 0) continue; Kill(rx); } } bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = static_cast(MF->getTarget().getInstrInfo()); TRI = MF->getTarget().getRegisterInfo(); MBB = 0; LiveRegs = 0; Distance = 0; assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); // If no XMM registers are used in the function, we can skip it completely. bool anyregs = false; for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), E = X86::VR128RegClass.end(); I != E; ++I) if (MF->getRegInfo().isPhysRegUsed(*I)) { anyregs = true; break; } if (!anyregs) return false; MachineBasicBlock *Entry = MF->begin(); SmallPtrSet Visited; for (df_ext_iterator > DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); DFI != DFE; ++DFI) { MBB = *DFI; enterBasicBlock(); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { MachineInstr *mi = I; if (mi->isDebugValue()) continue; ++Distance; std::pair domp = TII->GetSSEDomain(mi); if (domp.first) if (domp.second) visitSoftInstr(mi, domp.second); else visitHardInstr(mi, domp.first); else if (LiveRegs) visitGenericInstr(mi); } // Save live registers at end of MBB - used by enterBasicBlock(). if (LiveRegs) LiveOuts.insert(std::make_pair(MBB, LiveRegs)); LiveRegs = 0; } // Clear the LiveOuts vectors. Should we also collapse any remaining // DomainValues? for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); i != e; ++i) free(i->second); LiveOuts.clear(); Pool.Clear(); return false; } FunctionPass *llvm::createSSEDomainFixPass() { return new SSEDomainFixPass(); }