Remove X86-dependent stuff from SSEDomainFix.

This also enables domain swizzling for AVX code which required a few
trivial test changes.

The pass will be moved to lib/CodeGen shortly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140659 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jakob Stoklund Olesen 2011-09-27 23:50:46 +00:00
parent 833217bfb9
commit df4b35e3dd
6 changed files with 43 additions and 24 deletions

View File

@ -24,6 +24,7 @@ namespace llvm {
class MachineFunctionPass; class MachineFunctionPass;
class PassInfo; class PassInfo;
class TargetLowering; class TargetLowering;
class TargetRegisterClass;
class raw_ostream; class raw_ostream;
/// createUnreachableBlockEliminationPass - The LLVM code generator does not /// createUnreachableBlockEliminationPass - The LLVM code generator does not
@ -225,6 +226,14 @@ namespace llvm {
/// ///
FunctionPass *createExpandISelPseudosPass(); FunctionPass *createExpandISelPseudosPass();
/// createExecutionDependencyFixPass - This pass fixes execution time
/// problems with dependent instructions, such as switching execution
/// domains to match.
///
/// The pass will examine instructions using and defining registers in RC.
///
FunctionPass *createExecutionDependencyFixPass(const TargetRegisterClass *RC);
} // End llvm namespace } // End llvm namespace
#endif #endif

View File

@ -18,10 +18,12 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sse-domain-fix" #define DEBUG_TYPE "execution-fix"
#include "X86InstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Support/Allocator.h" #include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
@ -97,25 +99,27 @@ struct DomainValue {
}; };
} }
static const unsigned NumRegs = 16;
namespace { namespace {
class SSEDomainFixPass : public MachineFunctionPass { class SSEDomainFixPass : public MachineFunctionPass {
static char ID; static char ID;
SpecificBumpPtrAllocator<DomainValue> Allocator; SpecificBumpPtrAllocator<DomainValue> Allocator;
SmallVector<DomainValue*,16> Avail; SmallVector<DomainValue*,16> Avail;
const TargetRegisterClass *const RC;
MachineFunction *MF; MachineFunction *MF;
const X86InstrInfo *TII; const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI; const TargetRegisterInfo *TRI;
MachineBasicBlock *MBB; MachineBasicBlock *MBB;
std::vector<int> AliasMap;
const unsigned NumRegs;
DomainValue **LiveRegs; DomainValue **LiveRegs;
typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
LiveOutMap LiveOuts; LiveOutMap LiveOuts;
unsigned Distance; unsigned Distance;
public: public:
SSEDomainFixPass() : MachineFunctionPass(ID) {} SSEDomainFixPass(const TargetRegisterClass *rc)
: MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const { virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll(); AU.setPreservesAll();
@ -154,10 +158,9 @@ char SSEDomainFixPass::ID = 0;
/// Translate TRI register number to an index into our smaller tables of /// Translate TRI register number to an index into our smaller tables of
/// interesting registers. Return -1 for boring registers. /// interesting registers. Return -1 for boring registers.
int SSEDomainFixPass::RegIndex(unsigned reg) { int SSEDomainFixPass::RegIndex(unsigned Reg) {
assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); assert(Reg < AliasMap.size() && "Invalid register");
reg -= X86::XMM0; return AliasMap[Reg];
return reg < NumRegs ? (int) reg : -1;
} }
DomainValue *SSEDomainFixPass::Alloc(int domain) { DomainValue *SSEDomainFixPass::Alloc(int domain) {
@ -444,23 +447,33 @@ void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
MF = &mf; MF = &mf;
TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo()); TII = MF->getTarget().getInstrInfo();
TRI = MF->getTarget().getRegisterInfo(); TRI = MF->getTarget().getRegisterInfo();
MBB = 0; MBB = 0;
LiveRegs = 0; LiveRegs = 0;
Distance = 0; Distance = 0;
assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); assert(NumRegs == RC->getNumRegs() && "Bad regclass");
// If no XMM registers are used in the function, we can skip it completely. // If no XMM registers are used in the function, we can skip it completely.
bool anyregs = false; bool anyregs = false;
for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
E = X86::VR128RegClass.end(); I != E; ++I) I != E; ++I)
if (MF->getRegInfo().isPhysRegUsed(*I)) { if (MF->getRegInfo().isPhysRegUsed(*I)) {
anyregs = true; anyregs = true;
break; break;
} }
if (!anyregs) return false; if (!anyregs) return false;
// Initialize the AliasMap on the first use.
if (AliasMap.empty()) {
// Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
// or -1.
AliasMap.resize(TRI->getNumRegs(), -1);
for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
AliasMap[*AI] = i;
}
MachineBasicBlock *Entry = MF->begin(); MachineBasicBlock *Entry = MF->begin();
SmallPtrSet<MachineBasicBlock*, 16> Visited; SmallPtrSet<MachineBasicBlock*, 16> Visited;
for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> > for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
@ -501,6 +514,7 @@ bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
return false; return false;
} }
FunctionPass *llvm::createSSEDomainFixPass() { FunctionPass *
return new SSEDomainFixPass(); llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
return new SSEDomainFixPass(RC);
} }

View File

@ -44,10 +44,6 @@ FunctionPass* createGlobalBaseRegPass();
/// ///
FunctionPass *createX86FloatingPointStackifierPass(); FunctionPass *createX86FloatingPointStackifierPass();
/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
/// crossings.
FunctionPass *createSSEDomainFixPass();
/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions /// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
/// before each call to avoid transition penalty between functions encoded with /// before each call to avoid transition penalty between functions encoded with
/// AVX and SSE. /// AVX and SSE.

View File

@ -133,7 +133,7 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
bool ShouldPrint = false; bool ShouldPrint = false;
if (OptLevel != CodeGenOpt::None && if (OptLevel != CodeGenOpt::None &&
(Subtarget.hasSSE2() || Subtarget.hasAVX())) { (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
PM.add(createSSEDomainFixPass()); PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
ShouldPrint = true; ShouldPrint = true;
} }

View File

@ -3,8 +3,8 @@
; CHECK: vmovaps ; CHECK: vmovaps
; CHECK: vmovaps ; CHECK: vmovaps
; CHECK: vmovapd ; CHECK: vmovaps
; CHECK: vmovapd ; CHECK: vmovaps
; CHECK: vmovaps ; CHECK: vmovaps
; CHECK: vmovaps ; CHECK: vmovaps
define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp { define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp {

View File

@ -47,7 +47,7 @@ entry:
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
; To: ; To:
; shuffle (vload ptr)), undef, <1, 1, 1, 1> ; shuffle (vload ptr)), undef, <1, 1, 1, 1>
; CHECK: vmovaps ; CHECK: vmovdqa
; CHECK-NEXT: vinsertf128 $1 ; CHECK-NEXT: vinsertf128 $1
; CHECK-NEXT: vpermilps $-1 ; CHECK-NEXT: vpermilps $-1
define <8 x float> @funcE() nounwind { define <8 x float> @funcE() nounwind {