From c75c5fa12582956fc6b7d7d756b2bdd49fa61f71 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 23 Mar 2010 23:14:44 +0000 Subject: [PATCH] Add a late SSEDomainFix pass that twiddles SSE instructions to avoid domain crossings. This is work in progress. So far, SSE execution domain tables are added to X86InstrInfo, and a skeleton pass is enabled with -sse-domain-fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99345 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/CMakeLists.txt | 1 + lib/Target/X86/SSEDomainFix.cpp | 536 ++++++++++++++++++++++++++++ lib/Target/X86/X86.h | 4 + lib/Target/X86/X86InstrInfo.cpp | 3 + lib/Target/X86/X86InstrInfo.h | 14 + lib/Target/X86/X86TargetMachine.cpp | 15 + lib/Target/X86/X86TargetMachine.h | 1 + 7 files changed, 574 insertions(+) create mode 100644 lib/Target/X86/SSEDomainFix.cpp diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 4d3dedf0e5f..22285f19323 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) set(sources + SSEDomainFix.cpp X86AsmBackend.cpp X86CodeEmitter.cpp X86COFFMachineModuleInfo.cpp diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp new file mode 100644 index 00000000000..419c675190a --- /dev/null +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -0,0 +1,536 @@ +//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SSEDomainFix pass. +// +// Some SSE instructions like mov, and, or, xor are available in different +// variants for different operand types. These variant instructions are +// equivalent, but on Nehalem and newer cpus there is extra latency +// transferring data between integer and floating point domains. +// +// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sse-domain-fix" +#include "X86InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class SSEDomainFixPass : public MachineFunctionPass { + static char ID; + const X86InstrInfo *TII; + + MachineFunction *MF; + MachineBasicBlock *MBB; +public: + SSEDomainFixPass() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "SSE execution domain fixup"; + } + +private: + void enterBasicBlock(MachineBasicBlock *MBB); +}; +} + +void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) { + MBB = mbb; + DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n"); +} + +bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + TII = static_cast(MF->getTarget().getInstrInfo()); + + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet Visited; + for (df_ext_iterator > + DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); + DFI != DFE; ++DFI) { + enterBasicBlock(*DFI); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + MachineInstr *MI = I; + const unsigned *equiv = 0; + X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv); + DEBUG(dbgs() << "isd-"[domain] << (equiv ? "* " : " ") << *MI); + } + } + return false; +} + +FunctionPass *llvm::createSSEDomainFixPass() { + return new SSEDomainFixPass(); +} + +// These are the replaceable instructions. Some of these have _Int variants +// that we don't include here. We don't want to replace instructions selected +// by intrinsics. +static const unsigned ReplaceableInstrs[][3] = { + //PackedInt PackedSingle PackedDouble + { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr }, + { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm }, + { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr }, + { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr }, + { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm }, + { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr }, + { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm }, + { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr }, + { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm }, + { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr }, + { X86::PORrm, X86::ORPSrm, X86::ORPDrm }, + { X86::PORrr, X86::ORPSrr, X86::ORPDrr }, + { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm }, + { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr }, + { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm }, + { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr }, + { X86::PXORrm, X86::XORPSrm, X86::XORPDrm }, + { X86::PXORrr, X86::XORPSrr, X86::XORPDrr }, +}; + +void X86InstrInfo::populateSSEInstrDomainTable() { + // Instructions that execute in the packed integer domain. + static const unsigned PackedIntInstrs[] = { + X86::LDDQUrm, + X86::MASKMOVDQU, + X86::MASKMOVDQU64, + X86::MOVDI2PDIrm, + X86::MOVDI2PDIrr, + X86::MOVDQUmr_Int, + X86::MOVDQUrm_Int, + X86::MOVLQ128mr, + X86::MOVNTDQArm, + X86::MOVNTDQmr_Int, + X86::MOVNTDQ_64mr, + X86::MOVPDI2DImr, + X86::MOVPDI2DIrr, + X86::MOVPQI2QImr, + X86::MOVPQIto64rr, + X86::MOVQI2PQIrm, + X86::MOVQxrxr, + X86::MOVZDI2PDIrm, + X86::MOVZDI2PDIrr, + X86::MOVZPQILo2PQIrm, + X86::MOVZPQILo2PQIrr, + X86::MOVZQI2PQIrm, + X86::MOVZQI2PQIrr, + X86::MPSADBWrmi, + X86::MPSADBWrri, + X86::PABSBrm128, + X86::PABSBrr128, + X86::PABSDrm128, + X86::PABSDrr128, + X86::PABSWrm128, + X86::PABSWrr128, + X86::PACKSSDWrm, + X86::PACKSSDWrr, + X86::PACKSSWBrm, + X86::PACKSSWBrr, + X86::PACKUSDWrm, + X86::PACKUSDWrr, + X86::PACKUSWBrm, + X86::PACKUSWBrr, + X86::PADDBrm, + X86::PADDBrr, + X86::PADDDrm, + X86::PADDDrr, + X86::PADDQrm, + X86::PADDQrr, + X86::PADDSBrm, + X86::PADDSBrr, + X86::PADDSWrm, + X86::PADDSWrr, + X86::PADDUSBrm, + X86::PADDUSBrr, + X86::PADDUSWrm, + X86::PADDUSWrr, + X86::PADDWrm, + X86::PADDWrr, + X86::PALIGNR128rm, + X86::PALIGNR128rr, + X86::PAVGBrm, + X86::PAVGBrr, + X86::PAVGWrm, + X86::PAVGWrr, + X86::PBLENDVBrm0, + X86::PBLENDVBrr0, + X86::PBLENDWrmi, + X86::PBLENDWrri, + X86::PCMPEQBrm, + X86::PCMPEQBrr, + X86::PCMPEQDrm, + X86::PCMPEQDrr, + X86::PCMPEQQrm, + X86::PCMPEQQrr, + X86::PCMPEQWrm, + X86::PCMPEQWrr, + X86::PCMPESTRIArm, + X86::PCMPESTRIArr, + X86::PCMPESTRICrm, + X86::PCMPESTRICrr, + X86::PCMPESTRIOrm, + X86::PCMPESTRIOrr, + X86::PCMPESTRIrm, + X86::PCMPESTRIrr, + X86::PCMPESTRISrm, + X86::PCMPESTRISrr, + X86::PCMPESTRIZrm, + X86::PCMPESTRIZrr, + X86::PCMPESTRM128MEM, + X86::PCMPESTRM128REG, + X86::PCMPESTRM128rm, + X86::PCMPESTRM128rr, + X86::PCMPGTBrm, + X86::PCMPGTBrr, + X86::PCMPGTDrm, + X86::PCMPGTDrr, + X86::PCMPGTQrm, + X86::PCMPGTQrr, + X86::PCMPGTWrm, + X86::PCMPGTWrr, + X86::PCMPISTRIArm, + X86::PCMPISTRIArr, + X86::PCMPISTRICrm, + X86::PCMPISTRICrr, + X86::PCMPISTRIOrm, + X86::PCMPISTRIOrr, + X86::PCMPISTRIrm, + X86::PCMPISTRIrr, + X86::PCMPISTRISrm, + X86::PCMPISTRISrr, + X86::PCMPISTRIZrm, + X86::PCMPISTRIZrr, + X86::PCMPISTRM128MEM, + X86::PCMPISTRM128REG, + X86::PCMPISTRM128rm, + X86::PCMPISTRM128rr, + X86::PEXTRBmr, + X86::PEXTRBrr, + X86::PEXTRDmr, + X86::PEXTRDrr, + X86::PEXTRQmr, + X86::PEXTRQrr, + X86::PEXTRWmr, + X86::PEXTRWri, + X86::PHADDDrm128, + X86::PHADDDrr128, + X86::PHADDSWrm128, + X86::PHADDSWrr128, + X86::PHADDWrm128, + X86::PHADDWrr128, + X86::PHMINPOSUWrm128, + X86::PHMINPOSUWrr128, + X86::PHSUBDrm128, + X86::PHSUBDrr128, + X86::PHSUBSWrm128, + X86::PHSUBSWrr128, + X86::PHSUBWrm128, + X86::PHSUBWrr128, + X86::PINSRBrm, + X86::PINSRBrr, + X86::PINSRDrm, + X86::PINSRDrr, + X86::PINSRQrm, + X86::PINSRQrr, + X86::PINSRWrmi, + X86::PINSRWrri, + X86::PMADDUBSWrm128, + X86::PMADDUBSWrr128, + X86::PMADDWDrm, + X86::PMADDWDrr, + X86::PMAXSBrm, + X86::PMAXSBrr, + X86::PMAXSDrm, + X86::PMAXSDrr, + X86::PMAXSWrm, + X86::PMAXSWrr, + X86::PMAXUBrm, + X86::PMAXUBrr, + X86::PMAXUDrm, + X86::PMAXUDrr, + X86::PMAXUWrm, + X86::PMAXUWrr, + X86::PMINSBrm, + X86::PMINSBrr, + X86::PMINSDrm, + X86::PMINSDrr, + X86::PMINSWrm, + X86::PMINSWrr, + X86::PMINUBrm, + X86::PMINUBrr, + X86::PMINUDrm, + X86::PMINUDrr, + X86::PMINUWrm, + X86::PMINUWrr, + X86::PMOVSXBDrm, + X86::PMOVSXBDrr, + X86::PMOVSXBQrm, + X86::PMOVSXBQrr, + X86::PMOVSXBWrm, + X86::PMOVSXBWrr, + X86::PMOVSXDQrm, + X86::PMOVSXDQrr, + X86::PMOVSXWDrm, + X86::PMOVSXWDrr, + X86::PMOVSXWQrm, + X86::PMOVSXWQrr, + X86::PMOVZXBDrm, + X86::PMOVZXBDrr, + X86::PMOVZXBQrm, + X86::PMOVZXBQrr, + X86::PMOVZXBWrm, + X86::PMOVZXBWrr, + X86::PMOVZXDQrm, + X86::PMOVZXDQrr, + X86::PMOVZXWDrm, + X86::PMOVZXWDrr, + X86::PMOVZXWQrm, + X86::PMOVZXWQrr, + X86::PMULDQrm, + X86::PMULDQrr, + X86::PMULHRSWrm128, + X86::PMULHRSWrr128, + X86::PMULHUWrm, + X86::PMULHUWrr, + X86::PMULHWrm, + X86::PMULHWrr, + X86::PMULLDrm, + X86::PMULLDrm_int, + X86::PMULLDrr, + X86::PMULLDrr_int, + X86::PMULLWrm, + X86::PMULLWrr, + X86::PMULUDQrm, + X86::PMULUDQrr, + X86::PSADBWrm, + X86::PSADBWrr, + X86::PSHUFBrm128, + X86::PSHUFBrr128, + X86::PSHUFHWmi, + X86::PSHUFHWri, + X86::PSHUFLWmi, + X86::PSHUFLWri, + X86::PSIGNBrm128, + X86::PSIGNBrr128, + X86::PSIGNDrm128, + X86::PSIGNDrr128, + X86::PSIGNWrm128, + X86::PSIGNWrr128, + X86::PSLLDQri, + X86::PSLLDri, + X86::PSLLDrm, + X86::PSLLDrr, + X86::PSLLQri, + X86::PSLLQrm, + X86::PSLLQrr, + X86::PSLLWri, + X86::PSLLWrm, + X86::PSLLWrr, + X86::PSRADri, + X86::PSRADrm, + X86::PSRADrr, + X86::PSRAWri, + X86::PSRAWrm, + X86::PSRAWrr, + X86::PSRLDQri, + X86::PSRLDri, + X86::PSRLDrm, + X86::PSRLDrr, + X86::PSRLQri, + X86::PSRLQrm, + X86::PSRLQrr, + X86::PSRLWri, + X86::PSRLWrm, + X86::PSRLWrr, + X86::PSUBBrm, + X86::PSUBBrr, + X86::PSUBDrm, + X86::PSUBDrr, + X86::PSUBQrm, + X86::PSUBQrr, + X86::PSUBSBrm, + X86::PSUBSBrr, + X86::PSUBSWrm, + X86::PSUBSWrr, + X86::PSUBUSBrm, + X86::PSUBUSBrr, + X86::PSUBUSWrm, + X86::PSUBUSWrr, + X86::PSUBWrm, + X86::PSUBWrr, + X86::PUNPCKHBWrm, + X86::PUNPCKHBWrr, + X86::PUNPCKHWDrm, + X86::PUNPCKHWDrr, + X86::PUNPCKLBWrm, + X86::PUNPCKLBWrr, + X86::PUNPCKLWDrm, + X86::PUNPCKLWDrr, + }; + + // Instructions that execute in the packed single domain. + static const unsigned PackedSingleInstrs[] = { + X86::ADDPSrm, + X86::ADDPSrr, + X86::ADDSUBPSrm, + X86::ADDSUBPSrr, + X86::BLENDPSrmi, + X86::BLENDPSrri, + X86::BLENDVPSrm0, + X86::BLENDVPSrr0, + X86::CMPPSrmi, + X86::CMPPSrri, + X86::DIVPSrm, + X86::DIVPSrr, + X86::DPPSrmi, + X86::DPPSrri, + X86::EXTRACTPSmr, + X86::EXTRACTPSrr, + X86::HADDPSrm, + X86::HADDPSrr, + X86::HSUBPSrm, + X86::HSUBPSrr, + X86::INSERTPSrm, + X86::INSERTPSrr, + X86::MAXPSrm, + X86::MAXPSrm_Int, + X86::MAXPSrr, + X86::MAXPSrr_Int, + X86::MINPSrm, + X86::MINPSrm_Int, + X86::MINPSrr, + X86::MINPSrr_Int, + X86::MOVHLPSrr, + X86::MOVHPSmr, + X86::MOVHPSrm, + X86::MOVLHPSrr, + X86::MOVLPSmr, + X86::MOVLPSrm, + X86::MOVMSKPSrr, + X86::MOVNTPSmr_Int, + X86::MOVSHDUPrm, + X86::MOVSHDUPrr, + X86::MOVSLDUPrm, + X86::MOVSLDUPrr, + X86::MOVUPSmr_Int, + X86::MOVUPSrm_Int, + X86::MULPSrm, + X86::MULPSrr, + X86::RCPPSm, + X86::RCPPSm_Int, + X86::RCPPSr, + X86::RCPPSr_Int, + X86::ROUNDPSm_Int, + X86::ROUNDPSr_Int, + X86::RSQRTPSm, + X86::RSQRTPSm_Int, + X86::RSQRTPSr, + X86::RSQRTPSr_Int, + X86::SQRTPSm, + X86::SQRTPSm_Int, + X86::SQRTPSr, + X86::SQRTPSr_Int, + X86::SUBPSrm, + X86::SUBPSrr, + }; + + // Instructions that execute in the packed double domain. + static const unsigned PackedDoubleInstrs[] = { + X86::ADDPDrm, + X86::ADDPDrr, + X86::ADDSUBPDrm, + X86::ADDSUBPDrr, + X86::BLENDPDrmi, + X86::BLENDPDrri, + X86::BLENDVPDrm0, + X86::BLENDVPDrr0, + X86::CMPPDrmi, + X86::CMPPDrri, + X86::DIVPDrm, + X86::DIVPDrr, + X86::DPPDrmi, + X86::DPPDrri, + X86::HADDPDrm, + X86::HADDPDrr, + X86::HSUBPDrm, + X86::HSUBPDrr, + X86::MAXPDrm, + X86::MAXPDrm_Int, + X86::MAXPDrr, + X86::MAXPDrr_Int, + X86::MINPDrm, + X86::MINPDrm_Int, + X86::MINPDrr, + X86::MINPDrr_Int, + X86::MOVHPDmr, + X86::MOVHPDrm, + X86::MOVLPDmr, + X86::MOVLPDrm, + X86::MOVMSKPDrr, + X86::MOVNTPDmr_Int, + X86::MOVUPDmr_Int, + X86::MOVUPDrm_Int, + X86::MULPDrm, + X86::MULPDrr, + X86::ROUNDPDm_Int, + X86::ROUNDPDr_Int, + X86::SQRTPDm, + X86::SQRTPDm_Int, + X86::SQRTPDr, + X86::SQRTPDr_Int, + X86::SUBPDrm, + X86::SUBPDrr, + }; + + // Add non-negative entries for forcing instructions. + for (unsigned i = 0, e = array_lengthof(PackedIntInstrs); i != e; ++i) + SSEInstrDomainTable.insert(std::make_pair(PackedIntInstrs[i], + PackedInt)); + for (unsigned i = 0, e = array_lengthof(PackedSingleInstrs); i != e; ++i) + SSEInstrDomainTable.insert(std::make_pair(PackedSingleInstrs[i], + PackedSingle)); + for (unsigned i = 0, e = array_lengthof(PackedDoubleInstrs); i != e; ++i) + SSEInstrDomainTable.insert(std::make_pair(PackedDoubleInstrs[i], + PackedDouble)); + + // Add row number + 1 for replaceable instructions. + for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) + for (unsigned c = 0; c != 3; ++c) + SSEInstrDomainTable.insert(std::make_pair(ReplaceableInstrs[i][c], + c + 4*(i+1))); +} + +X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI, + const unsigned *&equiv) const { + DenseMap::const_iterator i = + SSEInstrDomainTable.find(MI->getOpcode()); + if (i == SSEInstrDomainTable.end()) + return NotSSEDomain; + unsigned value = i->second; + if (value/4) + equiv = ReplaceableInstrs[value/4 - 1]; + else + equiv = 0; + return SSEDomain(value & 3); +} diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index c753cf2a530..9be38a4b56a 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM, /// FunctionPass *createX86FloatingPointStackifierPass(); +/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain +/// crossings. +FunctionPass *createSSEDomainFixPass(); + /// createX87FPRegKillInserterPass - This function returns a pass which /// inserts FP_REG_KILL instructions where needed. /// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2323f5790ab..71a4dae6a5f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -665,6 +665,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // Remove ambiguous entries. assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); + + if (TM.getSubtarget().hasSSE2()) + populateSSEInstrDomainTable(); } bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5111719a209..c3dbae9a5ef 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -486,6 +486,9 @@ class X86InstrInfo : public TargetInstrInfoImpl { /// MemOp2RegOpTable - Load / store unfolding opcode map. /// DenseMap > MemOp2RegOpTable; + + /// SSEInstrDomainTable - Map SSE opcodes to execution domain info. + DenseMap SSEInstrDomainTable; public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -716,6 +719,14 @@ public: /// unsigned getGlobalBaseReg(MachineFunction *MF) const; + /// Some SSE instructions come in variants for three domains. + enum SSEDomain { PackedInt, PackedSingle, PackedDouble, NotSSEDomain }; + + /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for + /// unknown instructions. If the instruction has equivalents for other domain, + /// equiv points to a list of opcodes index by domain. + SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, @@ -732,6 +743,9 @@ private: /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const; + + // Implemented in SSEDomainFix.cpp + void populateSSEInstrDomainTable(); }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f13e6f35256..06a481de258 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,11 +17,17 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; +static cl::opt +SSEDomainFix("sse-domain-fix", + cl::desc("Enable fixing of SSE execution domain"), + cl::init(false), cl::Hidden); + static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -169,6 +175,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, return true; // -print-machineinstr should print after this. } +bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { + PM.add(createSSEDomainFixPass()); + return true; + } + return false; +} + bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 2bb54544d40..ae7b5b29af1 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -66,6 +66,7 @@ public: virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); };