diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 487ce1dd434..76cc06e9638 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -105,6 +105,7 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMConstantIslandPass(); FunctionPass *createNEONPreAllocPass(); +FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 80961bff774..bc49197ca0a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -658,39 +658,8 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, // Always use neon reg-reg move if source or dest is NEON-only regclass. BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg); } else if (DestRC == ARM::DPRRegisterClass) { - const ARMBaseRegisterInfo* TRI = &getRegisterInfo(); - - // If we do not found an instruction defining the reg, this means the - // register should be live-in for this BB. It's always to better to use - // NEON reg-reg moves. - unsigned Domain = ARMII::DomainNEON; - - // Find the Machine Instruction which defines SrcReg. - if (!MBB.empty()) { - MachineBasicBlock::iterator J = (I == MBB.begin() ? I : prior(I)); - while (J != MBB.begin()) { - if (J->modifiesRegister(SrcReg, TRI)) - break; - --J; - } - - if (J->modifiesRegister(SrcReg, TRI)) { - Domain = J->getDesc().TSFlags & ARMII::DomainMask; - // Instructions in general domain are subreg accesses. - // Map them to NEON reg-reg moves. - if (Domain == ARMII::DomainGeneral) - Domain = ARMII::DomainNEON; - } - } - - if ((Domain & ARMII::DomainNEON) && getSubtarget().hasNEON()) { - BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); - } else { - assert((Domain & ARMII::DomainVFP || - !getSubtarget().hasNEON()) && "Invalid domain!"); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) - .addReg(SrcReg)); - } + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + .addReg(SrcReg)); } else if (DestRC == ARM::QPRRegisterClass || DestRC == ARM::QPR_VFP2RegisterClass) { BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index c1da6ce88b9..29579db2e73 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -112,8 +112,11 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) - PM.add(createIfConverterPass()); + if (OptLevel != CodeGenOpt::None) { + if (!Subtarget.isThumb1Only()) + PM.add(createIfConverterPass()); + PM.add(createNEONMoveFixPass()); + } if (Subtarget.isThumb2()) { PM.add(createThumb2ITBlockPass()); diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp new file mode 100644 index 00000000000..3c3b9520ca1 --- /dev/null +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -0,0 +1,144 @@ +//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "neon-mov-fix" +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMInstrInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumVMovs, "Number of reg-reg moves converted"); + +namespace { + struct NEONMoveFixPass : public MachineFunctionPass { + static char ID; + NEONMoveFixPass() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "NEON reg-reg move conversion"; + } + + private: + const TargetRegisterInfo *TRI; + const ARMBaseInstrInfo *TII; + const ARMSubtarget *Subtarget; + + typedef DenseMap RegMap; + + bool InsertMoves(MachineBasicBlock &MBB); + }; + char NEONMoveFixPass::ID = 0; +} + +bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { + RegMap Defs; + bool Modified = false; + + // Walk over MBB tracking the def points of the registers. + MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MachineBasicBlock::iterator NextMII; + for (; MII != E; MII = NextMII) { + NextMII = next(MII); + MachineInstr *MI = &*MII; + + if (MI->getOpcode() == ARM::FCPYD && + !TII->isPredicated(MI)) { + unsigned SrcReg = MI->getOperand(1).getReg(); + // If we do not found an instruction defining the reg, this means the + // register should be live-in for this BB. It's always to better to use + // NEON reg-reg moves. + unsigned Domain = ARMII::DomainNEON; + RegMap::iterator DefMI = Defs.find(SrcReg); + if (DefMI != Defs.end()) { + Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask; + // Instructions in general domain are subreg accesses. + // Map them to NEON reg-reg moves. + if (Domain == ARMII::DomainGeneral) + Domain = ARMII::DomainNEON; + } + + if ((Domain & ARMII::DomainNEON) && Subtarget->hasNEON()) { + // Convert FCPYD to VMOVD. + unsigned DestReg = MI->getOperand(0).getReg(); + + DEBUG({errs() << "vmov convert: "; MI->dump();}); + + // It's safe to ignore imp-defs / imp-uses here, since: + // - We're running late, no intelligent condegen passes should be run + // afterwards + // - The imp-defs / imp-uses are superregs only, we don't care about + // them. + BuildMI(MBB, *MI, MI->getDebugLoc(), + TII->get(ARM::VMOVD), DestReg).addReg(SrcReg); + MBB.erase(MI); + MachineBasicBlock::iterator I = prior(NextMII); + MI = &*I; + + DEBUG({errs() << " into: "; MI->dump();}); + + Modified = true; + ++NumVMovs; + } else { + assert((Domain & ARMII::DomainVFP || + !Subtarget->hasNEON()) && "Invalid domain!"); + // Do nothing. + } + } + + // Update def information. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand& MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + + Defs[MOReg] = MI; + // Catch subregs as well. + for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R) + Defs[*R] = MI; + } + } + + return Modified; +} + +bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { + ARMFunctionInfo *AFI = Fn.getInfo(); + const TargetMachine &TM = Fn.getTarget(); + + if (AFI->isThumbFunction()) + return false; + + TRI = TM.getRegisterInfo(); + Subtarget = &TM.getSubtarget(); + TII = static_cast(TM.getInstrInfo()); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= InsertMoves(MBB); + } + + return Modified; +} + +/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix +/// pass. +FunctionPass *llvm::createNEONMoveFixPass() { + return new NEONMoveFixPass(); +}