diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 271ca44c2b6..451247b71e9 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -127,7 +127,6 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); -FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 38535dc6650..9dbf29d85a9 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -44,6 +44,8 @@ namespace { void ExpandVLD(MachineBasicBlock::iterator &MBBI); void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); + void ExpandVTBL(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool IsExt, unsigned NumRegs); }; char ARMExpandPseudo::ID = 0; } @@ -326,7 +328,7 @@ static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, D1 = TRI->getSubReg(Reg, ARM::dsub_3); D2 = TRI->getSubReg(Reg, ARM::dsub_5); D3 = TRI->getSubReg(Reg, ARM::dsub_7); - } + } } /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register @@ -414,10 +416,10 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { if (NumRegs > 3) MIB.addReg(D3); MIB = AddDefaultPred(MIB); - TransferImpOps(MI, MIB, MIB); if (SrcIsKill) // Add an implicit kill for the super-reg. (*MIB).addRegisterKilled(SrcReg, TRI, true); + TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } @@ -500,6 +502,42 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { MI.eraseFromParent(); } +/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ +/// register operands to real instructions with D register operands. +void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool IsExt, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + + // Transfer the destination register operand. + MIB.addOperand(MI.getOperand(OpIdx++)); + if (IsExt) + MIB.addOperand(MI.getOperand(OpIdx++)); + + bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + unsigned D0, D1, D2, D3; + GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); + MIB.addReg(D0).addReg(D1); + if (NumRegs > 2) + MIB.addReg(D2); + if (NumRegs > 3) + MIB.addReg(D3); + + // Copy the other source register operand. + MIB.addOperand(MI.getOperand(OpIdx)); + + MIB = AddDefaultPred(MIB); + if (SrcIsKill) + // Add an implicit kill for the super-reg. + (*MIB).addRegisterKilled(SrcReg, TRI, true); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -515,7 +553,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { ModifiedOp = false; break; - case ARM::tLDRpci_pic: + case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; @@ -765,6 +803,19 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VST4LNq32Pseudo_UPD: ExpandLaneOp(MBBI); break; + + case ARM::VTBL2Pseudo: + ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break; + case ARM::VTBL3Pseudo: + ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break; + case ARM::VTBL4Pseudo: + ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break; + case ARM::VTBX2Pseudo: + ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break; + case ARM::VTBX3Pseudo: + ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break; + case ARM::VTBX4Pseudo: + ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break; } if (ModifiedOp) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a477344f375..5cfeb290fe2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1353,17 +1353,10 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - // Now extract the D registers back out. SmallVector Ops; if (IsExt) Ops.push_back(N->getOperand(1)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq)); - + Ops.push_back(RegSeq); Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); Ops.push_back(getAL(CurDAG)); // predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register @@ -2099,18 +2092,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; case Intrinsic::arm_neon_vtbl2: - return SelectVTBL(N, false, 2, ARM::VTBL2); + return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo); case Intrinsic::arm_neon_vtbl3: - return SelectVTBL(N, false, 3, ARM::VTBL3); + return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); case Intrinsic::arm_neon_vtbl4: - return SelectVTBL(N, false, 4, ARM::VTBL4); + return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); case Intrinsic::arm_neon_vtbx2: - return SelectVTBL(N, true, 2, ARM::VTBX2); + return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo); case Intrinsic::arm_neon_vtbx3: - return SelectVTBL(N, true, 3, ARM::VTBX3); + return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); case Intrinsic::arm_neon_vtbx4: - return SelectVTBL(N, true, 4, ARM::VTBX4); + return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); } break; } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 113cfffe61f..9610427bcbf 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1541,6 +1541,14 @@ class PseudoNLdSt list Predicates = [HasNEON]; } +class PseudoNeonI + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + list Predicates = [HasNEON]; +} + class NDataI pattern> : NeonI; } // hasExtraSrcRegAllocReq = 1 +def VTBL2Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "">; +def VTBL3Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "">; +def VTBL4Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "">; + // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), @@ -3863,6 +3870,16 @@ def VTBX4 "$orig = $dst", []>; } // hasExtraSrcRegAllocReq = 1 +def VTBX2Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), + IIC_VTBX2, "$orig = $dst">; +def VTBX3Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), + IIC_VTBX3, "$orig = $dst">; +def VTBX4Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), + IIC_VTBX4, "$orig = $dst">; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 30ff8276cda..b9d9d575935 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -104,9 +104,6 @@ bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (Subtarget.hasNEON()) - PM.add(createNEONPreAllocPass()); - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index b38e9536dc9..7b0ee2b7e20 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -37,7 +37,6 @@ add_llvm_target(ARMCodeGen ARMTargetMachine.cpp ARMTargetObjectFile.cpp NEONMoveFix.cpp - NEONPreAllocPass.cpp Thumb1InstrInfo.cpp Thumb1RegisterInfo.cpp Thumb2HazardRecognizer.cpp diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp deleted file mode 100644 index 03ef272b873..00000000000 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ /dev/null @@ -1,268 +0,0 @@ -//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "neon-prealloc" -#include "ARM.h" -#include "ARMInstrInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -using namespace llvm; - -namespace { - class NEONPreAllocPass : public MachineFunctionPass { - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - - public: - static char ID; - NEONPreAllocPass() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "NEON register pre-allocation pass"; - } - - private: - bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs, - unsigned Offset, unsigned Stride) const; - bool PreAllocNEONRegisters(MachineBasicBlock &MBB); - }; - - char NEONPreAllocPass::ID = 0; -} - -static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, - unsigned &Offset, unsigned &Stride) { - // Default to unit stride with no offset. - Stride = 1; - Offset = 0; - - switch (Opcode) { - default: - break; - - case ARM::VTBL2: - FirstOpnd = 1; - NumRegs = 2; - return true; - - case ARM::VTBL3: - FirstOpnd = 1; - NumRegs = 3; - return true; - - case ARM::VTBL4: - FirstOpnd = 1; - NumRegs = 4; - return true; - - case ARM::VTBX2: - FirstOpnd = 2; - NumRegs = 2; - return true; - - case ARM::VTBX3: - FirstOpnd = 2; - NumRegs = 3; - return true; - - case ARM::VTBX4: - FirstOpnd = 2; - NumRegs = 4; - return true; - } - - return false; -} - -bool -NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs, - unsigned Offset, unsigned Stride) const { - MachineOperand &FMO = MI->getOperand(FirstOpnd); - assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = FMO.getReg(); - (void)VirtReg; - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - - unsigned LastSubIdx = 0; - if (FMO.isDef()) { - MachineInstr *RegSeq = 0; - for (unsigned R = 0; R < NumRegs; ++R) { - const MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - // Feeding into a REG_SEQUENCE. - if (!MRI->hasOneNonDBGUse(VirtReg)) - return false; - MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); - if (!UseMI->isRegSequence()) - return false; - if (RegSeq && RegSeq != UseMI) - return false; - unsigned OpIdx = 1 + (Offset + R * Stride) * 2; - if (UseMI->getOperand(OpIdx).getReg() != VirtReg) - llvm_unreachable("Malformed REG_SEQUENCE instruction!"); - unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm(); - if (LastSubIdx) { - if (LastSubIdx != SubIdx-Stride) - return false; - } else { - // Must start from dsub_0 or qsub_0. - if (SubIdx != (ARM::dsub_0+Offset) && - SubIdx != (ARM::qsub_0+Offset)) - return false; - } - RegSeq = UseMI; - LastSubIdx = SubIdx; - } - - // In the case of vld3, etc., make sure the trailing operand of - // REG_SEQUENCE is an undef. - if (NumRegs == 3) { - unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2; - const MachineOperand &MO = RegSeq->getOperand(OpIdx); - unsigned VirtReg = MO.getReg(); - MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isImplicitDef()) - return false; - } - return true; - } - - unsigned LastSrcReg = 0; - SmallVector SubIds; - for (unsigned R = 0; R < NumRegs; ++R) { - const MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - // Extracting from a Q or QQ register. - MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg()) - return false; - VirtReg = DefMI->getOperand(1).getReg(); - if (LastSrcReg && LastSrcReg != VirtReg) - return false; - LastSrcReg = VirtReg; - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - if (RC != ARM::QPRRegisterClass && - RC != ARM::QQPRRegisterClass && - RC != ARM::QQQQPRRegisterClass) - return false; - unsigned SubIdx = DefMI->getOperand(1).getSubReg(); - if (LastSubIdx) { - if (LastSubIdx != SubIdx-Stride) - return false; - } else { - // Must start from dsub_0 or qsub_0. - if (SubIdx != (ARM::dsub_0+Offset) && - SubIdx != (ARM::qsub_0+Offset)) - return false; - } - SubIds.push_back(SubIdx); - LastSubIdx = SubIdx; - } - - // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is - // currently required for correctness. e.g. - // %reg1041 = REG_SEQUENCE %reg1040, 5, %reg1035, 6 - // %reg1042 = EXTRACT_SUBREG %reg1041, 6 - // %reg1043 = EXTRACT_SUBREG %reg1041, 5 - // VST1q16 %reg1025, 0, %reg1043, %reg1042, - // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5 - // respectively. - // We need to change how we model uses of REG_SEQUENCE. - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - unsigned OldReg = MO.getReg(); - MachineInstr *DefMI = MRI->getVRegDef(OldReg); - assert(DefMI->isCopy()); - MO.setReg(LastSrcReg); - MO.setSubReg(SubIds[R]); - MO.setIsKill(false); - // Delete the EXTRACT_SUBREG if its result is now dead. - if (MRI->use_empty(OldReg)) - DefMI->eraseFromParent(); - } - - return true; -} - -bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - for (; MBBI != E; ++MBBI) { - MachineInstr *MI = &*MBBI; - unsigned FirstOpnd, NumRegs, Offset, Stride; - if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) - continue; - if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) - continue; - - MachineBasicBlock::iterator NextI = llvm::next(MBBI); - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - - // For now, just assign a fixed set of adjacent registers. - // This leaves plenty of room for future improvements. - static const unsigned NEONDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 - }; - MO.setReg(NEONDRegs[Offset + R * Stride]); - - if (MO.isUse()) { - // Insert a copy from VirtReg. - BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg()) - .addReg(VirtReg, getKillRegState(MO.isKill())); - MO.setIsKill(); - } else if (MO.isDef() && !MO.isDead()) { - // Add a copy to VirtReg. - BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg) - .addReg(MO.getReg()); - } - } - } - - return Modified; -} - -bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - MRI = &MF.getRegInfo(); - - bool Modified = false; - for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; - ++MFI) { - MachineBasicBlock &MBB = *MFI; - Modified |= PreAllocNEONRegisters(MBB); - } - - return Modified; -} - -/// createNEONPreAllocPass - returns an instance of the NEON register -/// pre-allocation pass. -FunctionPass *llvm::createNEONPreAllocPass() { - return new NEONPreAllocPass(); -}