mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 02:33:33 +00:00
5bdc2aa264
v1024 = REG_SEQUENCE ... v1025 = EXTRACT_SUBREG v1024, 5 v1026 = EXTRACR_SUBREG v1024, 6 = VSTxx <addr>, v1025, v1026 The REG_SEQUENCE ensures the sources that feed into the VST instruction are getting the right register allocation so they form a large super- register. The extract_subreg will be coalesced away all would just work: v1024 = REG_SEQUENCE ... = VSTxx <addr>, v1024:5, v1024:6 The problem is if the coalescer isn't run, the extract_subreg instructions would stick around and there is no assurance v1025 and v1026 will get the right registers. As a short term workaround, teach the NEON pre-allocation pass to transfer the sub-register indices over. An alternative would be do it 2addr pass when reg_sequence's are eliminated. But that *seems* wrong and require updating liveness information. Another alternative is to do this in the scheduler when the instructions are created. But that would mean somehow the scheduler this has to be done for correctness reason. That's yucky as well. So for now, we are leaving this in the target specific pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103540 91177308-0d34-0410-b5e6-96231b3b80d8
509 lines
12 KiB
C++
509 lines
12 KiB
C++
//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "neon-prealloc"
|
|
#include "ARM.h"
|
|
#include "ARMInstrInfo.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
class NEONPreAllocPass : public MachineFunctionPass {
|
|
const TargetInstrInfo *TII;
|
|
MachineRegisterInfo *MRI;
|
|
|
|
public:
|
|
static char ID;
|
|
NEONPreAllocPass() : MachineFunctionPass(&ID) {}
|
|
|
|
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
|
|
virtual const char *getPassName() const {
|
|
return "NEON register pre-allocation pass";
|
|
}
|
|
|
|
private:
|
|
bool FormsRegSequence(MachineInstr *MI,
|
|
unsigned FirstOpnd, unsigned NumRegs) const;
|
|
bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
|
|
};
|
|
|
|
char NEONPreAllocPass::ID = 0;
|
|
}
|
|
|
|
static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|
unsigned &Offset, unsigned &Stride) {
|
|
// Default to unit stride with no offset.
|
|
Stride = 1;
|
|
Offset = 0;
|
|
|
|
switch (Opcode) {
|
|
default:
|
|
break;
|
|
|
|
case ARM::VLD1q8:
|
|
case ARM::VLD1q16:
|
|
case ARM::VLD1q32:
|
|
case ARM::VLD1q64:
|
|
case ARM::VLD2d8:
|
|
case ARM::VLD2d16:
|
|
case ARM::VLD2d32:
|
|
case ARM::VLD2LNd8:
|
|
case ARM::VLD2LNd16:
|
|
case ARM::VLD2LNd32:
|
|
FirstOpnd = 0;
|
|
NumRegs = 2;
|
|
return true;
|
|
|
|
case ARM::VLD2q8:
|
|
case ARM::VLD2q16:
|
|
case ARM::VLD2q32:
|
|
FirstOpnd = 0;
|
|
NumRegs = 4;
|
|
return true;
|
|
|
|
case ARM::VLD2LNq16:
|
|
case ARM::VLD2LNq32:
|
|
FirstOpnd = 0;
|
|
NumRegs = 2;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD2LNq16odd:
|
|
case ARM::VLD2LNq32odd:
|
|
FirstOpnd = 0;
|
|
NumRegs = 2;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD3d8:
|
|
case ARM::VLD3d16:
|
|
case ARM::VLD3d32:
|
|
case ARM::VLD1d64T:
|
|
case ARM::VLD3LNd8:
|
|
case ARM::VLD3LNd16:
|
|
case ARM::VLD3LNd32:
|
|
FirstOpnd = 0;
|
|
NumRegs = 3;
|
|
return true;
|
|
|
|
case ARM::VLD3q8_UPD:
|
|
case ARM::VLD3q16_UPD:
|
|
case ARM::VLD3q32_UPD:
|
|
FirstOpnd = 0;
|
|
NumRegs = 3;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD3q8odd_UPD:
|
|
case ARM::VLD3q16odd_UPD:
|
|
case ARM::VLD3q32odd_UPD:
|
|
FirstOpnd = 0;
|
|
NumRegs = 3;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD3LNq16:
|
|
case ARM::VLD3LNq32:
|
|
FirstOpnd = 0;
|
|
NumRegs = 3;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD3LNq16odd:
|
|
case ARM::VLD3LNq32odd:
|
|
FirstOpnd = 0;
|
|
NumRegs = 3;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD4d8:
|
|
case ARM::VLD4d16:
|
|
case ARM::VLD4d32:
|
|
case ARM::VLD1d64Q:
|
|
case ARM::VLD4LNd8:
|
|
case ARM::VLD4LNd16:
|
|
case ARM::VLD4LNd32:
|
|
FirstOpnd = 0;
|
|
NumRegs = 4;
|
|
return true;
|
|
|
|
case ARM::VLD4q8_UPD:
|
|
case ARM::VLD4q16_UPD:
|
|
case ARM::VLD4q32_UPD:
|
|
FirstOpnd = 0;
|
|
NumRegs = 4;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD4q8odd_UPD:
|
|
case ARM::VLD4q16odd_UPD:
|
|
case ARM::VLD4q32odd_UPD:
|
|
FirstOpnd = 0;
|
|
NumRegs = 4;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD4LNq16:
|
|
case ARM::VLD4LNq32:
|
|
FirstOpnd = 0;
|
|
NumRegs = 4;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VLD4LNq16odd:
|
|
case ARM::VLD4LNq32odd:
|
|
FirstOpnd = 0;
|
|
NumRegs = 4;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST1q8:
|
|
case ARM::VST1q16:
|
|
case ARM::VST1q32:
|
|
case ARM::VST1q64:
|
|
case ARM::VST2d8:
|
|
case ARM::VST2d16:
|
|
case ARM::VST2d32:
|
|
case ARM::VST2LNd8:
|
|
case ARM::VST2LNd16:
|
|
case ARM::VST2LNd32:
|
|
FirstOpnd = 2;
|
|
NumRegs = 2;
|
|
return true;
|
|
|
|
case ARM::VST2q8:
|
|
case ARM::VST2q16:
|
|
case ARM::VST2q32:
|
|
FirstOpnd = 2;
|
|
NumRegs = 4;
|
|
return true;
|
|
|
|
case ARM::VST2LNq16:
|
|
case ARM::VST2LNq32:
|
|
FirstOpnd = 2;
|
|
NumRegs = 2;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST2LNq16odd:
|
|
case ARM::VST2LNq32odd:
|
|
FirstOpnd = 2;
|
|
NumRegs = 2;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST3d8:
|
|
case ARM::VST3d16:
|
|
case ARM::VST3d32:
|
|
case ARM::VST1d64T:
|
|
case ARM::VST3LNd8:
|
|
case ARM::VST3LNd16:
|
|
case ARM::VST3LNd32:
|
|
FirstOpnd = 2;
|
|
NumRegs = 3;
|
|
return true;
|
|
|
|
case ARM::VST3q8_UPD:
|
|
case ARM::VST3q16_UPD:
|
|
case ARM::VST3q32_UPD:
|
|
FirstOpnd = 4;
|
|
NumRegs = 3;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST3q8odd_UPD:
|
|
case ARM::VST3q16odd_UPD:
|
|
case ARM::VST3q32odd_UPD:
|
|
FirstOpnd = 4;
|
|
NumRegs = 3;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST3LNq16:
|
|
case ARM::VST3LNq32:
|
|
FirstOpnd = 2;
|
|
NumRegs = 3;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST3LNq16odd:
|
|
case ARM::VST3LNq32odd:
|
|
FirstOpnd = 2;
|
|
NumRegs = 3;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST4d8:
|
|
case ARM::VST4d16:
|
|
case ARM::VST4d32:
|
|
case ARM::VST1d64Q:
|
|
case ARM::VST4LNd8:
|
|
case ARM::VST4LNd16:
|
|
case ARM::VST4LNd32:
|
|
FirstOpnd = 2;
|
|
NumRegs = 4;
|
|
return true;
|
|
|
|
case ARM::VST4q8_UPD:
|
|
case ARM::VST4q16_UPD:
|
|
case ARM::VST4q32_UPD:
|
|
FirstOpnd = 4;
|
|
NumRegs = 4;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST4q8odd_UPD:
|
|
case ARM::VST4q16odd_UPD:
|
|
case ARM::VST4q32odd_UPD:
|
|
FirstOpnd = 4;
|
|
NumRegs = 4;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST4LNq16:
|
|
case ARM::VST4LNq32:
|
|
FirstOpnd = 2;
|
|
NumRegs = 4;
|
|
Offset = 0;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VST4LNq16odd:
|
|
case ARM::VST4LNq32odd:
|
|
FirstOpnd = 2;
|
|
NumRegs = 4;
|
|
Offset = 1;
|
|
Stride = 2;
|
|
return true;
|
|
|
|
case ARM::VTBL2:
|
|
FirstOpnd = 1;
|
|
NumRegs = 2;
|
|
return true;
|
|
|
|
case ARM::VTBL3:
|
|
FirstOpnd = 1;
|
|
NumRegs = 3;
|
|
return true;
|
|
|
|
case ARM::VTBL4:
|
|
FirstOpnd = 1;
|
|
NumRegs = 4;
|
|
return true;
|
|
|
|
case ARM::VTBX2:
|
|
FirstOpnd = 2;
|
|
NumRegs = 2;
|
|
return true;
|
|
|
|
case ARM::VTBX3:
|
|
FirstOpnd = 2;
|
|
NumRegs = 3;
|
|
return true;
|
|
|
|
case ARM::VTBX4:
|
|
FirstOpnd = 2;
|
|
NumRegs = 4;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
|
|
unsigned FirstOpnd, unsigned NumRegs) const {
|
|
MachineOperand &FMO = MI->getOperand(FirstOpnd);
|
|
assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand");
|
|
unsigned VirtReg = FMO.getReg();
|
|
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
|
|
"expected a virtual register");
|
|
if (FMO.isDef()) {
|
|
MachineInstr *RegSeq = 0;
|
|
for (unsigned R = 0; R < NumRegs; ++R) {
|
|
const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
|
|
assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
|
|
unsigned VirtReg = MO.getReg();
|
|
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
|
|
"expected a virtual register");
|
|
// Feeding into a REG_SEQUENCE.
|
|
if (!MRI->hasOneNonDBGUse(VirtReg))
|
|
return false;
|
|
MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
|
|
if (!UseMI->isRegSequence())
|
|
return false;
|
|
if (RegSeq && RegSeq != UseMI)
|
|
return false;
|
|
RegSeq = UseMI;
|
|
}
|
|
|
|
// Make sure trailing operands of REG_SEQUENCE are undef.
|
|
unsigned NumExps = (RegSeq->getNumOperands() - 1) / 2;
|
|
for (unsigned i = NumRegs * 2 + 1; i < NumExps; i += 2) {
|
|
const MachineOperand &MO = RegSeq->getOperand(i);
|
|
unsigned VirtReg = MO.getReg();
|
|
MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
|
|
if (!DefMI || !DefMI->isImplicitDef())
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
unsigned LastSrcReg = 0;
|
|
unsigned LastSubIdx = 0;
|
|
SmallVector<unsigned, 4> SubIds;
|
|
for (unsigned R = 0; R < NumRegs; ++R) {
|
|
const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
|
|
assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
|
|
unsigned VirtReg = MO.getReg();
|
|
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
|
|
"expected a virtual register");
|
|
// Extracting from a Q or QQ register.
|
|
MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
|
|
if (!DefMI || !DefMI->isExtractSubreg())
|
|
return false;
|
|
VirtReg = DefMI->getOperand(1).getReg();
|
|
if (LastSrcReg && LastSrcReg != VirtReg)
|
|
return false;
|
|
LastSrcReg = VirtReg;
|
|
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
|
|
if (RC != ARM::QPRRegisterClass && RC != ARM::QQPRRegisterClass)
|
|
return false;
|
|
unsigned SubIdx = DefMI->getOperand(2).getImm();
|
|
if (LastSubIdx) {
|
|
if (LastSubIdx != SubIdx-1)
|
|
return false;
|
|
} else {
|
|
// Must start from arm_dsubreg_0 or arm_qsubreg_0.
|
|
if (SubIdx != ARM::DSUBREG_0 && SubIdx != ARM::QSUBREG_0)
|
|
return false;
|
|
}
|
|
SubIds.push_back(SubIdx);
|
|
LastSubIdx = SubIdx;
|
|
}
|
|
|
|
// FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
|
|
// currently required for correctness. e.g.
|
|
// %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
|
|
// %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
|
|
// %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
|
|
// VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
|
|
// reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5
|
|
// respectively.
|
|
// We need to change how we model uses of REG_SEQUENCE.
|
|
for (unsigned R = 0; R < NumRegs; ++R) {
|
|
MachineOperand &MO = MI->getOperand(FirstOpnd + R);
|
|
unsigned OldReg = MO.getReg();
|
|
MachineInstr *DefMI = MRI->getVRegDef(OldReg);
|
|
assert(DefMI->isExtractSubreg());
|
|
MO.setReg(LastSrcReg);
|
|
MO.setSubReg(SubIds[R]);
|
|
if (R != 0)
|
|
MO.setIsKill(false);
|
|
// Delete the EXTRACT_SUBREG if its result is now dead.
|
|
if (MRI->use_empty(OldReg))
|
|
DefMI->eraseFromParent();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
|
|
bool Modified = false;
|
|
|
|
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
|
for (; MBBI != E; ++MBBI) {
|
|
MachineInstr *MI = &*MBBI;
|
|
unsigned FirstOpnd, NumRegs, Offset, Stride;
|
|
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
|
|
continue;
|
|
if (llvm::ModelWithRegSequence() &&
|
|
FormsRegSequence(MI, FirstOpnd, NumRegs))
|
|
continue;
|
|
|
|
MachineBasicBlock::iterator NextI = llvm::next(MBBI);
|
|
for (unsigned R = 0; R < NumRegs; ++R) {
|
|
MachineOperand &MO = MI->getOperand(FirstOpnd + R);
|
|
assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
|
|
unsigned VirtReg = MO.getReg();
|
|
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
|
|
"expected a virtual register");
|
|
|
|
// For now, just assign a fixed set of adjacent registers.
|
|
// This leaves plenty of room for future improvements.
|
|
static const unsigned NEONDRegs[] = {
|
|
ARM::D0, ARM::D1, ARM::D2, ARM::D3,
|
|
ARM::D4, ARM::D5, ARM::D6, ARM::D7
|
|
};
|
|
MO.setReg(NEONDRegs[Offset + R * Stride]);
|
|
|
|
if (MO.isUse()) {
|
|
// Insert a copy from VirtReg.
|
|
TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
|
|
ARM::DPRRegisterClass, ARM::DPRRegisterClass,
|
|
DebugLoc());
|
|
if (MO.isKill()) {
|
|
MachineInstr *CopyMI = prior(MBBI);
|
|
CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
|
|
}
|
|
MO.setIsKill();
|
|
} else if (MO.isDef() && !MO.isDead()) {
|
|
// Add a copy to VirtReg.
|
|
TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
|
|
ARM::DPRRegisterClass, ARM::DPRRegisterClass,
|
|
DebugLoc());
|
|
}
|
|
}
|
|
}
|
|
|
|
return Modified;
|
|
}
|
|
|
|
bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) {
|
|
TII = MF.getTarget().getInstrInfo();
|
|
MRI = &MF.getRegInfo();
|
|
|
|
bool Modified = false;
|
|
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
|
|
++MFI) {
|
|
MachineBasicBlock &MBB = *MFI;
|
|
Modified |= PreAllocNEONRegisters(MBB);
|
|
}
|
|
|
|
return Modified;
|
|
}
|
|
|
|
/// createNEONPreAllocPass - returns an instance of the NEON register
|
|
/// pre-allocation pass.
|
|
FunctionPass *llvm::createNEONPreAllocPass() {
|
|
return new NEONPreAllocPass();
|
|
}
|