R600/SI: handle all registers in copyPhysReg v2

v2: based on Michels patch, but now allows copying of all registers sizes.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176346 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Christian Konig 2013-03-01 09:46:27 +00:00
parent 0916ac2100
commit 66501123d1

View File

@ -33,36 +33,108 @@ const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
void void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL, MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const { bool KillSrc) const {
// If we are trying to copy to or from SCC, there is a bug somewhere else in // If we are trying to copy to or from SCC, there is a bug somewhere else in
// the backend. While it may be theoretically possible to do this, it should // the backend. While it may be theoretically possible to do this, it should
// never be necessary. // never be necessary.
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
if (AMDGPU::VReg_64RegClass.contains(DestReg)) { const int16_t Sub0_15[] = {
assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
AMDGPU::SReg_64RegClass.contains(SrcReg)); AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0)) AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
.addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc)) AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
.addReg(DestReg, RegState::Define | RegState::Implicit); };
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1))
.addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc)); const int16_t Sub0_7[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
};
const int16_t Sub0_3[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
};
const int16_t Sub0_1[] = {
AMDGPU::sub0, AMDGPU::sub1, 0
};
unsigned Opcode;
const int16_t *SubIndices;
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc)); .addReg(SrcReg, getKillRegState(KillSrc));
return;
} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
Opcode = AMDGPU::S_MOV_B32;
SubIndices = Sub0_3;
} else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
Opcode = AMDGPU::S_MOV_B32;
SubIndices = Sub0_7;
} else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
Opcode = AMDGPU::S_MOV_B32;
SubIndices = Sub0_15;
} else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg)); AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc)); .addReg(SrcReg, getKillRegState(KillSrc));
return;
} else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
AMDGPU::SReg_64RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_1;
} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
AMDGPU::SReg_128RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_3;
} else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
AMDGPU::SReg_256RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_7;
} else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
AMDGPU::SReg_512RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_15;
} else { } else {
assert(AMDGPU::SReg_32RegClass.contains(DestReg)); llvm_unreachable("Can't copy register!");
assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); }
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc)); while (unsigned SubIdx = *SubIndices++) {
MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
get(Opcode), RI.getSubReg(DestReg, SubIdx));
Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
if (*SubIndices)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
} }
} }