mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-05 17:39:16 +00:00
R600/SI: Fix 64-bit bit ops that require the VALU.
Try to match scalar and first like the other instructions. Expand 64-bit ands to a pair of 32-bit ands since that is not available on the VALU. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204660 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d816eda3c7
commit
db1807144a
@ -879,6 +879,30 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
case AMDGPU::S_AND_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_OR_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_XOR_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_NOT_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_BFE_U64:
|
||||
case AMDGPU::S_BFE_I64:
|
||||
case AMDGPU::S_BFM_B64:
|
||||
llvm_unreachable("Moving this op to VALU not implemented");
|
||||
}
|
||||
|
||||
unsigned NewOpcode = getVALUOp(*Inst);
|
||||
@ -968,6 +992,58 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
|
||||
return &AMDGPU::VReg_32RegClass;
|
||||
}
|
||||
|
||||
void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst,
|
||||
unsigned Opcode) const {
|
||||
MachineBasicBlock &MBB = *Inst->getParent();
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
|
||||
// We shouldn't need to worry about immediate operands here.
|
||||
MachineOperand &Dest = Inst->getOperand(0);
|
||||
MachineOperand &Src0 = Inst->getOperand(1);
|
||||
MachineOperand &Src1 = Inst->getOperand(2);
|
||||
DebugLoc DL = Inst->getDebugLoc();
|
||||
|
||||
MachineBasicBlock::iterator MII = Inst;
|
||||
|
||||
const MCInstrDesc &InstDesc = get(Opcode);
|
||||
const TargetRegisterClass *RC = MRI.getRegClass(Src0.getReg());
|
||||
const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, AMDGPU::sub0);
|
||||
unsigned SrcReg0Sub0 = buildExtractSubReg(MII, MRI, Src0, RC,
|
||||
AMDGPU::sub0, SubRC);
|
||||
unsigned SrcReg1Sub0 = buildExtractSubReg(MII, MRI, Src1, RC,
|
||||
AMDGPU::sub0, SubRC);
|
||||
|
||||
unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
|
||||
.addReg(SrcReg0Sub0)
|
||||
.addReg(SrcReg1Sub0);
|
||||
|
||||
unsigned SrcReg0Sub1 = buildExtractSubReg(MII, MRI, Src0, RC,
|
||||
AMDGPU::sub1, SubRC);
|
||||
unsigned SrcReg1Sub1 = buildExtractSubReg(MII, MRI, Src1, RC,
|
||||
AMDGPU::sub1, SubRC);
|
||||
|
||||
unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
|
||||
.addReg(SrcReg0Sub1)
|
||||
.addReg(SrcReg1Sub1);
|
||||
|
||||
unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
|
||||
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
|
||||
.addReg(DestSub0)
|
||||
.addImm(AMDGPU::sub0)
|
||||
.addReg(DestSub1)
|
||||
.addImm(AMDGPU::sub1);
|
||||
|
||||
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
|
||||
|
||||
// Try to legalize the operands in case we need to swap the order to keep it
|
||||
// valid.
|
||||
Worklist.push_back(LoHalf);
|
||||
Worklist.push_back(HiHalf);
|
||||
}
|
||||
|
||||
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
|
@ -38,6 +38,10 @@ private:
|
||||
const TargetRegisterClass *RC,
|
||||
const MachineOperand &Op) const;
|
||||
|
||||
void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst, unsigned Opcode) const;
|
||||
|
||||
|
||||
public:
|
||||
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
||||
@ -92,6 +96,7 @@ public:
|
||||
|
||||
bool isSALUInstr(const MachineInstr &MI) const;
|
||||
static unsigned getVALUOp(const MachineInstr &MI);
|
||||
|
||||
bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
|
||||
|
||||
/// \brief Return the correct register class for \p OpNo. For target-specific
|
||||
|
@ -1222,7 +1222,7 @@ def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
|
||||
>;
|
||||
|
||||
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
|
||||
[]
|
||||
[(set i64:$dst, (or i64:$src0, i64:$src1))]
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
|
@ -56,15 +56,34 @@ define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK-LABEL: @or_i64
|
||||
; EG-CHECK-LABEL: @scalar_or_i64
|
||||
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
|
||||
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
|
||||
; SI-CHECK-LABEL: @or_i64
|
||||
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
|
||||
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
|
||||
define void @or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
entry:
|
||||
%0 = or i64 %a, %b
|
||||
store i64 %0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
; SI-CHECK-LABEL: @scalar_or_i64
|
||||
; SI-CHECK: S_OR_B64
|
||||
define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
%or = or i64 %a, %b
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @vector_or_i64
|
||||
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
|
||||
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
|
||||
define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
||||
%loada = load i64 addrspace(1)* %a, align 8
|
||||
%loadb = load i64 addrspace(1)* %a, align 8
|
||||
%or = or i64 %loada, %loadb
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @scalar_vector_or_i64
|
||||
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
|
||||
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
|
||||
define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
|
||||
%loada = load i64 addrspace(1)* %a
|
||||
%or = or i64 %loada, %b
|
||||
store i64 %or, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user