From 1968da36ed044f0ad87cfc3614be9e48ac9cdd98 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 9 Jun 2014 16:36:31 +0000 Subject: [PATCH] R600/SI: Keep 64-bit not on SALU git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.cpp | 68 +++++++++++++++++++++++++++---- lib/Target/R600/SIInstrInfo.h | 7 +++- lib/Target/R600/SIInstructions.td | 4 +- test/CodeGen/R600/sub.ll | 2 +- test/CodeGen/R600/xor.ll | 40 ++++++++++++++++++ 5 files changed, 110 insertions(+), 11 deletions(-) diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 4e1d4055fc5..dc964349191 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -655,6 +655,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; + case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; @@ -1157,22 +1158,22 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { continue; } case AMDGPU::S_AND_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); Inst->eraseFromParent(); continue; case AMDGPU::S_OR_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); Inst->eraseFromParent(); continue; case AMDGPU::S_XOR_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); Inst->eraseFromParent(); continue; case AMDGPU::S_NOT_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32); + splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); Inst->eraseFromParent(); continue; @@ -1297,9 +1298,62 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { return &AMDGPU::VReg_32RegClass; } -void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl &Worklist, - MachineInstr *Inst, - unsigned Opcode) const { +void SIInstrInfo::splitScalar64BitUnaryOp( + SmallVectorImpl &Worklist, + MachineInstr *Inst, + unsigned Opcode) const { + MachineBasicBlock &MBB = *Inst->getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + MachineOperand &Dest = Inst->getOperand(0); + MachineOperand &Src0 = Inst->getOperand(1); + DebugLoc DL = Inst->getDebugLoc(); + + MachineBasicBlock::iterator MII = Inst; + + const MCInstrDesc &InstDesc = get(Opcode); + const TargetRegisterClass *Src0RC = Src0.isReg() ? + MRI.getRegClass(Src0.getReg()) : + &AMDGPU::SGPR_32RegClass; + + const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); + + MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, + AMDGPU::sub0, Src0SubRC); + + const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); + const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); + + unsigned DestSub0 = MRI.createVirtualRegister(DestRC); + MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) + .addOperand(SrcReg0Sub0); + + MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, + AMDGPU::sub1, Src0SubRC); + + unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); + MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) + .addOperand(SrcReg0Sub1); + + unsigned FullDestReg = MRI.createVirtualRegister(DestRC); + BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + + MRI.replaceRegWith(Dest.getReg(), FullDestReg); + + // Try to legalize the operands in case we need to swap the order to keep it + // valid. + Worklist.push_back(LoHalf); + Worklist.push_back(HiHalf); +} + +void SIInstrInfo::splitScalar64BitBinaryOp( + SmallVectorImpl &Worklist, + MachineInstr *Inst, + unsigned Opcode) const { MachineBasicBlock &MBB = *Inst->getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 7b31a81f0a8..a9b014f8805 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -44,8 +44,11 @@ private: const TargetRegisterClass *RC, const MachineOperand &Op) const; - void splitScalar64BitOp(SmallVectorImpl & Worklist, - MachineInstr *Inst, unsigned Opcode) const; + void splitScalar64BitUnaryOp(SmallVectorImpl &Worklist, + MachineInstr *Inst, unsigned Opcode) const; + + void splitScalar64BitBinaryOp(SmallVectorImpl &Worklist, + MachineInstr *Inst, unsigned Opcode) const; void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 1b6c35d6951..aa817b853e7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -96,7 +96,9 @@ def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", [(set i32:$dst, (not i32:$src0))] >; -def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; +def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", + [(set i64:$dst, (not i64:$src0))] +>; def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index e321ed67a69..58523d068e5 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -45,7 +45,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { ;EG-DAG: SUB_INT ;EG-DAG: SUB_INT -;SI: S_XOR_B64 +;SI: S_NOT_B64 ;SI-DAG: S_ADD_I32 ;SI-DAG: S_ADDC_U32 ;SI-DAG: S_ADD_I32 diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index 5a5c86d7ef5..00430417399 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -90,3 +90,43 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 store i32 %result, i32 addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @vector_xor_i64 +; SI-CHECK: V_XOR_B32_e32 +; SI-CHECK: V_XOR_B32_e32 +; SI-CHECK: S_ENDPGM +define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { + %a = load i64 addrspace(1)* %in0 + %b = load i64 addrspace(1)* %in1 + %result = xor i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @scalar_xor_i64 +; SI-CHECK: S_XOR_B64 +; SI-CHECK: S_ENDPGM +define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %result = xor i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @scalar_not_i64 +; SI-CHECK: S_NOT_B64 +define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) { + %result = xor i64 %a, -1 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @vector_not_i64 +; SI-CHECK: V_NOT_B32 +; SI-CHECK: V_NOT_B32 +define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { + %a = load i64 addrspace(1)* %in0 + %b = load i64 addrspace(1)* %in1 + %result = xor i64 %a, -1 + store i64 %result, i64 addrspace(1)* %out + ret void +}