diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 2e08c9f5a5d..c319b32111f 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -88,7 +88,8 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); - void LoadM0(MachineInstr &MI, MachineInstr *MovRel); + void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0); + void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset); void IndirectSrc(MachineInstr &MI); void IndirectDst(MachineInstr &MI); @@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } -void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); @@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { unsigned Idx = MI.getOperand(3).getReg(); if (AMDGPU::SReg_32RegClass.contains(Idx)) { - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) - .addReg(Idx); + if (Offset) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) + .addReg(Idx) + .addImm(Offset); + } else { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(Idx); + } MBB.insert(I, MovRel); } else { @@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) .addReg(AMDGPU::VCC); + if (Offset) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) + .addReg(AMDGPU::M0) + .addImm(Offset); + } // Do the actual move MBB.insert(I, MovRel); @@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { MI.eraseFromParent(); } +/// \param @VecReg The register which holds element zero of the vector +/// being addressed into. +/// \param[out] @Reg The base register to use in the indirect addressing instruction. +/// \param[in,out] @Offset As an input, this is the constant offset part of the +// indirect Index. e.g. v0 = v[VecReg + Offset] +// As an output, this is a constant value that needs +// to be added to the value stored in M0. +void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg, + unsigned &Reg, + int &Offset) { + unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0); + if (!SubReg) + SubReg = VecReg; + + const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg); + int RegIdx = TRI->getHWRegIndex(SubReg) + Offset; + + if (RegIdx < 0) { + Offset = RegIdx; + RegIdx = 0; + } else { + Offset = 0; + } + + Reg = RC->getRegister(RegIdx); +} + void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); @@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Vec = MI.getOperand(2).getReg(); - unsigned Off = MI.getOperand(4).getImm(); - unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0); - if (!SubReg) - SubReg = Vec; + int Off = MI.getOperand(4).getImm(); + unsigned Reg; + + computeIndirectRegAndOffset(Vec, Reg, Off); MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) - .addReg(SubReg + Off) + .addReg(Reg) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Vec, RegState::Implicit); - LoadM0(MI, MovRel); + LoadM0(MI, MovRel, Off); } void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { @@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { DebugLoc DL = MI.getDebugLoc(); unsigned Dst = MI.getOperand(0).getReg(); - unsigned Off = MI.getOperand(4).getImm(); + int Off = MI.getOperand(4).getImm(); unsigned Val = MI.getOperand(5).getReg(); - unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0); - if (!SubReg) - SubReg = Dst; + unsigned Reg; + + computeIndirectRegAndOffset(Dst, Reg, Off); MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) - .addReg(SubReg + Off, RegState::Define) + .addReg(Reg, RegState::Define) .addReg(Val) .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Dst, RegState::Implicit); - LoadM0(MI, MovRel); + LoadM0(MI, MovRel, Off); } bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll index 319910fcabf..f551606d63a 100644 --- a/test/CodeGen/R600/indirect-addressing-si.ll +++ b/test/CodeGen/R600/indirect-addressing-si.ll @@ -25,6 +25,33 @@ entry: ret void } +; CHECK-LABEL: {{^}}extract_neg_offset_sgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0 +define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %value = extractelement <4 x i32> , i32 %index + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}extract_neg_offset_vgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: v_readfirstlane_b32 +; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}} +; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0 +; CHECK: s_cbranch_execnz +define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) { +entry: + %id = call i32 @llvm.r600.read.tidig.x() #1 + %index = add i32 %id, -512 + %value = extractelement <4 x i32> , i32 %index + store i32 %value, i32 addrspace(1)* %out + ret void +} + ; CHECK-LABEL: {{^}}insert_w_offset: ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movreld_b32_e32 @@ -47,3 +74,48 @@ entry: store float %1, float addrspace(1)* %out ret void } + +; CHECK-LABEL: {{^}}insert_neg_offset_sgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}} +define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %value = insertelement <4 x i32> , i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}insert_neg_offset_vgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: v_readfirstlane_b32 +; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}} +; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}} +; CHECK: s_cbranch_execnz +define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { +entry: + %id = call i32 @llvm.r600.read.tidig.x() #1 + %index = add i32 %id, -512 + %value = insertelement <4 x i32> , i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr: +; The offset depends on the register that holds the first element of the vector. +; CHECK: v_readfirstlane_b32 +; CHECK: s_add_i32 m0, m0, -{{[0-9]+}} +; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}} +; CHECK: s_cbranch_execnz +define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { +entry: + %id = call i32 @llvm.r600.read.tidig.x() #1 + %index = add i32 %id, -16 + %value = insertelement <4 x i32> , i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 +attributes #1 = { nounwind readnone }