From d3fc10a52532a8da060f8d00a9c545e0a6ce156e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 15 Oct 2014 21:08:59 +0000 Subject: [PATCH] R600/SI: Fix bug where immediates were being used in DS addr operands The SelectDS1Addr1Offset complex pattern always tries to store constant lds pointers in the offset operand and store a zero value in the addr operand. Since the addr operand does not accept immediates, the zero value needs to first be copied to a register. This newly created zero value will not go through normal instruction selection, so we need to manually insert a V_MOV_B32_e32 in the complex pattern. This bug was hidden by the fact that if there was another zero value in the DAG that had not been selected yet, then the CSE done by the DAG would use the unselected node for the addr operand rather than the one that was just created. This would lead to the zero value being selected and the DAG automatically inserting a V_MOV_B32_e32 instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219848 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 5 ++++- test/CodeGen/R600/load.ll | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index becb7112217..c7539716597 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -793,7 +793,10 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, // into read2 / write2 instructions. if (const ConstantSDNode *CAddr = dyn_cast(Addr)) { if (isUInt<16>(CAddr->getZExtValue())) { - Base = CurDAG->getConstant(0, MVT::i32); + SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + SDLoc(Addr), MVT::i32, Zero); + Base = SDValue(MovZero, 0); Offset = Addr; return true; } diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index ff489b58f81..882976b6d2a 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -719,3 +719,21 @@ define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3) store <2 x i32> %vec, <2 x i32> addrspace(1)* %out ret void } + + +@lds = addrspace(3) global [512 x i32] zeroinitializer, align 4 + +; On SI we need to make sure that the base offset is a register and not +; an immediate. +; FUNC-LABEL: {{^}}load_i32_local_const_ptr: +; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]+]], 0 +; SI-CHECK: DS_READ_B32 v0, v[[ZERO]] offset:4 +; R600-CHECK: LDS_READ_RET +define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %tmp0 = getelementptr [512 x i32] addrspace(3)* @lds, i32 0, i32 1 + %tmp1 = load i32 addrspace(3)* %tmp0 + %tmp2 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 %tmp1, i32 addrspace(1)* %tmp2 + ret void +}