From ec4cb3346dab2ba1816203aa3941a1f7e4939b1b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 22 Aug 2014 18:49:33 +0000 Subject: [PATCH] R600/SI: Use a ComplexPattern for DS loads and stores git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216278 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 38 ++++++ lib/Target/R600/SIInstrInfo.td | 2 + lib/Target/R600/SIInstructions.td | 50 +++---- .../R600/32-bit-local-address-space.ll | 27 ++-- ...ds-negative-offset-addressing-mode-loop.ll | 29 ++-- test/CodeGen/R600/gep-address-space.ll | 8 +- test/CodeGen/R600/local-64.ll | 126 +++++++++--------- test/CodeGen/R600/local-memory-two-objects.ll | 9 +- 8 files changed, 169 insertions(+), 120 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index ee7d217b7b8..7911b6f3302 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -88,6 +88,9 @@ private: SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); + bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, + unsigned OffsetBits) const; + bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, @@ -744,6 +747,41 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); } +bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, + unsigned OffsetBits) const { + const AMDGPUSubtarget &ST = TM.getSubtarget(); + if ((OffsetBits == 16 && !isUInt<16>(Offset)) || + (OffsetBits == 8 && !isUInt<8>(Offset))) + return false; + + if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) + return true; + + // On Southern Islands instruction with a negative base value and an offset + // don't seem to work. + return CurDAG->SignBitIsZero(Base); +} + +bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + ConstantSDNode *C1 = cast(N1); + if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { + // (add n0, c0) + Base = N0; + Offset = N1; + return true; + } + } + + // default case + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i16); + return true; +} + static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) { return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32, Ptr), 0); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 7560cef4c95..5357af97422 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -191,6 +191,8 @@ def tfe : Operand { // Complex patterns //===----------------------------------------------------------------------===// +def DS1Addr1Offset : ComplexPattern; + def MUBUFAddr32 : ComplexPattern; def MUBUFAddr64 : ComplexPattern; def MUBUFScratch : ComplexPattern; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 01db459e3f2..40fca9f264f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2520,41 +2520,27 @@ def : ROTRPattern ; /********** Load/Store Patterns **********/ /********** ======================= **********/ -multiclass DSReadPat { - def : Pat < - (vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))), - (inst (i1 0), $ptr, (as_i16imm $offset)) - >; +class DSReadPat : Pat < + (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), + (inst (i1 0), $ptr, (as_i16imm $offset)) +>; - def : Pat < - (frag i32:$src0), - (vt (inst 0, $src0, 0)) - >; -} +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; -defm : DSReadPat ; -defm : DSReadPat ; -defm : DSReadPat ; -defm : DSReadPat ; -defm : DSReadPat ; -defm : DSReadPat ; +class DSWritePat : Pat < + (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), + (inst (i1 0), $ptr, $value, (as_i16imm $offset)) +>; -multiclass DSWritePat { - def : Pat < - (frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))), - (inst (i1 0), $ptr, $value, (as_i16imm $offset)) - >; - - def : Pat < - (frag vt:$val, i32:$ptr), - (inst 0, $ptr, $val, 0) - >; -} - -defm : DSWritePat ; -defm : DSWritePat ; -defm : DSWritePat ; -defm : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; multiclass DSAtomicRetPat { def : Pat < diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll index 7dec4263742..e13d719f626 100644 --- a/test/CodeGen/R600/32-bit-local-address-space.ll +++ b/test/CodeGen/R600/32-bit-local-address-space.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=FUNC %s ; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and ; the global address space(1) uses 64-bit pointers. These tests check to make sure @@ -9,7 +10,7 @@ ; Instructions with B32, U32, and I32 in their name take 32-bit operands, while ; instructions with B64, U64, and I64 take 64-bit operands. -; CHECK-LABEL: @local_address_load +; FUNC-LABEL: @local_address_load ; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]] ; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]] define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { @@ -19,7 +20,7 @@ entry: ret void } -; CHECK-LABEL: @local_address_gep +; FUNC-LABEL: @local_address_gep ; CHECK: S_ADD_I32 [[SPTR:s[0-9]]] ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; CHECK: DS_READ_B32 [[VPTR]] @@ -31,7 +32,7 @@ entry: ret void } -; CHECK-LABEL: @local_address_gep_const_offset +; FUNC-LABEL: @local_address_gep_const_offset ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}} ; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4, define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { @@ -43,7 +44,7 @@ entry: } ; Offset too large, can't fold into 16-bit immediate offset. -; CHECK-LABEL: @local_address_gep_large_const_offset +; FUNC-LABEL: @local_address_gep_large_const_offset ; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; CHECK: DS_READ_B32 [[VPTR]] @@ -55,7 +56,7 @@ entry: ret void } -; CHECK-LABEL: @null_32bit_lds_ptr: +; FUNC-LABEL: @null_32bit_lds_ptr: ; CHECK: V_CMP_NE_I32 ; CHECK-NOT: V_CMP_NE_I32 ; CHECK: V_CNDMASK_B32 @@ -66,7 +67,7 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) ret void } -; CHECK-LABEL: @mul_32bit_ptr: +; FUNC-LABEL: @mul_32bit_ptr: ; CHECK: V_MUL_LO_I32 ; CHECK-NEXT: V_ADD_I32_e32 ; CHECK-NEXT: DS_READ_B32 @@ -79,7 +80,7 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* % @g_lds = addrspace(3) global float zeroinitializer, align 4 -; CHECK-LABEL: @infer_ptr_alignment_global_offset: +; FUNC-LABEL: @infer_ptr_alignment_global_offset: ; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0 ; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]] define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) { @@ -92,21 +93,21 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti @ptr = addrspace(3) global i32 addrspace(3)* null @dst = addrspace(3) global [16384 x i32] zeroinitializer -; CHECK-LABEL: @global_ptr: +; FUNC-LABEL: @global_ptr: ; CHECK: DS_WRITE_B32 define void @global_ptr() nounwind { store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr ret void } -; CHECK-LABEL: @local_address_store +; FUNC-LABEL: @local_address_store ; CHECK: DS_WRITE_B32 define void @local_address_store(i32 addrspace(3)* %out, i32 %val) { store i32 %val, i32 addrspace(3)* %out ret void } -; CHECK-LABEL: @local_address_gep_store +; FUNC-LABEL: @local_address_gep_store ; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]], ; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]] ; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}}, @@ -116,7 +117,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 ret void } -; CHECK-LABEL: @local_address_gep_const_offset_store +; FUNC-LABEL: @local_address_gep_const_offset_store ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}} ; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} ; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4 @@ -127,7 +128,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v } ; Offset too large, can't fold into 16-bit immediate offset. -; CHECK-LABEL: @local_address_gep_large_const_offset_store +; FUNC-LABEL: @local_address_gep_large_const_offset_store ; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0 diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll index d768c0e4ba9..231cf3d6883 100644 --- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll @@ -1,18 +1,29 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s declare i32 @llvm.r600.read.tidig.x() #0 declare void @llvm.AMDGPU.barrier.local() #1 ; Function Attrs: nounwind -; SI-LABEL: @signed_ds_offset_addressing_loop -; SI: BB0_1: -; SI: V_ADD_I32_e32 [[VADDR:v[0-9]+]], +; CHECK-LABEL: @signed_ds_offset_addressing_loop +; CHECK: BB0_1: +; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0 -; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x4 -; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x80 -; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x84 -; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100 -; SI: S_ENDPGM +; SI-DAG: V_ADD_I32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]] +; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]], 0x0 +; SI-DAG: V_ADD_I32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]] +; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]], 0x0 +; SI-DAG: V_ADD_I32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]] +; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]], 0x0 +; SI-DAG: V_ADD_I32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]] +; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]], 0x0 + +; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0 +; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x4 +; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x80 +; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x84 +; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100 +; CHECK: S_ENDPGM define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 { entry: %x.i = tail call i32 @llvm.r600.read.tidig.x() #0 diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll index ab2c0bf92fe..cd698f4d6b4 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/R600/gep-address-space.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: @use_gep_address_space: @@ -11,7 +12,10 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: @use_gep_address_space_large_offset: -; CHECK: S_ADD_I32 +; The LDS offset will be 65536 bytes, which is larger than the size of LDS on +; SI, which is why it is being OR'd with the base pointer. +; SI: S_OR_B32 +; CI: S_ADD_I32 ; CHECK: DS_WRITE_B32 %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384 store i32 99, i32 addrspace(3)* %p diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll index c52b41bb1b5..ef48eefa5ee 100644 --- a/test/CodeGen/R600/local-64.ll +++ b/test/CodeGen/R600/local-64.ll @@ -1,8 +1,9 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s -; SI-LABEL: @local_i32_load -; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0] -; SI: BUFFER_STORE_DWORD [[REG]], +; BOTH-LABEL: @local_i32_load +; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0] +; BOTH: BUFFER_STORE_DWORD [[REG]], define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %gep = getelementptr i32 addrspace(3)* %in, i32 7 %val = load i32 addrspace(3)* %gep, align 4 @@ -10,19 +11,19 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw ret void } -; SI-LABEL: @local_i32_load_0_offset -; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0] -; SI: BUFFER_STORE_DWORD [[REG]], +; BOTH-LABEL: @local_i32_load_0_offset +; BOTH: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0] +; BOTH: BUFFER_STORE_DWORD [[REG]], define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %val = load i32 addrspace(3)* %in, align 4 store i32 %val, i32 addrspace(1)* %out, align 4 ret void } -; SI-LABEL: @local_i8_load_i16_max_offset -; SI-NOT: ADD -; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0] -; SI: BUFFER_STORE_BYTE [[REG]], +; BOTH-LABEL: @local_i8_load_i16_max_offset +; BOTH-NOT: ADD +; BOTH: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0] +; BOTH: BUFFER_STORE_BYTE [[REG]], define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8 addrspace(3)* %in, i32 65535 %val = load i8 addrspace(3)* %gep, align 4 @@ -30,11 +31,14 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3) ret void } -; SI-LABEL: @local_i8_load_over_i16_max_offset -; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 -; SI: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]] -; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0] -; SI: BUFFER_STORE_BYTE [[REG]], +; BOTH-LABEL: @local_i8_load_over_i16_max_offset +; The LDS offset will be 65536 bytes, which is larger than the size of LDS on +; SI, which is why it is being OR'd with the base pointer. +; SI: S_OR_B32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 +; CI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 +; BOTH: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]] +; BOTH: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0] +; BOTH: BUFFER_STORE_BYTE [[REG]], define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8 addrspace(3)* %in, i32 65536 %val = load i8 addrspace(3)* %gep, align 4 @@ -42,10 +46,10 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa ret void } -; SI-LABEL: @local_i64_load -; SI-NOT: ADD -; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0] -; SI: BUFFER_STORE_DWORDX2 [[REG]], +; BOTH-LABEL: @local_i64_load +; BOTH-NOT: ADD +; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0] +; BOTH: BUFFER_STORE_DWORDX2 [[REG]], define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %gep = getelementptr i64 addrspace(3)* %in, i32 7 %val = load i64 addrspace(3)* %gep, align 8 @@ -53,19 +57,19 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw ret void } -; SI-LABEL: @local_i64_load_0_offset -; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0] -; SI: BUFFER_STORE_DWORDX2 [[REG]], +; BOTH-LABEL: @local_i64_load_0_offset +; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0] +; BOTH: BUFFER_STORE_DWORDX2 [[REG]], define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %val = load i64 addrspace(3)* %in, align 8 store i64 %val, i64 addrspace(1)* %out, align 8 ret void } -; SI-LABEL: @local_f64_load -; SI-NOT: ADD -; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0] -; SI: BUFFER_STORE_DWORDX2 [[REG]], +; BOTH-LABEL: @local_f64_load +; BOTH-NOT: ADD +; BOTH: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0] +; BOTH: BUFFER_STORE_DWORDX2 [[REG]], define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %gep = getelementptr double addrspace(3)* %in, i32 7 %val = load double addrspace(3)* %gep, align 8 @@ -73,85 +77,85 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) ret void } -; SI-LABEL: @local_f64_load_0_offset -; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0] -; SI: BUFFER_STORE_DWORDX2 [[REG]], +; BOTH-LABEL: @local_f64_load_0_offset +; BOTH: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0] +; BOTH: BUFFER_STORE_DWORDX2 [[REG]], define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %val = load double addrspace(3)* %in, align 8 store double %val, double addrspace(1)* %out, align 8 ret void } -; SI-LABEL: @local_i64_store -; SI-NOT: ADD -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0] +; BOTH-LABEL: @local_i64_store +; BOTH-NOT: ADD +; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0] define void @local_i64_store(i64 addrspace(3)* %out) nounwind { %gep = getelementptr i64 addrspace(3)* %out, i32 7 store i64 5678, i64 addrspace(3)* %gep, align 8 ret void } -; SI-LABEL: @local_i64_store_0_offset -; SI-NOT: ADD -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] +; BOTH-LABEL: @local_i64_store_0_offset +; BOTH-NOT: ADD +; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind { store i64 1234, i64 addrspace(3)* %out, align 8 ret void } -; SI-LABEL: @local_f64_store -; SI-NOT: ADD -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0] +; BOTH-LABEL: @local_f64_store +; BOTH-NOT: ADD +; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0] define void @local_f64_store(double addrspace(3)* %out) nounwind { %gep = getelementptr double addrspace(3)* %out, i32 7 store double 16.0, double addrspace(3)* %gep, align 8 ret void } -; SI-LABEL: @local_f64_store_0_offset -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] +; BOTH-LABEL: @local_f64_store_0_offset +; BOTH: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind { store double 20.0, double addrspace(3)* %out, align 8 ret void } -; SI-LABEL: @local_v2i64_store -; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0] +; BOTH-LABEL: @local_v2i64_store +; BOTH-NOT: ADD +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0] define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7 store <2 x i64> , <2 x i64> addrspace(3)* %gep, align 16 ret void } -; SI-LABEL: @local_v2i64_store_0_offset -; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] +; BOTH-LABEL: @local_v2i64_store_0_offset +; BOTH-NOT: ADD +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind { store <2 x i64> , <2 x i64> addrspace(3)* %out, align 16 ret void } -; SI-LABEL: @local_v4i64_store -; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0] +; BOTH-LABEL: @local_v4i64_store +; BOTH-NOT: ADD +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0] define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7 store <4 x i64> , <4 x i64> addrspace(3)* %gep, align 16 ret void } -; SI-LABEL: @local_v4i64_store_0_offset -; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] +; BOTH-LABEL: @local_v4i64_store_0_offset +; BOTH-NOT: ADD +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0] +; BOTH-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind { store <4 x i64> , <4 x i64> addrspace(3)* %out, align 16 ret void diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll index e29e4cc88fd..f0f68243ebf 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/R600/local-memory-two-objects.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=SI %s +; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=CI %s @local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4 @local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4 @@ -28,8 +29,10 @@ ; constant offsets. ; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] -; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10 -; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0, +; SI: V_ADD_I32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} +; SI: DS_READ_B32 {{v[0-9]+}}, [[SIPTR]], 0x0 +; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10 +; CI: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0, define void @local_memory_two_objects(i32 addrspace(1)* %out) { entry: