llvm-6502/test/CodeGen/R600/ds_write2.ll
David Blaikie 5a70dd1d82 [opaque pointer type] Add textual IR support for explicit type parameter to gep operator
Similar to gep (r230786) and load (r230794) changes.

Similar migration script can be used to update test cases, which
successfully migrated all of LLVM and Polly, but about 4 test cases
needed manually changes in Clang.

(this script will read the contents of stdin and massage it into stdout
- wrap it in the 'apply.sh' script shown in previous commits + xargs to
apply it over a large set of test cases)

import fileinput
import sys
import re

rep = re.compile(r"(getelementptr(?:\s+inbounds)?\s*\()((<\d*\s+x\s+)?([^@]*?)(|\s*addrspace\(\d+\))\s*\*(?(3)>)\s*)(?=$|%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|zeroinitializer|<|\[\[[a-zA-Z]|\{\{)", re.MULTILINE | re.DOTALL)

def conv(match):
  line = match.group(1)
  line += match.group(4)
  line += ", "
  line += match.group(2)
  return line

line = sys.stdin.read()
off = 0
for match in re.finditer(rep, line):
  sys.stdout.write(line[off:match.start()])
  sys.stdout.write(conv(match))
  off = match.end()
sys.stdout.write(line[off:])

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232184 91177308-0d34-0410-b5e6-96231b3b80d8
2015-03-13 18:20:45 +00:00

426 lines
22 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
@lds = addrspace(3) global [512 x float] undef, align 4
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
; SI-LABEL: @simple_write2_one_val_f32
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
; SI: s_endpgm
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
%val = load float, float addrspace(1)* %in.gep, align 4
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_f32
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
%val0 = load float, float addrspace(1)* %in.gep.0, align 4
%val1 = load float, float addrspace(1)* %in.gep.1, align 4
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_f32_volatile_0
; SI-NOT: ds_write2_b32
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
%val0 = load float, float addrspace(1)* %in0.gep, align 4
%val1 = load float, float addrspace(1)* %in1.gep, align 4
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_f32_volatile_1
; SI-NOT: ds_write2_b32
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
%val0 = load float, float addrspace(1)* %in0.gep, align 4
%val1 = load float, float addrspace(1)* %in1.gep, align 4
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; 2 data subregisters from different super registers.
; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
%in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
%val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
%val1 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8
%val0.0 = extractelement <2 x float> %val0, i32 0
%val1.1 = extractelement <2 x float> %val1, i32 1
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0.0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1.1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_subreg2_f32
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
%val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
%val0 = extractelement <2 x float> %val, i32 0
%val1 = extractelement <2 x float> %val, i32 1
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_subreg4_f32
; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
%val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
%val0 = extractelement <4 x float> %val, i32 0
%val1 = extractelement <4 x float> %val, i32 3
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_max_offset_f32
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255
; SI: s_endpgm
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
%in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
%val0 = load float, float addrspace(1)* %in.gep.0, align 4
%val1 = load float, float addrspace(1)* %in.gep.1, align 4
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 255
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_too_far_f32
; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
; SI: s_endpgm
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
%val0 = load float, float addrspace(1)* %in0.gep, align 4
%val1 = load float, float addrspace(1)* %in1.gep, align 4
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 257
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_f32_x2
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
%val0 = load float, float addrspace(1)* %in0.gep, align 4
%val1 = load float, float addrspace(1)* %in1.gep, align 4
%idx.0 = add nsw i32 %tid.x, 0
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
%idx.2 = add nsw i32 %tid.x, 11
%arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
store float %val0, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
%arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
store float %val1, float addrspace(3)* %arrayidx3, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
%val0 = load float, float addrspace(1)* %in0.gep, align 4
%val1 = load float, float addrspace(1)* %in1.gep, align 4
%idx.0 = add nsw i32 %tid.x, 3
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
%idx.2 = add nsw i32 %tid.x, 11
%arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
store float %val0, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
%arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
store float %val1, float addrspace(3)* %arrayidx3, align 4
ret void
}
; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
; SI-NOT: ds_write2_b32
; SI: ds_write_b32
; SI: ds_write_b32
; SI: s_endpgm
define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
%in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
%val0 = load float, float addrspace(1)* %in0.gep, align 4
%val1 = load float, float addrspace(1)* %in1.gep, align 4
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
%gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
; Apply an additional offset after the vector that will be more obviously folded.
%gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
store float %val0, float addrspace(3)* %gep.0, align 4
%add.x = add nsw i32 %x.i, 8
store float %val1, float addrspace(3)* %gep.1.offset, align 4
ret void
}
; SI-LABEL: @simple_write2_one_val_f64
; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8
; SI: s_endpgm
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
%val = load double, double addrspace(1)* %in.gep, align 8
%arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 8
ret void
}
; SI-LABEL: @misaligned_simple_write2_one_val_f64
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
; SI: s_endpgm
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
%val = load double, double addrspace(1)* %in.gep, align 8
%arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 7
%arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_f64
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
%in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
%val0 = load double, double addrspace(1)* %in.gep.0, align 8
%val1 = load double, double addrspace(1)* %in.gep.1, align 8
%arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
store double %val0, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
store double %val1, double addrspace(3)* %arrayidx1, align 8
ret void
}
@foo = addrspace(3) global [4 x i32] undef, align 4
; SI-LABEL: @store_constant_adjacent_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
define void @store_constant_adjacent_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
ret void
}
; SI-LABEL: @store_constant_disjoint_offsets
; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
define void @store_constant_disjoint_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
ret void
}
@bar = addrspace(3) global [4 x i64] undef, align 4
; SI-LABEL: @store_misaligned64_constant_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
define void @store_misaligned64_constant_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
ret void
}
@bar.large = addrspace(3) global [4096 x i64] undef, align 4
; SI-LABEL: @store_misaligned64_constant_large_offsets
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: s_endpgm
define void @store_misaligned64_constant_large_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
ret void
}
@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
%val = load float, float addrspace(1)* %in
%arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx44, align 4
%add47 = add nsw i32 %x.i, 1
%arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
store float %val, float addrspace(3)* %arrayidx48, align 4
%add51 = add nsw i32 %x.i, 16
%arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
store float %val, float addrspace(3)* %arrayidx52, align 4
%add55 = add nsw i32 %x.i, 17
%arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
store float %val, float addrspace(3)* %arrayidx56, align 4
%arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
store float %val, float addrspace(3)* %arrayidx60, align 4
%add63 = add nsw i32 %y.i, 1
%arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
store float %val, float addrspace(3)* %arrayidx64, align 4
%add67 = add nsw i32 %y.i, 32
%arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
store float %val, float addrspace(3)* %arrayidx68, align 4
%add71 = add nsw i32 %y.i, 33
%arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
store float %val, float addrspace(3)* %arrayidx72, align 4
%add75 = add nsw i32 %y.i, 64
%arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
store float %val, float addrspace(3)* %arrayidx76, align 4
%add79 = add nsw i32 %y.i, 65
%arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
store float %val, float addrspace(3)* %arrayidx80, align 4
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
; Function Attrs: noduplicate nounwind
declare void @llvm.AMDGPU.barrier.local() #2
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { noduplicate nounwind }