R600/SI: Implement isLegalAddressingMode

The default assumes that a 16-bit signed offset is used.
LDS instruction use a 16-bit unsigned offset, so it wasn't
being used in some cases where it was assumed a negative offset
could be used.

More should be done here, but first isLegalAddressingMode needs
to gain an addressing mode argument. For now, copy most of the rest
of the default implementation with the immediate offset change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215732 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault
2014-08-15 17:17:07 +00:00
parent d84561bf69
commit ed76ca720b
3 changed files with 107 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() #0
declare void @llvm.AMDGPU.barrier.local() #1
; Function Attrs: nounwind
; SI-LABEL: @signed_ds_offset_addressing_loop
; SI: BB0_1:
; SI: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x4
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x80
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x84
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100
; SI: S_ENDPGM
define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
entry:
%x.i = tail call i32 @llvm.r600.read.tidig.x() #0
%mul = shl nsw i32 %x.i, 1
br label %for.body
for.body: ; preds = %for.body, %entry
%sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
%offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
%k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
tail call void @llvm.AMDGPU.barrier.local() #1
%arrayidx = getelementptr inbounds float addrspace(3)* %lptr, i32 %offset.02
%tmp = load float addrspace(3)* %arrayidx, align 4
%add1 = add nsw i32 %offset.02, 1
%arrayidx2 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add1
%tmp1 = load float addrspace(3)* %arrayidx2, align 4
%add3 = add nsw i32 %offset.02, 32
%arrayidx4 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add3
%tmp2 = load float addrspace(3)* %arrayidx4, align 4
%add5 = add nsw i32 %offset.02, 33
%arrayidx6 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add5
%tmp3 = load float addrspace(3)* %arrayidx6, align 4
%add7 = add nsw i32 %offset.02, 64
%arrayidx8 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add7
%tmp4 = load float addrspace(3)* %arrayidx8, align 4
%add9 = fadd float %tmp, %tmp1
%add10 = fadd float %add9, %tmp2
%add11 = fadd float %add10, %tmp3
%add12 = fadd float %add11, %tmp4
%add13 = fadd float %sum.03, %add12
%inc = add nsw i32 %k.01, 1
%add14 = add nsw i32 %offset.02, 97
%exitcond = icmp eq i32 %inc, 8
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
%tmp5 = sext i32 %x.i to i64
%arrayidx15 = getelementptr inbounds float addrspace(1)* %out, i64 %tmp5
store float %add13, float addrspace(1)* %arrayidx15, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { noduplicate nounwind }
attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }