From 655ba251b50346262bee125d6fb29bb086a883d2 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 22 Jan 2014 19:24:19 +0000 Subject: [PATCH] R600: Begin private memory at the second GPR. This way private memory does not over-write work group information stored in GPRs 0 and 1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199824 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUFrameLowering.cpp | 5 ++++- test/CodeGen/R600/private-memory.ll | 25 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp index 40f14d2f67c..40cc9083c00 100644 --- a/lib/Target/R600/AMDGPUFrameLowering.cpp +++ b/lib/Target/R600/AMDGPUFrameLowering.cpp @@ -74,7 +74,10 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const { int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned Offset = 0; + // Start the offset at 2 so we don't overwrite work group information. + // XXX: We should only do this when the shader actually uses this + // information. + unsigned Offset = 2; int UpperBound = FI == -1 ? MFI->getNumObjects() : FI; for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) { diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index 3fd67d75bb7..b25fc7ba572 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -156,3 +156,28 @@ entry: ret void } + +; Make sure we don't overwrite workitem information with private memory + +; FUNC-LABEL: @work_item_info +; R600-CHECK-NOT: MOV T0.X +; Additional check in case the move ends up in the last slot +; R600-CHECK-NOT: MOV * TO.X + +; SI-CHECK-NOT: V_MOV_B32_e{{(32|64)}} v0 +define void @work_item_info(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = alloca [2 x i32] + %1 = getelementptr [2 x i32]* %0, i32 0, i32 0 + %2 = getelementptr [2 x i32]* %0, i32 0, i32 1 + store i32 0, i32* %1 + store i32 1, i32* %2 + %3 = getelementptr [2 x i32]* %0, i32 0, i32 %in + %4 = load i32* %3 + %5 = call i32 @llvm.r600.read.tidig.x() + %6 = add i32 %4, %5 + store i32 %6, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone