From e7d4e83702722ad9de8719d80a410144a43034f5 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 22 Jan 2014 19:24:23 +0000 Subject: [PATCH] R600: Take alignment into account when calculating the stack offset git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199826 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUFrameLowering.cpp | 15 ++++++++---- test/CodeGen/R600/private-memory.ll | 31 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp index 40cc9083c00..0325a00c177 100644 --- a/lib/Target/R600/AMDGPUFrameLowering.cpp +++ b/lib/Target/R600/AMDGPUFrameLowering.cpp @@ -77,14 +77,21 @@ int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, // Start the offset at 2 so we don't overwrite work group information. // XXX: We should only do this when the shader actually uses this // information. - unsigned Offset = 2; + unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4); int UpperBound = FI == -1 ? MFI->getNumObjects() : FI; for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) { - unsigned Size = MFI->getObjectSize(i); - Offset += (Size / (getStackWidth(MF) * 4)); + OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i)); + OffsetBytes += MFI->getObjectSize(i); + // Each regiter holds 4 bytes, so we must always align the offset to at + // least 4 bytes, so that 2 frame objects won't share the same register. + OffsetBytes = RoundUpToAlignment(OffsetBytes, 4); } - return Offset; + + if (FI != -1) + OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(FI)); + + return OffsetBytes / (getStackWidth(MF) * 4); } const TargetFrameLowering::SpillSlot * diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index b25fc7ba572..1bd17bf6048 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -180,4 +180,35 @@ entry: ret void } +; Test that two stack objects are not stored in the same register +; The second stack object should be in T3.X +; FUNC-LABEL: @no_overlap +; R600-CHECK: MOV {{\** *}}T3.X +; SI-CHECK: V_MOV_B32_e32 v3 +define void @no_overlap(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = alloca [3 x i8], align 1 + %1 = alloca [2 x i8], align 1 + %2 = getelementptr [3 x i8]* %0, i32 0, i32 0 + %3 = getelementptr [3 x i8]* %0, i32 0, i32 1 + %4 = getelementptr [3 x i8]* %0, i32 0, i32 2 + %5 = getelementptr [2 x i8]* %1, i32 0, i32 0 + %6 = getelementptr [2 x i8]* %1, i32 0, i32 1 + store i8 0, i8* %2 + store i8 1, i8* %3 + store i8 2, i8* %4 + store i8 1, i8* %5 + store i8 0, i8* %6 + %7 = getelementptr [3 x i8]* %0, i32 0, i32 %in + %8 = getelementptr [2 x i8]* %1, i32 0, i32 %in + %9 = load i8* %7 + %10 = load i8* %8 + %11 = add i8 %9, %10 + %12 = sext i8 %11 to i32 + store i32 %12, i32 addrspace(1)* %out + ret void +} + + + declare i32 @llvm.r600.read.tidig.x() nounwind readnone