llvm-6502/lib/Target/R600/AMDGPUFrameLowering.cpp

//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Interface to describe a layout of a stack frame on a AMDIL target machine
//
//===----------------------------------------------------------------------===//
#include "AMDGPUFrameLowering.h"
#include "AMDGPURegisterInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Instructions.h"

using namespace llvm;
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
    int LAO, unsigned TransAl)
  : TargetFrameLowering(D, StackAl, LAO, TransAl) { }

AMDGPUFrameLowering::~AMDGPUFrameLowering() { }

unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {

  // XXX: Hardcoding to 1 for now.
  //
  // I think the StackWidth should stored as metadata associated with the
  // MachineFunction.  This metadata can either be added by a frontend, or
  // calculated by a R600 specific LLVM IR pass.
  //
  // The StackWidth determines how stack objects are laid out in memory.
  // For a vector stack variable, like: int4 stack[2], the data will be stored
  // in the following ways depending on the StackWidth.
  //
  // StackWidth = 1:
  //
  // T0.X = stack[0].x
  // T1.X = stack[0].y
  // T2.X = stack[0].z
  // T3.X = stack[0].w
  // T4.X = stack[1].x
  // T5.X = stack[1].y
  // T6.X = stack[1].z
  // T7.X = stack[1].w
  //
  // StackWidth = 2:
  //
  // T0.X = stack[0].x
  // T0.Y = stack[0].y
  // T1.X = stack[0].z
  // T1.Y = stack[0].w
  // T2.X = stack[1].x
  // T2.Y = stack[1].y
  // T3.X = stack[1].z
  // T3.Y = stack[1].w
  // 
  // StackWidth = 4:
  // T0.X = stack[0].x
  // T0.Y = stack[0].y
  // T0.Z = stack[0].z
  // T0.W = stack[0].w
  // T1.X = stack[1].x
  // T1.Y = stack[1].y
  // T1.Z = stack[1].z
  // T1.W = stack[1].w
  return 1;
}

/// \returns The number of registers allocated for \p FI.
int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
                                         int FI) const {
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  // Start the offset at 2 so we don't overwrite work group information.
  // XXX: We should only do this when the shader actually uses this
  // information.
  unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
  int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;

  for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
    OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i));
    OffsetBytes += MFI->getObjectSize(i);
    // Each regiter holds 4 bytes, so we must always align the offset to at
    // least 4 bytes, so that 2 frame objects won't share the same register.
    OffsetBytes = RoundUpToAlignment(OffsetBytes, 4);
  }

  if (FI != -1)
    OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(FI));

  return OffsetBytes / (getStackWidth(MF) * 4);
}

const TargetFrameLowering::SpillSlot *
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
  NumEntries = 0;
  return 0;
}
void
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
}
void
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
                                  MachineBasicBlock &MBB) const {
}

bool
AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
  return false;
}
R600: Support for indirect addressing v4 Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174525 91177308-0d34-0410-b5e6-96231b3b80d8 2013-02-06 17:32:29 +00:00			`//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//==-----------------------------------------------------------------------===//`
			`//`
			`// Interface to describe a layout of a stack frame on a AMDIL target machine`
			`//`
			`//===----------------------------------------------------------------------===//`
			`#include "AMDGPUFrameLowering.h"`
			`#include "AMDGPURegisterInfo.h"`
			`#include "R600MachineFunctionInfo.h"`
			`#include "llvm/CodeGen/MachineFrameInfo.h"`
			`#include "llvm/CodeGen/MachineRegisterInfo.h"`
			`#include "llvm/IR/Instructions.h"`

			`using namespace llvm;`
			`AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,`
			`int LAO, unsigned TransAl)`
			`: TargetFrameLowering(D, StackAl, LAO, TransAl) { }`

			`AMDGPUFrameLowering::~AMDGPUFrameLowering() { }`

			`unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {`

			`// XXX: Hardcoding to 1 for now.`
			`//`
			`// I think the StackWidth should stored as metadata associated with the`
			`// MachineFunction. This metadata can either be added by a frontend, or`
			`// calculated by a R600 specific LLVM IR pass.`
			`//`
			`// The StackWidth determines how stack objects are laid out in memory.`
			`// For a vector stack variable, like: int4 stack[2], the data will be stored`
			`// in the following ways depending on the StackWidth.`
			`//`
			`// StackWidth = 1:`
			`//`
			`// T0.X = stack[0].x`
			`// T1.X = stack[0].y`
			`// T2.X = stack[0].z`
			`// T3.X = stack[0].w`
			`// T4.X = stack[1].x`
			`// T5.X = stack[1].y`
			`// T6.X = stack[1].z`
			`// T7.X = stack[1].w`
			`//`
			`// StackWidth = 2:`
			`//`
			`// T0.X = stack[0].x`
			`// T0.Y = stack[0].y`
			`// T1.X = stack[0].z`
			`// T1.Y = stack[0].w`
			`// T2.X = stack[1].x`
			`// T2.Y = stack[1].y`
			`// T3.X = stack[1].z`
			`// T3.Y = stack[1].w`
			`//`
			`// StackWidth = 4:`
			`// T0.X = stack[0].x`
			`// T0.Y = stack[0].y`
			`// T0.Z = stack[0].z`
			`// T0.W = stack[0].w`
			`// T1.X = stack[1].x`
			`// T1.Y = stack[1].y`
			`// T1.Z = stack[1].z`
			`// T1.W = stack[1].w`
			`return 1;`
			`}`

			`/// \returns The number of registers allocated for \p FI.`
			`int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,`
			`int FI) const {`
			`const MachineFrameInfo *MFI = MF.getFrameInfo();`
R600: Begin private memory at the second GPR. This way private memory does not over-write work group information stored in GPRs 0 and 1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199824 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-22 19:24:19 +00:00			`// Start the offset at 2 so we don't overwrite work group information.`
			`// XXX: We should only do this when the shader actually uses this`
			`// information.`
R600: Take alignment into account when calculating the stack offset git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199826 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-22 19:24:23 +00:00			`unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);`
R600: Support for indirect addressing v4 Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174525 91177308-0d34-0410-b5e6-96231b3b80d8 2013-02-06 17:32:29 +00:00			`int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;`

			`for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {`
R600: Take alignment into account when calculating the stack offset git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199826 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-22 19:24:23 +00:00			`OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i));`
			`OffsetBytes += MFI->getObjectSize(i);`
			`// Each regiter holds 4 bytes, so we must always align the offset to at`
			`// least 4 bytes, so that 2 frame objects won't share the same register.`
			`OffsetBytes = RoundUpToAlignment(OffsetBytes, 4);`
R600: Support for indirect addressing v4 Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174525 91177308-0d34-0410-b5e6-96231b3b80d8 2013-02-06 17:32:29 +00:00			`}`
R600: Take alignment into account when calculating the stack offset git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199826 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-22 19:24:23 +00:00
			`if (FI != -1)`
			`OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(FI));`

			`return OffsetBytes / (getStackWidth(MF) * 4);`
R600: Support for indirect addressing v4 Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174525 91177308-0d34-0410-b5e6-96231b3b80d8 2013-02-06 17:32:29 +00:00			`}`

			`const TargetFrameLowering::SpillSlot *`
			`AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {`
			`NumEntries = 0;`
			`return 0;`
			`}`
			`void`
			`AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {`
			`}`
			`void`
			`AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,`
			`MachineBasicBlock &MBB) const {`
			`}`

			`bool`
			`AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {`
			`return false;`
			`}`