mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-02 22:23:10 +00:00
R600/SI: Add a calling convention for compute shaders
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183137 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -32,17 +32,21 @@ def CC_SI : CallingConv<[
|
|||||||
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
||||||
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
||||||
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
|
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
|
||||||
]>>>,
|
]>>>
|
||||||
|
|
||||||
// This is the default for i64 values.
|
]>;
|
||||||
// XXX: We should change this once clang understands the CC_AMDGPU.
|
|
||||||
CCIfType<[i64], CCAssignToRegWithShadow<
|
// Calling convention for SI compute kernels
|
||||||
[ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
|
def CC_SI_Kernel : CallingConv<[
|
||||||
[ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
|
CCIfType<[i64], CCAssignToStack <8, 4>>,
|
||||||
>>
|
CCIfType<[i32, f32], CCAssignToStack <4, 4>>,
|
||||||
|
CCIfType<[i16], CCAssignToStack <2, 4>>,
|
||||||
|
CCIfType<[i8], CCAssignToStack <1, 4>>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def CC_AMDGPU : CallingConv<[
|
def CC_AMDGPU : CallingConv<[
|
||||||
|
CCIf<"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
|
||||||
|
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_SI_Kernel>>,
|
||||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
|
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
|
||||||
"->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
|
"->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
|
||||||
]>;
|
]>;
|
||||||
|
|||||||
@@ -14,9 +14,11 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "AMDGPUISelLowering.h"
|
#include "AMDGPUISelLowering.h"
|
||||||
|
#include "AMDGPU.h"
|
||||||
#include "AMDGPURegisterInfo.h"
|
#include "AMDGPURegisterInfo.h"
|
||||||
#include "AMDGPUSubtarget.h"
|
#include "AMDGPUSubtarget.h"
|
||||||
#include "AMDILIntrinsicInfo.h"
|
#include "AMDILIntrinsicInfo.h"
|
||||||
|
#include "SIMachineFunctionInfo.h"
|
||||||
#include "llvm/CodeGen/CallingConvLower.h"
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
|||||||
@@ -121,7 +121,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Second split vertices into their elements
|
// Second split vertices into their elements
|
||||||
if (Arg.VT.isVector()) {
|
if (Info->ShaderType != ShaderType::COMPUTE && Arg.VT.isVector()) {
|
||||||
ISD::InputArg NewArg = Arg;
|
ISD::InputArg NewArg = Arg;
|
||||||
NewArg.Flags.setSplit();
|
NewArg.Flags.setSplit();
|
||||||
NewArg.VT = Arg.VT.getVectorElementType();
|
NewArg.VT = Arg.VT.getVectorElementType();
|
||||||
@@ -153,6 +153,14 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
CCInfo.AllocateReg(AMDGPU::VGPR1);
|
CCInfo.AllocateReg(AMDGPU::VGPR1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned ArgReg = 0;
|
||||||
|
// The pointer to the list of arguments is stored in SGPR0, SGPR1
|
||||||
|
if (Info->ShaderType == ShaderType::COMPUTE) {
|
||||||
|
CCInfo.AllocateReg(AMDGPU::SGPR0);
|
||||||
|
CCInfo.AllocateReg(AMDGPU::SGPR1);
|
||||||
|
ArgReg = MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
|
||||||
|
}
|
||||||
|
|
||||||
AnalyzeFormalArguments(CCInfo, Splits);
|
AnalyzeFormalArguments(CCInfo, Splits);
|
||||||
|
|
||||||
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
||||||
@@ -164,10 +172,26 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
}
|
}
|
||||||
|
|
||||||
CCValAssign &VA = ArgLocs[ArgIdx++];
|
CCValAssign &VA = ArgLocs[ArgIdx++];
|
||||||
|
EVT VT = VA.getLocVT();
|
||||||
|
|
||||||
|
if (VA.isMemLoc()) {
|
||||||
|
assert(ArgReg);
|
||||||
|
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||||
|
AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
|
EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits());
|
||||||
|
SDValue BasePtr = DAG.getCopyFromReg(DAG.getRoot(), DL,
|
||||||
|
ArgReg, MVT::i64);
|
||||||
|
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
|
||||||
|
DAG.getConstant(VA.getLocMemOffset(), MVT::i64));
|
||||||
|
SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), Ptr,
|
||||||
|
MachinePointerInfo(UndefValue::get(PtrTy)),
|
||||||
|
VA.getValVT(), false, false, ArgVT.getSizeInBits() >> 3);
|
||||||
|
InVals.push_back(Arg);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
assert(VA.isRegLoc() && "Parameter must be in a register!");
|
assert(VA.isRegLoc() && "Parameter must be in a register!");
|
||||||
|
|
||||||
unsigned Reg = VA.getLocReg();
|
unsigned Reg = VA.getLocReg();
|
||||||
MVT VT = VA.getLocVT();
|
|
||||||
|
|
||||||
if (VT == MVT::i64) {
|
if (VT == MVT::i64) {
|
||||||
// For now assume it is a pointer
|
// For now assume it is a pointer
|
||||||
|
|||||||
@@ -38,8 +38,8 @@ entry:
|
|||||||
; R600-CHECK: @bfi_sha256_ma
|
; R600-CHECK: @bfi_sha256_ma
|
||||||
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||||
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||||
; SI-CHECK: V_XOR_B32_e32 [[DST:VGPR[0-9]+]], {{VGPR[0-9]+, VGPR[0-9]+}}
|
; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
|
||||||
; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{VGPR[0-9]+, VGPR[0-9]+}}
|
; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
|
||||||
|
|
||||||
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||||
entry:
|
entry:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
|
;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]+}}, {{[SV]GPR[0-9]+}}, 1
|
||||||
|
|
||||||
define void @test(i32 %p) {
|
define void @test(i32 %p) {
|
||||||
%i = mul i32 %p, 2
|
%i = mul i32 %p, 2
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
|
;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]+}}, {{[SV]GPR[0-9]+}}, 1
|
||||||
|
|
||||||
define void @test(i32 %p) {
|
define void @test(i32 %p) {
|
||||||
%i = udiv i32 %p, 2
|
%i = udiv i32 %p, 2
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
|
;CHECK: V_MOV_B32_e32 VGPR{{[0-9]+}}, -1431655765
|
||||||
;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
|
;CHECK: V_MUL_HI_U32 VGPR0, {{[SV]GPR[0-9]+}}, {{VGPR[0-9]+}}
|
||||||
;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
|
;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
|
||||||
|
|
||||||
define void @test(i32 %p) {
|
define void @test(i32 %p) {
|
||||||
|
|||||||
@@ -22,8 +22,8 @@ entry:
|
|||||||
; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PV.[XYZW]}}
|
; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PV.[XYZW]}}
|
||||||
|
|
||||||
; SI-CHECK: @rotl
|
; SI-CHECK: @rotl
|
||||||
; SI-CHECK: V_SUB_I32_e32 [[DST:VGPR[0-9]+]], 32, {{VGPR[0-9]+}}
|
; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}}
|
||||||
; SI-CHECK: V_ALIGNBIT_B32 {{VGPR[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}, [[DST]]
|
; SI-CHECK: V_ALIGNBIT_B32 {{VGPR[0-9]+, [SV]GPR[0-9]+, VGPR[0-9]+}}, [[DST]]
|
||||||
define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) {
|
define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) {
|
||||||
entry:
|
entry:
|
||||||
%0 = shl i32 %x, %y
|
%0 = shl i32 %x, %y
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
|
;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}, 0, 0, 0, 0
|
||||||
|
|
||||||
define void @main(float %p) {
|
define void @main(float %p) {
|
||||||
main_body:
|
main_body:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
|
;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}, 0, 0, 0, 0
|
||||||
|
|
||||||
define void @main(float %p) {
|
define void @main(float %p) {
|
||||||
main_body:
|
main_body:
|
||||||
|
|||||||
Reference in New Issue
Block a user