mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-12 18:33:22 +00:00
Revert "R600: Add new intrinsic to read work dimensions"
This reverts commit r219705. CodeGen/R600/work-item-intrinsics.ll was failing on linux. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219707 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d1494d5ff3
commit
e8e8db7ff6
@ -33,14 +33,10 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
|
|||||||
"__builtin_r600_read_tgid">;
|
"__builtin_r600_read_tgid">;
|
||||||
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
|
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
|
||||||
"__builtin_r600_read_tidig">;
|
"__builtin_r600_read_tidig">;
|
||||||
|
|
||||||
} // End TargetPrefix = "r600"
|
} // End TargetPrefix = "r600"
|
||||||
|
|
||||||
let TargetPrefix = "AMDGPU" in {
|
let TargetPrefix = "AMDGPU" in {
|
||||||
|
|
||||||
class AMDGPUReadPreloadRegisterIntrinsic<string name>
|
|
||||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
|
||||||
GCCBuiltin<name>;
|
|
||||||
|
|
||||||
def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
|
def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
|
||||||
// 1st parameter: Numerator
|
// 1st parameter: Numerator
|
||||||
// 2nd parameter: Denominator
|
// 2nd parameter: Denominator
|
||||||
@ -76,7 +72,4 @@ def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
|
|||||||
def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
|
def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
|
||||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
|
|
||||||
"__builtin_amdgpu_read_workdim">;
|
|
||||||
|
|
||||||
} // End TargetPrefix = "AMDGPU"
|
} // End TargetPrefix = "AMDGPU"
|
||||||
|
@ -30,9 +30,6 @@ public:
|
|||||||
/// Number of bytes in the LDS that are being used.
|
/// Number of bytes in the LDS that are being used.
|
||||||
unsigned LDSSize;
|
unsigned LDSSize;
|
||||||
|
|
||||||
/// Start of implicit kernel args
|
|
||||||
unsigned ABIArgOffset;
|
|
||||||
|
|
||||||
unsigned getShaderType() const {
|
unsigned getShaderType() const {
|
||||||
return ShaderType;
|
return ShaderType;
|
||||||
}
|
}
|
||||||
|
@ -809,9 +809,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|||||||
case Intrinsic::r600_read_local_size_z:
|
case Intrinsic::r600_read_local_size_z:
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 8);
|
return LowerImplicitParameter(DAG, VT, DL, 8);
|
||||||
|
|
||||||
case Intrinsic::AMDGPU_read_workdim:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
|
|
||||||
|
|
||||||
case Intrinsic::r600_read_tgid_x:
|
case Intrinsic::r600_read_tgid_x:
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||||
AMDGPU::T1_X, VT);
|
AMDGPU::T1_X, VT);
|
||||||
@ -1701,7 +1698,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
|
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
|
||||||
*DAG.getContext());
|
*DAG.getContext());
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
|
||||||
|
|
||||||
SmallVector<ISD::InputArg, 8> LocalIns;
|
SmallVector<ISD::InputArg, 8> LocalIns;
|
||||||
|
|
||||||
@ -1719,7 +1716,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||||||
MemVT = MemVT.getVectorElementType();
|
MemVT = MemVT.getVectorElementType();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (MFI->getShaderType() != ShaderType::COMPUTE) {
|
if (ShaderType != ShaderType::COMPUTE) {
|
||||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
|
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
|
||||||
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
|
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
|
||||||
InVals.push_back(Register);
|
InVals.push_back(Register);
|
||||||
@ -1751,18 +1748,16 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||||||
|
|
||||||
unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
|
unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
|
||||||
unsigned PartOffset = VA.getLocMemOffset();
|
unsigned PartOffset = VA.getLocMemOffset();
|
||||||
unsigned Offset = 36 + VA.getLocMemOffset();
|
|
||||||
|
|
||||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
||||||
SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
|
SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
|
||||||
DAG.getConstant(Offset, MVT::i32),
|
DAG.getConstant(36 + PartOffset, MVT::i32),
|
||||||
DAG.getUNDEF(MVT::i32),
|
DAG.getUNDEF(MVT::i32),
|
||||||
PtrInfo,
|
PtrInfo,
|
||||||
MemVT, false, true, true, 4);
|
MemVT, false, true, true, 4);
|
||||||
|
|
||||||
// 4 is the preferred alignment for the CONSTANT memory space.
|
// 4 is the preferred alignment for the CONSTANT memory space.
|
||||||
InVals.push_back(Arg);
|
InVals.push_back(Arg);
|
||||||
MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
|
|
||||||
}
|
}
|
||||||
return Chain;
|
return Chain;
|
||||||
}
|
}
|
||||||
|
@ -519,11 +519,11 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
if (VA.isMemLoc()) {
|
if (VA.isMemLoc()) {
|
||||||
VT = Ins[i].VT;
|
VT = Ins[i].VT;
|
||||||
EVT MemVT = Splits[i].VT;
|
EVT MemVT = Splits[i].VT;
|
||||||
const unsigned Offset = 36 + VA.getLocMemOffset();
|
|
||||||
// The first 36 bytes of the input buffer contains information about
|
// The first 36 bytes of the input buffer contains information about
|
||||||
// thread group and global sizes.
|
// thread group and global sizes.
|
||||||
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
|
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
|
||||||
Offset, Ins[i].Flags.isSExt());
|
36 + VA.getLocMemOffset(),
|
||||||
|
Ins[i].Flags.isSExt());
|
||||||
|
|
||||||
const PointerType *ParamTy =
|
const PointerType *ParamTy =
|
||||||
dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
|
dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
|
||||||
@ -537,7 +537,6 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
}
|
}
|
||||||
|
|
||||||
InVals.push_back(Arg);
|
InVals.push_back(Arg);
|
||||||
Info->ABIArgOffset = Offset + MemVT.getStoreSize();
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assert(VA.isRegLoc() && "Parameter must be in a register!");
|
assert(VA.isRegLoc() && "Parameter must be in a register!");
|
||||||
@ -928,12 +927,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||||||
case Intrinsic::r600_read_local_size_z:
|
case Intrinsic::r600_read_local_size_z:
|
||||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||||
SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
|
SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
|
||||||
|
|
||||||
case Intrinsic::AMDGPU_read_workdim:
|
|
||||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
|
||||||
MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
|
|
||||||
false);
|
|
||||||
|
|
||||||
case Intrinsic::r600_read_tgid_x:
|
case Intrinsic::r600_read_tgid_x:
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
||||||
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
|
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
|
||||||
|
@ -128,20 +128,6 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @get_work_dim
|
|
||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
||||||
; EG: MOV [[VAL]], KC0[2].Z
|
|
||||||
|
|
||||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0xb
|
|
||||||
; SI: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
||||||
; SI: BUFFER_STORE_DWORD [[VVAL]]
|
|
||||||
define void @get_work_dim (i32 addrspace(1)* %out) {
|
|
||||||
entry:
|
|
||||||
%0 = call i32 @llvm.r600.read.workdim() #0
|
|
||||||
store i32 %0, i32 addrspace(1)* %out
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; The tgid values are stored in sgprs offset by the number of user sgprs.
|
; The tgid values are stored in sgprs offset by the number of user sgprs.
|
||||||
; Currently we always use exactly 2 user sgprs for the pointer to the
|
; Currently we always use exactly 2 user sgprs for the pointer to the
|
||||||
; kernel arguments, but this may change in the future.
|
; kernel arguments, but this may change in the future.
|
||||||
@ -223,6 +209,4 @@ declare i32 @llvm.r600.read.tidig.x() #0
|
|||||||
declare i32 @llvm.r600.read.tidig.y() #0
|
declare i32 @llvm.r600.read.tidig.y() #0
|
||||||
declare i32 @llvm.r600.read.tidig.z() #0
|
declare i32 @llvm.r600.read.tidig.z() #0
|
||||||
|
|
||||||
declare i32 @llvm.r600.read.workdim() #0
|
|
||||||
|
|
||||||
attributes #0 = { readnone }
|
attributes #0 = { readnone }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user