mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
R600: Fix handling of vector kernel arguments
The SelectionDAGBuilder was promoting vector kernel arguments to legal types, but this won't work for R600 and SI since kernel arguments are stored in memory and can't be promoted. In order to handle vector arguments correctly we need to look at the original types from the LLVM IR function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193215 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d0716b0647
commit
f95b162188
@ -44,11 +44,7 @@ def CC_SI : CallingConv<[
|
|||||||
|
|
||||||
// Calling convention for compute kernels
|
// Calling convention for compute kernels
|
||||||
def CC_AMDGPU_Kernel : CallingConv<[
|
def CC_AMDGPU_Kernel : CallingConv<[
|
||||||
CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>,
|
CCCustom<"allocateStack">
|
||||||
CCIfType<[i64, f64, v2f32, v2i32], CCAssignToStack < 8, 8>>,
|
|
||||||
CCIfType<[i32, f32], CCAssignToStack < 4, 4>>,
|
|
||||||
CCIfType<[i16], CCAssignToStack < 2, 4>>,
|
|
||||||
CCIfType<[i8], CCAssignToStack < 1, 4>>
|
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def CC_AMDGPU : CallingConv<[
|
def CC_AMDGPU : CallingConv<[
|
||||||
|
@ -28,6 +28,14 @@
|
|||||||
#include "llvm/IR/DataLayout.h"
|
#include "llvm/IR/DataLayout.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||||
|
CCValAssign::LocInfo LocInfo,
|
||||||
|
ISD::ArgFlagsTy ArgFlags, CCState &State) {
|
||||||
|
unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
|
||||||
|
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
#include "AMDGPUGenCallingConv.inc"
|
#include "AMDGPUGenCallingConv.inc"
|
||||||
|
|
||||||
@ -64,6 +72,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
|||||||
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
|
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
|
||||||
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
|
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
|
||||||
|
|
||||||
|
setOperationAction(ISD::STORE, MVT::v8f32, Promote);
|
||||||
|
AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
|
||||||
|
|
||||||
|
setOperationAction(ISD::STORE, MVT::v16f32, Promote);
|
||||||
|
AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
|
||||||
|
|
||||||
setOperationAction(ISD::STORE, MVT::f64, Promote);
|
setOperationAction(ISD::STORE, MVT::f64, Promote);
|
||||||
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
|
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
|
||||||
|
|
||||||
@ -90,6 +104,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
|||||||
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
|
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
|
||||||
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
|
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
|
||||||
|
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
|
||||||
|
AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
|
||||||
|
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
|
||||||
|
AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
|
||||||
|
|
||||||
setOperationAction(ISD::LOAD, MVT::f64, Promote);
|
setOperationAction(ISD::LOAD, MVT::f64, Promote);
|
||||||
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
|
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
|
||||||
|
|
||||||
@ -656,6 +676,38 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
|
|||||||
// Helper functions
|
// Helper functions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
void AMDGPUTargetLowering::getOriginalFunctionArgs(
|
||||||
|
SelectionDAG &DAG,
|
||||||
|
const Function *F,
|
||||||
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
|
SmallVectorImpl<ISD::InputArg> &OrigIns) const {
|
||||||
|
|
||||||
|
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
||||||
|
if (Ins[i].ArgVT == Ins[i].VT) {
|
||||||
|
OrigIns.push_back(Ins[i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
EVT VT;
|
||||||
|
if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
|
||||||
|
// Vector has been split into scalars.
|
||||||
|
VT = Ins[i].ArgVT.getVectorElementType();
|
||||||
|
} else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
|
||||||
|
Ins[i].ArgVT.getVectorElementType() !=
|
||||||
|
Ins[i].VT.getVectorElementType()) {
|
||||||
|
// Vector elements have been promoted
|
||||||
|
VT = Ins[i].ArgVT;
|
||||||
|
} else {
|
||||||
|
// Vector has been spilt into smaller vectors.
|
||||||
|
VT = Ins[i].VT;
|
||||||
|
}
|
||||||
|
|
||||||
|
ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
|
||||||
|
Ins[i].OrigArgIndex, Ins[i].PartOffset);
|
||||||
|
OrigIns.push_back(Arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
|
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
|
||||||
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||||
return CFP->isExactlyValue(1.0);
|
return CFP->isExactlyValue(1.0);
|
||||||
|
@ -36,7 +36,6 @@ private:
|
|||||||
SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
|
SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
|
||||||
/// \brief Split a vector store into multiple scalar stores.
|
/// \brief Split a vector store into multiple scalar stores.
|
||||||
/// \returns The resulting chain.
|
/// \returns The resulting chain.
|
||||||
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -52,10 +51,21 @@ protected:
|
|||||||
SelectionDAG &DAG) const;
|
SelectionDAG &DAG) const;
|
||||||
/// \brief Split a vector load into multiple scalar loads.
|
/// \brief Split a vector load into multiple scalar loads.
|
||||||
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
|
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
bool isHWTrueValue(SDValue Op) const;
|
bool isHWTrueValue(SDValue Op) const;
|
||||||
bool isHWFalseValue(SDValue Op) const;
|
bool isHWFalseValue(SDValue Op) const;
|
||||||
|
|
||||||
|
/// The SelectionDAGBuilder will automatically promote function arguments
|
||||||
|
/// with illegal types. However, this does not work for the AMDGPU targets
|
||||||
|
/// since the function arguments are stored in memory as these illegal types.
|
||||||
|
/// In order to handle this properly we need to get the origianl types sizes
|
||||||
|
/// from the LLVM IR Function and fixup the ISD:InputArg values before
|
||||||
|
/// passing them to AnalyzeFormalArguments()
|
||||||
|
void getOriginalFunctionArgs(SelectionDAG &DAG,
|
||||||
|
const Function *F,
|
||||||
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
|
SmallVectorImpl<ISD::InputArg> &OrigIns) const;
|
||||||
void AnalyzeFormalArguments(CCState &State,
|
void AnalyzeFormalArguments(CCState &State,
|
||||||
const SmallVectorImpl<ISD::InputArg> &Ins) const;
|
const SmallVectorImpl<ISD::InputArg> &Ins) const;
|
||||||
|
|
||||||
|
@ -59,8 +59,9 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
|
|||||||
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
|
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
|
||||||
Subtarget(TT, CPU, FS),
|
Subtarget(TT, CPU, FS),
|
||||||
Layout(Subtarget.getDataLayout()),
|
Layout(Subtarget.getDataLayout()),
|
||||||
FrameLowering(TargetFrameLowering::StackGrowsUp, 16 // Stack Alignment
|
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||||
, 0),
|
64 * 16 // Maximum stack alignment (long16)
|
||||||
|
, 0),
|
||||||
IntrinsicInfo(this),
|
IntrinsicInfo(this),
|
||||||
InstrItins(&Subtarget.getInstrItineraryData()) {
|
InstrItins(&Subtarget.getInstrItineraryData()) {
|
||||||
// TLInfo uses InstrInfo so it must be initialized after.
|
// TLInfo uses InstrInfo so it must be initialized after.
|
||||||
|
@ -1209,7 +1209,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
||||||
if (ConstantBlock > -1) {
|
if (ConstantBlock > -1 && LoadNode->getExtensionType() != ISD::SEXTLOAD) {
|
||||||
SDValue Result;
|
SDValue Result;
|
||||||
if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
|
if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
|
||||||
dyn_cast<Constant>(LoadNode->getSrcValue()) ||
|
dyn_cast<Constant>(LoadNode->getSrcValue()) ||
|
||||||
@ -1340,22 +1340,29 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||||
getTargetMachine(), ArgLocs, *DAG.getContext());
|
getTargetMachine(), ArgLocs, *DAG.getContext());
|
||||||
|
|
||||||
AnalyzeFormalArguments(CCInfo, Ins);
|
SmallVector<ISD::InputArg, 8> LocalIns;
|
||||||
|
|
||||||
|
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
|
||||||
|
LocalIns);
|
||||||
|
|
||||||
|
AnalyzeFormalArguments(CCInfo, LocalIns);
|
||||||
|
|
||||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
EVT VT = VA.getLocVT();
|
EVT VT = Ins[i].VT;
|
||||||
|
EVT MemVT = LocalIns[i].VT;
|
||||||
|
|
||||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||||
AMDGPUAS::CONSTANT_BUFFER_0);
|
AMDGPUAS::CONSTANT_BUFFER_0);
|
||||||
|
|
||||||
// The first 36 bytes of the input buffer contains information about
|
// The first 36 bytes of the input buffer contains information about
|
||||||
// thread group and global sizes.
|
// thread group and global sizes.
|
||||||
SDValue Arg = DAG.getLoad(VT, DL, Chain,
|
SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
|
||||||
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
|
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
|
||||||
MachinePointerInfo(UndefValue::get(PtrTy)), false,
|
MachinePointerInfo(UndefValue::get(PtrTy)),
|
||||||
false, false, 4); // 4 is the prefered alignment for
|
MemVT, false, false, 4);
|
||||||
// the CONSTANT memory space.
|
// 4 is the prefered alignment for
|
||||||
|
// the CONSTANT memory space.
|
||||||
InVals.push_back(Arg);
|
InVals.push_back(Arg);
|
||||||
}
|
}
|
||||||
return Chain;
|
return Chain;
|
||||||
|
@ -69,6 +69,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||||||
// We need to custom lower vector stores from local memory
|
// We need to custom lower vector stores from local memory
|
||||||
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||||
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
|
||||||
|
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
|
||||||
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||||
@ -78,6 +83,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||||||
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
|
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
|
||||||
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
|
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
|
||||||
|
|
||||||
|
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
|
||||||
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
|
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
|
||||||
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
|
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
|
||||||
|
|
||||||
@ -89,10 +95,15 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||||
|
|
||||||
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
|
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
|
||||||
|
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
|
||||||
|
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
|
||||||
|
|
||||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
|
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
|
||||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||||
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
|
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
|
||||||
|
setTruncStoreAction(MVT::i128, MVT::i64, Expand);
|
||||||
|
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
|
||||||
|
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
|
||||||
|
|
||||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||||
|
|
||||||
@ -115,23 +126,22 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const {
|
bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const {
|
||||||
return VT.bitsLE(MVT::i8);
|
return VT.bitsLE(MVT::i16);
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT,
|
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||||
SDLoc DL, SDValue Chain,
|
SDLoc DL, SDValue Chain,
|
||||||
unsigned Offset) const {
|
unsigned Offset) const {
|
||||||
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
|
||||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||||
AMDGPUAS::CONSTANT_ADDRESS);
|
AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits());
|
|
||||||
SDValue BasePtr = DAG.getCopyFromReg(Chain, DL,
|
SDValue BasePtr = DAG.getCopyFromReg(Chain, DL,
|
||||||
MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
|
MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
|
||||||
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
|
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
|
||||||
DAG.getConstant(Offset, MVT::i64));
|
DAG.getConstant(Offset, MVT::i64));
|
||||||
return DAG.getLoad(VT, DL, Chain, Ptr,
|
return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr,
|
||||||
MachinePointerInfo(UndefValue::get(PtrTy)),
|
MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
|
||||||
false, false, false, ArgVT.getSizeInBits() >> 3);
|
false, false, MemVT.getSizeInBits() >> 3);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,7 +200,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
NewArg.PartOffset += NewArg.VT.getStoreSize();
|
NewArg.PartOffset += NewArg.VT.getStoreSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else if (Info->ShaderType != ShaderType::COMPUTE) {
|
||||||
Splits.push_back(Arg);
|
Splits.push_back(Arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -213,6 +223,11 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
|
MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Info->ShaderType == ShaderType::COMPUTE) {
|
||||||
|
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
|
||||||
|
Splits);
|
||||||
|
}
|
||||||
|
|
||||||
AnalyzeFormalArguments(CCInfo, Splits);
|
AnalyzeFormalArguments(CCInfo, Splits);
|
||||||
|
|
||||||
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
||||||
@ -227,9 +242,11 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||||||
EVT VT = VA.getLocVT();
|
EVT VT = VA.getLocVT();
|
||||||
|
|
||||||
if (VA.isMemLoc()) {
|
if (VA.isMemLoc()) {
|
||||||
|
VT = Ins[i].VT;
|
||||||
|
EVT MemVT = Splits[i].VT;
|
||||||
// The first 36 bytes of the input buffer contains information about
|
// The first 36 bytes of the input buffer contains information about
|
||||||
// thread group and global sizes.
|
// thread group and global sizes.
|
||||||
SDValue Arg = LowerParameter(DAG, VT, DL, DAG.getRoot(),
|
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
|
||||||
36 + VA.getLocMemOffset());
|
36 + VA.getLocMemOffset());
|
||||||
InVals.push_back(Arg);
|
InVals.push_back(Arg);
|
||||||
continue;
|
continue;
|
||||||
@ -389,8 +406,18 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case ISD::STORE: {
|
||||||
|
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
|
||||||
|
if (Store->getValue().getValueType().isVector() &&
|
||||||
|
Store->getValue().getValueType().getVectorNumElements() >= 8)
|
||||||
|
return SplitVectorStore(Op, DAG);
|
||||||
|
else
|
||||||
|
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||||
|
}
|
||||||
|
|
||||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||||
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
|
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
|
||||||
|
case ISD::ANY_EXTEND: // Fall-through
|
||||||
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
|
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
|
||||||
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
|
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
|
||||||
case ISD::INTRINSIC_WO_CHAIN: {
|
case ISD::INTRINSIC_WO_CHAIN: {
|
||||||
@ -403,23 +430,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
switch (IntrinsicID) {
|
switch (IntrinsicID) {
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||||
case Intrinsic::r600_read_ngroups_x:
|
case Intrinsic::r600_read_ngroups_x:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 0);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0);
|
||||||
case Intrinsic::r600_read_ngroups_y:
|
case Intrinsic::r600_read_ngroups_y:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 4);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4);
|
||||||
case Intrinsic::r600_read_ngroups_z:
|
case Intrinsic::r600_read_ngroups_z:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 8);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8);
|
||||||
case Intrinsic::r600_read_global_size_x:
|
case Intrinsic::r600_read_global_size_x:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 12);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12);
|
||||||
case Intrinsic::r600_read_global_size_y:
|
case Intrinsic::r600_read_global_size_y:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 16);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16);
|
||||||
case Intrinsic::r600_read_global_size_z:
|
case Intrinsic::r600_read_global_size_z:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 20);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20);
|
||||||
case Intrinsic::r600_read_local_size_x:
|
case Intrinsic::r600_read_local_size_x:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 24);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24);
|
||||||
case Intrinsic::r600_read_local_size_y:
|
case Intrinsic::r600_read_local_size_y:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 28);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28);
|
||||||
case Intrinsic::r600_read_local_size_z:
|
case Intrinsic::r600_read_local_size_z:
|
||||||
return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 32);
|
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32);
|
||||||
case Intrinsic::r600_read_tgid_x:
|
case Intrinsic::r600_read_tgid_x:
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
|
||||||
AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
|
AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
class SITargetLowering : public AMDGPUTargetLowering {
|
class SITargetLowering : public AMDGPUTargetLowering {
|
||||||
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL,
|
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
|
||||||
SDValue Chain, unsigned Offset) const;
|
SDValue Chain, unsigned Offset) const;
|
||||||
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
|
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
|
||||||
SelectionDAG &DAG) const;
|
SelectionDAG &DAG) const;
|
||||||
|
@ -1560,9 +1560,12 @@ def : BitConvert <f64, i64, VReg_64>;
|
|||||||
|
|
||||||
def : BitConvert <v2f32, v2i32, VReg_64>;
|
def : BitConvert <v2f32, v2i32, VReg_64>;
|
||||||
def : BitConvert <v2i32, v2f32, VReg_64>;
|
def : BitConvert <v2i32, v2f32, VReg_64>;
|
||||||
|
def : BitConvert <v2i32, i64, VReg_64>;
|
||||||
|
|
||||||
def : BitConvert <v4f32, v4i32, VReg_128>;
|
def : BitConvert <v4f32, v4i32, VReg_128>;
|
||||||
def : BitConvert <v4i32, v4f32, VReg_128>;
|
def : BitConvert <v4i32, v4f32, VReg_128>;
|
||||||
|
def : BitConvert <v4i32, i128, VReg_128>;
|
||||||
|
def : BitConvert <i128, v4i32, VReg_128>;
|
||||||
|
|
||||||
def : BitConvert <v8i32, v32i8, SReg_256>;
|
def : BitConvert <v8i32, v32i8, SReg_256>;
|
||||||
def : BitConvert <v32i8, v8i32, SReg_256>;
|
def : BitConvert <v32i8, v8i32, SReg_256>;
|
||||||
|
@ -174,7 +174,7 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
|
|||||||
let Size = 96;
|
let Size = 96;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
|
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>;
|
||||||
|
|
||||||
def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
|
def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
|
||||||
|
|
||||||
|
455
test/CodeGen/R600/kernel-args.ll
Normal file
455
test/CodeGen/R600/kernel-args.ll
Normal file
@ -0,0 +1,455 @@
|
|||||||
|
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
|
||||||
|
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
|
||||||
|
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @i8_arg
|
||||||
|
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @i8_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
|
||||||
|
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = zext i8 %in to i32
|
||||||
|
store i32 %0, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @i8_zext_arg
|
||||||
|
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @i8_zext_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
|
||||||
|
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = zext i8 %in to i32
|
||||||
|
store i32 %0, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @i8_sext_arg
|
||||||
|
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @i8_sext_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
|
||||||
|
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = sext i8 %in to i32
|
||||||
|
store i32 %0, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @i16_arg
|
||||||
|
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @i16_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
|
||||||
|
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = zext i16 %in to i32
|
||||||
|
store i32 %0, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @i16_zext_arg
|
||||||
|
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @i16_zext_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
|
||||||
|
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = zext i16 %in to i32
|
||||||
|
store i32 %0, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @i16_sext_arg
|
||||||
|
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @i16_sext_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
|
||||||
|
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = sext i16 %in to i32
|
||||||
|
store i32 %0, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @i32_arg
|
||||||
|
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @i32_arg
|
||||||
|
; S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store i32 %in, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @f32_arg
|
||||||
|
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
|
||||||
|
; SI-CHECK-LABEL: @f32_arg
|
||||||
|
; S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store float %in, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v2i8_arg
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; SI-CHECK-LABEL: @v2i8_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
|
||||||
|
entry:
|
||||||
|
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v2i16_arg
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; SI-CHECK-LABEL: @v2i16_arg
|
||||||
|
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
|
||||||
|
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
|
||||||
|
entry:
|
||||||
|
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v2i32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
|
||||||
|
; SI-CHECK-LABEL: @v2i32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v2f32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
|
||||||
|
; SI-CHECK-LABEL: @v2f32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
||||||
|
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v3i8_arg
|
||||||
|
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
|
||||||
|
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
|
||||||
|
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
|
||||||
|
; SI-CHECK-LABEL: @v3i8_arg
|
||||||
|
define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v3i16_arg
|
||||||
|
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
|
||||||
|
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
|
||||||
|
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
|
||||||
|
; SI-CHECK-LABEL: @v3i16_arg
|
||||||
|
define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
; EG-CHECK-LABEL: @v3i32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||||
|
; SI-CHECK-LABEL: @v3i32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
|
||||||
|
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v3f32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||||
|
; SI-CHECK-LABEL: @v3f32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
|
||||||
|
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v4i8_arg
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; SI-CHECK-LABEL: @v4i8_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
|
||||||
|
entry:
|
||||||
|
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v4i16_arg
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; SI-CHECK-LABEL: @v4i16_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
|
||||||
|
entry:
|
||||||
|
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v4i32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
|
||||||
|
; SI-CHECK-LABEL: @v4i32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
|
||||||
|
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v4f32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
|
||||||
|
; SI-CHECK-LABEL: @v4f32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX4 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 13
|
||||||
|
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v8i8_arg
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; SI-CHECK-LABEL: @v8i8_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
|
||||||
|
entry:
|
||||||
|
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v8i16_arg
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; SI-CHECK-LABEL: @v8i16_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
|
||||||
|
entry:
|
||||||
|
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v8i32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
|
||||||
|
; SI-CHECK-LABEL: @v8i32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX8 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 17
|
||||||
|
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v8f32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
|
||||||
|
; SI-CHECK-LABEL: @v8f32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX8 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 17
|
||||||
|
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v16i8_arg
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; EG-CHECK: VTX_READ_8
|
||||||
|
; SI-CHECK-LABEL: @v16i8_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||||
|
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
|
||||||
|
entry:
|
||||||
|
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v16i16_arg
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; EG-CHECK: VTX_READ_16
|
||||||
|
; SI-CHECK-LABEL: @v16i16_arg
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||||
|
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
|
||||||
|
entry:
|
||||||
|
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v16i32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
|
||||||
|
; SI-CHECK-LABEL: @v16i32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX16 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 25
|
||||||
|
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; EG-CHECK-LABEL: @v16f32_arg
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
|
||||||
|
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
|
||||||
|
; SI-CHECK-LABEL: @v16f32_arg
|
||||||
|
; SI-CHECK: S_LOAD_DWORDX16 SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}_SGPR{{[0-9]+}}, SGPR0_SGPR1, 25
|
||||||
|
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
|
||||||
|
entry:
|
||||||
|
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
@ -41,7 +41,7 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in)
|
|||||||
|
|
||||||
; EG-CHECK-LABEL: @or_i64
|
; EG-CHECK-LABEL: @or_i64
|
||||||
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
|
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
|
||||||
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[3].X
|
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
|
||||||
; SI-CHECK-LABEL: @or_i64
|
; SI-CHECK-LABEL: @or_i64
|
||||||
; SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]}}
|
; SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]}}
|
||||||
; SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]}}
|
; SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]}}
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
|
|
||||||
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
|
|
||||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
|
|
||||||
|
|
||||||
; EG-CHECK: @i8_arg
|
|
||||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
|
||||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
|
||||||
|
|
||||||
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
|
|
||||||
entry:
|
|
||||||
%0 = zext i8 %in to i32
|
|
||||||
store i32 %0, i32 addrspace(1)* %out, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; EG-CHECK: @i8_zext_arg
|
|
||||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
|
||||||
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
|
||||||
|
|
||||||
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
|
|
||||||
entry:
|
|
||||||
%0 = zext i8 %in to i32
|
|
||||||
store i32 %0, i32 addrspace(1)* %out, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; EG-CHECK: @i8_sext_arg
|
|
||||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
|
||||||
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
|
||||||
|
|
||||||
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
|
|
||||||
entry:
|
|
||||||
%0 = sext i8 %in to i32
|
|
||||||
store i32 %0, i32 addrspace(1)* %out, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; EG-CHECK: @i16_arg
|
|
||||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
|
||||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
|
||||||
|
|
||||||
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
|
|
||||||
entry:
|
|
||||||
%0 = zext i16 %in to i32
|
|
||||||
store i32 %0, i32 addrspace(1)* %out, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; EG-CHECK: @i16_zext_arg
|
|
||||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
|
||||||
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
|
||||||
|
|
||||||
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
|
|
||||||
entry:
|
|
||||||
%0 = zext i16 %in to i32
|
|
||||||
store i32 %0, i32 addrspace(1)* %out, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; EG-CHECK: @i16_sext_arg
|
|
||||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
|
||||||
; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
|
|
||||||
|
|
||||||
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
|
|
||||||
entry:
|
|
||||||
%0 = sext i16 %in to i32
|
|
||||||
store i32 %0, i32 addrspace(1)* %out, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
@ -7,7 +7,7 @@
|
|||||||
;===------------------------------------------------------------------------===;
|
;===------------------------------------------------------------------------===;
|
||||||
|
|
||||||
; i8 store
|
; i8 store
|
||||||
; EG-CHECK: @store_i8
|
; EG-CHECK-LABEL: @store_i8
|
||||||
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
|
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
|
||||||
; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
|
; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
|
||||||
; IG 0: Get the byte index and truncate the value
|
; IG 0: Get the byte index and truncate the value
|
||||||
@ -26,7 +26,7 @@
|
|||||||
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
|
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
|
||||||
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
|
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
|
||||||
|
|
||||||
; SI-CHECK: @store_i8
|
; SI-CHECK-LABEL: @store_i8
|
||||||
; SI-CHECK: BUFFER_STORE_BYTE
|
; SI-CHECK: BUFFER_STORE_BYTE
|
||||||
|
|
||||||
define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
|
define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
|
||||||
@ -36,7 +36,7 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; i16 store
|
; i16 store
|
||||||
; EG-CHECK: @store_i16
|
; EG-CHECK-LABEL: @store_i16
|
||||||
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
|
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
|
||||||
; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
|
; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
|
||||||
; IG 0: Get the byte index and truncate the value
|
; IG 0: Get the byte index and truncate the value
|
||||||
@ -55,7 +55,7 @@ entry:
|
|||||||
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
|
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
|
||||||
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
|
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
|
||||||
|
|
||||||
; SI-CHECK: @store_i16
|
; SI-CHECK-LABEL: @store_i16
|
||||||
; SI-CHECK: BUFFER_STORE_SHORT
|
; SI-CHECK: BUFFER_STORE_SHORT
|
||||||
define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
|
define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
|
||||||
entry:
|
entry:
|
||||||
@ -63,10 +63,10 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_v2i8
|
; EG-CHECK-LABEL: @store_v2i8
|
||||||
; EG-CHECK: MEM_RAT MSKOR
|
; EG-CHECK: MEM_RAT MSKOR
|
||||||
; EG-CHECK-NOT: MEM_RAT MSKOR
|
; EG-CHECK-NOT: MEM_RAT MSKOR
|
||||||
; SI-CHECK: @store_v2i8
|
; SI-CHECK-LABEL: @store_v2i8
|
||||||
; SI-CHECK: BUFFER_STORE_BYTE
|
; SI-CHECK: BUFFER_STORE_BYTE
|
||||||
; SI-CHECK: BUFFER_STORE_BYTE
|
; SI-CHECK: BUFFER_STORE_BYTE
|
||||||
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
|
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
|
||||||
@ -77,12 +77,13 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
; EG-CHECK: @store_v2i16
|
; EG-CHECK-LABEL: @store_v2i16
|
||||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||||
; CM-CHECK: @store_v2i16
|
; CM-CHECK-LABEL: @store_v2i16
|
||||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
||||||
; SI-CHECK: @store_v2i16
|
; SI-CHECK-LABEL: @store_v2i16
|
||||||
; SI-CHECK: BUFFER_STORE_DWORD
|
; SI-CHECK: BUFFER_STORE_SHORT
|
||||||
|
; SI-CHECK: BUFFER_STORE_SHORT
|
||||||
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
|
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
|
||||||
entry:
|
entry:
|
||||||
%0 = trunc <2 x i32> %in to <2 x i16>
|
%0 = trunc <2 x i32> %in to <2 x i16>
|
||||||
@ -90,11 +91,11 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_v4i8
|
; EG-CHECK-LABEL: @store_v4i8
|
||||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||||
; CM-CHECK: @store_v4i8
|
; CM-CHECK-LABEL: @store_v4i8
|
||||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
||||||
; SI-CHECK: @store_v4i8
|
; SI-CHECK-LABEL: @store_v4i8
|
||||||
; SI-CHECK: BUFFER_STORE_BYTE
|
; SI-CHECK: BUFFER_STORE_BYTE
|
||||||
; SI-CHECK: BUFFER_STORE_BYTE
|
; SI-CHECK: BUFFER_STORE_BYTE
|
||||||
; SI-CHECK: BUFFER_STORE_BYTE
|
; SI-CHECK: BUFFER_STORE_BYTE
|
||||||
@ -107,11 +108,11 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; floating-point store
|
; floating-point store
|
||||||
; EG-CHECK: @store_f32
|
; EG-CHECK-LABEL: @store_f32
|
||||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
|
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
|
||||||
; CM-CHECK: @store_f32
|
; CM-CHECK-LABEL: @store_f32
|
||||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
|
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
|
||||||
; SI-CHECK: @store_f32
|
; SI-CHECK-LABEL: @store_f32
|
||||||
; SI-CHECK: BUFFER_STORE_DWORD
|
; SI-CHECK: BUFFER_STORE_DWORD
|
||||||
|
|
||||||
define void @store_f32(float addrspace(1)* %out, float %in) {
|
define void @store_f32(float addrspace(1)* %out, float %in) {
|
||||||
@ -119,13 +120,13 @@ define void @store_f32(float addrspace(1)* %out, float %in) {
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_v4i16
|
; EG-CHECK-LABEL: @store_v4i16
|
||||||
; EG-CHECK: MEM_RAT MSKOR
|
; EG-CHECK: MEM_RAT MSKOR
|
||||||
; EG-CHECK: MEM_RAT MSKOR
|
; EG-CHECK: MEM_RAT MSKOR
|
||||||
; EG-CHECK: MEM_RAT MSKOR
|
; EG-CHECK: MEM_RAT MSKOR
|
||||||
; EG-CHECK: MEM_RAT MSKOR
|
; EG-CHECK: MEM_RAT MSKOR
|
||||||
; EG-CHECK-NOT: MEM_RAT MSKOR
|
; EG-CHECK-NOT: MEM_RAT MSKOR
|
||||||
; SI-CHECK: @store_v4i16
|
; SI-CHECK-LABEL: @store_v4i16
|
||||||
; SI-CHECK: BUFFER_STORE_SHORT
|
; SI-CHECK: BUFFER_STORE_SHORT
|
||||||
; SI-CHECK: BUFFER_STORE_SHORT
|
; SI-CHECK: BUFFER_STORE_SHORT
|
||||||
; SI-CHECK: BUFFER_STORE_SHORT
|
; SI-CHECK: BUFFER_STORE_SHORT
|
||||||
@ -139,11 +140,11 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; vec2 floating-point stores
|
; vec2 floating-point stores
|
||||||
; EG-CHECK: @store_v2f32
|
; EG-CHECK-LABEL: @store_v2f32
|
||||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||||
; CM-CHECK: @store_v2f32
|
; CM-CHECK-LABEL: @store_v2f32
|
||||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
||||||
; SI-CHECK: @store_v2f32
|
; SI-CHECK-LABEL: @store_v2f32
|
||||||
; SI-CHECK: BUFFER_STORE_DWORDX2
|
; SI-CHECK: BUFFER_STORE_DWORDX2
|
||||||
|
|
||||||
define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
|
define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
|
||||||
@ -154,13 +155,13 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_v4i32
|
; EG-CHECK-LABEL: @store_v4i32
|
||||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||||
; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
|
; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
|
||||||
; CM-CHECK: @store_v4i32
|
; CM-CHECK-LABEL: @store_v4i32
|
||||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
||||||
; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
|
; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
|
||||||
; SI-CHECK: @store_v4i32
|
; SI-CHECK-LABEL: @store_v4i32
|
||||||
; SI-CHECK: BUFFER_STORE_DWORDX4
|
; SI-CHECK: BUFFER_STORE_DWORDX4
|
||||||
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
|
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
|
||||||
entry:
|
entry:
|
||||||
@ -172,41 +173,42 @@ entry:
|
|||||||
; Local Address Space
|
; Local Address Space
|
||||||
;===------------------------------------------------------------------------===;
|
;===------------------------------------------------------------------------===;
|
||||||
|
|
||||||
; EG-CHECK: @store_local_i8
|
; EG-CHECK-LABEL: @store_local_i8
|
||||||
; EG-CHECK: LDS_BYTE_WRITE
|
; EG-CHECK: LDS_BYTE_WRITE
|
||||||
; SI-CHECK: @store_local_i8
|
; SI-CHECK-LABEL: @store_local_i8
|
||||||
; SI-CHECK: DS_WRITE_B8
|
; SI-CHECK: DS_WRITE_B8
|
||||||
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
|
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
|
||||||
store i8 %in, i8 addrspace(3)* %out
|
store i8 %in, i8 addrspace(3)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_local_i16
|
; EG-CHECK-LABEL: @store_local_i16
|
||||||
; EG-CHECK: LDS_SHORT_WRITE
|
; EG-CHECK: LDS_SHORT_WRITE
|
||||||
; SI-CHECK: @store_local_i16
|
; SI-CHECK-LABEL: @store_local_i16
|
||||||
; SI-CHECK: DS_WRITE_B16
|
; SI-CHECK: DS_WRITE_B16
|
||||||
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
|
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
|
||||||
store i16 %in, i16 addrspace(3)* %out
|
store i16 %in, i16 addrspace(3)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_local_v2i16
|
; EG-CHECK-LABEL: @store_local_v2i16
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: @store_local_v2i16
|
; CM-CHECK-LABEL: @store_local_v2i16
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; SI-CHECK: @store_local_v2i16
|
; SI-CHECK-LABEL: @store_local_v2i16
|
||||||
; SI-CHECK: DS_WRITE_B32
|
; SI-CHECK: DS_WRITE_B16
|
||||||
|
; SI-CHECK: DS_WRITE_B16
|
||||||
define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
|
define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
|
||||||
entry:
|
entry:
|
||||||
store <2 x i16> %in, <2 x i16> addrspace(3)* %out
|
store <2 x i16> %in, <2 x i16> addrspace(3)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_local_v4i8
|
; EG-CHECK-LABEL: @store_local_v4i8
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: @store_local_v4i8
|
; CM-CHECK-LABEL: @store_local_v4i8
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; SI-CHECK: @store_local_v4i8
|
; SI-CHECK-LABEL: @store_local_v4i8
|
||||||
; SI-CHECK: DS_WRITE_B8
|
; SI-CHECK: DS_WRITE_B8
|
||||||
; SI-CHECK: DS_WRITE_B8
|
; SI-CHECK: DS_WRITE_B8
|
||||||
; SI-CHECK: DS_WRITE_B8
|
; SI-CHECK: DS_WRITE_B8
|
||||||
@ -217,13 +219,13 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_local_v2i32
|
; EG-CHECK-LABEL: @store_local_v2i32
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: @store_local_v2i32
|
; CM-CHECK-LABEL: @store_local_v2i32
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; SI-CHECK: @store_local_v2i32
|
; SI-CHECK-LABEL: @store_local_v2i32
|
||||||
; SI-CHECK: DS_WRITE_B32
|
; SI-CHECK: DS_WRITE_B32
|
||||||
; SI-CHECK: DS_WRITE_B32
|
; SI-CHECK: DS_WRITE_B32
|
||||||
define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
|
define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
|
||||||
@ -232,17 +234,17 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; EG-CHECK: @store_local_v4i32
|
; EG-CHECK-LABEL: @store_local_v4i32
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; EG-CHECK: LDS_WRITE
|
; EG-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: @store_local_v4i32
|
; CM-CHECK-LABEL: @store_local_v4i32
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; CM-CHECK: LDS_WRITE
|
; CM-CHECK: LDS_WRITE
|
||||||
; SI-CHECK: @store_local_v4i32
|
; SI-CHECK-LABEL: @store_local_v4i32
|
||||||
; SI-CHECK: DS_WRITE_B32
|
; SI-CHECK: DS_WRITE_B32
|
||||||
; SI-CHECK: DS_WRITE_B32
|
; SI-CHECK: DS_WRITE_B32
|
||||||
; SI-CHECK: DS_WRITE_B32
|
; SI-CHECK: DS_WRITE_B32
|
||||||
@ -260,11 +262,11 @@ entry:
|
|||||||
; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
|
; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
|
||||||
; be two 32-bit stores.
|
; be two 32-bit stores.
|
||||||
|
|
||||||
; EG-CHECK: @vecload2
|
; EG-CHECK-LABEL: @vecload2
|
||||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||||
; CM-CHECK: @vecload2
|
; CM-CHECK-LABEL: @vecload2
|
||||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
||||||
; SI-CHECK: @vecload2
|
; SI-CHECK-LABEL: @vecload2
|
||||||
; SI-CHECK: BUFFER_STORE_DWORDX2
|
; SI-CHECK: BUFFER_STORE_DWORDX2
|
||||||
define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
|
define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
|
||||||
entry:
|
entry:
|
||||||
|
@ -21,7 +21,6 @@ define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
|
|||||||
; SI: S_LOAD_DWORDX2
|
; SI: S_LOAD_DWORDX2
|
||||||
; SI: S_LOAD_DWORDX2 [[SREG:SGPR[0-9]+_SGPR[0-9]+]]
|
; SI: S_LOAD_DWORDX2 [[SREG:SGPR[0-9]+_SGPR[0-9]+]]
|
||||||
; SI: V_LSHL_B64 [[LO_VREG:VGPR[0-9]+]]_VGPR{{[0-9]+}}, [[SREG]], 2
|
; SI: V_LSHL_B64 [[LO_VREG:VGPR[0-9]+]]_VGPR{{[0-9]+}}, [[SREG]], 2
|
||||||
; SI-NOT: [[LO_VREG]]
|
|
||||||
; SI: BUFFER_STORE_DWORD [[LO_VREG]],
|
; SI: BUFFER_STORE_DWORD [[LO_VREG]],
|
||||||
define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
|
define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
|
||||||
%b = shl i64 %a, 2
|
%b = shl i64 %a, 2
|
||||||
|
Loading…
Reference in New Issue
Block a user