mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-12 03:32:10 +00:00
[NVPTX] Disable vector registers
Vectors were being manually scalarized by the backend. Instead, let the target-independent code do all of the work. The manual scalarization was from a time before good target-independent support for scalarization in LLVM. However, this forces us to specially-handle vector loads and stores, which we can turn into PTX instructions that produce/consume multiple operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174968 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c8a196ae8f
commit
7eacad03ef
@ -805,6 +805,16 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.p">;
|
||||
|
||||
// Generated within nvvm. Use for ldg on sm_35 or later
|
||||
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.i">;
|
||||
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.f">;
|
||||
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldg.global.p">;
|
||||
|
||||
// Use for generic pointers
|
||||
// - These intrinsics are used to convert address spaces.
|
||||
|
@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources
|
||||
NVPTXAllocaHoisting.cpp
|
||||
NVPTXAsmPrinter.cpp
|
||||
NVPTXUtilities.cpp
|
||||
VectorElementize.cpp
|
||||
)
|
||||
|
||||
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
|
||||
|
@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
|
||||
|
||||
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
|
||||
llvm::CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
|
||||
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
|
||||
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
|
||||
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
|
||||
|
@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
|
||||
O << getNVPTXRegClassStr(RC) << mapped_vr;
|
||||
return;
|
||||
}
|
||||
// Vector virtual register
|
||||
if (getNVPTXVectorSize(RC) == 4)
|
||||
O << "{"
|
||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
|
||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
|
||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
|
||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_3"
|
||||
<< "}";
|
||||
else if (getNVPTXVectorSize(RC) == 2)
|
||||
O << "{"
|
||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
|
||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_1"
|
||||
<< "}";
|
||||
else
|
||||
llvm_unreachable("Unsupported vector size");
|
||||
report_fatal_error("Bad register!");
|
||||
}
|
||||
|
||||
void
|
||||
@ -2024,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
|
||||
case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
|
||||
case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
|
||||
case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
|
||||
case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
|
||||
case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
|
||||
case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
|
||||
case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
|
||||
case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
|
||||
case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
|
||||
case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
|
||||
case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
|
||||
case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
|
||||
case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
|
||||
case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
|
||||
case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
|
||||
case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
|
||||
case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
|
||||
case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
|
||||
case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
|
||||
case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
|
||||
case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
|
||||
case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
|
||||
case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
|
||||
case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
|
||||
case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
|
||||
case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
|
||||
case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
|
||||
case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
|
||||
case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
|
||||
@ -2057,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
|
||||
case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
|
||||
case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
|
||||
case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
|
||||
case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
|
||||
case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
|
||||
case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
|
||||
case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
|
||||
case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
|
||||
case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
|
||||
case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
|
||||
case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
|
||||
case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
|
||||
case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
|
||||
case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
|
||||
return true;
|
||||
}
|
||||
|
@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
case ISD::STORE:
|
||||
ResNode = SelectStore(N);
|
||||
break;
|
||||
case NVPTXISD::LoadV2:
|
||||
case NVPTXISD::LoadV4:
|
||||
ResNode = SelectLoadVector(N);
|
||||
break;
|
||||
case NVPTXISD::LDGV2:
|
||||
case NVPTXISD::LDGV4:
|
||||
case NVPTXISD::LDUV2:
|
||||
case NVPTXISD::LDUV4:
|
||||
ResNode = SelectLDGLDUVector(N);
|
||||
break;
|
||||
case NVPTXISD::StoreV2:
|
||||
case NVPTXISD::StoreV4:
|
||||
ResNode = SelectStoreVector(N);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
if (ResNode)
|
||||
return ResNode;
|
||||
@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRri64(N1.getNode(), N1, Base, Offset):
|
||||
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
|
||||
default: return NULL;
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break;
|
||||
default: return NULL;
|
||||
}
|
||||
} else {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
||||
MVT::Other, Ops, 8);
|
||||
}
|
||||
else {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
|
||||
default: return NULL;
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break;
|
||||
default: return NULL;
|
||||
}
|
||||
} else {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
||||
return NVPTXLD;
|
||||
}
|
||||
|
||||
SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
SDValue Addr, Offset, Base;
|
||||
unsigned Opcode;
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
SDNode *LD;
|
||||
MemSDNode *MemSD = cast<MemSDNode>(N);
|
||||
EVT LoadedVT = MemSD->getMemoryVT();
|
||||
|
||||
|
||||
if (!LoadedVT.isSimple())
|
||||
return NULL;
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool IsVolatile = MemSD->isVolatile();
|
||||
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
IsVolatile = false;
|
||||
|
||||
// Vector Setting
|
||||
MVT SimpleVT = LoadedVT.getSimpleVT();
|
||||
|
||||
// Type Setting: fromType + fromTypeWidth
|
||||
//
|
||||
// Sign : ISD::SEXTLOAD
|
||||
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
|
||||
// type is integer
|
||||
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
|
||||
MVT ScalarVT = SimpleVT.getScalarType();
|
||||
unsigned FromTypeWidth = ScalarVT.getSizeInBits();
|
||||
unsigned int FromType;
|
||||
// The last operand holds the original LoadSDNode::getExtensionType() value
|
||||
unsigned ExtensionType =
|
||||
cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
|
||||
if (ExtensionType == ISD::SEXTLOAD)
|
||||
FromType = NVPTX::PTXLdStInstCode::Signed;
|
||||
else if (ScalarVT.isFloatingPoint())
|
||||
FromType = NVPTX::PTXLdStInstCode::Float;
|
||||
else
|
||||
FromType = NVPTX::PTXLdStInstCode::Unsigned;
|
||||
|
||||
unsigned VecType;
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break;
|
||||
case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break;
|
||||
default: return NULL;
|
||||
}
|
||||
|
||||
EVT EltVT = N->getValueType(0);
|
||||
|
||||
if (SelectDirectAddr(Op1, Addr)) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LoadV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break;
|
||||
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break;
|
||||
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LoadV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||
getI32Imm(CodeAddrSpace),
|
||||
getI32Imm(VecType),
|
||||
getI32Imm(FromType),
|
||||
getI32Imm(FromTypeWidth),
|
||||
Addr, Chain };
|
||||
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
|
||||
SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LoadV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break;
|
||||
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break;
|
||||
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LoadV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||
getI32Imm(CodeAddrSpace),
|
||||
getI32Imm(VecType),
|
||||
getI32Imm(FromType),
|
||||
getI32Imm(FromTypeWidth),
|
||||
Base, Offset, Chain };
|
||||
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
|
||||
SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LoadV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LoadV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LoadV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LoadV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||
getI32Imm(CodeAddrSpace),
|
||||
getI32Imm(VecType),
|
||||
getI32Imm(FromType),
|
||||
getI32Imm(FromTypeWidth),
|
||||
Base, Offset, Chain };
|
||||
|
||||
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
|
||||
} else {
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LoadV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LoadV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LoadV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LoadV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||
getI32Imm(CodeAddrSpace),
|
||||
getI32Imm(VecType),
|
||||
getI32Imm(FromType),
|
||||
getI32Imm(FromTypeWidth),
|
||||
Op1, Chain };
|
||||
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
|
||||
}
|
||||
|
||||
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||
|
||||
return LD;
|
||||
}
|
||||
|
||||
SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
unsigned Opcode;
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
SDNode *LD;
|
||||
|
||||
EVT RetVT = N->getValueType(0);
|
||||
|
||||
// Select opcode
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LDGV2:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDGV4:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDUV2:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDUV4:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::LDGV2:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
|
||||
case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
|
||||
case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDGV4:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDUV2:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
|
||||
case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
|
||||
case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::LDUV4:
|
||||
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
|
||||
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
|
||||
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
|
||||
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue Ops[] = { Op1, Chain };
|
||||
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
|
||||
|
||||
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||
|
||||
return LD;
|
||||
}
|
||||
|
||||
|
||||
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRri64(N2.getNode(), N2, Base, Offset):
|
||||
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
|
||||
default: return NULL;
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break;
|
||||
default: return NULL;
|
||||
}
|
||||
} else {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
getI32Imm(isVolatile),
|
||||
@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
|
||||
MVT::Other, Ops, 9);
|
||||
} else {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
|
||||
default: return NULL;
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break;
|
||||
default: return NULL;
|
||||
}
|
||||
} else {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
getI32Imm(isVolatile),
|
||||
@ -523,6 +870,244 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||
return NVPTXST;
|
||||
}
|
||||
|
||||
SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
SDValue Addr, Offset, Base;
|
||||
unsigned Opcode;
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
SDNode *ST;
|
||||
EVT EltVT = Op1.getValueType();
|
||||
MemSDNode *MemSD = cast<MemSDNode>(N);
|
||||
EVT StoreVT = MemSD->getMemoryVT();
|
||||
|
||||
// Address Space Setting
|
||||
unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
|
||||
|
||||
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
|
||||
report_fatal_error("Cannot store to pointer that points to constant "
|
||||
"memory space");
|
||||
}
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool IsVolatile = MemSD->isVolatile();
|
||||
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
IsVolatile = false;
|
||||
|
||||
// Type Setting: toType + toTypeWidth
|
||||
// - for integer type, always use 'u'
|
||||
assert(StoreVT.isSimple() && "Store value is not simple");
|
||||
MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
|
||||
unsigned ToTypeWidth = ScalarVT.getSizeInBits();
|
||||
unsigned ToType;
|
||||
if (ScalarVT.isFloatingPoint())
|
||||
ToType = NVPTX::PTXLdStInstCode::Float;
|
||||
else
|
||||
ToType = NVPTX::PTXLdStInstCode::Unsigned;
|
||||
|
||||
|
||||
SmallVector<SDValue, 12> StOps;
|
||||
SDValue N2;
|
||||
unsigned VecType;
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
case NVPTXISD::StoreV2:
|
||||
VecType = NVPTX::PTXLdStInstCode::V2;
|
||||
StOps.push_back(N->getOperand(1));
|
||||
StOps.push_back(N->getOperand(2));
|
||||
N2 = N->getOperand(3);
|
||||
break;
|
||||
case NVPTXISD::StoreV4:
|
||||
VecType = NVPTX::PTXLdStInstCode::V4;
|
||||
StOps.push_back(N->getOperand(1));
|
||||
StOps.push_back(N->getOperand(2));
|
||||
StOps.push_back(N->getOperand(3));
|
||||
StOps.push_back(N->getOperand(4));
|
||||
N2 = N->getOperand(5);
|
||||
break;
|
||||
default: return NULL;
|
||||
}
|
||||
|
||||
StOps.push_back(getI32Imm(IsVolatile));
|
||||
StOps.push_back(getI32Imm(CodeAddrSpace));
|
||||
StOps.push_back(getI32Imm(VecType));
|
||||
StOps.push_back(getI32Imm(ToType));
|
||||
StOps.push_back(getI32Imm(ToTypeWidth));
|
||||
|
||||
if (SelectDirectAddr(N2, Addr)) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::StoreV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break;
|
||||
case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break;
|
||||
case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::StoreV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
StOps.push_back(Addr);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRsi64(N2.getNode(), N2, Base, Offset):
|
||||
SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::StoreV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break;
|
||||
case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break;
|
||||
case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::StoreV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
StOps.push_back(Base);
|
||||
StOps.push_back(Offset);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRri64(N2.getNode(), N2, Base, Offset):
|
||||
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::StoreV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::StoreV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::StoreV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::StoreV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
StOps.push_back(Base);
|
||||
StOps.push_back(Offset);
|
||||
} else {
|
||||
if (Subtarget.is64Bit()) {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::StoreV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break;
|
||||
case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break;
|
||||
case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::StoreV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (N->getOpcode()) {
|
||||
default: return NULL;
|
||||
case NVPTXISD::StoreV2:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break;
|
||||
}
|
||||
break;
|
||||
case NVPTXISD::StoreV4:
|
||||
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||
default: return NULL;
|
||||
case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
StOps.push_back(N2);
|
||||
}
|
||||
|
||||
StOps.push_back(Chain);
|
||||
|
||||
ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
|
||||
|
||||
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||
|
||||
return ST;
|
||||
}
|
||||
|
||||
// SelectDirectAddr - Match a direct address for DAG.
|
||||
// A direct address could be a globaladdress or externalsymbol.
|
||||
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
|
||||
|
@ -72,8 +72,11 @@ private:
|
||||
#include "NVPTXGenDAGISel.inc"
|
||||
|
||||
SDNode *Select(SDNode *N);
|
||||
SDNode* SelectLoad(SDNode *N);
|
||||
SDNode* SelectStore(SDNode *N);
|
||||
SDNode *SelectLoad(SDNode *N);
|
||||
SDNode *SelectLoadVector(SDNode *N);
|
||||
SDNode *SelectLDGLDUVector(SDNode *N);
|
||||
SDNode *SelectStore(SDNode *N);
|
||||
SDNode *SelectStoreVector(SDNode *N);
|
||||
|
||||
inline SDValue getI32Imm(unsigned Imm) {
|
||||
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
||||
|
@ -44,16 +44,28 @@ using namespace llvm;
|
||||
|
||||
static unsigned int uniqueCallSite = 0;
|
||||
|
||||
static cl::opt<bool>
|
||||
RetainVectorOperands("nvptx-codegen-vectors",
|
||||
cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<bool>
|
||||
sched4reg("nvptx-sched4reg",
|
||||
cl::desc("NVPTX Specific: schedule for register pressue"),
|
||||
cl::init(false));
|
||||
|
||||
static bool IsPTXVectorType(MVT VT) {
|
||||
switch (VT.SimpleTy) {
|
||||
default: return false;
|
||||
case MVT::v2i8:
|
||||
case MVT::v4i8:
|
||||
case MVT::v2i16:
|
||||
case MVT::v4i16:
|
||||
case MVT::v2i32:
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64:
|
||||
case MVT::v2f32:
|
||||
case MVT::v4f32:
|
||||
case MVT::v2f64:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// NVPTXTargetLowering Constructor.
|
||||
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
||||
: TargetLowering(TM, new NVPTXTargetObjectFile()),
|
||||
@ -87,41 +99,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
||||
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
|
||||
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
|
||||
|
||||
if (RetainVectorOperands) {
|
||||
addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
|
||||
addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
|
||||
addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
|
||||
addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
|
||||
addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
|
||||
addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
|
||||
addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
|
||||
addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
|
||||
addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
|
||||
addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
|
||||
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
|
||||
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
|
||||
}
|
||||
|
||||
// Operations not directly supported by NVPTX.
|
||||
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
||||
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
|
||||
@ -191,42 +168,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
||||
// TRAP can be lowered to PTX trap
|
||||
setOperationAction(ISD::TRAP, MVT::Other, Legal);
|
||||
|
||||
// By default, CONCAT_VECTORS is implemented via store/load
|
||||
// through stack. It is slow and uses local memory. We need
|
||||
// to custom-lowering them.
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
|
||||
|
||||
// Expand vector int to float and float to int conversions
|
||||
// - For SINT_TO_FP and UINT_TO_FP, the src type
|
||||
// (Node->getOperand(0).getValueType())
|
||||
// is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
|
||||
// the dest type (Node->getValueType(0)) is used.
|
||||
//
|
||||
// See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
|
||||
// case, and
|
||||
// SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
|
||||
//
|
||||
// That is why v4i32 or v2i32 are used here.
|
||||
//
|
||||
// The expansion for vectors happens in VectorLegalizer::LegalizeOp()
|
||||
// (LegalizeVectorOps.cpp).
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
|
||||
// Register custom handling for vector loads/stores
|
||||
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
|
||||
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
|
||||
MVT VT = (MVT::SimpleValueType)i;
|
||||
if (IsPTXVectorType(VT)) {
|
||||
setOperationAction(ISD::LOAD, VT, Custom);
|
||||
setOperationAction(ISD::STORE, VT, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
// Now deduce the information based on the above mentioned
|
||||
// actions
|
||||
@ -268,6 +219,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
|
||||
case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
|
||||
case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
|
||||
case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2";
|
||||
case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4";
|
||||
case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2";
|
||||
case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4";
|
||||
case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2";
|
||||
case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4";
|
||||
case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2";
|
||||
case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4";
|
||||
}
|
||||
}
|
||||
|
||||
@ -868,12 +827,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (Op.getValueType() == MVT::i1)
|
||||
return LowerLOADi1(Op, DAG);
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// v = ld i1* addr
|
||||
// =>
|
||||
// v1 = ld i8* addr
|
||||
// v = trunc v1 to i1
|
||||
SDValue NVPTXTargetLowering::
|
||||
LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDNode *Node = Op.getNode();
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Node);
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
@ -893,12 +859,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getMergeValues(Ops, 2, dl);
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT ValVT = Op.getOperand(1).getValueType();
|
||||
if (ValVT == MVT::i1)
|
||||
return LowerSTOREi1(Op, DAG);
|
||||
else if (ValVT.isVector())
|
||||
return LowerSTOREVector(Op, DAG);
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue
|
||||
NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDNode *N = Op.getNode();
|
||||
SDValue Val = N->getOperand(1);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
EVT ValVT = Val.getValueType();
|
||||
|
||||
if (ValVT.isVector()) {
|
||||
// We only handle "native" vector sizes for now, e.g. <4 x double> is not
|
||||
// legal. We can (and should) split that into 2 stores of <2 x double> here
|
||||
// but I'm leaving that as a TODO for now.
|
||||
if (!ValVT.isSimple())
|
||||
return SDValue();
|
||||
switch (ValVT.getSimpleVT().SimpleTy) {
|
||||
default: return SDValue();
|
||||
case MVT::v2i8:
|
||||
case MVT::v2i16:
|
||||
case MVT::v2i32:
|
||||
case MVT::v2i64:
|
||||
case MVT::v2f32:
|
||||
case MVT::v2f64:
|
||||
case MVT::v4i8:
|
||||
case MVT::v4i16:
|
||||
case MVT::v4i32:
|
||||
case MVT::v4f32:
|
||||
// This is a "native" vector type
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned Opcode = 0;
|
||||
EVT EltVT = ValVT.getVectorElementType();
|
||||
unsigned NumElts = ValVT.getVectorNumElements();
|
||||
|
||||
// Since StoreV2 is a target node, we cannot rely on DAG type legalization.
|
||||
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
|
||||
// stored type to i16 and propogate the "real" type as the memory type.
|
||||
bool NeedExt = false;
|
||||
if (EltVT.getSizeInBits() < 16)
|
||||
NeedExt = true;
|
||||
|
||||
switch (NumElts) {
|
||||
default: return SDValue();
|
||||
case 2:
|
||||
Opcode = NVPTXISD::StoreV2;
|
||||
break;
|
||||
case 4: {
|
||||
Opcode = NVPTXISD::StoreV4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
|
||||
// First is the chain
|
||||
Ops.push_back(N->getOperand(0));
|
||||
|
||||
// Then the split values
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
|
||||
DAG.getIntPtrConstant(i));
|
||||
if (NeedExt)
|
||||
// ANY_EXTEND is correct here since the store will only look at the
|
||||
// lower-order bits anyway.
|
||||
ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
|
||||
Ops.push_back(ExtVal);
|
||||
}
|
||||
|
||||
// Then any remaining arguments
|
||||
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
|
||||
Ops.push_back(N->getOperand(i));
|
||||
}
|
||||
|
||||
MemSDNode *MemSD = cast<MemSDNode>(N);
|
||||
|
||||
SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
|
||||
DAG.getVTList(MVT::Other), &Ops[0],
|
||||
Ops.size(), MemSD->getMemoryVT(),
|
||||
MemSD->getMemOperand());
|
||||
|
||||
|
||||
//return DCI.CombineTo(N, NewSt, true);
|
||||
return NewSt;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// st i1 v, addr
|
||||
// =>
|
||||
// v1 = zxt v to i8
|
||||
// st i8, addr
|
||||
SDValue NVPTXTargetLowering::
|
||||
LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDNode *Node = Op.getNode();
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
StoreSDNode *ST = cast<StoreSDNode>(Node);
|
||||
@ -1348,3 +1411,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
||||
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
|
||||
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue>& Results) {
|
||||
EVT ResVT = N->getValueType(0);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
assert(ResVT.isVector() && "Vector load must have vector type");
|
||||
|
||||
// We only handle "native" vector sizes for now, e.g. <4 x double> is not
|
||||
// legal. We can (and should) split that into 2 loads of <2 x double> here
|
||||
// but I'm leaving that as a TODO for now.
|
||||
assert(ResVT.isSimple() && "Can only handle simple types");
|
||||
switch (ResVT.getSimpleVT().SimpleTy) {
|
||||
default: return;
|
||||
case MVT::v2i8:
|
||||
case MVT::v2i16:
|
||||
case MVT::v2i32:
|
||||
case MVT::v2i64:
|
||||
case MVT::v2f32:
|
||||
case MVT::v2f64:
|
||||
case MVT::v4i8:
|
||||
case MVT::v4i16:
|
||||
case MVT::v4i32:
|
||||
case MVT::v4f32:
|
||||
// This is a "native" vector type
|
||||
break;
|
||||
}
|
||||
|
||||
EVT EltVT = ResVT.getVectorElementType();
|
||||
unsigned NumElts = ResVT.getVectorNumElements();
|
||||
|
||||
// Since LoadV2 is a target node, we cannot rely on DAG type legalization.
|
||||
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
|
||||
// loaded type to i16 and propogate the "real" type as the memory type.
|
||||
bool NeedTrunc = false;
|
||||
if (EltVT.getSizeInBits() < 16) {
|
||||
EltVT = MVT::i16;
|
||||
NeedTrunc = true;
|
||||
}
|
||||
|
||||
unsigned Opcode = 0;
|
||||
SDVTList LdResVTs;
|
||||
|
||||
switch (NumElts) {
|
||||
default: return;
|
||||
case 2:
|
||||
Opcode = NVPTXISD::LoadV2;
|
||||
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
|
||||
break;
|
||||
case 4: {
|
||||
Opcode = NVPTXISD::LoadV4;
|
||||
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
|
||||
LdResVTs = DAG.getVTList(ListVTs, 5);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 8> OtherOps;
|
||||
|
||||
// Copy regular operands
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||
OtherOps.push_back(N->getOperand(i));
|
||||
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
|
||||
// The select routine does not have access to the LoadSDNode instance, so
|
||||
// pass along the extension information
|
||||
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
|
||||
|
||||
SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
|
||||
OtherOps.size(), LD->getMemoryVT(),
|
||||
LD->getMemOperand());
|
||||
|
||||
SmallVector<SDValue, 4> ScalarRes;
|
||||
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Res = NewLD.getValue(i);
|
||||
if (NeedTrunc)
|
||||
Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
|
||||
ScalarRes.push_back(Res);
|
||||
}
|
||||
|
||||
SDValue LoadChain = NewLD.getValue(NumElts);
|
||||
|
||||
SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
|
||||
|
||||
Results.push_back(BuildVec);
|
||||
Results.push_back(LoadChain);
|
||||
}
|
||||
|
||||
static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
|
||||
SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &Results) {
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Intrin = N->getOperand(1);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// Get the intrinsic ID
|
||||
unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
|
||||
switch(IntrinNo) {
|
||||
default: return;
|
||||
case Intrinsic::nvvm_ldg_global_i:
|
||||
case Intrinsic::nvvm_ldg_global_f:
|
||||
case Intrinsic::nvvm_ldg_global_p:
|
||||
case Intrinsic::nvvm_ldu_global_i:
|
||||
case Intrinsic::nvvm_ldu_global_f:
|
||||
case Intrinsic::nvvm_ldu_global_p: {
|
||||
EVT ResVT = N->getValueType(0);
|
||||
|
||||
if (ResVT.isVector()) {
|
||||
// Vector LDG/LDU
|
||||
|
||||
unsigned NumElts = ResVT.getVectorNumElements();
|
||||
EVT EltVT = ResVT.getVectorElementType();
|
||||
|
||||
// Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
|
||||
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
|
||||
// loaded type to i16 and propogate the "real" type as the memory type.
|
||||
bool NeedTrunc = false;
|
||||
if (EltVT.getSizeInBits() < 16) {
|
||||
EltVT = MVT::i16;
|
||||
NeedTrunc = true;
|
||||
}
|
||||
|
||||
unsigned Opcode = 0;
|
||||
SDVTList LdResVTs;
|
||||
|
||||
switch (NumElts) {
|
||||
default: return;
|
||||
case 2:
|
||||
switch(IntrinNo) {
|
||||
default: return;
|
||||
case Intrinsic::nvvm_ldg_global_i:
|
||||
case Intrinsic::nvvm_ldg_global_f:
|
||||
case Intrinsic::nvvm_ldg_global_p:
|
||||
Opcode = NVPTXISD::LDGV2;
|
||||
break;
|
||||
case Intrinsic::nvvm_ldu_global_i:
|
||||
case Intrinsic::nvvm_ldu_global_f:
|
||||
case Intrinsic::nvvm_ldu_global_p:
|
||||
Opcode = NVPTXISD::LDUV2;
|
||||
break;
|
||||
}
|
||||
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
|
||||
break;
|
||||
case 4: {
|
||||
switch(IntrinNo) {
|
||||
default: return;
|
||||
case Intrinsic::nvvm_ldg_global_i:
|
||||
case Intrinsic::nvvm_ldg_global_f:
|
||||
case Intrinsic::nvvm_ldg_global_p:
|
||||
Opcode = NVPTXISD::LDGV4;
|
||||
break;
|
||||
case Intrinsic::nvvm_ldu_global_i:
|
||||
case Intrinsic::nvvm_ldu_global_f:
|
||||
case Intrinsic::nvvm_ldu_global_p:
|
||||
Opcode = NVPTXISD::LDUV4;
|
||||
break;
|
||||
}
|
||||
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
|
||||
LdResVTs = DAG.getVTList(ListVTs, 5);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 8> OtherOps;
|
||||
|
||||
// Copy regular operands
|
||||
|
||||
OtherOps.push_back(Chain); // Chain
|
||||
// Skip operand 1 (intrinsic ID)
|
||||
// Others
|
||||
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
|
||||
OtherOps.push_back(N->getOperand(i));
|
||||
|
||||
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
|
||||
|
||||
SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
|
||||
OtherOps.size(), MemSD->getMemoryVT(),
|
||||
MemSD->getMemOperand());
|
||||
|
||||
SmallVector<SDValue, 4> ScalarRes;
|
||||
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Res = NewLD.getValue(i);
|
||||
if (NeedTrunc)
|
||||
Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
|
||||
ScalarRes.push_back(Res);
|
||||
}
|
||||
|
||||
SDValue LoadChain = NewLD.getValue(NumElts);
|
||||
|
||||
SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
|
||||
|
||||
Results.push_back(BuildVec);
|
||||
Results.push_back(LoadChain);
|
||||
} else {
|
||||
// i8 LDG/LDU
|
||||
assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
|
||||
"Custom handling of non-i8 ldu/ldg?");
|
||||
|
||||
// Just copy all operands as-is
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||
Ops.push_back(N->getOperand(i));
|
||||
|
||||
// Force output to i16
|
||||
SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
|
||||
|
||||
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
|
||||
|
||||
// We make sure the memory type is i8, which will be used during isel
|
||||
// to select the proper instruction.
|
||||
SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
|
||||
LdResVTs, &Ops[0],
|
||||
Ops.size(), MVT::i8,
|
||||
MemSD->getMemOperand());
|
||||
|
||||
Results.push_back(NewLD.getValue(0));
|
||||
Results.push_back(NewLD.getValue(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const {
|
||||
switch (N->getOpcode()) {
|
||||
default: report_fatal_error("Unhandled custom legalization");
|
||||
case ISD::LOAD:
|
||||
ReplaceLoadVector(N, DAG, Results);
|
||||
return;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -58,7 +58,16 @@ enum NodeType {
|
||||
RETURN,
|
||||
CallSeqBegin,
|
||||
CallSeqEnd,
|
||||
Dummy
|
||||
Dummy,
|
||||
|
||||
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
LoadV4,
|
||||
LDGV2, // LDG.v2
|
||||
LDGV4, // LDG.v4
|
||||
LDUV2, // LDU.v2
|
||||
LDUV4, // LDU.v4
|
||||
StoreV2,
|
||||
StoreV4
|
||||
};
|
||||
}
|
||||
|
||||
@ -143,8 +152,16 @@ private:
|
||||
|
||||
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
virtual void ReplaceNodeResults(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
|
@ -65,46 +65,6 @@ void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
|
||||
NVPTX::Float64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4F32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4I32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2F32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4I8RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I8RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4I16RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I16RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2F64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else {
|
||||
llvm_unreachable("Don't know how to copy a register");
|
||||
}
|
||||
|
@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
|
||||
def hasVote : Predicate<"Subtarget.hasVote()">;
|
||||
def hasDouble : Predicate<"Subtarget.hasDouble()">;
|
||||
def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
|
||||
def hasLDG : Predicate<"Subtarget.hasLDG()">;
|
||||
def hasLDU : Predicate<"Subtarget.hasLDU()">;
|
||||
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
|
||||
|
||||
@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> {
|
||||
i32imm:$fromWidth, Int32Regs:$addr),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t$dst, [$addr];"), []>;
|
||||
def _areg_64 : NVPTXInst<(outs regclass:$dst),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
|
||||
" \t$dst, [$addr];"), []>;
|
||||
def _ari : NVPTXInst<(outs regclass:$dst),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t$dst, [$addr+$offset];"), []>;
|
||||
def _ari_64 : NVPTXInst<(outs regclass:$dst),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
|
||||
" \t$dst, [$addr+$offset];"), []>;
|
||||
def _asi : NVPTXInst<(outs regclass:$dst),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
|
||||
@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>;
|
||||
defm LD_f64 : LD<Float64Regs>;
|
||||
}
|
||||
|
||||
let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
|
||||
defm LD_v2i8 : LD<V2I8Regs>;
|
||||
defm LD_v4i8 : LD<V4I8Regs>;
|
||||
defm LD_v2i16 : LD<V2I16Regs>;
|
||||
defm LD_v4i16 : LD<V4I16Regs>;
|
||||
defm LD_v2i32 : LD<V2I32Regs>;
|
||||
defm LD_v4i32 : LD<V4I32Regs>;
|
||||
defm LD_v2f32 : LD<V2F32Regs>;
|
||||
defm LD_v4f32 : LD<V4F32Regs>;
|
||||
defm LD_v2i64 : LD<V2I64Regs>;
|
||||
defm LD_v2f64 : LD<V2F64Regs>;
|
||||
}
|
||||
|
||||
multiclass ST<NVPTXRegClass regclass> {
|
||||
def _avar : NVPTXInst<(outs),
|
||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||
@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> {
|
||||
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
|
||||
" \t[$addr], $src;"), []>;
|
||||
def _areg_64 : NVPTXInst<(outs),
|
||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
|
||||
"\t[$addr], $src;"), []>;
|
||||
def _ari : NVPTXInst<(outs),
|
||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
|
||||
" \t[$addr+$offset], $src;"), []>;
|
||||
def _ari_64 : NVPTXInst<(outs),
|
||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
|
||||
"\t[$addr+$offset], $src;"), []>;
|
||||
def _asi : NVPTXInst<(outs),
|
||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
|
||||
@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>;
|
||||
defm ST_f64 : ST<Float64Regs>;
|
||||
}
|
||||
|
||||
let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
|
||||
defm ST_v2i8 : ST<V2I8Regs>;
|
||||
defm ST_v4i8 : ST<V4I8Regs>;
|
||||
defm ST_v2i16 : ST<V2I16Regs>;
|
||||
defm ST_v4i16 : ST<V4I16Regs>;
|
||||
defm ST_v2i32 : ST<V2I32Regs>;
|
||||
defm ST_v4i32 : ST<V4I32Regs>;
|
||||
defm ST_v2f32 : ST<V2F32Regs>;
|
||||
defm ST_v4f32 : ST<V4F32Regs>;
|
||||
defm ST_v2i64 : ST<V2I64Regs>;
|
||||
defm ST_v2f64 : ST<V2F64Regs>;
|
||||
}
|
||||
|
||||
// The following is used only in and after vector elementizations.
|
||||
// Vector elementization happens at the machine instruction level, so the
|
||||
// following instruction
|
||||
@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
|
||||
i32imm:$fromWidth, Int32Regs:$addr),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
|
||||
def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
|
||||
def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
|
||||
def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
|
||||
def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
|
||||
@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
|
||||
i32imm:$fromWidth, Int32Regs:$addr),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
|
||||
def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||
regclass:$dst3, regclass:$dst4),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
|
||||
def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
||||
regclass:$dst4),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
|
||||
[]>;
|
||||
def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||
regclass:$dst3, regclass:$dst4),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
|
||||
[]>;
|
||||
def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
||||
regclass:$dst4),
|
||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
|
||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
|
||||
def _v2_areg_64 : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
|
||||
def _v2_ari : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
|
||||
i32imm:$offset),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
|
||||
def _v2_ari_64 : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
|
||||
i32imm:$offset),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
|
||||
def _v2_asi : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
|
||||
@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
|
||||
i32imm:$fromWidth, Int32Regs:$addr),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
|
||||
def _v4_areg_64 : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
|
||||
def _v4_ari : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
|
||||
[]>;
|
||||
def _v4_ari_64 : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
|
||||
[]>;
|
||||
def _v4_asi : NVPTXInst<(outs),
|
||||
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||
@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins),
|
||||
"trap;",
|
||||
[(trap)]>;
|
||||
|
||||
include "NVPTXVector.td"
|
||||
|
||||
include "NVPTXIntrinsics.td"
|
||||
|
||||
|
||||
|
@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE
|
||||
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
||||
Float32Regs>;
|
||||
|
||||
// Vector ldu
|
||||
multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
|
||||
NVPTXInst eleInst, NVPTXInst eleInst64> {
|
||||
def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
|
||||
Requires<[hasLDU]>;
|
||||
def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||
!strconcat("ldu.global.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
|
||||
Requires<[hasLDU]>;
|
||||
|
||||
//-----------------------------------
|
||||
// Support for ldg on sm_35 or later
|
||||
//-----------------------------------
|
||||
|
||||
def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
|
||||
MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
|
||||
return M->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
|
||||
multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
|
||||
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
|
||||
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||
Requires<[hasLDG]>;
|
||||
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
|
||||
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
|
||||
}
|
||||
|
||||
let VecInstType=isVecLD.Value in {
|
||||
defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
|
||||
V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
|
||||
INT_PTX_LDU_G_v2i8_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
|
||||
V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
|
||||
INT_PTX_LDU_G_v4i8_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
|
||||
V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
|
||||
INT_PTX_LDU_G_v2i16_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
|
||||
V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
|
||||
INT_PTX_LDU_G_v4i16_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
|
||||
V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
|
||||
INT_PTX_LDU_G_v2i32_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
|
||||
V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
|
||||
INT_PTX_LDU_G_v4i32_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
|
||||
V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
|
||||
INT_PTX_LDU_G_v2f32_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
|
||||
V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
|
||||
INT_PTX_LDU_G_v4f32_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
|
||||
V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
|
||||
INT_PTX_LDU_G_v2i64_ELE_64>;
|
||||
defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
|
||||
V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
|
||||
INT_PTX_LDU_G_v2f64_ELE_64>;
|
||||
multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
|
||||
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
|
||||
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
|
||||
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||
Requires<[hasLDG]>;
|
||||
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
|
||||
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||
!strconcat("ld.global.nc.", TyStr),
|
||||
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
|
||||
}
|
||||
|
||||
defm INT_PTX_LDG_GLOBAL_i8
|
||||
: LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
|
||||
defm INT_PTX_LDG_GLOBAL_i16
|
||||
: LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
|
||||
defm INT_PTX_LDG_GLOBAL_i32
|
||||
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
|
||||
defm INT_PTX_LDG_GLOBAL_i64
|
||||
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
|
||||
defm INT_PTX_LDG_GLOBAL_f32
|
||||
: LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
|
||||
defm INT_PTX_LDG_GLOBAL_f64
|
||||
: LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
|
||||
defm INT_PTX_LDG_GLOBAL_p32
|
||||
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
|
||||
defm INT_PTX_LDG_GLOBAL_p64
|
||||
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
|
||||
|
||||
// vector
|
||||
|
||||
// Elementized vector ldg
|
||||
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
|
||||
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||
(ins Int32Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr), []>;
|
||||
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||
(ins Int64Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr), []>;
|
||||
}
|
||||
|
||||
multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
|
||||
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||
regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr), []>;
|
||||
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||
regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
|
||||
!strconcat("ld.global.nc.", TyStr), []>;
|
||||
}
|
||||
|
||||
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
|
||||
defm INT_PTX_LDG_G_v2i8_ELE
|
||||
: VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDG_G_v2i16_ELE
|
||||
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDG_G_v2i32_ELE
|
||||
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
|
||||
defm INT_PTX_LDG_G_v2f32_ELE
|
||||
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
|
||||
defm INT_PTX_LDG_G_v2i64_ELE
|
||||
: VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
|
||||
defm INT_PTX_LDG_G_v2f64_ELE
|
||||
: VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
|
||||
defm INT_PTX_LDG_G_v4i8_ELE
|
||||
: VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDG_G_v4i16_ELE
|
||||
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
|
||||
defm INT_PTX_LDG_G_v4i32_ELE
|
||||
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
|
||||
defm INT_PTX_LDG_G_v4f32_ELE
|
||||
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
|
||||
|
||||
|
||||
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
|
||||
|
@ -54,36 +54,6 @@ std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
|
||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
return "!Special!";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F32RegsRegClass) {
|
||||
return ".v2.f32";
|
||||
}
|
||||
else if (RC == &NVPTX::V4F32RegsRegClass) {
|
||||
return ".v4.f32";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I32RegsRegClass) {
|
||||
return ".v2.s32";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I32RegsRegClass) {
|
||||
return ".v4.s32";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F64RegsRegClass) {
|
||||
return ".v2.f64";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I64RegsRegClass) {
|
||||
return ".v2.s64";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I16RegsRegClass) {
|
||||
return ".v2.s16";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I16RegsRegClass) {
|
||||
return ".v4.s16";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I8RegsRegClass) {
|
||||
return ".v2.s16";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I8RegsRegClass) {
|
||||
return ".v4.s16";
|
||||
}
|
||||
else {
|
||||
return "INTERNAL";
|
||||
}
|
||||
@ -115,137 +85,11 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
|
||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
return "!Special!";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F32RegsRegClass) {
|
||||
return "%v2f";
|
||||
}
|
||||
else if (RC == &NVPTX::V4F32RegsRegClass) {
|
||||
return "%v4f";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I32RegsRegClass) {
|
||||
return "%v2r";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I32RegsRegClass) {
|
||||
return "%v4r";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F64RegsRegClass) {
|
||||
return "%v2fd";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I64RegsRegClass) {
|
||||
return "%v2rd";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I16RegsRegClass) {
|
||||
return "%v2s";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I16RegsRegClass) {
|
||||
return "%v4rs";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I8RegsRegClass) {
|
||||
return "%v2rc";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I8RegsRegClass) {
|
||||
return "%v4rc";
|
||||
}
|
||||
else {
|
||||
return "INTERNAL";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
|
||||
llvm_unreachable("Not a vector register class");
|
||||
}
|
||||
|
||||
const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return (&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return (&NVPTX::Float64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return (&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return (&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return (&NVPTX::Int64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return (&NVPTX::Int8RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return (&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return (&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return (&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return (&NVPTX::Int8RegsRegClass);
|
||||
llvm_unreachable("Not a vector register class");
|
||||
}
|
||||
|
||||
int getNVPTXVectorSize(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return 4;
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return 4;
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return 4;
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return 4;
|
||||
llvm_unreachable("Not a vector register class");
|
||||
}
|
||||
}
|
||||
|
||||
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
|
||||
|
@ -81,10 +81,6 @@ public:
|
||||
|
||||
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
|
||||
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
|
||||
bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
|
||||
std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
|
||||
int getNVPTXVectorSize (const TargetRegisterClass *RC);
|
||||
const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
@ -37,9 +37,6 @@ foreach i = 0-395 in {
|
||||
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
|
||||
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
|
||||
def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
|
||||
// Vectors
|
||||
foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
|
||||
def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
|
||||
|
||||
// Arguments
|
||||
def ia#i : NVPTXReg<"%ia"#i>;
|
||||
@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
|
||||
|
||||
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
|
||||
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
|
||||
|
||||
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
|
||||
NVPTXRegClass sClass,
|
||||
int e,
|
||||
string n>
|
||||
: NVPTXRegClass<regTypes, alignment, regList>
|
||||
{
|
||||
NVPTXRegClass scalarClass=sClass;
|
||||
int elems=e;
|
||||
string name=n;
|
||||
}
|
||||
def V2F32Regs
|
||||
: NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
|
||||
Float32Regs, 2, ".v2.f32">;
|
||||
def V4F32Regs
|
||||
: NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
|
||||
Float32Regs, 4, ".v4.f32">;
|
||||
def V2I32Regs
|
||||
: NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
|
||||
Int32Regs, 2, ".v2.u32">;
|
||||
def V4I32Regs
|
||||
: NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
|
||||
Int32Regs, 4, ".v4.u32">;
|
||||
def V2F64Regs
|
||||
: NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
|
||||
Float64Regs, 2, ".v2.f64">;
|
||||
def V2I64Regs
|
||||
: NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
|
||||
Int64Regs, 2, ".v2.u64">;
|
||||
def V2I16Regs
|
||||
: NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
|
||||
Int16Regs, 2, ".v2.u16">;
|
||||
def V4I16Regs
|
||||
: NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
|
||||
Int16Regs, 4, ".v4.u16">;
|
||||
def V2I8Regs
|
||||
: NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
|
||||
Int8Regs, 2, ".v2.u8">;
|
||||
def V4I8Regs
|
||||
: NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
|
||||
Int8Regs, 4, ".v4.u8">;
|
||||
|
@ -57,6 +57,7 @@ public:
|
||||
bool hasF32FTZ() const { return SmVersion >= 20; }
|
||||
bool hasFMAF32() const { return SmVersion >= 20; }
|
||||
bool hasFMAF64() const { return SmVersion >= 13; }
|
||||
bool hasLDG() const { return SmVersion >= 32; }
|
||||
bool hasLDU() const { return SmVersion >= 20; }
|
||||
bool hasGenericLdSt() const { return SmVersion >= 20; }
|
||||
inline bool hasHWROT32() const { return false; }
|
||||
|
@ -123,7 +123,6 @@ bool NVPTXPassConfig::addInstSelector() {
|
||||
addPass(createSplitBBatBarPass());
|
||||
addPass(createAllocaHoisting());
|
||||
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
|
||||
addPass(createVectorElementizePass(getNVPTXTargetMachine()));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,202 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
num_regs = 396
|
||||
|
||||
outFile = open('NVPTXRegisterInfo.td', 'w')
|
||||
|
||||
outFile.write('''
|
||||
//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declarations that describe the PTX register file
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class NVPTXReg<string n> : Register<n> {
|
||||
let Namespace = "NVPTX";
|
||||
}
|
||||
|
||||
class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
|
||||
: RegisterClass <"NVPTX", regTypes, alignment, regList>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Special Registers used as stack pointer
|
||||
def VRFrame : NVPTXReg<"%SP">;
|
||||
def VRFrameLocal : NVPTXReg<"%SPL">;
|
||||
|
||||
// Special Registers used as the stack
|
||||
def VRDepot : NVPTXReg<"%Depot">;
|
||||
''')
|
||||
|
||||
# Predicates
|
||||
outFile.write('''
|
||||
//===--- Predicate --------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
|
||||
|
||||
# Int8
|
||||
outFile.write('''
|
||||
//===--- 8-bit ------------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
|
||||
|
||||
# Int16
|
||||
outFile.write('''
|
||||
//===--- 16-bit -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
|
||||
|
||||
# Int32
|
||||
outFile.write('''
|
||||
//===--- 32-bit -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
|
||||
|
||||
# Int64
|
||||
outFile.write('''
|
||||
//===--- 64-bit -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
|
||||
|
||||
# F32
|
||||
outFile.write('''
|
||||
//===--- 32-bit float -----------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
|
||||
|
||||
# F64
|
||||
outFile.write('''
|
||||
//===--- 64-bit float -----------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
|
||||
|
||||
# Vector registers
|
||||
outFile.write('''
|
||||
//===--- Vector -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
|
||||
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
|
||||
|
||||
# Argument registers
|
||||
outFile.write('''
|
||||
//===--- Arguments --------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
|
||||
|
||||
outFile.write('''
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
''')
|
||||
|
||||
outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('''
|
||||
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
|
||||
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
|
||||
''')
|
||||
|
||||
outFile.write('''
|
||||
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
|
||||
NVPTXRegClass sClass,
|
||||
int e,
|
||||
string n>
|
||||
: NVPTXRegClass<regTypes, alignment, regList>
|
||||
{
|
||||
NVPTXRegClass scalarClass=sClass;
|
||||
int elems=e;
|
||||
string name=n;
|
||||
}
|
||||
''')
|
||||
|
||||
|
||||
outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
|
||||
outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
|
||||
outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
|
||||
outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
|
||||
outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
|
||||
outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
|
||||
|
||||
outFile.close()
|
||||
|
||||
|
||||
outFile = open('NVPTXNumRegisters.h', 'w')
|
||||
outFile.write('''
|
||||
//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_NUM_REGISTERS_H
|
||||
#define NVPTX_NUM_REGISTERS_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
const unsigned NVPTXNumRegisters = %d;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
''' % num_regs)
|
||||
|
||||
outFile.close()
|
66
test/CodeGen/NVPTX/vector-loads.ll
Normal file
66
test/CodeGen/NVPTX/vector-loads.ll
Normal file
@ -0,0 +1,66 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
; Even though general vector types are not supported in PTX, we can still
|
||||
; optimize loads/stores with pseudo-vector instructions of the form:
|
||||
;
|
||||
; ld.v2.f32 {%f0, %f1}, [%r0]
|
||||
;
|
||||
; which will load two floats at once into scalar registers.
|
||||
|
||||
define void @foo(<2 x float>* %a) {
|
||||
; CHECK: .func foo
|
||||
; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||
%t1 = load <2 x float>* %a
|
||||
%t2 = fmul <2 x float> %t1, %t1
|
||||
store <2 x float> %t2, <2 x float>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo2(<4 x float>* %a) {
|
||||
; CHECK: .func foo2
|
||||
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||
%t1 = load <4 x float>* %a
|
||||
%t2 = fmul <4 x float> %t1, %t1
|
||||
store <4 x float> %t2, <4 x float>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo3(<8 x float>* %a) {
|
||||
; CHECK: .func foo3
|
||||
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||
; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16];
|
||||
%t1 = load <8 x float>* %a
|
||||
%t2 = fmul <8 x float> %t1, %t1
|
||||
store <8 x float> %t2, <8 x float>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
define void @foo4(<2 x i32>* %a) {
|
||||
; CHECK: .func foo4
|
||||
; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||
%t1 = load <2 x i32>* %a
|
||||
%t2 = mul <2 x i32> %t1, %t1
|
||||
store <2 x i32> %t2, <2 x i32>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo5(<4 x i32>* %a) {
|
||||
; CHECK: .func foo5
|
||||
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||
%t1 = load <4 x i32>* %a
|
||||
%t2 = mul <4 x i32> %t1, %t1
|
||||
store <4 x i32> %t2, <4 x i32>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo6(<8 x i32>* %a) {
|
||||
; CHECK: .func foo6
|
||||
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||
; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16];
|
||||
%t1 = load <8 x i32>* %a
|
||||
%t2 = mul <8 x i32> %t1, %t1
|
||||
store <8 x i32> %t2, <8 x i32>* %a
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user