[NVPTX] Disable vector registers

Vectors were being manually scalarized by the backend.  Instead,
let the target-independent code do all of the work.  The manual
scalarization was from a time before good target-independent support
for scalarization in LLVM. However, this forces us to specially-handle
vector loads and stores, which we can turn into PTX instructions that
produce/consume multiple operands.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174968 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2013-02-12 14:18:49 +00:00
parent c8a196ae8f
commit 7eacad03ef
19 changed files with 1350 additions and 1997 deletions

View File

@ -805,6 +805,16 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldu.global.p">;
// Generated within nvvm. Use for ldg on sm_35 or later
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldg.global.i">;
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldg.global.f">;
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldg.global.p">;
// Use for generic pointers
// - These intrinsics are used to convert address spaces.

View File

@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources
NVPTXAllocaHoisting.cpp
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
VectorElementize.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})

View File

@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);

View File

@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
O << getNVPTXRegClassStr(RC) << mapped_vr;
return;
}
// Vector virtual register
if (getNVPTXVectorSize(RC) == 4)
O << "{"
<< getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
<< getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
<< getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
<< getNVPTXRegClassStr(RC) << mapped_vr << "_3"
<< "}";
else if (getNVPTXVectorSize(RC) == 2)
O << "{"
<< getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
<< getNVPTXRegClassStr(RC) << mapped_vr << "_1"
<< "}";
else
llvm_unreachable("Unsupported vector size");
report_fatal_error("Bad register!");
}
void
@ -2024,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
@ -2057,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
return true;
}

View File

@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
ResNode = SelectStore(N);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LoadV4:
ResNode = SelectLoadVector(N);
break;
case NVPTXISD::LDGV2:
case NVPTXISD::LDGV4:
case NVPTXISD::LDUV2:
case NVPTXISD::LDUV4:
ResNode = SelectLDGLDUVector(N);
break;
case NVPTXISD::StoreV2:
case NVPTXISD::StoreV4:
ResNode = SelectStoreVector(N);
break;
default: break;
}
if (ResNode)
return ResNode;
@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N1.getNode(), N1, Base, Offset):
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
default: return NULL;
if (Subtarget.is64Bit()) {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break;
case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break;
case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break;
case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break;
case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break;
case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break;
default: return NULL;
}
} else {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
default: return NULL;
}
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
MVT::Other, Ops, 8);
}
else {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
default: return NULL;
if (Subtarget.is64Bit()) {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break;
case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break;
case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break;
case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break;
case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break;
case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break;
default: return NULL;
}
} else {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
default: return NULL;
}
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return NVPTXLD;
}
SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
unsigned Opcode;
DebugLoc DL = N->getDebugLoc();
SDNode *LD;
MemSDNode *MemSD = cast<MemSDNode>(N);
EVT LoadedVT = MemSD->getMemoryVT();
if (!LoadedVT.isSimple())
return NULL;
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool IsVolatile = MemSD->isVolatile();
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
IsVolatile = false;
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
unsigned FromTypeWidth = ScalarVT.getSizeInBits();
unsigned int FromType;
// The last operand holds the original LoadSDNode::getExtensionType() value
unsigned ExtensionType =
cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
if (ExtensionType == ISD::SEXTLOAD)
FromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
FromType = NVPTX::PTXLdStInstCode::Float;
else
FromType = NVPTX::PTXLdStInstCode::Unsigned;
unsigned VecType;
switch (N->getOpcode()) {
case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break;
case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break;
default: return NULL;
}
EVT EltVT = N->getValueType(0);
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break;
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break;
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break;
}
break;
}
SDValue Ops[] = { getI32Imm(IsVolatile),
getI32Imm(CodeAddrSpace),
getI32Imm(VecType),
getI32Imm(FromType),
getI32Imm(FromTypeWidth),
Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
} else if (Subtarget.is64Bit()?
SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break;
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break;
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break;
}
break;
}
SDValue Ops[] = { getI32Imm(IsVolatile),
getI32Imm(CodeAddrSpace),
getI32Imm(VecType),
getI32Imm(FromType),
getI32Imm(FromTypeWidth),
Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
} else if (Subtarget.is64Bit()?
SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break;
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break;
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break;
}
break;
}
} else {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break;
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break;
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break;
}
break;
}
}
SDValue Ops[] = { getI32Imm(IsVolatile),
getI32Imm(CodeAddrSpace),
getI32Imm(VecType),
getI32Imm(FromType),
getI32Imm(FromTypeWidth),
Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
} else {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break;
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break;
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break;
}
break;
}
} else {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break;
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break;
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break;
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break;
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break;
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break;
}
break;
}
}
SDValue Ops[] = { getI32Imm(IsVolatile),
getI32Imm(CodeAddrSpace),
getI32Imm(VecType),
getI32Imm(FromType),
getI32Imm(FromTypeWidth),
Op1, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
return LD;
}
SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
unsigned Opcode;
DebugLoc DL = N->getDebugLoc();
SDNode *LD;
EVT RetVT = N->getValueType(0);
// Select opcode
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LDGV2:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
}
break;
case NVPTXISD::LDGV4:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
}
break;
case NVPTXISD::LDUV2:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
}
break;
case NVPTXISD::LDUV4:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
}
break;
}
} else {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::LDGV2:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
}
break;
case NVPTXISD::LDGV4:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
}
break;
case NVPTXISD::LDUV2:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
}
break;
case NVPTXISD::LDUV4:
switch (RetVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
}
break;
}
}
SDValue Ops[] = { Op1, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
return LD;
}
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(N);
@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N2.getNode(), N2, Base, Offset):
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
default: return NULL;
if (Subtarget.is64Bit()) {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break;
case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break;
case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break;
case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break;
case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break;
case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break;
default: return NULL;
}
} else {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
default: return NULL;
}
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 9);
} else {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
default: return NULL;
if (Subtarget.is64Bit()) {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break;
case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break;
case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break;
case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break;
case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break;
case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break;
default: return NULL;
}
} else {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
default: return NULL;
}
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
@ -523,6 +870,244 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return NVPTXST;
}
SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
unsigned Opcode;
DebugLoc DL = N->getDebugLoc();
SDNode *ST;
EVT EltVT = Op1.getValueType();
MemSDNode *MemSD = cast<MemSDNode>(N);
EVT StoreVT = MemSD->getMemoryVT();
// Address Space Setting
unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
report_fatal_error("Cannot store to pointer that points to constant "
"memory space");
}
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool IsVolatile = MemSD->isVolatile();
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
IsVolatile = false;
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
assert(StoreVT.isSimple() && "Store value is not simple");
MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
unsigned ToTypeWidth = ScalarVT.getSizeInBits();
unsigned ToType;
if (ScalarVT.isFloatingPoint())
ToType = NVPTX::PTXLdStInstCode::Float;
else
ToType = NVPTX::PTXLdStInstCode::Unsigned;
SmallVector<SDValue, 12> StOps;
SDValue N2;
unsigned VecType;
switch (N->getOpcode()) {
case NVPTXISD::StoreV2:
VecType = NVPTX::PTXLdStInstCode::V2;
StOps.push_back(N->getOperand(1));
StOps.push_back(N->getOperand(2));
N2 = N->getOperand(3);
break;
case NVPTXISD::StoreV4:
VecType = NVPTX::PTXLdStInstCode::V4;
StOps.push_back(N->getOperand(1));
StOps.push_back(N->getOperand(2));
StOps.push_back(N->getOperand(3));
StOps.push_back(N->getOperand(4));
N2 = N->getOperand(5);
break;
default: return NULL;
}
StOps.push_back(getI32Imm(IsVolatile));
StOps.push_back(getI32Imm(CodeAddrSpace));
StOps.push_back(getI32Imm(VecType));
StOps.push_back(getI32Imm(ToType));
StOps.push_back(getI32Imm(ToTypeWidth));
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break;
case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break;
case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break;
}
break;
}
StOps.push_back(Addr);
} else if (Subtarget.is64Bit()?
SelectADDRsi64(N2.getNode(), N2, Base, Offset):
SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break;
case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break;
case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break;
}
break;
}
StOps.push_back(Base);
StOps.push_back(Offset);
} else if (Subtarget.is64Bit()?
SelectADDRri64(N2.getNode(), N2, Base, Offset):
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break;
case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break;
case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break;
}
break;
}
} else {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break;
case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break;
case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break;
}
break;
}
}
StOps.push_back(Base);
StOps.push_back(Offset);
} else {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break;
case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break;
case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break;
}
break;
}
} else {
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break;
case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break;
case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default: return NULL;
case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break;
case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break;
case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break;
case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break;
}
break;
}
}
StOps.push_back(N2);
}
StOps.push_back(Chain);
ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
return ST;
}
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {

View File

@ -72,8 +72,11 @@ private:
#include "NVPTXGenDAGISel.inc"
SDNode *Select(SDNode *N);
SDNode* SelectLoad(SDNode *N);
SDNode* SelectStore(SDNode *N);
SDNode *SelectLoad(SDNode *N);
SDNode *SelectLoadVector(SDNode *N);
SDNode *SelectLDGLDUVector(SDNode *N);
SDNode *SelectStore(SDNode *N);
SDNode *SelectStoreVector(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);

View File

@ -44,16 +44,28 @@ using namespace llvm;
static unsigned int uniqueCallSite = 0;
static cl::opt<bool>
RetainVectorOperands("nvptx-codegen-vectors",
cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
cl::init(true));
static cl::opt<bool>
sched4reg("nvptx-sched4reg",
cl::desc("NVPTX Specific: schedule for register pressue"),
cl::init(false));
static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
default: return false;
case MVT::v2i8:
case MVT::v4i8:
case MVT::v2i16:
case MVT::v4i16:
case MVT::v2i32:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v2f32:
case MVT::v4f32:
case MVT::v2f64:
return true;
}
}
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
: TargetLowering(TM, new NVPTXTargetObjectFile()),
@ -87,41 +99,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
if (RetainVectorOperands) {
addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
}
// Operations not directly supported by NVPTX.
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
@ -191,42 +168,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
// TRAP can be lowered to PTX trap
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// By default, CONCAT_VECTORS is implemented via store/load
// through stack. It is slow and uses local memory. We need
// to custom-lowering them.
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
// Expand vector int to float and float to int conversions
// - For SINT_TO_FP and UINT_TO_FP, the src type
// (Node->getOperand(0).getValueType())
// is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
// the dest type (Node->getValueType(0)) is used.
//
// See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
// case, and
// SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
//
// That is why v4i32 or v2i32 are used here.
//
// The expansion for vectors happens in VectorLegalizer::LegalizeOp()
// (LegalizeVectorOps.cpp).
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
// Register custom handling for vector loads/stores
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (IsPTXVectorType(VT)) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
}
}
// Now deduce the information based on the above mentioned
// actions
@ -268,6 +219,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2";
case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4";
case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2";
case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4";
case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2";
case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4";
case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2";
case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4";
}
}
@ -868,12 +827,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
else
return SDValue();
}
// v = ld i1* addr
// =>
// v1 = ld i8* addr
// v = trunc v1 to i1
SDValue NVPTXTargetLowering::
LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
DebugLoc dl = Node->getDebugLoc();
@ -893,12 +859,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, 2, dl);
}
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT ValVT = Op.getOperand(1).getValueType();
if (ValVT == MVT::i1)
return LowerSTOREi1(Op, DAG);
else if (ValVT.isVector())
return LowerSTOREVector(Op, DAG);
else
return SDValue();
}
SDValue
NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
SDNode *N = Op.getNode();
SDValue Val = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
EVT ValVT = Val.getValueType();
if (ValVT.isVector()) {
// We only handle "native" vector sizes for now, e.g. <4 x double> is not
// legal. We can (and should) split that into 2 stores of <2 x double> here
// but I'm leaving that as a TODO for now.
if (!ValVT.isSimple())
return SDValue();
switch (ValVT.getSimpleVT().SimpleTy) {
default: return SDValue();
case MVT::v2i8:
case MVT::v2i16:
case MVT::v2i32:
case MVT::v2i64:
case MVT::v2f32:
case MVT::v2f64:
case MVT::v4i8:
case MVT::v4i16:
case MVT::v4i32:
case MVT::v4f32:
// This is a "native" vector type
break;
}
unsigned Opcode = 0;
EVT EltVT = ValVT.getVectorElementType();
unsigned NumElts = ValVT.getVectorNumElements();
// Since StoreV2 is a target node, we cannot rely on DAG type legalization.
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
// stored type to i16 and propogate the "real" type as the memory type.
bool NeedExt = false;
if (EltVT.getSizeInBits() < 16)
NeedExt = true;
switch (NumElts) {
default: return SDValue();
case 2:
Opcode = NVPTXISD::StoreV2;
break;
case 4: {
Opcode = NVPTXISD::StoreV4;
break;
}
}
SmallVector<SDValue, 8> Ops;
// First is the chain
Ops.push_back(N->getOperand(0));
// Then the split values
for (unsigned i = 0; i < NumElts; ++i) {
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
DAG.getIntPtrConstant(i));
if (NeedExt)
// ANY_EXTEND is correct here since the store will only look at the
// lower-order bits anyway.
ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
Ops.push_back(ExtVal);
}
// Then any remaining arguments
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
Ops.push_back(N->getOperand(i));
}
MemSDNode *MemSD = cast<MemSDNode>(N);
SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
DAG.getVTList(MVT::Other), &Ops[0],
Ops.size(), MemSD->getMemoryVT(),
MemSD->getMemOperand());
//return DCI.CombineTo(N, NewSt, true);
return NewSt;
}
return SDValue();
}
// st i1 v, addr
// =>
// v1 = zxt v to i8
// st i8, addr
SDValue NVPTXTargetLowering::
LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(Node);
@ -1348,3 +1411,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
return 4;
}
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue>& Results) {
EVT ResVT = N->getValueType(0);
DebugLoc DL = N->getDebugLoc();
assert(ResVT.isVector() && "Vector load must have vector type");
// We only handle "native" vector sizes for now, e.g. <4 x double> is not
// legal. We can (and should) split that into 2 loads of <2 x double> here
// but I'm leaving that as a TODO for now.
assert(ResVT.isSimple() && "Can only handle simple types");
switch (ResVT.getSimpleVT().SimpleTy) {
default: return;
case MVT::v2i8:
case MVT::v2i16:
case MVT::v2i32:
case MVT::v2i64:
case MVT::v2f32:
case MVT::v2f64:
case MVT::v4i8:
case MVT::v4i16:
case MVT::v4i32:
case MVT::v4f32:
// This is a "native" vector type
break;
}
EVT EltVT = ResVT.getVectorElementType();
unsigned NumElts = ResVT.getVectorNumElements();
// Since LoadV2 is a target node, we cannot rely on DAG type legalization.
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
// loaded type to i16 and propogate the "real" type as the memory type.
bool NeedTrunc = false;
if (EltVT.getSizeInBits() < 16) {
EltVT = MVT::i16;
NeedTrunc = true;
}
unsigned Opcode = 0;
SDVTList LdResVTs;
switch (NumElts) {
default: return;
case 2:
Opcode = NVPTXISD::LoadV2;
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
break;
case 4: {
Opcode = NVPTXISD::LoadV4;
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
LdResVTs = DAG.getVTList(ListVTs, 5);
break;
}
}
SmallVector<SDValue, 8> OtherOps;
// Copy regular operands
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
OtherOps.push_back(N->getOperand(i));
LoadSDNode *LD = cast<LoadSDNode>(N);
// The select routine does not have access to the LoadSDNode instance, so
// pass along the extension information
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
OtherOps.size(), LD->getMemoryVT(),
LD->getMemOperand());
SmallVector<SDValue, 4> ScalarRes;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Res = NewLD.getValue(i);
if (NeedTrunc)
Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
ScalarRes.push_back(Res);
}
SDValue LoadChain = NewLD.getValue(NumElts);
SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
}
static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
SDValue Chain = N->getOperand(0);
SDValue Intrin = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
// Get the intrinsic ID
unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
switch(IntrinNo) {
default: return;
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p:
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
case Intrinsic::nvvm_ldu_global_p: {
EVT ResVT = N->getValueType(0);
if (ResVT.isVector()) {
// Vector LDG/LDU
unsigned NumElts = ResVT.getVectorNumElements();
EVT EltVT = ResVT.getVectorElementType();
// Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
// loaded type to i16 and propogate the "real" type as the memory type.
bool NeedTrunc = false;
if (EltVT.getSizeInBits() < 16) {
EltVT = MVT::i16;
NeedTrunc = true;
}
unsigned Opcode = 0;
SDVTList LdResVTs;
switch (NumElts) {
default: return;
case 2:
switch(IntrinNo) {
default: return;
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p:
Opcode = NVPTXISD::LDGV2;
break;
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
case Intrinsic::nvvm_ldu_global_p:
Opcode = NVPTXISD::LDUV2;
break;
}
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
break;
case 4: {
switch(IntrinNo) {
default: return;
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p:
Opcode = NVPTXISD::LDGV4;
break;
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
case Intrinsic::nvvm_ldu_global_p:
Opcode = NVPTXISD::LDUV4;
break;
}
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
LdResVTs = DAG.getVTList(ListVTs, 5);
break;
}
}
SmallVector<SDValue, 8> OtherOps;
// Copy regular operands
OtherOps.push_back(Chain); // Chain
// Skip operand 1 (intrinsic ID)
// Others
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
OtherOps.push_back(N->getOperand(i));
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
OtherOps.size(), MemSD->getMemoryVT(),
MemSD->getMemOperand());
SmallVector<SDValue, 4> ScalarRes;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Res = NewLD.getValue(i);
if (NeedTrunc)
Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
ScalarRes.push_back(Res);
}
SDValue LoadChain = NewLD.getValue(NumElts);
SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
} else {
// i8 LDG/LDU
assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
"Custom handling of non-i8 ldu/ldg?");
// Just copy all operands as-is
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
Ops.push_back(N->getOperand(i));
// Force output to i16
SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
// We make sure the memory type is i8, which will be used during isel
// to select the proper instruction.
SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
LdResVTs, &Ops[0],
Ops.size(), MVT::i8,
MemSD->getMemOperand());
Results.push_back(NewLD.getValue(0));
Results.push_back(NewLD.getValue(1));
}
}
}
}
void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
switch (N->getOpcode()) {
default: report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
ReplaceLoadVector(N, DAG, Results);
return;
case ISD::INTRINSIC_W_CHAIN:
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
return;
}
}

View File

@ -58,7 +58,16 @@ enum NodeType {
RETURN,
CallSeqBegin,
CallSeqEnd,
Dummy
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
LoadV4,
LDGV2, // LDG.v2
LDGV4, // LDG.v4
LDUV2, // LDU.v2
LDUV4, // LDU.v4
StoreV2,
StoreV4
};
}
@ -143,8 +152,16 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
virtual void ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
};
} // namespace llvm

View File

@ -65,46 +65,6 @@ void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
NVPTX::Float64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
NVPTX::V4F32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
NVPTX::V4I32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
NVPTX::V2F32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
NVPTX::V2I32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
NVPTX::V4I8RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
NVPTX::V2I8RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
NVPTX::V4I16RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
NVPTX::V2I16RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
NVPTX::V2I64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
NVPTX::V2F64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else {
llvm_unreachable("Don't know how to copy a register");
}

View File

@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
def hasVote : Predicate<"Subtarget.hasVote()">;
def hasDouble : Predicate<"Subtarget.hasDouble()">;
def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
def hasLDG : Predicate<"Subtarget.hasLDG()">;
def hasLDU : Predicate<"Subtarget.hasLDU()">;
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr];"), []>;
def _areg_64 : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
" \t$dst, [$addr];"), []>;
def _ari : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr+$offset];"), []>;
def _ari_64 : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
" \t$dst, [$addr+$offset];"), []>;
def _asi : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
defm LD_v2i8 : LD<V2I8Regs>;
defm LD_v4i8 : LD<V4I8Regs>;
defm LD_v2i16 : LD<V2I16Regs>;
defm LD_v4i16 : LD<V4I16Regs>;
defm LD_v2i32 : LD<V2I32Regs>;
defm LD_v4i32 : LD<V4I32Regs>;
defm LD_v2f32 : LD<V2F32Regs>;
defm LD_v4f32 : LD<V4F32Regs>;
defm LD_v2i64 : LD<V2I64Regs>;
defm LD_v2f64 : LD<V2F64Regs>;
}
multiclass ST<NVPTXRegClass regclass> {
def _avar : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> {
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr], $src;"), []>;
def _areg_64 : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
"\t[$addr], $src;"), []>;
def _ari : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr+$offset], $src;"), []>;
def _ari_64 : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
"\t[$addr+$offset], $src;"), []>;
def _asi : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
defm ST_v2i8 : ST<V2I8Regs>;
defm ST_v4i8 : ST<V4I8Regs>;
defm ST_v2i16 : ST<V2I16Regs>;
defm ST_v4i16 : ST<V4I16Regs>;
defm ST_v2i32 : ST<V2I32Regs>;
defm ST_v4i32 : ST<V4I32Regs>;
defm ST_v2f32 : ST<V2F32Regs>;
defm ST_v4f32 : ST<V4F32Regs>;
defm ST_v2i64 : ST<V2I64Regs>;
defm ST_v2f64 : ST<V2F64Regs>;
}
// The following is used only in and after vector elementizations.
// Vector elementization happens at the machine instruction level, so the
// following instruction
@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
def _v2_areg_64 : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
def _v2_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
def _v2_ari_64 : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
def _v2_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
def _v4_areg_64 : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
def _v4_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
def _v4_ari_64 : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
def _v4_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins),
"trap;",
[(trap)]>;
include "NVPTXVector.td"
include "NVPTXIntrinsics.td"

View File

@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
// Vector ldu
multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
NVPTXInst eleInst, NVPTXInst eleInst64> {
def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
!strconcat("ldu.global.", TyStr),
[(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
Requires<[hasLDU]>;
def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
!strconcat("ldu.global.", TyStr),
[(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
Requires<[hasLDU]>;
//-----------------------------------
// Support for ldg on sm_35 or later
//-----------------------------------
def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
return M->getMemoryVT() == MVT::i8;
}]>;
multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
Requires<[hasLDG]>;
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
}
let VecInstType=isVecLD.Value in {
defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
INT_PTX_LDU_G_v2i8_ELE_64>;
defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
INT_PTX_LDU_G_v4i8_ELE_64>;
defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
INT_PTX_LDU_G_v2i16_ELE_64>;
defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
INT_PTX_LDU_G_v4i16_ELE_64>;
defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
INT_PTX_LDU_G_v2i32_ELE_64>;
defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
INT_PTX_LDU_G_v4i32_ELE_64>;
defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
INT_PTX_LDU_G_v2f32_ELE_64>;
defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
INT_PTX_LDU_G_v4f32_ELE_64>;
defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
INT_PTX_LDU_G_v2i64_ELE_64>;
defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
INT_PTX_LDU_G_v2f64_ELE_64>;
multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
Requires<[hasLDG]>;
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
!strconcat("ld.global.nc.", TyStr),
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
}
defm INT_PTX_LDG_GLOBAL_i8
: LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
defm INT_PTX_LDG_GLOBAL_i16
: LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
defm INT_PTX_LDG_GLOBAL_i32
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
defm INT_PTX_LDG_GLOBAL_i64
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
defm INT_PTX_LDG_GLOBAL_f32
: LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
defm INT_PTX_LDG_GLOBAL_f64
: LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
defm INT_PTX_LDG_GLOBAL_p32
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
defm INT_PTX_LDG_GLOBAL_p64
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
// vector
// Elementized vector ldg
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
}
multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
}
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
defm INT_PTX_LDG_G_v2i8_ELE
: VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v2i16_ELE
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v2i32_ELE
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
defm INT_PTX_LDG_G_v2f32_ELE
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDG_G_v2i64_ELE
: VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
defm INT_PTX_LDG_G_v2f64_ELE
: VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
defm INT_PTX_LDG_G_v4i8_ELE
: VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v4i16_ELE
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v4i32_ELE
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
defm INT_PTX_LDG_G_v4f32_ELE
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
multiclass NG_TO_G<string Str, Intrinsic Intrin> {

View File

@ -54,36 +54,6 @@ std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
else if (RC == &NVPTX::V2F32RegsRegClass) {
return ".v2.f32";
}
else if (RC == &NVPTX::V4F32RegsRegClass) {
return ".v4.f32";
}
else if (RC == &NVPTX::V2I32RegsRegClass) {
return ".v2.s32";
}
else if (RC == &NVPTX::V4I32RegsRegClass) {
return ".v4.s32";
}
else if (RC == &NVPTX::V2F64RegsRegClass) {
return ".v2.f64";
}
else if (RC == &NVPTX::V2I64RegsRegClass) {
return ".v2.s64";
}
else if (RC == &NVPTX::V2I16RegsRegClass) {
return ".v2.s16";
}
else if (RC == &NVPTX::V4I16RegsRegClass) {
return ".v4.s16";
}
else if (RC == &NVPTX::V2I8RegsRegClass) {
return ".v2.s16";
}
else if (RC == &NVPTX::V4I8RegsRegClass) {
return ".v4.s16";
}
else {
return "INTERNAL";
}
@ -115,137 +85,11 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
else if (RC == &NVPTX::V2F32RegsRegClass) {
return "%v2f";
}
else if (RC == &NVPTX::V4F32RegsRegClass) {
return "%v4f";
}
else if (RC == &NVPTX::V2I32RegsRegClass) {
return "%v2r";
}
else if (RC == &NVPTX::V4I32RegsRegClass) {
return "%v4r";
}
else if (RC == &NVPTX::V2F64RegsRegClass) {
return "%v2fd";
}
else if (RC == &NVPTX::V2I64RegsRegClass) {
return "%v2rd";
}
else if (RC == &NVPTX::V2I16RegsRegClass) {
return "%v2s";
}
else if (RC == &NVPTX::V4I16RegsRegClass) {
return "%v4rs";
}
else if (RC == &NVPTX::V2I8RegsRegClass) {
return "%v2rc";
}
else if (RC == &NVPTX::V4I8RegsRegClass) {
return "%v4rc";
}
else {
return "INTERNAL";
}
return "";
}
bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return true;
return false;
}
std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
llvm_unreachable("Not a vector register class");
}
const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return (&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return (&NVPTX::Float64RegsRegClass);
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return (&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return (&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return (&NVPTX::Int64RegsRegClass);
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return (&NVPTX::Int8RegsRegClass);
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return (&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return (&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return (&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return (&NVPTX::Int8RegsRegClass);
llvm_unreachable("Not a vector register class");
}
int getNVPTXVectorSize(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return 4;
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return 4;
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return 4;
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return 4;
llvm_unreachable("Not a vector register class");
}
}
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,

View File

@ -81,10 +81,6 @@ public:
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
int getNVPTXVectorSize (const TargetRegisterClass *RC);
const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
} // end namespace llvm

View File

@ -37,9 +37,6 @@ foreach i = 0-395 in {
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
// Vectors
foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
// Arguments
def ia#i : NVPTXReg<"%ia"#i>;
@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
NVPTXRegClass sClass,
int e,
string n>
: NVPTXRegClass<regTypes, alignment, regList>
{
NVPTXRegClass scalarClass=sClass;
int elems=e;
string name=n;
}
def V2F32Regs
: NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
Float32Regs, 2, ".v2.f32">;
def V4F32Regs
: NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
Float32Regs, 4, ".v4.f32">;
def V2I32Regs
: NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
Int32Regs, 2, ".v2.u32">;
def V4I32Regs
: NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
Int32Regs, 4, ".v4.u32">;
def V2F64Regs
: NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
Float64Regs, 2, ".v2.f64">;
def V2I64Regs
: NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
Int64Regs, 2, ".v2.u64">;
def V2I16Regs
: NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
Int16Regs, 2, ".v2.u16">;
def V4I16Regs
: NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
Int16Regs, 4, ".v4.u16">;
def V2I8Regs
: NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
Int8Regs, 2, ".v2.u8">;
def V4I8Regs
: NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
Int8Regs, 4, ".v4.u8">;

View File

@ -57,6 +57,7 @@ public:
bool hasF32FTZ() const { return SmVersion >= 20; }
bool hasFMAF32() const { return SmVersion >= 20; }
bool hasFMAF64() const { return SmVersion >= 13; }
bool hasLDG() const { return SmVersion >= 32; }
bool hasLDU() const { return SmVersion >= 20; }
bool hasGenericLdSt() const { return SmVersion >= 20; }
inline bool hasHWROT32() const { return false; }

View File

@ -123,7 +123,6 @@ bool NVPTXPassConfig::addInstSelector() {
addPass(createSplitBBatBarPass());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
addPass(createVectorElementizePass(getNVPTXTargetMachine()));
return false;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,202 +0,0 @@
#!/usr/bin/env python
num_regs = 396
outFile = open('NVPTXRegisterInfo.td', 'w')
outFile.write('''
//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Declarations that describe the PTX register file
//===----------------------------------------------------------------------===//
class NVPTXReg<string n> : Register<n> {
let Namespace = "NVPTX";
}
class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
: RegisterClass <"NVPTX", regTypes, alignment, regList>;
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
// Special Registers used as stack pointer
def VRFrame : NVPTXReg<"%SP">;
def VRFrameLocal : NVPTXReg<"%SPL">;
// Special Registers used as the stack
def VRDepot : NVPTXReg<"%Depot">;
''')
# Predicates
outFile.write('''
//===--- Predicate --------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
# Int8
outFile.write('''
//===--- 8-bit ------------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
# Int16
outFile.write('''
//===--- 16-bit -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
# Int32
outFile.write('''
//===--- 32-bit -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
# Int64
outFile.write('''
//===--- 64-bit -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
# F32
outFile.write('''
//===--- 32-bit float -----------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
# F64
outFile.write('''
//===--- 64-bit float -----------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
# Vector registers
outFile.write('''
//===--- Vector -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
# Argument registers
outFile.write('''
//===--- Arguments --------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
outFile.write('''
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
''')
outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('''
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
''')
outFile.write('''
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
NVPTXRegClass sClass,
int e,
string n>
: NVPTXRegClass<regTypes, alignment, regList>
{
NVPTXRegClass scalarClass=sClass;
int elems=e;
string name=n;
}
''')
outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
outFile.close()
outFile = open('NVPTXNumRegisters.h', 'w')
outFile.write('''
//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_NUM_REGISTERS_H
#define NVPTX_NUM_REGISTERS_H
namespace llvm {
const unsigned NVPTXNumRegisters = %d;
}
#endif
''' % num_regs)
outFile.close()

View File

@ -0,0 +1,66 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; Even though general vector types are not supported in PTX, we can still
; optimize loads/stores with pseudo-vector instructions of the form:
;
; ld.v2.f32 {%f0, %f1}, [%r0]
;
; which will load two floats at once into scalar registers.
define void @foo(<2 x float>* %a) {
; CHECK: .func foo
; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
%t1 = load <2 x float>* %a
%t2 = fmul <2 x float> %t1, %t1
store <2 x float> %t2, <2 x float>* %a
ret void
}
define void @foo2(<4 x float>* %a) {
; CHECK: .func foo2
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
%t1 = load <4 x float>* %a
%t2 = fmul <4 x float> %t1, %t1
store <4 x float> %t2, <4 x float>* %a
ret void
}
define void @foo3(<8 x float>* %a) {
; CHECK: .func foo3
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16];
%t1 = load <8 x float>* %a
%t2 = fmul <8 x float> %t1, %t1
store <8 x float> %t2, <8 x float>* %a
ret void
}
define void @foo4(<2 x i32>* %a) {
; CHECK: .func foo4
; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
%t1 = load <2 x i32>* %a
%t2 = mul <2 x i32> %t1, %t1
store <2 x i32> %t2, <2 x i32>* %a
ret void
}
define void @foo5(<4 x i32>* %a) {
; CHECK: .func foo5
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
%t1 = load <4 x i32>* %a
%t2 = mul <4 x i32> %t1, %t1
store <4 x i32> %t2, <4 x i32>* %a
ret void
}
define void @foo6(<8 x i32>* %a) {
; CHECK: .func foo6
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16];
%t1 = load <8 x i32>* %a
%t2 = mul <8 x i32> %t1, %t1
store <8 x i32> %t2, <8 x i32>* %a
ret void
}