mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-09-30 06:19:45 +00:00
[NVPTX] Disable vector registers
Vectors were being manually scalarized by the backend. Instead, let the target-independent code do all of the work. The manual scalarization was from a time before good target-independent support for scalarization in LLVM. However, this forces us to specially-handle vector loads and stores, which we can turn into PTX instructions that produce/consume multiple operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174968 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -805,6 +805,16 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
|
|||||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||||
"llvm.nvvm.ldu.global.p">;
|
"llvm.nvvm.ldu.global.p">;
|
||||||
|
|
||||||
|
// Generated within nvvm. Use for ldg on sm_35 or later
|
||||||
|
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
|
||||||
|
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||||
|
"llvm.nvvm.ldg.global.i">;
|
||||||
|
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||||
|
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||||
|
"llvm.nvvm.ldg.global.f">;
|
||||||
|
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||||
|
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||||
|
"llvm.nvvm.ldg.global.p">;
|
||||||
|
|
||||||
// Use for generic pointers
|
// Use for generic pointers
|
||||||
// - These intrinsics are used to convert address spaces.
|
// - These intrinsics are used to convert address spaces.
|
||||||
|
@@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources
|
|||||||
NVPTXAllocaHoisting.cpp
|
NVPTXAllocaHoisting.cpp
|
||||||
NVPTXAsmPrinter.cpp
|
NVPTXAsmPrinter.cpp
|
||||||
NVPTXUtilities.cpp
|
NVPTXUtilities.cpp
|
||||||
VectorElementize.cpp
|
|
||||||
)
|
)
|
||||||
|
|
||||||
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
|
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
|
||||||
|
@@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
|
|||||||
|
|
||||||
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
|
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
|
||||||
llvm::CodeGenOpt::Level OptLevel);
|
llvm::CodeGenOpt::Level OptLevel);
|
||||||
FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
|
|
||||||
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
|
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
|
||||||
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
|
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
|
||||||
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
|
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
|
||||||
|
@@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
|
|||||||
O << getNVPTXRegClassStr(RC) << mapped_vr;
|
O << getNVPTXRegClassStr(RC) << mapped_vr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Vector virtual register
|
report_fatal_error("Bad register!");
|
||||||
if (getNVPTXVectorSize(RC) == 4)
|
|
||||||
O << "{"
|
|
||||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
|
|
||||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
|
|
||||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
|
|
||||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_3"
|
|
||||||
<< "}";
|
|
||||||
else if (getNVPTXVectorSize(RC) == 2)
|
|
||||||
O << "{"
|
|
||||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
|
|
||||||
<< getNVPTXRegClassStr(RC) << mapped_vr << "_1"
|
|
||||||
<< "}";
|
|
||||||
else
|
|
||||||
llvm_unreachable("Unsupported vector size");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -2024,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
|
|||||||
case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
|
case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
|
||||||
case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
|
case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
|
||||||
case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
|
case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
|
||||||
case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
|
|
||||||
case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
|
|
||||||
case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
|
|
||||||
case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
|
|
||||||
case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
|
|
||||||
case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
|
|
||||||
case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
|
|
||||||
case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
|
|
||||||
case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
|
|
||||||
case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
|
|
||||||
case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
|
case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
|
||||||
case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
|
case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
|
||||||
case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
|
case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
|
||||||
case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
|
|
||||||
case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
|
|
||||||
case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
|
|
||||||
case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
|
|
||||||
case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
|
|
||||||
case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
|
|
||||||
case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
|
|
||||||
case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
|
|
||||||
case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
|
|
||||||
case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
|
|
||||||
case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
|
case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
|
||||||
case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
|
case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
|
||||||
case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
|
case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
|
||||||
@@ -2057,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
|
|||||||
case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
|
case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
|
||||||
case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
|
case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
|
||||||
case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
|
case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
|
||||||
case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
|
|
||||||
case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
|
|
||||||
case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
|
|
||||||
case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
|
|
||||||
case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
|
|
||||||
case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
|
|
||||||
case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
|
|
||||||
case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
|
|
||||||
case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
|
|
||||||
case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
|
|
||||||
case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
|
case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
|
|||||||
case ISD::STORE:
|
case ISD::STORE:
|
||||||
ResNode = SelectStore(N);
|
ResNode = SelectStore(N);
|
||||||
break;
|
break;
|
||||||
|
case NVPTXISD::LoadV2:
|
||||||
|
case NVPTXISD::LoadV4:
|
||||||
|
ResNode = SelectLoadVector(N);
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LDGV2:
|
||||||
|
case NVPTXISD::LDGV4:
|
||||||
|
case NVPTXISD::LDUV2:
|
||||||
|
case NVPTXISD::LDUV4:
|
||||||
|
ResNode = SelectLDGLDUVector(N);
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
ResNode = SelectStoreVector(N);
|
||||||
|
break;
|
||||||
|
default: break;
|
||||||
}
|
}
|
||||||
if (ResNode)
|
if (ResNode)
|
||||||
return ResNode;
|
return ResNode;
|
||||||
@@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
|||||||
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
|
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
|
||||||
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
|
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
|
||||||
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
|
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
|
|
||||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
|
|
||||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
|
|
||||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
|
|
||||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
|
|
||||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
|
|
||||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
|
|
||||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
|
|
||||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
|
|
||||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
|
|
||||||
default: return NULL;
|
default: return NULL;
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||||
@@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
|||||||
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
|
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
|
||||||
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
|
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
|
||||||
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
|
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
|
|
||||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
|
|
||||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
|
|
||||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
|
|
||||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
|
|
||||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
|
|
||||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
|
|
||||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
|
|
||||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
|
|
||||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
|
|
||||||
default: return NULL;
|
default: return NULL;
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||||
@@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
|||||||
} else if (Subtarget.is64Bit()?
|
} else if (Subtarget.is64Bit()?
|
||||||
SelectADDRri64(N1.getNode(), N1, Base, Offset):
|
SelectADDRri64(N1.getNode(), N1, Base, Offset):
|
||||||
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
|
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
|
||||||
switch (TargetVT) {
|
if (Subtarget.is64Bit()) {
|
||||||
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
|
switch (TargetVT) {
|
||||||
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
|
case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break;
|
||||||
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
|
case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break;
|
||||||
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
|
case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break;
|
||||||
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
|
case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break;
|
||||||
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
|
case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
|
case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break;
|
||||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
|
default: return NULL;
|
||||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
|
}
|
||||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
|
} else {
|
||||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
|
switch (TargetVT) {
|
||||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
|
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
|
||||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
|
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
|
||||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
|
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
|
||||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
|
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
|
||||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
|
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
|
||||||
default: return NULL;
|
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||||
getI32Imm(codeAddrSpace),
|
getI32Imm(codeAddrSpace),
|
||||||
@@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
|||||||
MVT::Other, Ops, 8);
|
MVT::Other, Ops, 8);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
switch (TargetVT) {
|
if (Subtarget.is64Bit()) {
|
||||||
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
|
switch (TargetVT) {
|
||||||
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
|
case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break;
|
||||||
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
|
case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break;
|
||||||
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
|
case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break;
|
||||||
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
|
case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break;
|
||||||
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
|
case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
|
case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break;
|
||||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
|
default: return NULL;
|
||||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
|
}
|
||||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
|
} else {
|
||||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
|
switch (TargetVT) {
|
||||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
|
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
|
||||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
|
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
|
||||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
|
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
|
||||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
|
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
|
||||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
|
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
|
||||||
default: return NULL;
|
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||||
getI32Imm(codeAddrSpace),
|
getI32Imm(codeAddrSpace),
|
||||||
@@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
|||||||
return NVPTXLD;
|
return NVPTXLD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
|
||||||
|
|
||||||
|
SDValue Chain = N->getOperand(0);
|
||||||
|
SDValue Op1 = N->getOperand(1);
|
||||||
|
SDValue Addr, Offset, Base;
|
||||||
|
unsigned Opcode;
|
||||||
|
DebugLoc DL = N->getDebugLoc();
|
||||||
|
SDNode *LD;
|
||||||
|
MemSDNode *MemSD = cast<MemSDNode>(N);
|
||||||
|
EVT LoadedVT = MemSD->getMemoryVT();
|
||||||
|
|
||||||
|
|
||||||
|
if (!LoadedVT.isSimple())
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
// Address Space Setting
|
||||||
|
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
|
||||||
|
|
||||||
|
// Volatile Setting
|
||||||
|
// - .volatile is only availalble for .global and .shared
|
||||||
|
bool IsVolatile = MemSD->isVolatile();
|
||||||
|
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||||
|
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||||
|
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||||
|
IsVolatile = false;
|
||||||
|
|
||||||
|
// Vector Setting
|
||||||
|
MVT SimpleVT = LoadedVT.getSimpleVT();
|
||||||
|
|
||||||
|
// Type Setting: fromType + fromTypeWidth
|
||||||
|
//
|
||||||
|
// Sign : ISD::SEXTLOAD
|
||||||
|
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
|
||||||
|
// type is integer
|
||||||
|
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
|
||||||
|
MVT ScalarVT = SimpleVT.getScalarType();
|
||||||
|
unsigned FromTypeWidth = ScalarVT.getSizeInBits();
|
||||||
|
unsigned int FromType;
|
||||||
|
// The last operand holds the original LoadSDNode::getExtensionType() value
|
||||||
|
unsigned ExtensionType =
|
||||||
|
cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
|
||||||
|
if (ExtensionType == ISD::SEXTLOAD)
|
||||||
|
FromType = NVPTX::PTXLdStInstCode::Signed;
|
||||||
|
else if (ScalarVT.isFloatingPoint())
|
||||||
|
FromType = NVPTX::PTXLdStInstCode::Float;
|
||||||
|
else
|
||||||
|
FromType = NVPTX::PTXLdStInstCode::Unsigned;
|
||||||
|
|
||||||
|
unsigned VecType;
|
||||||
|
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break;
|
||||||
|
case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break;
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
EVT EltVT = N->getValueType(0);
|
||||||
|
|
||||||
|
if (SelectDirectAddr(Op1, Addr)) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LoadV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LoadV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||||
|
getI32Imm(CodeAddrSpace),
|
||||||
|
getI32Imm(VecType),
|
||||||
|
getI32Imm(FromType),
|
||||||
|
getI32Imm(FromTypeWidth),
|
||||||
|
Addr, Chain };
|
||||||
|
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
|
||||||
|
} else if (Subtarget.is64Bit()?
|
||||||
|
SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
|
||||||
|
SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LoadV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LoadV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||||
|
getI32Imm(CodeAddrSpace),
|
||||||
|
getI32Imm(VecType),
|
||||||
|
getI32Imm(FromType),
|
||||||
|
getI32Imm(FromTypeWidth),
|
||||||
|
Base, Offset, Chain };
|
||||||
|
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
|
||||||
|
} else if (Subtarget.is64Bit()?
|
||||||
|
SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
|
||||||
|
SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
|
||||||
|
if (Subtarget.is64Bit()) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LoadV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LoadV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LoadV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LoadV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||||
|
getI32Imm(CodeAddrSpace),
|
||||||
|
getI32Imm(VecType),
|
||||||
|
getI32Imm(FromType),
|
||||||
|
getI32Imm(FromTypeWidth),
|
||||||
|
Base, Offset, Chain };
|
||||||
|
|
||||||
|
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
|
||||||
|
} else {
|
||||||
|
if (Subtarget.is64Bit()) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LoadV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LoadV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LoadV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LoadV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue Ops[] = { getI32Imm(IsVolatile),
|
||||||
|
getI32Imm(CodeAddrSpace),
|
||||||
|
getI32Imm(VecType),
|
||||||
|
getI32Imm(FromType),
|
||||||
|
getI32Imm(FromTypeWidth),
|
||||||
|
Op1, Chain };
|
||||||
|
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||||
|
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||||
|
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||||
|
|
||||||
|
return LD;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
|
||||||
|
|
||||||
|
SDValue Chain = N->getOperand(0);
|
||||||
|
SDValue Op1 = N->getOperand(1);
|
||||||
|
unsigned Opcode;
|
||||||
|
DebugLoc DL = N->getDebugLoc();
|
||||||
|
SDNode *LD;
|
||||||
|
|
||||||
|
EVT RetVT = N->getValueType(0);
|
||||||
|
|
||||||
|
// Select opcode
|
||||||
|
if (Subtarget.is64Bit()) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LDGV2:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LDGV4:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LDUV2:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LDUV4:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::LDGV2:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LDGV4:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LDUV2:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::LDUV4:
|
||||||
|
switch (RetVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue Ops[] = { Op1, Chain };
|
||||||
|
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
|
||||||
|
|
||||||
|
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||||
|
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||||
|
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||||
|
|
||||||
|
return LD;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||||
DebugLoc dl = N->getDebugLoc();
|
DebugLoc dl = N->getDebugLoc();
|
||||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||||
@@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
|||||||
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
|
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
|
||||||
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
|
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
|
||||||
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
|
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
|
|
||||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
|
|
||||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
|
|
||||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
|
|
||||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
|
|
||||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
|
|
||||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
|
|
||||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
|
|
||||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
|
|
||||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
|
|
||||||
default: return NULL;
|
default: return NULL;
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { N1,
|
SDValue Ops[] = { N1,
|
||||||
@@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
|||||||
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
|
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
|
||||||
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
|
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
|
||||||
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
|
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
|
|
||||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
|
|
||||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
|
|
||||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
|
|
||||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
|
|
||||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
|
|
||||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
|
|
||||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
|
|
||||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
|
|
||||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
|
|
||||||
default: return NULL;
|
default: return NULL;
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { N1,
|
SDValue Ops[] = { N1,
|
||||||
@@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
|||||||
} else if (Subtarget.is64Bit()?
|
} else if (Subtarget.is64Bit()?
|
||||||
SelectADDRri64(N2.getNode(), N2, Base, Offset):
|
SelectADDRri64(N2.getNode(), N2, Base, Offset):
|
||||||
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||||
switch (SourceVT) {
|
if (Subtarget.is64Bit()) {
|
||||||
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
|
switch (SourceVT) {
|
||||||
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
|
case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break;
|
||||||
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
|
case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break;
|
||||||
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
|
case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break;
|
||||||
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
|
case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break;
|
||||||
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
|
case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
|
case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break;
|
||||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
|
default: return NULL;
|
||||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
|
}
|
||||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
|
} else {
|
||||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
|
switch (SourceVT) {
|
||||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
|
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
|
||||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
|
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
|
||||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
|
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
|
||||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
|
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
|
||||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
|
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
|
||||||
default: return NULL;
|
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { N1,
|
SDValue Ops[] = { N1,
|
||||||
getI32Imm(isVolatile),
|
getI32Imm(isVolatile),
|
||||||
@@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
|||||||
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
|
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
|
||||||
MVT::Other, Ops, 9);
|
MVT::Other, Ops, 9);
|
||||||
} else {
|
} else {
|
||||||
switch (SourceVT) {
|
if (Subtarget.is64Bit()) {
|
||||||
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
|
switch (SourceVT) {
|
||||||
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
|
case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break;
|
||||||
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
|
case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break;
|
||||||
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
|
case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break;
|
||||||
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
|
case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break;
|
||||||
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
|
case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break;
|
||||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
|
case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break;
|
||||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
|
default: return NULL;
|
||||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
|
}
|
||||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
|
} else {
|
||||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
|
switch (SourceVT) {
|
||||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
|
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
|
||||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
|
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
|
||||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
|
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
|
||||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
|
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
|
||||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
|
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
|
||||||
default: return NULL;
|
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
SDValue Ops[] = { N1,
|
SDValue Ops[] = { N1,
|
||||||
getI32Imm(isVolatile),
|
getI32Imm(isVolatile),
|
||||||
@@ -523,6 +870,244 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
|||||||
return NVPTXST;
|
return NVPTXST;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
|
||||||
|
SDValue Chain = N->getOperand(0);
|
||||||
|
SDValue Op1 = N->getOperand(1);
|
||||||
|
SDValue Addr, Offset, Base;
|
||||||
|
unsigned Opcode;
|
||||||
|
DebugLoc DL = N->getDebugLoc();
|
||||||
|
SDNode *ST;
|
||||||
|
EVT EltVT = Op1.getValueType();
|
||||||
|
MemSDNode *MemSD = cast<MemSDNode>(N);
|
||||||
|
EVT StoreVT = MemSD->getMemoryVT();
|
||||||
|
|
||||||
|
// Address Space Setting
|
||||||
|
unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
|
||||||
|
|
||||||
|
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
|
||||||
|
report_fatal_error("Cannot store to pointer that points to constant "
|
||||||
|
"memory space");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Volatile Setting
|
||||||
|
// - .volatile is only availalble for .global and .shared
|
||||||
|
bool IsVolatile = MemSD->isVolatile();
|
||||||
|
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||||
|
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||||
|
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||||
|
IsVolatile = false;
|
||||||
|
|
||||||
|
// Type Setting: toType + toTypeWidth
|
||||||
|
// - for integer type, always use 'u'
|
||||||
|
assert(StoreVT.isSimple() && "Store value is not simple");
|
||||||
|
MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
|
||||||
|
unsigned ToTypeWidth = ScalarVT.getSizeInBits();
|
||||||
|
unsigned ToType;
|
||||||
|
if (ScalarVT.isFloatingPoint())
|
||||||
|
ToType = NVPTX::PTXLdStInstCode::Float;
|
||||||
|
else
|
||||||
|
ToType = NVPTX::PTXLdStInstCode::Unsigned;
|
||||||
|
|
||||||
|
|
||||||
|
SmallVector<SDValue, 12> StOps;
|
||||||
|
SDValue N2;
|
||||||
|
unsigned VecType;
|
||||||
|
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
VecType = NVPTX::PTXLdStInstCode::V2;
|
||||||
|
StOps.push_back(N->getOperand(1));
|
||||||
|
StOps.push_back(N->getOperand(2));
|
||||||
|
N2 = N->getOperand(3);
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
VecType = NVPTX::PTXLdStInstCode::V4;
|
||||||
|
StOps.push_back(N->getOperand(1));
|
||||||
|
StOps.push_back(N->getOperand(2));
|
||||||
|
StOps.push_back(N->getOperand(3));
|
||||||
|
StOps.push_back(N->getOperand(4));
|
||||||
|
N2 = N->getOperand(5);
|
||||||
|
break;
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
StOps.push_back(getI32Imm(IsVolatile));
|
||||||
|
StOps.push_back(getI32Imm(CodeAddrSpace));
|
||||||
|
StOps.push_back(getI32Imm(VecType));
|
||||||
|
StOps.push_back(getI32Imm(ToType));
|
||||||
|
StOps.push_back(getI32Imm(ToTypeWidth));
|
||||||
|
|
||||||
|
if (SelectDirectAddr(N2, Addr)) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
StOps.push_back(Addr);
|
||||||
|
} else if (Subtarget.is64Bit()?
|
||||||
|
SelectADDRsi64(N2.getNode(), N2, Base, Offset):
|
||||||
|
SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
StOps.push_back(Base);
|
||||||
|
StOps.push_back(Offset);
|
||||||
|
} else if (Subtarget.is64Bit()?
|
||||||
|
SelectADDRri64(N2.getNode(), N2, Base, Offset):
|
||||||
|
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||||
|
if (Subtarget.is64Bit()) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StOps.push_back(Base);
|
||||||
|
StOps.push_back(Offset);
|
||||||
|
} else {
|
||||||
|
if (Subtarget.is64Bit()) {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: return NULL;
|
||||||
|
case NVPTXISD::StoreV2:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break;
|
||||||
|
case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break;
|
||||||
|
case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NVPTXISD::StoreV4:
|
||||||
|
switch (EltVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return NULL;
|
||||||
|
case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break;
|
||||||
|
case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break;
|
||||||
|
case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break;
|
||||||
|
case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StOps.push_back(N2);
|
||||||
|
}
|
||||||
|
|
||||||
|
StOps.push_back(Chain);
|
||||||
|
|
||||||
|
ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
|
||||||
|
|
||||||
|
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||||
|
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||||
|
cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||||
|
|
||||||
|
return ST;
|
||||||
|
}
|
||||||
|
|
||||||
// SelectDirectAddr - Match a direct address for DAG.
|
// SelectDirectAddr - Match a direct address for DAG.
|
||||||
// A direct address could be a globaladdress or externalsymbol.
|
// A direct address could be a globaladdress or externalsymbol.
|
||||||
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
|
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
|
||||||
|
@@ -72,8 +72,11 @@ private:
|
|||||||
#include "NVPTXGenDAGISel.inc"
|
#include "NVPTXGenDAGISel.inc"
|
||||||
|
|
||||||
SDNode *Select(SDNode *N);
|
SDNode *Select(SDNode *N);
|
||||||
SDNode* SelectLoad(SDNode *N);
|
SDNode *SelectLoad(SDNode *N);
|
||||||
SDNode* SelectStore(SDNode *N);
|
SDNode *SelectLoadVector(SDNode *N);
|
||||||
|
SDNode *SelectLDGLDUVector(SDNode *N);
|
||||||
|
SDNode *SelectStore(SDNode *N);
|
||||||
|
SDNode *SelectStoreVector(SDNode *N);
|
||||||
|
|
||||||
inline SDValue getI32Imm(unsigned Imm) {
|
inline SDValue getI32Imm(unsigned Imm) {
|
||||||
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
||||||
|
@@ -44,16 +44,28 @@ using namespace llvm;
|
|||||||
|
|
||||||
static unsigned int uniqueCallSite = 0;
|
static unsigned int uniqueCallSite = 0;
|
||||||
|
|
||||||
static cl::opt<bool>
|
|
||||||
RetainVectorOperands("nvptx-codegen-vectors",
|
|
||||||
cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
|
|
||||||
cl::init(true));
|
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
sched4reg("nvptx-sched4reg",
|
sched4reg("nvptx-sched4reg",
|
||||||
cl::desc("NVPTX Specific: schedule for register pressue"),
|
cl::desc("NVPTX Specific: schedule for register pressue"),
|
||||||
cl::init(false));
|
cl::init(false));
|
||||||
|
|
||||||
|
static bool IsPTXVectorType(MVT VT) {
|
||||||
|
switch (VT.SimpleTy) {
|
||||||
|
default: return false;
|
||||||
|
case MVT::v2i8:
|
||||||
|
case MVT::v4i8:
|
||||||
|
case MVT::v2i16:
|
||||||
|
case MVT::v4i16:
|
||||||
|
case MVT::v2i32:
|
||||||
|
case MVT::v4i32:
|
||||||
|
case MVT::v2i64:
|
||||||
|
case MVT::v2f32:
|
||||||
|
case MVT::v4f32:
|
||||||
|
case MVT::v2f64:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// NVPTXTargetLowering Constructor.
|
// NVPTXTargetLowering Constructor.
|
||||||
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
||||||
: TargetLowering(TM, new NVPTXTargetObjectFile()),
|
: TargetLowering(TM, new NVPTXTargetObjectFile()),
|
||||||
@@ -87,41 +99,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
|||||||
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
|
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
|
||||||
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
|
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
|
||||||
|
|
||||||
if (RetainVectorOperands) {
|
|
||||||
addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
|
|
||||||
addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
|
|
||||||
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
|
|
||||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
|
|
||||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Operations not directly supported by NVPTX.
|
// Operations not directly supported by NVPTX.
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
||||||
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
|
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
|
||||||
@@ -191,42 +168,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
|||||||
// TRAP can be lowered to PTX trap
|
// TRAP can be lowered to PTX trap
|
||||||
setOperationAction(ISD::TRAP, MVT::Other, Legal);
|
setOperationAction(ISD::TRAP, MVT::Other, Legal);
|
||||||
|
|
||||||
// By default, CONCAT_VECTORS is implemented via store/load
|
// Register custom handling for vector loads/stores
|
||||||
// through stack. It is slow and uses local memory. We need
|
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
|
||||||
// to custom-lowering them.
|
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
|
MVT VT = (MVT::SimpleValueType)i;
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
|
if (IsPTXVectorType(VT)) {
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
|
setOperationAction(ISD::LOAD, VT, Custom);
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
|
setOperationAction(ISD::STORE, VT, Custom);
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
|
setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
|
}
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
|
}
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
|
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
|
|
||||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
|
|
||||||
|
|
||||||
// Expand vector int to float and float to int conversions
|
|
||||||
// - For SINT_TO_FP and UINT_TO_FP, the src type
|
|
||||||
// (Node->getOperand(0).getValueType())
|
|
||||||
// is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
|
|
||||||
// the dest type (Node->getValueType(0)) is used.
|
|
||||||
//
|
|
||||||
// See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
|
|
||||||
// case, and
|
|
||||||
// SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
|
|
||||||
//
|
|
||||||
// That is why v4i32 or v2i32 are used here.
|
|
||||||
//
|
|
||||||
// The expansion for vectors happens in VectorLegalizer::LegalizeOp()
|
|
||||||
// (LegalizeVectorOps.cpp).
|
|
||||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
|
|
||||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
|
|
||||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
|
|
||||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
|
|
||||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
|
|
||||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
|
|
||||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
|
|
||||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
|
|
||||||
|
|
||||||
// Now deduce the information based on the above mentioned
|
// Now deduce the information based on the above mentioned
|
||||||
// actions
|
// actions
|
||||||
@@ -268,6 +219,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
|
case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
|
||||||
case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
|
case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
|
||||||
case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
|
case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
|
||||||
|
case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2";
|
||||||
|
case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4";
|
||||||
|
case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2";
|
||||||
|
case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4";
|
||||||
|
case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2";
|
||||||
|
case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4";
|
||||||
|
case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2";
|
||||||
|
case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -868,12 +827,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
if (Op.getValueType() == MVT::i1)
|
||||||
|
return LowerLOADi1(Op, DAG);
|
||||||
|
else
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
// v = ld i1* addr
|
// v = ld i1* addr
|
||||||
// =>
|
// =>
|
||||||
// v1 = ld i8* addr
|
// v1 = ld i8* addr
|
||||||
// v = trunc v1 to i1
|
// v = trunc v1 to i1
|
||||||
SDValue NVPTXTargetLowering::
|
SDValue NVPTXTargetLowering::
|
||||||
LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDNode *Node = Op.getNode();
|
SDNode *Node = Op.getNode();
|
||||||
LoadSDNode *LD = cast<LoadSDNode>(Node);
|
LoadSDNode *LD = cast<LoadSDNode>(Node);
|
||||||
DebugLoc dl = Node->getDebugLoc();
|
DebugLoc dl = Node->getDebugLoc();
|
||||||
@@ -893,12 +859,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return DAG.getMergeValues(Ops, 2, dl);
|
return DAG.getMergeValues(Ops, 2, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
EVT ValVT = Op.getOperand(1).getValueType();
|
||||||
|
if (ValVT == MVT::i1)
|
||||||
|
return LowerSTOREi1(Op, DAG);
|
||||||
|
else if (ValVT.isVector())
|
||||||
|
return LowerSTOREVector(Op, DAG);
|
||||||
|
else
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
SDNode *N = Op.getNode();
|
||||||
|
SDValue Val = N->getOperand(1);
|
||||||
|
DebugLoc DL = N->getDebugLoc();
|
||||||
|
EVT ValVT = Val.getValueType();
|
||||||
|
|
||||||
|
if (ValVT.isVector()) {
|
||||||
|
// We only handle "native" vector sizes for now, e.g. <4 x double> is not
|
||||||
|
// legal. We can (and should) split that into 2 stores of <2 x double> here
|
||||||
|
// but I'm leaving that as a TODO for now.
|
||||||
|
if (!ValVT.isSimple())
|
||||||
|
return SDValue();
|
||||||
|
switch (ValVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return SDValue();
|
||||||
|
case MVT::v2i8:
|
||||||
|
case MVT::v2i16:
|
||||||
|
case MVT::v2i32:
|
||||||
|
case MVT::v2i64:
|
||||||
|
case MVT::v2f32:
|
||||||
|
case MVT::v2f64:
|
||||||
|
case MVT::v4i8:
|
||||||
|
case MVT::v4i16:
|
||||||
|
case MVT::v4i32:
|
||||||
|
case MVT::v4f32:
|
||||||
|
// This is a "native" vector type
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Opcode = 0;
|
||||||
|
EVT EltVT = ValVT.getVectorElementType();
|
||||||
|
unsigned NumElts = ValVT.getVectorNumElements();
|
||||||
|
|
||||||
|
// Since StoreV2 is a target node, we cannot rely on DAG type legalization.
|
||||||
|
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
|
||||||
|
// stored type to i16 and propogate the "real" type as the memory type.
|
||||||
|
bool NeedExt = false;
|
||||||
|
if (EltVT.getSizeInBits() < 16)
|
||||||
|
NeedExt = true;
|
||||||
|
|
||||||
|
switch (NumElts) {
|
||||||
|
default: return SDValue();
|
||||||
|
case 2:
|
||||||
|
Opcode = NVPTXISD::StoreV2;
|
||||||
|
break;
|
||||||
|
case 4: {
|
||||||
|
Opcode = NVPTXISD::StoreV4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
|
||||||
|
// First is the chain
|
||||||
|
Ops.push_back(N->getOperand(0));
|
||||||
|
|
||||||
|
// Then the split values
|
||||||
|
for (unsigned i = 0; i < NumElts; ++i) {
|
||||||
|
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
|
||||||
|
DAG.getIntPtrConstant(i));
|
||||||
|
if (NeedExt)
|
||||||
|
// ANY_EXTEND is correct here since the store will only look at the
|
||||||
|
// lower-order bits anyway.
|
||||||
|
ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
|
||||||
|
Ops.push_back(ExtVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then any remaining arguments
|
||||||
|
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
|
||||||
|
Ops.push_back(N->getOperand(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
MemSDNode *MemSD = cast<MemSDNode>(N);
|
||||||
|
|
||||||
|
SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
|
||||||
|
DAG.getVTList(MVT::Other), &Ops[0],
|
||||||
|
Ops.size(), MemSD->getMemoryVT(),
|
||||||
|
MemSD->getMemOperand());
|
||||||
|
|
||||||
|
|
||||||
|
//return DCI.CombineTo(N, NewSt, true);
|
||||||
|
return NewSt;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
// st i1 v, addr
|
// st i1 v, addr
|
||||||
// =>
|
// =>
|
||||||
// v1 = zxt v to i8
|
// v1 = zxt v to i8
|
||||||
// st i8, addr
|
// st i8, addr
|
||||||
SDValue NVPTXTargetLowering::
|
SDValue NVPTXTargetLowering::
|
||||||
LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDNode *Node = Op.getNode();
|
SDNode *Node = Op.getNode();
|
||||||
DebugLoc dl = Node->getDebugLoc();
|
DebugLoc dl = Node->getDebugLoc();
|
||||||
StoreSDNode *ST = cast<StoreSDNode>(Node);
|
StoreSDNode *ST = cast<StoreSDNode>(Node);
|
||||||
@@ -1348,3 +1411,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
|||||||
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
|
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
|
||||||
|
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
|
||||||
|
SmallVectorImpl<SDValue>& Results) {
|
||||||
|
EVT ResVT = N->getValueType(0);
|
||||||
|
DebugLoc DL = N->getDebugLoc();
|
||||||
|
|
||||||
|
assert(ResVT.isVector() && "Vector load must have vector type");
|
||||||
|
|
||||||
|
// We only handle "native" vector sizes for now, e.g. <4 x double> is not
|
||||||
|
// legal. We can (and should) split that into 2 loads of <2 x double> here
|
||||||
|
// but I'm leaving that as a TODO for now.
|
||||||
|
assert(ResVT.isSimple() && "Can only handle simple types");
|
||||||
|
switch (ResVT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return;
|
||||||
|
case MVT::v2i8:
|
||||||
|
case MVT::v2i16:
|
||||||
|
case MVT::v2i32:
|
||||||
|
case MVT::v2i64:
|
||||||
|
case MVT::v2f32:
|
||||||
|
case MVT::v2f64:
|
||||||
|
case MVT::v4i8:
|
||||||
|
case MVT::v4i16:
|
||||||
|
case MVT::v4i32:
|
||||||
|
case MVT::v4f32:
|
||||||
|
// This is a "native" vector type
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
EVT EltVT = ResVT.getVectorElementType();
|
||||||
|
unsigned NumElts = ResVT.getVectorNumElements();
|
||||||
|
|
||||||
|
// Since LoadV2 is a target node, we cannot rely on DAG type legalization.
|
||||||
|
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
|
||||||
|
// loaded type to i16 and propogate the "real" type as the memory type.
|
||||||
|
bool NeedTrunc = false;
|
||||||
|
if (EltVT.getSizeInBits() < 16) {
|
||||||
|
EltVT = MVT::i16;
|
||||||
|
NeedTrunc = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Opcode = 0;
|
||||||
|
SDVTList LdResVTs;
|
||||||
|
|
||||||
|
switch (NumElts) {
|
||||||
|
default: return;
|
||||||
|
case 2:
|
||||||
|
Opcode = NVPTXISD::LoadV2;
|
||||||
|
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
|
||||||
|
break;
|
||||||
|
case 4: {
|
||||||
|
Opcode = NVPTXISD::LoadV4;
|
||||||
|
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
|
||||||
|
LdResVTs = DAG.getVTList(ListVTs, 5);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SmallVector<SDValue, 8> OtherOps;
|
||||||
|
|
||||||
|
// Copy regular operands
|
||||||
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||||
|
OtherOps.push_back(N->getOperand(i));
|
||||||
|
|
||||||
|
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||||
|
|
||||||
|
// The select routine does not have access to the LoadSDNode instance, so
|
||||||
|
// pass along the extension information
|
||||||
|
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
|
||||||
|
|
||||||
|
SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
|
||||||
|
OtherOps.size(), LD->getMemoryVT(),
|
||||||
|
LD->getMemOperand());
|
||||||
|
|
||||||
|
SmallVector<SDValue, 4> ScalarRes;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < NumElts; ++i) {
|
||||||
|
SDValue Res = NewLD.getValue(i);
|
||||||
|
if (NeedTrunc)
|
||||||
|
Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
|
||||||
|
ScalarRes.push_back(Res);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue LoadChain = NewLD.getValue(NumElts);
|
||||||
|
|
||||||
|
SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
|
||||||
|
|
||||||
|
Results.push_back(BuildVec);
|
||||||
|
Results.push_back(LoadChain);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
|
||||||
|
SelectionDAG &DAG,
|
||||||
|
SmallVectorImpl<SDValue> &Results) {
|
||||||
|
SDValue Chain = N->getOperand(0);
|
||||||
|
SDValue Intrin = N->getOperand(1);
|
||||||
|
DebugLoc DL = N->getDebugLoc();
|
||||||
|
|
||||||
|
// Get the intrinsic ID
|
||||||
|
unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
|
||||||
|
switch(IntrinNo) {
|
||||||
|
default: return;
|
||||||
|
case Intrinsic::nvvm_ldg_global_i:
|
||||||
|
case Intrinsic::nvvm_ldg_global_f:
|
||||||
|
case Intrinsic::nvvm_ldg_global_p:
|
||||||
|
case Intrinsic::nvvm_ldu_global_i:
|
||||||
|
case Intrinsic::nvvm_ldu_global_f:
|
||||||
|
case Intrinsic::nvvm_ldu_global_p: {
|
||||||
|
EVT ResVT = N->getValueType(0);
|
||||||
|
|
||||||
|
if (ResVT.isVector()) {
|
||||||
|
// Vector LDG/LDU
|
||||||
|
|
||||||
|
unsigned NumElts = ResVT.getVectorNumElements();
|
||||||
|
EVT EltVT = ResVT.getVectorElementType();
|
||||||
|
|
||||||
|
// Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
|
||||||
|
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
|
||||||
|
// loaded type to i16 and propogate the "real" type as the memory type.
|
||||||
|
bool NeedTrunc = false;
|
||||||
|
if (EltVT.getSizeInBits() < 16) {
|
||||||
|
EltVT = MVT::i16;
|
||||||
|
NeedTrunc = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Opcode = 0;
|
||||||
|
SDVTList LdResVTs;
|
||||||
|
|
||||||
|
switch (NumElts) {
|
||||||
|
default: return;
|
||||||
|
case 2:
|
||||||
|
switch(IntrinNo) {
|
||||||
|
default: return;
|
||||||
|
case Intrinsic::nvvm_ldg_global_i:
|
||||||
|
case Intrinsic::nvvm_ldg_global_f:
|
||||||
|
case Intrinsic::nvvm_ldg_global_p:
|
||||||
|
Opcode = NVPTXISD::LDGV2;
|
||||||
|
break;
|
||||||
|
case Intrinsic::nvvm_ldu_global_i:
|
||||||
|
case Intrinsic::nvvm_ldu_global_f:
|
||||||
|
case Intrinsic::nvvm_ldu_global_p:
|
||||||
|
Opcode = NVPTXISD::LDUV2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
|
||||||
|
break;
|
||||||
|
case 4: {
|
||||||
|
switch(IntrinNo) {
|
||||||
|
default: return;
|
||||||
|
case Intrinsic::nvvm_ldg_global_i:
|
||||||
|
case Intrinsic::nvvm_ldg_global_f:
|
||||||
|
case Intrinsic::nvvm_ldg_global_p:
|
||||||
|
Opcode = NVPTXISD::LDGV4;
|
||||||
|
break;
|
||||||
|
case Intrinsic::nvvm_ldu_global_i:
|
||||||
|
case Intrinsic::nvvm_ldu_global_f:
|
||||||
|
case Intrinsic::nvvm_ldu_global_p:
|
||||||
|
Opcode = NVPTXISD::LDUV4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
|
||||||
|
LdResVTs = DAG.getVTList(ListVTs, 5);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SmallVector<SDValue, 8> OtherOps;
|
||||||
|
|
||||||
|
// Copy regular operands
|
||||||
|
|
||||||
|
OtherOps.push_back(Chain); // Chain
|
||||||
|
// Skip operand 1 (intrinsic ID)
|
||||||
|
// Others
|
||||||
|
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
|
||||||
|
OtherOps.push_back(N->getOperand(i));
|
||||||
|
|
||||||
|
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
|
||||||
|
|
||||||
|
SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
|
||||||
|
OtherOps.size(), MemSD->getMemoryVT(),
|
||||||
|
MemSD->getMemOperand());
|
||||||
|
|
||||||
|
SmallVector<SDValue, 4> ScalarRes;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < NumElts; ++i) {
|
||||||
|
SDValue Res = NewLD.getValue(i);
|
||||||
|
if (NeedTrunc)
|
||||||
|
Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
|
||||||
|
ScalarRes.push_back(Res);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue LoadChain = NewLD.getValue(NumElts);
|
||||||
|
|
||||||
|
SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
|
||||||
|
|
||||||
|
Results.push_back(BuildVec);
|
||||||
|
Results.push_back(LoadChain);
|
||||||
|
} else {
|
||||||
|
// i8 LDG/LDU
|
||||||
|
assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
|
||||||
|
"Custom handling of non-i8 ldu/ldg?");
|
||||||
|
|
||||||
|
// Just copy all operands as-is
|
||||||
|
SmallVector<SDValue, 4> Ops;
|
||||||
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||||
|
Ops.push_back(N->getOperand(i));
|
||||||
|
|
||||||
|
// Force output to i16
|
||||||
|
SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
|
||||||
|
|
||||||
|
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
|
||||||
|
|
||||||
|
// We make sure the memory type is i8, which will be used during isel
|
||||||
|
// to select the proper instruction.
|
||||||
|
SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
|
||||||
|
LdResVTs, &Ops[0],
|
||||||
|
Ops.size(), MVT::i8,
|
||||||
|
MemSD->getMemOperand());
|
||||||
|
|
||||||
|
Results.push_back(NewLD.getValue(0));
|
||||||
|
Results.push_back(NewLD.getValue(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||||
|
SmallVectorImpl<SDValue> &Results,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
switch (N->getOpcode()) {
|
||||||
|
default: report_fatal_error("Unhandled custom legalization");
|
||||||
|
case ISD::LOAD:
|
||||||
|
ReplaceLoadVector(N, DAG, Results);
|
||||||
|
return;
|
||||||
|
case ISD::INTRINSIC_W_CHAIN:
|
||||||
|
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -58,7 +58,16 @@ enum NodeType {
|
|||||||
RETURN,
|
RETURN,
|
||||||
CallSeqBegin,
|
CallSeqBegin,
|
||||||
CallSeqEnd,
|
CallSeqEnd,
|
||||||
Dummy
|
Dummy,
|
||||||
|
|
||||||
|
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||||
|
LoadV4,
|
||||||
|
LDGV2, // LDG.v2
|
||||||
|
LDGV4, // LDG.v4
|
||||||
|
LDUV2, // LDU.v2
|
||||||
|
LDUV4, // LDU.v4
|
||||||
|
StoreV2,
|
||||||
|
StoreV4
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -143,8 +152,16 @@ private:
|
|||||||
|
|
||||||
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
virtual void ReplaceNodeResults(SDNode *N,
|
||||||
|
SmallVectorImpl<SDValue> &Results,
|
||||||
|
SelectionDAG &DAG) const;
|
||||||
};
|
};
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
||||||
|
@@ -65,46 +65,6 @@ void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
|
|||||||
NVPTX::Float64RegsRegClass.contains(SrcReg))
|
NVPTX::Float64RegsRegClass.contains(SrcReg))
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
|
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||||
else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V4F32RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V4I32RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V2F32RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V2I32RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V4I8RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V2I8RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V4I16RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V2I16RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V2I64RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
|
|
||||||
NVPTX::V2F64RegsRegClass.contains(SrcReg))
|
|
||||||
BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
else {
|
else {
|
||||||
llvm_unreachable("Don't know how to copy a register");
|
llvm_unreachable("Don't know how to copy a register");
|
||||||
}
|
}
|
||||||
|
@@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
|
|||||||
def hasVote : Predicate<"Subtarget.hasVote()">;
|
def hasVote : Predicate<"Subtarget.hasVote()">;
|
||||||
def hasDouble : Predicate<"Subtarget.hasDouble()">;
|
def hasDouble : Predicate<"Subtarget.hasDouble()">;
|
||||||
def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
|
def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
|
||||||
|
def hasLDG : Predicate<"Subtarget.hasLDG()">;
|
||||||
def hasLDU : Predicate<"Subtarget.hasLDU()">;
|
def hasLDU : Predicate<"Subtarget.hasLDU()">;
|
||||||
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
|
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
|
||||||
|
|
||||||
@@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> {
|
|||||||
i32imm:$fromWidth, Int32Regs:$addr),
|
i32imm:$fromWidth, Int32Regs:$addr),
|
||||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t$dst, [$addr];"), []>;
|
"$fromWidth \t$dst, [$addr];"), []>;
|
||||||
|
def _areg_64 : NVPTXInst<(outs regclass:$dst),
|
||||||
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr),
|
||||||
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
|
||||||
|
" \t$dst, [$addr];"), []>;
|
||||||
def _ari : NVPTXInst<(outs regclass:$dst),
|
def _ari : NVPTXInst<(outs regclass:$dst),
|
||||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
|
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
|
||||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t$dst, [$addr+$offset];"), []>;
|
"$fromWidth \t$dst, [$addr+$offset];"), []>;
|
||||||
|
def _ari_64 : NVPTXInst<(outs regclass:$dst),
|
||||||
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||||
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
|
||||||
|
" \t$dst, [$addr+$offset];"), []>;
|
||||||
def _asi : NVPTXInst<(outs regclass:$dst),
|
def _asi : NVPTXInst<(outs regclass:$dst),
|
||||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
|
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
|
||||||
@@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>;
|
|||||||
defm LD_f64 : LD<Float64Regs>;
|
defm LD_f64 : LD<Float64Regs>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
|
|
||||||
defm LD_v2i8 : LD<V2I8Regs>;
|
|
||||||
defm LD_v4i8 : LD<V4I8Regs>;
|
|
||||||
defm LD_v2i16 : LD<V2I16Regs>;
|
|
||||||
defm LD_v4i16 : LD<V4I16Regs>;
|
|
||||||
defm LD_v2i32 : LD<V2I32Regs>;
|
|
||||||
defm LD_v4i32 : LD<V4I32Regs>;
|
|
||||||
defm LD_v2f32 : LD<V2F32Regs>;
|
|
||||||
defm LD_v4f32 : LD<V4F32Regs>;
|
|
||||||
defm LD_v2i64 : LD<V2I64Regs>;
|
|
||||||
defm LD_v2f64 : LD<V2F64Regs>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass ST<NVPTXRegClass regclass> {
|
multiclass ST<NVPTXRegClass regclass> {
|
||||||
def _avar : NVPTXInst<(outs),
|
def _avar : NVPTXInst<(outs),
|
||||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||||
@@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> {
|
|||||||
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
|
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
|
||||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
|
||||||
" \t[$addr], $src;"), []>;
|
" \t[$addr], $src;"), []>;
|
||||||
|
def _areg_64 : NVPTXInst<(outs),
|
||||||
|
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||||
|
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
|
||||||
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
|
||||||
|
"\t[$addr], $src;"), []>;
|
||||||
def _ari : NVPTXInst<(outs),
|
def _ari : NVPTXInst<(outs),
|
||||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||||
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
|
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
|
||||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
|
||||||
" \t[$addr+$offset], $src;"), []>;
|
" \t[$addr+$offset], $src;"), []>;
|
||||||
|
def _ari_64 : NVPTXInst<(outs),
|
||||||
|
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||||
|
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
|
||||||
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
|
||||||
|
"\t[$addr+$offset], $src;"), []>;
|
||||||
def _asi : NVPTXInst<(outs),
|
def _asi : NVPTXInst<(outs),
|
||||||
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
|
||||||
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
|
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
|
||||||
@@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>;
|
|||||||
defm ST_f64 : ST<Float64Regs>;
|
defm ST_f64 : ST<Float64Regs>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
|
|
||||||
defm ST_v2i8 : ST<V2I8Regs>;
|
|
||||||
defm ST_v4i8 : ST<V4I8Regs>;
|
|
||||||
defm ST_v2i16 : ST<V2I16Regs>;
|
|
||||||
defm ST_v4i16 : ST<V4I16Regs>;
|
|
||||||
defm ST_v2i32 : ST<V2I32Regs>;
|
|
||||||
defm ST_v4i32 : ST<V4I32Regs>;
|
|
||||||
defm ST_v2f32 : ST<V2F32Regs>;
|
|
||||||
defm ST_v4f32 : ST<V4F32Regs>;
|
|
||||||
defm ST_v2i64 : ST<V2I64Regs>;
|
|
||||||
defm ST_v2f64 : ST<V2F64Regs>;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The following is used only in and after vector elementizations.
|
// The following is used only in and after vector elementizations.
|
||||||
// Vector elementization happens at the machine instruction level, so the
|
// Vector elementization happens at the machine instruction level, so the
|
||||||
// following instruction
|
// following instruction
|
||||||
@@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
|
|||||||
i32imm:$fromWidth, Int32Regs:$addr),
|
i32imm:$fromWidth, Int32Regs:$addr),
|
||||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
|
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
|
||||||
|
def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||||
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr),
|
||||||
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
|
||||||
def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
|
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
|
||||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
|
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
|
||||||
|
def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||||
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||||
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
|
||||||
def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
|
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
|
||||||
@@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
|
|||||||
i32imm:$fromWidth, Int32Regs:$addr),
|
i32imm:$fromWidth, Int32Regs:$addr),
|
||||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
|
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
|
||||||
|
def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||||
|
regclass:$dst3, regclass:$dst4),
|
||||||
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr),
|
||||||
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
|
||||||
def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
||||||
regclass:$dst4),
|
regclass:$dst4),
|
||||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
@@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
|
|||||||
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
|
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
|
||||||
[]>;
|
[]>;
|
||||||
|
def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||||
|
regclass:$dst3, regclass:$dst4),
|
||||||
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||||
|
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
|
||||||
|
[]>;
|
||||||
def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
||||||
regclass:$dst4),
|
regclass:$dst4),
|
||||||
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
@@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
|
|||||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
|
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
|
||||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
|
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
|
||||||
|
def _v2_areg_64 : NVPTXInst<(outs),
|
||||||
|
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||||
|
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
|
||||||
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
|
||||||
def _v2_ari : NVPTXInst<(outs),
|
def _v2_ari : NVPTXInst<(outs),
|
||||||
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
|
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
|
||||||
i32imm:$offset),
|
i32imm:$offset),
|
||||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
|
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
|
||||||
|
def _v2_ari_64 : NVPTXInst<(outs),
|
||||||
|
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||||
|
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
|
||||||
|
i32imm:$offset),
|
||||||
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
|
||||||
def _v2_asi : NVPTXInst<(outs),
|
def _v2_asi : NVPTXInst<(outs),
|
||||||
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
|
||||||
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
|
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
|
||||||
@@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
|
|||||||
i32imm:$fromWidth, Int32Regs:$addr),
|
i32imm:$fromWidth, Int32Regs:$addr),
|
||||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
|
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
|
||||||
|
def _v4_areg_64 : NVPTXInst<(outs),
|
||||||
|
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||||
|
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr),
|
||||||
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
|
||||||
def _v4_ari : NVPTXInst<(outs),
|
def _v4_ari : NVPTXInst<(outs),
|
||||||
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||||
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
@@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
|
|||||||
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
|
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
|
||||||
[]>;
|
[]>;
|
||||||
|
def _v4_ari_64 : NVPTXInst<(outs),
|
||||||
|
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||||
|
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
|
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
|
||||||
|
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
|
||||||
|
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
|
||||||
|
[]>;
|
||||||
def _v4_asi : NVPTXInst<(outs),
|
def _v4_asi : NVPTXInst<(outs),
|
||||||
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
|
||||||
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
|
||||||
@@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins),
|
|||||||
"trap;",
|
"trap;",
|
||||||
[(trap)]>;
|
[(trap)]>;
|
||||||
|
|
||||||
include "NVPTXVector.td"
|
|
||||||
|
|
||||||
include "NVPTXIntrinsics.td"
|
include "NVPTXIntrinsics.td"
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE
|
|||||||
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
||||||
Float32Regs>;
|
Float32Regs>;
|
||||||
|
|
||||||
// Vector ldu
|
|
||||||
multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
|
//-----------------------------------
|
||||||
NVPTXInst eleInst, NVPTXInst eleInst64> {
|
// Support for ldg on sm_35 or later
|
||||||
def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
|
//-----------------------------------
|
||||||
!strconcat("ldu.global.", TyStr),
|
|
||||||
[(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
|
def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
|
||||||
Requires<[hasLDU]>;
|
MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
|
||||||
def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
|
return M->getMemoryVT() == MVT::i8;
|
||||||
!strconcat("ldu.global.", TyStr),
|
}]>;
|
||||||
[(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
|
|
||||||
Requires<[hasLDU]>;
|
multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
|
||||||
|
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
|
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
|
||||||
|
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
|
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
|
||||||
|
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
|
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||||
|
Requires<[hasLDG]>;
|
||||||
|
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
|
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
|
||||||
|
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
|
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let VecInstType=isVecLD.Value in {
|
multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
|
||||||
defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
|
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
||||||
V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
INT_PTX_LDU_G_v2i8_ELE_64>;
|
[(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
|
||||||
defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
|
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
||||||
V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
INT_PTX_LDU_G_v4i8_ELE_64>;
|
[(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
|
||||||
defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
|
def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
|
||||||
V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
INT_PTX_LDU_G_v2i16_ELE_64>;
|
[(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
|
||||||
defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
|
Requires<[hasLDG]>;
|
||||||
V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
|
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
||||||
INT_PTX_LDU_G_v4i16_ELE_64>;
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
|
[(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
|
||||||
V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
|
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
||||||
INT_PTX_LDU_G_v2i32_ELE_64>;
|
!strconcat("ld.global.nc.", TyStr),
|
||||||
defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
|
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
|
||||||
V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
|
|
||||||
INT_PTX_LDU_G_v4i32_ELE_64>;
|
|
||||||
defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
|
|
||||||
V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
|
|
||||||
INT_PTX_LDU_G_v2f32_ELE_64>;
|
|
||||||
defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
|
|
||||||
V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
|
|
||||||
INT_PTX_LDU_G_v4f32_ELE_64>;
|
|
||||||
defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
|
|
||||||
V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
|
|
||||||
INT_PTX_LDU_G_v2i64_ELE_64>;
|
|
||||||
defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
|
|
||||||
V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
|
|
||||||
INT_PTX_LDU_G_v2f64_ELE_64>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
defm INT_PTX_LDG_GLOBAL_i8
|
||||||
|
: LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
|
||||||
|
defm INT_PTX_LDG_GLOBAL_i16
|
||||||
|
: LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
|
||||||
|
defm INT_PTX_LDG_GLOBAL_i32
|
||||||
|
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
|
||||||
|
defm INT_PTX_LDG_GLOBAL_i64
|
||||||
|
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
|
||||||
|
defm INT_PTX_LDG_GLOBAL_f32
|
||||||
|
: LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
|
||||||
|
defm INT_PTX_LDG_GLOBAL_f64
|
||||||
|
: LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
|
||||||
|
defm INT_PTX_LDG_GLOBAL_p32
|
||||||
|
: LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
|
||||||
|
defm INT_PTX_LDG_GLOBAL_p64
|
||||||
|
: LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
|
||||||
|
|
||||||
|
// vector
|
||||||
|
|
||||||
|
// Elementized vector ldg
|
||||||
|
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
|
||||||
|
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||||
|
(ins Int32Regs:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr), []>;
|
||||||
|
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
||||||
|
(ins Int64Regs:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr), []>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
|
||||||
|
def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||||
|
regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr), []>;
|
||||||
|
def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
|
||||||
|
regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
|
||||||
|
!strconcat("ld.global.nc.", TyStr), []>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
|
||||||
|
defm INT_PTX_LDG_G_v2i8_ELE
|
||||||
|
: VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v2i16_ELE
|
||||||
|
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v2i32_ELE
|
||||||
|
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v2f32_ELE
|
||||||
|
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v2i64_ELE
|
||||||
|
: VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v2f64_ELE
|
||||||
|
: VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v4i8_ELE
|
||||||
|
: VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v4i16_ELE
|
||||||
|
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v4i32_ELE
|
||||||
|
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
|
||||||
|
defm INT_PTX_LDG_G_v4f32_ELE
|
||||||
|
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
|
||||||
|
|
||||||
|
|
||||||
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
|
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
|
||||||
|
@@ -54,36 +54,6 @@ std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
|
|||||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||||
return "!Special!";
|
return "!Special!";
|
||||||
}
|
}
|
||||||
else if (RC == &NVPTX::V2F32RegsRegClass) {
|
|
||||||
return ".v2.f32";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4F32RegsRegClass) {
|
|
||||||
return ".v4.f32";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I32RegsRegClass) {
|
|
||||||
return ".v2.s32";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4I32RegsRegClass) {
|
|
||||||
return ".v4.s32";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2F64RegsRegClass) {
|
|
||||||
return ".v2.f64";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I64RegsRegClass) {
|
|
||||||
return ".v2.s64";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I16RegsRegClass) {
|
|
||||||
return ".v2.s16";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4I16RegsRegClass) {
|
|
||||||
return ".v4.s16";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I8RegsRegClass) {
|
|
||||||
return ".v2.s16";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4I8RegsRegClass) {
|
|
||||||
return ".v4.s16";
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
return "INTERNAL";
|
return "INTERNAL";
|
||||||
}
|
}
|
||||||
@@ -115,137 +85,11 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
|
|||||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||||
return "!Special!";
|
return "!Special!";
|
||||||
}
|
}
|
||||||
else if (RC == &NVPTX::V2F32RegsRegClass) {
|
|
||||||
return "%v2f";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4F32RegsRegClass) {
|
|
||||||
return "%v4f";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I32RegsRegClass) {
|
|
||||||
return "%v2r";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4I32RegsRegClass) {
|
|
||||||
return "%v4r";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2F64RegsRegClass) {
|
|
||||||
return "%v2fd";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I64RegsRegClass) {
|
|
||||||
return "%v2rd";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I16RegsRegClass) {
|
|
||||||
return "%v2s";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4I16RegsRegClass) {
|
|
||||||
return "%v4rs";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V2I8RegsRegClass) {
|
|
||||||
return "%v2rc";
|
|
||||||
}
|
|
||||||
else if (RC == &NVPTX::V4I8RegsRegClass) {
|
|
||||||
return "%v4rc";
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
return "INTERNAL";
|
return "INTERNAL";
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
|
|
||||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
|
|
||||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
|
||||||
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
|
|
||||||
llvm_unreachable("Not a vector register class");
|
|
||||||
}
|
|
||||||
|
|
||||||
const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
|
|
||||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
|
||||||
return (&NVPTX::Float32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
|
||||||
return (&NVPTX::Float64RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
|
||||||
return (&NVPTX::Int16RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
|
||||||
return (&NVPTX::Int32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
|
||||||
return (&NVPTX::Int64RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
|
||||||
return (&NVPTX::Int8RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
|
||||||
return (&NVPTX::Float32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
|
||||||
return (&NVPTX::Int16RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
|
||||||
return (&NVPTX::Int32RegsRegClass);
|
|
||||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
|
||||||
return (&NVPTX::Int8RegsRegClass);
|
|
||||||
llvm_unreachable("Not a vector register class");
|
|
||||||
}
|
|
||||||
|
|
||||||
int getNVPTXVectorSize(TargetRegisterClass const *RC) {
|
|
||||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
|
||||||
return 2;
|
|
||||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
|
||||||
return 2;
|
|
||||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
|
||||||
return 2;
|
|
||||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
|
||||||
return 2;
|
|
||||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
|
||||||
return 2;
|
|
||||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
|
||||||
return 2;
|
|
||||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
|
||||||
return 4;
|
|
||||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
|
||||||
return 4;
|
|
||||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
|
||||||
return 4;
|
|
||||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
|
||||||
return 4;
|
|
||||||
llvm_unreachable("Not a vector register class");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
|
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
|
||||||
|
@@ -81,10 +81,6 @@ public:
|
|||||||
|
|
||||||
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
|
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
|
||||||
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
|
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
|
||||||
bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
|
|
||||||
std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
|
|
||||||
int getNVPTXVectorSize (const TargetRegisterClass *RC);
|
|
||||||
const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
|
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
||||||
|
@@ -37,9 +37,6 @@ foreach i = 0-395 in {
|
|||||||
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
|
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
|
||||||
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
|
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
|
||||||
def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
|
def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
|
||||||
// Vectors
|
|
||||||
foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
|
|
||||||
def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
|
|
||||||
|
|
||||||
// Arguments
|
// Arguments
|
||||||
def ia#i : NVPTXReg<"%ia"#i>;
|
def ia#i : NVPTXReg<"%ia"#i>;
|
||||||
@@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
|
|||||||
|
|
||||||
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
|
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
|
||||||
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
|
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
|
||||||
|
|
||||||
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
|
|
||||||
NVPTXRegClass sClass,
|
|
||||||
int e,
|
|
||||||
string n>
|
|
||||||
: NVPTXRegClass<regTypes, alignment, regList>
|
|
||||||
{
|
|
||||||
NVPTXRegClass scalarClass=sClass;
|
|
||||||
int elems=e;
|
|
||||||
string name=n;
|
|
||||||
}
|
|
||||||
def V2F32Regs
|
|
||||||
: NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
|
|
||||||
Float32Regs, 2, ".v2.f32">;
|
|
||||||
def V4F32Regs
|
|
||||||
: NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
|
|
||||||
Float32Regs, 4, ".v4.f32">;
|
|
||||||
def V2I32Regs
|
|
||||||
: NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
|
|
||||||
Int32Regs, 2, ".v2.u32">;
|
|
||||||
def V4I32Regs
|
|
||||||
: NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
|
|
||||||
Int32Regs, 4, ".v4.u32">;
|
|
||||||
def V2F64Regs
|
|
||||||
: NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
|
|
||||||
Float64Regs, 2, ".v2.f64">;
|
|
||||||
def V2I64Regs
|
|
||||||
: NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
|
|
||||||
Int64Regs, 2, ".v2.u64">;
|
|
||||||
def V2I16Regs
|
|
||||||
: NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
|
|
||||||
Int16Regs, 2, ".v2.u16">;
|
|
||||||
def V4I16Regs
|
|
||||||
: NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
|
|
||||||
Int16Regs, 4, ".v4.u16">;
|
|
||||||
def V2I8Regs
|
|
||||||
: NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
|
|
||||||
Int8Regs, 2, ".v2.u8">;
|
|
||||||
def V4I8Regs
|
|
||||||
: NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
|
|
||||||
Int8Regs, 4, ".v4.u8">;
|
|
||||||
|
@@ -57,6 +57,7 @@ public:
|
|||||||
bool hasF32FTZ() const { return SmVersion >= 20; }
|
bool hasF32FTZ() const { return SmVersion >= 20; }
|
||||||
bool hasFMAF32() const { return SmVersion >= 20; }
|
bool hasFMAF32() const { return SmVersion >= 20; }
|
||||||
bool hasFMAF64() const { return SmVersion >= 13; }
|
bool hasFMAF64() const { return SmVersion >= 13; }
|
||||||
|
bool hasLDG() const { return SmVersion >= 32; }
|
||||||
bool hasLDU() const { return SmVersion >= 20; }
|
bool hasLDU() const { return SmVersion >= 20; }
|
||||||
bool hasGenericLdSt() const { return SmVersion >= 20; }
|
bool hasGenericLdSt() const { return SmVersion >= 20; }
|
||||||
inline bool hasHWROT32() const { return false; }
|
inline bool hasHWROT32() const { return false; }
|
||||||
|
@@ -123,7 +123,6 @@ bool NVPTXPassConfig::addInstSelector() {
|
|||||||
addPass(createSplitBBatBarPass());
|
addPass(createSplitBBatBarPass());
|
||||||
addPass(createAllocaHoisting());
|
addPass(createAllocaHoisting());
|
||||||
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
|
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
|
||||||
addPass(createVectorElementizePass(getNVPTXTargetMachine()));
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,202 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
num_regs = 396
|
|
||||||
|
|
||||||
outFile = open('NVPTXRegisterInfo.td', 'w')
|
|
||||||
|
|
||||||
outFile.write('''
|
|
||||||
//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Declarations that describe the PTX register file
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
class NVPTXReg<string n> : Register<n> {
|
|
||||||
let Namespace = "NVPTX";
|
|
||||||
}
|
|
||||||
|
|
||||||
class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
|
|
||||||
: RegisterClass <"NVPTX", regTypes, alignment, regList>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Registers
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// Special Registers used as stack pointer
|
|
||||||
def VRFrame : NVPTXReg<"%SP">;
|
|
||||||
def VRFrameLocal : NVPTXReg<"%SPL">;
|
|
||||||
|
|
||||||
// Special Registers used as the stack
|
|
||||||
def VRDepot : NVPTXReg<"%Depot">;
|
|
||||||
''')
|
|
||||||
|
|
||||||
# Predicates
|
|
||||||
outFile.write('''
|
|
||||||
//===--- Predicate --------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# Int8
|
|
||||||
outFile.write('''
|
|
||||||
//===--- 8-bit ------------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# Int16
|
|
||||||
outFile.write('''
|
|
||||||
//===--- 16-bit -----------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# Int32
|
|
||||||
outFile.write('''
|
|
||||||
//===--- 32-bit -----------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# Int64
|
|
||||||
outFile.write('''
|
|
||||||
//===--- 64-bit -----------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# F32
|
|
||||||
outFile.write('''
|
|
||||||
//===--- 32-bit float -----------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# F64
|
|
||||||
outFile.write('''
|
|
||||||
//===--- 64-bit float -----------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# Vector registers
|
|
||||||
outFile.write('''
|
|
||||||
//===--- Vector -----------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
# Argument registers
|
|
||||||
outFile.write('''
|
|
||||||
//===--- Arguments --------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
|
|
||||||
for i in range(0, num_regs):
|
|
||||||
outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
|
|
||||||
|
|
||||||
outFile.write('''
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Register classes
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
''')
|
|
||||||
|
|
||||||
outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.write('''
|
|
||||||
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
|
|
||||||
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
|
|
||||||
''')
|
|
||||||
|
|
||||||
outFile.write('''
|
|
||||||
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
|
|
||||||
NVPTXRegClass sClass,
|
|
||||||
int e,
|
|
||||||
string n>
|
|
||||||
: NVPTXRegClass<regTypes, alignment, regList>
|
|
||||||
{
|
|
||||||
NVPTXRegClass scalarClass=sClass;
|
|
||||||
int elems=e;
|
|
||||||
string name=n;
|
|
||||||
}
|
|
||||||
''')
|
|
||||||
|
|
||||||
|
|
||||||
outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
|
|
||||||
outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
|
|
||||||
outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
|
|
||||||
outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
|
|
||||||
outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
|
|
||||||
outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
|
|
||||||
|
|
||||||
outFile.close()
|
|
||||||
|
|
||||||
|
|
||||||
outFile = open('NVPTXNumRegisters.h', 'w')
|
|
||||||
outFile.write('''
|
|
||||||
//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef NVPTX_NUM_REGISTERS_H
|
|
||||||
#define NVPTX_NUM_REGISTERS_H
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
const unsigned NVPTXNumRegisters = %d;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
''' % num_regs)
|
|
||||||
|
|
||||||
outFile.close()
|
|
66
test/CodeGen/NVPTX/vector-loads.ll
Normal file
66
test/CodeGen/NVPTX/vector-loads.ll
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||||
|
|
||||||
|
; Even though general vector types are not supported in PTX, we can still
|
||||||
|
; optimize loads/stores with pseudo-vector instructions of the form:
|
||||||
|
;
|
||||||
|
; ld.v2.f32 {%f0, %f1}, [%r0]
|
||||||
|
;
|
||||||
|
; which will load two floats at once into scalar registers.
|
||||||
|
|
||||||
|
define void @foo(<2 x float>* %a) {
|
||||||
|
; CHECK: .func foo
|
||||||
|
; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||||
|
%t1 = load <2 x float>* %a
|
||||||
|
%t2 = fmul <2 x float> %t1, %t1
|
||||||
|
store <2 x float> %t2, <2 x float>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo2(<4 x float>* %a) {
|
||||||
|
; CHECK: .func foo2
|
||||||
|
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||||
|
%t1 = load <4 x float>* %a
|
||||||
|
%t2 = fmul <4 x float> %t1, %t1
|
||||||
|
store <4 x float> %t2, <4 x float>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo3(<8 x float>* %a) {
|
||||||
|
; CHECK: .func foo3
|
||||||
|
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||||
|
; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16];
|
||||||
|
%t1 = load <8 x float>* %a
|
||||||
|
%t2 = fmul <8 x float> %t1, %t1
|
||||||
|
store <8 x float> %t2, <8 x float>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
define void @foo4(<2 x i32>* %a) {
|
||||||
|
; CHECK: .func foo4
|
||||||
|
; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||||
|
%t1 = load <2 x i32>* %a
|
||||||
|
%t2 = mul <2 x i32> %t1, %t1
|
||||||
|
store <2 x i32> %t2, <2 x i32>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo5(<4 x i32>* %a) {
|
||||||
|
; CHECK: .func foo5
|
||||||
|
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||||
|
%t1 = load <4 x i32>* %a
|
||||||
|
%t2 = mul <4 x i32> %t1, %t1
|
||||||
|
store <4 x i32> %t2, <4 x i32>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo6(<8 x i32>* %a) {
|
||||||
|
; CHECK: .func foo6
|
||||||
|
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
|
||||||
|
; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16];
|
||||||
|
%t1 = load <8 x i32>* %a
|
||||||
|
%t2 = mul <8 x i32> %t1, %t1
|
||||||
|
store <8 x i32> %t2, <8 x i32>* %a
|
||||||
|
ret void
|
||||||
|
}
|
Reference in New Issue
Block a user