diff --git a/include/llvm/IntrinsicsPTX.td b/include/llvm/IntrinsicsPTX.td index cbcd56e5f2c..01241fe4d48 100644 --- a/include/llvm/IntrinsicsPTX.td +++ b/include/llvm/IntrinsicsPTX.td @@ -12,14 +12,17 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "ptx" in { - // FIXME Since PTX 2.0, special registers are redefined as v4i32 type - multiclass PTXReadSpecialRegisterIntrinsic_v4i16 { - def _r64 : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; - def _v4i16 : Intrinsic<[llvm_v4i16_ty], [], [IntrNoMem]>; - def _x : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>; - def _y : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>; - def _z : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>; - def _w : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>; + multiclass PTXReadSpecialRegisterIntrinsic_v4i32 { +// FIXME: Do we need the 128-bit integer type version? +// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>; + +// FIXME: Enable this once v4i32 support is enabled in back-end. +// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>; + + def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; } class PTXReadSpecialRegisterIntrinsic_r32 @@ -29,15 +32,15 @@ let TargetPrefix = "ptx" in { : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; } -defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i16; -defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i16; +defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32; +defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32; def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32; def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32; def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32; -defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i16; -defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i16; +defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32; +defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32; def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32; def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32; diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 12febcb13de..dbc6f579a29 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -29,17 +29,18 @@ def Feature64Bit : SubtargetFeature<"64bit", "Use64BitAddresses", "true", //===- PTX Version --------------------------------------------------------===// -def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4", - "Use PTX Language Version 1.4">; - def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", "Use PTX Language Version 2.0", - [FeaturePTX14]>; + []>; def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", "Use PTX Language Version 2.1", [FeaturePTX20]>; +def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", + "Use PTX Language Version 2.2", + [FeaturePTX21]>; + //===- PTX Shader Model ---------------------------------------------------===// def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 5c198ca4826..dc96914ba6e 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -32,10 +32,11 @@ def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">; def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">; // PTX Version Support -def SupportsPTX20 : Predicate<"getSubtarget().supportsPTX20()">; -def DoesNotSupportPTX20 : Predicate<"!getSubtarget().supportsPTX20()">; def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">; +def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">; +def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">; + //===----------------------------------------------------------------------===// // Instruction Pattern Stuff @@ -253,6 +254,33 @@ multiclass INT3 { [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; } +multiclass PTX_LOGIC { + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; +} + // no %type directive, non-communtable multiclass INT3ntnc { def rr : InstPTX<(outs RRegu32:$d), @@ -359,6 +387,7 @@ multiclass PTX_ST_ALL { defm ADD : INT3<"add", add>; defm SUB : INT3<"sub", sub>; +defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies ///===- Floating-Point Arithmetic Instructions ----------------------------===// @@ -462,6 +491,10 @@ defm SHL : INT3ntnc<"shl.b32", PTXshl>; defm SRL : INT3ntnc<"shr.u32", PTXsrl>; defm SRA : INT3ntnc<"shr.s32", PTXsra>; +defm AND : PTX_LOGIC<"and", and>; +defm OR : PTX_LOGIC<"or", or>; +defm XOR : PTX_LOGIC<"xor", xor>; + ///===- Data Movement and Conversion Instructions -------------------------===// let neverHasSideEffects = 1 in { diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 2f2578cb5c9..320934a2228 100644 --- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -23,40 +23,35 @@ class PTX_READ_SPECIAL_REGISTER_R32 !strconcat("mov.u32\t$d, %", regname), [(set RRegu32:$d, (intop))]>; -class PTX_READ_SPECIAL_SUB_REGISTER - : InstPTX<(outs RRegu16:$d), (ins), - !strconcat("mov.u16\t$d, %", regname), - [(set RRegu16:$d, (intop))]>; - // TODO Add read vector-version of special registers -def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; -def PTX_READ_TID_X : PTX_READ_SPECIAL_SUB_REGISTER<"tid.x", int_ptx_read_tid_x>; -def PTX_READ_TID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"tid.y", int_ptx_read_tid_y>; -def PTX_READ_TID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"tid.z", int_ptx_read_tid_z>; -def PTX_READ_TID_W : PTX_READ_SPECIAL_SUB_REGISTER<"tid.w", int_ptx_read_tid_w>; +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>; -def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; -def PTX_READ_NTID_X : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.x", int_ptx_read_ntid_x>; -def PTX_READ_NTID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.y", int_ptx_read_ntid_y>; -def PTX_READ_NTID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.z", int_ptx_read_ntid_z>; -def PTX_READ_NTID_W : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.w", int_ptx_read_ntid_w>; +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>; def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>; def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>; def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>; -def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; -def PTX_READ_CTAID_X : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.x", int_ptx_read_ctaid_x>; -def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.y", int_ptx_read_ctaid_y>; -def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.z", int_ptx_read_ctaid_z>; -def PTX_READ_CTAID_W : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.w", int_ptx_read_ctaid_w>; +//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>; -def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; -def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.x", int_ptx_read_nctaid_x>; -def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.y", int_ptx_read_nctaid_y>; -def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.z", int_ptx_read_nctaid_z>; -def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.w", int_ptx_read_nctaid_w>; +//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>; def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>; def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index ef4060d6f01..527622d0c78 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -18,7 +18,7 @@ using namespace llvm; PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) : PTXShaderModel(PTX_SM_1_0), - PTXVersion(PTX_VERSION_1_4), + PTXVersion(PTX_VERSION_2_0), SupportsDouble(false), Use64BitAddresses(false) { std::string TARGET = "generic"; @@ -37,9 +37,9 @@ std::string PTXSubtarget::getTargetString() const { std::string PTXSubtarget::getPTXVersionString() const { switch(PTXVersion) { default: llvm_unreachable("Unknown PTX version"); - case PTX_VERSION_1_4: return "1.4"; case PTX_VERSION_2_0: return "2.0"; case PTX_VERSION_2_1: return "2.1"; + case PTX_VERSION_2_2: return "2.2"; } } diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index 19a870d4ce8..57cd43da476 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -19,16 +19,25 @@ namespace llvm { class PTXSubtarget : public TargetSubtarget { private: + + /** + * Enumeration of Shader Models supported by the back-end. + */ enum PTXShaderModelEnum { - PTX_SM_1_0, - PTX_SM_1_3, - PTX_SM_2_0 + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_3, /*< Shader Model 1.3 */ + PTX_SM_2_0 /*< Shader Model 2.0 */ }; + /** + * Enumeration of PTX versions supported by the back-end. + * + * Currently, PTX 2.0 is the minimum supported version. + */ enum PTXVersionEnum { - PTX_VERSION_1_4, - PTX_VERSION_2_0, - PTX_VERSION_2_1 + PTX_VERSION_2_0, /*< PTX Version 2.0 */ + PTX_VERSION_2_1, /*< PTX Version 2.1 */ + PTX_VERSION_2_2 /*< PTX Version 2.2 */ }; /// Shader Model supported on the target GPU. @@ -58,10 +67,10 @@ namespace llvm { bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; } - bool supportsPTX20() const { return PTXVersion >= PTX_VERSION_2_0; } - bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } + bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } + std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); }; // class PTXSubtarget diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll index 139e29ee05a..7405dd6f5e5 100644 --- a/test/CodeGen/PTX/intrinsic.ll +++ b/test/CodeGen/PTX/intrinsic.ll @@ -1,59 +1,59 @@ ; RUN: llc < %s -march=ptx -mattr=+ptx20,+sm20 | FileCheck %s -define ptx_device i16 @test_tid_x() { -; CHECK: mov.u16 rh0, %tid.x; +define ptx_device i32 @test_tid_x() { +; CHECK: mov.u32 r0, %tid.x; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.tid.x() - ret i16 %x + %x = call i32 @llvm.ptx.read.tid.x() + ret i32 %x } -define ptx_device i16 @test_tid_y() { -; CHECK: mov.u16 rh0, %tid.y; +define ptx_device i32 @test_tid_y() { +; CHECK: mov.u32 r0, %tid.y; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.tid.y() - ret i16 %x + %x = call i32 @llvm.ptx.read.tid.y() + ret i32 %x } -define ptx_device i16 @test_tid_z() { -; CHECK: mov.u16 rh0, %tid.z; +define ptx_device i32 @test_tid_z() { +; CHECK: mov.u32 r0, %tid.z; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.tid.z() - ret i16 %x + %x = call i32 @llvm.ptx.read.tid.z() + ret i32 %x } -define ptx_device i16 @test_tid_w() { -; CHECK: mov.u16 rh0, %tid.w; +define ptx_device i32 @test_tid_w() { +; CHECK: mov.u32 r0, %tid.w; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.tid.w() - ret i16 %x + %x = call i32 @llvm.ptx.read.tid.w() + ret i32 %x } -define ptx_device i16 @test_ntid_x() { -; CHECK: mov.u16 rh0, %ntid.x; +define ptx_device i32 @test_ntid_x() { +; CHECK: mov.u32 r0, %ntid.x; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ntid.x() - ret i16 %x + %x = call i32 @llvm.ptx.read.ntid.x() + ret i32 %x } -define ptx_device i16 @test_ntid_y() { -; CHECK: mov.u16 rh0, %ntid.y; +define ptx_device i32 @test_ntid_y() { +; CHECK: mov.u32 r0, %ntid.y; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ntid.y() - ret i16 %x + %x = call i32 @llvm.ptx.read.ntid.y() + ret i32 %x } -define ptx_device i16 @test_ntid_z() { -; CHECK: mov.u16 rh0, %ntid.z; +define ptx_device i32 @test_ntid_z() { +; CHECK: mov.u32 r0, %ntid.z; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ntid.z() - ret i16 %x + %x = call i32 @llvm.ptx.read.ntid.z() + ret i32 %x } -define ptx_device i16 @test_ntid_w() { -; CHECK: mov.u16 rh0, %ntid.w; +define ptx_device i32 @test_ntid_w() { +; CHECK: mov.u32 r0, %ntid.w; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ntid.w() - ret i16 %x + %x = call i32 @llvm.ptx.read.ntid.w() + ret i32 %x } define ptx_device i32 @test_laneid() { @@ -77,60 +77,60 @@ define ptx_device i32 @test_nwarpid() { ret i32 %x } -define ptx_device i16 @test_ctaid_x() { -; CHECK: mov.u16 rh0, %ctaid.x; +define ptx_device i32 @test_ctaid_x() { +; CHECK: mov.u32 r0, %ctaid.x; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ctaid.x() - ret i16 %x + %x = call i32 @llvm.ptx.read.ctaid.x() + ret i32 %x } -define ptx_device i16 @test_ctaid_y() { -; CHECK: mov.u16 rh0, %ctaid.y; +define ptx_device i32 @test_ctaid_y() { +; CHECK: mov.u32 r0, %ctaid.y; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ctaid.y() - ret i16 %x + %x = call i32 @llvm.ptx.read.ctaid.y() + ret i32 %x } -define ptx_device i16 @test_ctaid_z() { -; CHECK: mov.u16 rh0, %ctaid.z; +define ptx_device i32 @test_ctaid_z() { +; CHECK: mov.u32 r0, %ctaid.z; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ctaid.z() - ret i16 %x + %x = call i32 @llvm.ptx.read.ctaid.z() + ret i32 %x } -define ptx_device i16 @test_ctaid_w() { -; CHECK: mov.u16 rh0, %ctaid.w; +define ptx_device i32 @test_ctaid_w() { +; CHECK: mov.u32 r0, %ctaid.w; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.ctaid.w() - ret i16 %x + %x = call i32 @llvm.ptx.read.ctaid.w() + ret i32 %x } -define ptx_device i16 @test_nctaid_x() { -; CHECK: mov.u16 rh0, %nctaid.x; +define ptx_device i32 @test_nctaid_x() { +; CHECK: mov.u32 r0, %nctaid.x; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.nctaid.x() - ret i16 %x + %x = call i32 @llvm.ptx.read.nctaid.x() + ret i32 %x } -define ptx_device i16 @test_nctaid_y() { -; CHECK: mov.u16 rh0, %nctaid.y; +define ptx_device i32 @test_nctaid_y() { +; CHECK: mov.u32 r0, %nctaid.y; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.nctaid.y() - ret i16 %x + %x = call i32 @llvm.ptx.read.nctaid.y() + ret i32 %x } -define ptx_device i16 @test_nctaid_z() { -; CHECK: mov.u16 rh0, %nctaid.z; +define ptx_device i32 @test_nctaid_z() { +; CHECK: mov.u32 r0, %nctaid.z; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.nctaid.z() - ret i16 %x + %x = call i32 @llvm.ptx.read.nctaid.z() + ret i32 %x } -define ptx_device i16 @test_nctaid_w() { -; CHECK: mov.u16 rh0, %nctaid.w; +define ptx_device i32 @test_nctaid_w() { +; CHECK: mov.u32 r0, %nctaid.w; ; CHECK-NEXT: ret; - %x = call i16 @llvm.ptx.read.nctaid.w() - ret i16 %x + %x = call i32 @llvm.ptx.read.nctaid.w() + ret i32 %x } define ptx_device i32 @test_smid() { @@ -238,27 +238,27 @@ define ptx_device void @test_bar_sync() { ret void } -declare i16 @llvm.ptx.read.tid.x() -declare i16 @llvm.ptx.read.tid.y() -declare i16 @llvm.ptx.read.tid.z() -declare i16 @llvm.ptx.read.tid.w() -declare i16 @llvm.ptx.read.ntid.x() -declare i16 @llvm.ptx.read.ntid.y() -declare i16 @llvm.ptx.read.ntid.z() -declare i16 @llvm.ptx.read.ntid.w() +declare i32 @llvm.ptx.read.tid.x() +declare i32 @llvm.ptx.read.tid.y() +declare i32 @llvm.ptx.read.tid.z() +declare i32 @llvm.ptx.read.tid.w() +declare i32 @llvm.ptx.read.ntid.x() +declare i32 @llvm.ptx.read.ntid.y() +declare i32 @llvm.ptx.read.ntid.z() +declare i32 @llvm.ptx.read.ntid.w() declare i32 @llvm.ptx.read.laneid() declare i32 @llvm.ptx.read.warpid() declare i32 @llvm.ptx.read.nwarpid() -declare i16 @llvm.ptx.read.ctaid.x() -declare i16 @llvm.ptx.read.ctaid.y() -declare i16 @llvm.ptx.read.ctaid.z() -declare i16 @llvm.ptx.read.ctaid.w() -declare i16 @llvm.ptx.read.nctaid.x() -declare i16 @llvm.ptx.read.nctaid.y() -declare i16 @llvm.ptx.read.nctaid.z() -declare i16 @llvm.ptx.read.nctaid.w() +declare i32 @llvm.ptx.read.ctaid.x() +declare i32 @llvm.ptx.read.ctaid.y() +declare i32 @llvm.ptx.read.ctaid.z() +declare i32 @llvm.ptx.read.ctaid.w() +declare i32 @llvm.ptx.read.nctaid.x() +declare i32 @llvm.ptx.read.nctaid.y() +declare i32 @llvm.ptx.read.nctaid.z() +declare i32 @llvm.ptx.read.nctaid.w() declare i32 @llvm.ptx.read.smid() declare i32 @llvm.ptx.read.nsmid() diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll index 1435537e007..6576a6d8bbb 100644 --- a/test/CodeGen/PTX/options.ll +++ b/test/CodeGen/PTX/options.ll @@ -1,8 +1,9 @@ -; RUN: llc < %s -march=ptx -mattr=ptx14 | grep ".version 1.4" ; RUN: llc < %s -march=ptx -mattr=ptx20 | grep ".version 2.0" ; RUN: llc < %s -march=ptx -mattr=ptx21 | grep ".version 2.1" -; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20" +; RUN: llc < %s -march=ptx -mattr=ptx22 | grep ".version 2.2" +; RUN: llc < %s -march=ptx -mattr=sm10 | grep ".target sm_10" ; RUN: llc < %s -march=ptx -mattr=sm13 | grep ".target sm_13" +; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20" define ptx_device void @t1() { ret void