PTX: Reverting implementation of i8.

The .b8 operations in PTX are far more limiting than I first thought. The mov operation isn't even supported, so there's no way of converting a .pred value into a .b8 without going via .b16, which is
not sensible. An improved implementation needs to use the fact that loads and stores automatically extend and truncate to implement support for EXTLOAD and TRUNCSTORE in order to correctly support
boolean values.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133873 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Bailey 2011-06-25 18:16:28 +00:00
parent 25b15777df
commit 84149460d5
11 changed files with 73 additions and 546 deletions

View File

@ -92,7 +92,6 @@ static const char *getRegisterTypeName(unsigned RegNo) {
#define TEST_REGCLS(cls, clsstr) \
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
TEST_REGCLS(RegPred, pred);
TEST_REGCLS(RegI8, b8);
TEST_REGCLS(RegI16, b16);
TEST_REGCLS(RegI32, b32);
TEST_REGCLS(RegI64, b64);
@ -125,7 +124,6 @@ static const char *getTypeName(const Type* type) {
case Type::IntegerTyID:
switch (type->getPrimitiveSizeInBits()) {
default: llvm_unreachable("Unknown integer bit-width");
case 8: return ".u8";
case 16: return ".u16";
case 32: return ".u32";
case 64: return ".u64";

View File

@ -15,7 +15,6 @@
// PTX Formal Parameter Calling Convention
def CC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
CCIfType<[i8], CCAssignToReg<[RQ12, RQ13, RQ14, RQ15, RQ16, RQ17, RQ18, RQ19, RQ20, RQ21, RQ22, RQ23, RQ24, RQ25, RQ26, RQ27, RQ28, RQ29, RQ30, RQ31, RQ32, RQ33, RQ34, RQ35, RQ36, RQ37, RQ38, RQ39, RQ40, RQ41, RQ42, RQ43, RQ44, RQ45, RQ46, RQ47, RQ48, RQ49, RQ50, RQ51, RQ52, RQ53, RQ54, RQ55, RQ56, RQ57, RQ58, RQ59, RQ60, RQ61, RQ62, RQ63, RQ64, RQ65, RQ66, RQ67, RQ68, RQ69, RQ70, RQ71, RQ72, RQ73, RQ74, RQ75, RQ76, RQ77, RQ78, RQ79, RQ80, RQ81, RQ82, RQ83, RQ84, RQ85, RQ86, RQ87, RQ88, RQ89, RQ90, RQ91, RQ92, RQ93, RQ94, RQ95, RQ96, RQ97, RQ98, RQ99, RQ100, RQ101, RQ102, RQ103, RQ104, RQ105, RQ106, RQ107, RQ108, RQ109, RQ110, RQ111, RQ112, RQ113, RQ114, RQ115, RQ116, RQ117, RQ118, RQ119, RQ120, RQ121, RQ122, RQ123, RQ124, RQ125, RQ126, RQ127]>>,
CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
@ -24,7 +23,6 @@ def CC_PTX : CallingConv<[
// PTX Return Value Calling Convention
def RetCC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
CCIfType<[i8], CCAssignToReg<[RQ0, RQ1, RQ2, RQ3, RQ4, RQ5, RQ6, RQ7, RQ8, RQ9, RQ10, RQ11]>>,
CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>

View File

@ -40,7 +40,6 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
addRegisterClass(MVT::i8, PTX::RegI8RegisterClass);
addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
@ -48,48 +47,59 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
setBooleanContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
setMinFunctionAlignment(2);
// Promote i1 type
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
////////////////////////////////////
/////////// Expansion //////////////
////////////////////////////////////
setTruncStoreAction(MVT::i8, MVT::i1, Promote);
// (any/zero/sign) extload => load + (any/zero/sign) extend
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// Turn i16 (z)extload into load + (z)extend
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
// Turn f32 extload into load + fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
// Turn f64 truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// Customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
// Expand BR_CC into BRCOND
// f32 extload => load + fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
// f64 truncstore => trunc + store
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// sign_extend_inreg => sign_extend
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// br_cc => brcond
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
// Expand SELECT_CC into SETCC
// select_cc => setcc
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
// need to lower SETCC of RegPred into bitwise logic
////////////////////////////////////
//////////// Legal /////////////////
////////////////////////////////////
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
////////////////////////////////////
//////////// Custom ////////////////
////////////////////////////////////
// customise setcc to use bitwise logic if possible
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setMinFunctionAlignment(2);
// customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
// Compute derived properties from the register classes
computeRegisterProperties();
@ -187,7 +197,6 @@ struct argmap_entry {
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
} argmap[] = {
argmap_entry(MVT::i1, PTX::RegPredRegisterClass),
argmap_entry(MVT::i8, PTX::RegI8RegisterClass),
argmap_entry(MVT::i16, PTX::RegI16RegisterClass),
argmap_entry(MVT::i32, PTX::RegI32RegisterClass),
argmap_entry(MVT::i64, PTX::RegI64RegisterClass),
@ -264,9 +273,6 @@ SDValue PTXTargetLowering::
if (RegVT == MVT::i1) {
TRC = PTX::RegPredRegisterClass;
}
else if (RegVT == MVT::i8) {
TRC = PTX::RegI8RegisterClass;
}
else if (RegVT == MVT::i16) {
TRC = PTX::RegI16RegisterClass;
}

View File

@ -33,7 +33,6 @@ static const struct map_entry {
const TargetRegisterClass *cls;
const int opcode;
} map[] = {
{ &PTX::RegI8RegClass, PTX::MOVU8rr },
{ &PTX::RegI16RegClass, PTX::MOVU16rr },
{ &PTX::RegI32RegClass, PTX::MOVU32rr },
{ &PTX::RegI64RegClass, PTX::MOVU64rr },
@ -303,9 +302,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
int OpCode;
// Select the appropriate opcode based on the register class
if (RC == PTX::RegI8RegisterClass) {
OpCode = PTX::STACKSTOREI8;
} else if (RC == PTX::RegI16RegisterClass) {
if (RC == PTX::RegI16RegisterClass) {
OpCode = PTX::STACKSTOREI16;
} else if (RC == PTX::RegI32RegisterClass) {
OpCode = PTX::STACKSTOREI32;
@ -340,9 +337,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
int OpCode;
// Select the appropriate opcode based on the register class
if (RC == PTX::RegI8RegisterClass) {
OpCode = PTX::STACKLOADI8;
} else if (RC == PTX::RegI16RegisterClass) {
if (RC == PTX::RegI16RegisterClass) {
OpCode = PTX::STACKLOADI16;
} else if (RC == PTX::RegI32RegisterClass) {
OpCode = PTX::STACKLOADI32;

View File

@ -537,7 +537,6 @@ multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_lo
}
multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
defm u8 : PTX_LD<opstr, ".u8", RegI8, pat_load>;
defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
@ -573,7 +572,6 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_st
}
multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
defm u8 : PTX_ST<opstr, ".u8", RegI8, pat_store>;
defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
@ -785,27 +783,22 @@ defm XOR : PTX_LOGIC<"xor", xor>;
let neverHasSideEffects = 1 in {
def MOVPREDrr
: InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
def MOVU8rr
: InstPTX<(outs RegI8:$d), (ins RegI8:$a), "mov.u8\t$d, $a", []>;
def MOVU16rr
: InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
: InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
def MOVU32rr
: InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
: InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
def MOVU64rr
: InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
: InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
def MOVF32rr
: InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
def MOVF64rr
: InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
: InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOVPREDri
: InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
[(set RegPred:$d, imm:$a)]>;
def MOVU8ri
: InstPTX<(outs RegI8:$d), (ins i8imm:$a), "mov.u8\t$d, $a",
[(set RegI8:$d, imm:$a)]>;
def MOVU16ri
: InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
[(set RegI16:$d, imm:$a)]>;
@ -845,9 +838,6 @@ let hasSideEffects = 1 in {
def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
"ld.param.pred\t$d, [$a]",
[(set RegPred:$d, (PTXloadparam timm:$a))]>;
def LDpiU8 : InstPTX<(outs RegI8:$d), (ins MEMpi:$a),
"ld.param.u8\t$d, [$a]",
[(set RegI8:$d, (PTXloadparam timm:$a))]>;
def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
"ld.param.u16\t$d, [$a]",
[(set RegI16:$d, (PTXloadparam timm:$a))]>;
@ -867,9 +857,6 @@ let hasSideEffects = 1 in {
def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
"st.param.pred\t[$d], $a",
[(PTXstoreparam timm:$d, RegPred:$a)]>;
def STpiU8 : InstPTX<(outs), (ins MEMret:$d, RegI8:$a),
"st.param.u8\t[$d], $a",
[(PTXstoreparam timm:$d, RegI8:$a)]>;
def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
"st.param.u16\t[$d], $a",
[(PTXstoreparam timm:$d, RegI16:$a)]>;
@ -900,62 +887,34 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
// PTX does not directly support converting to a predicate type, so we fake it
// by performing a greater-than test between the value and zero. This follows
// the C convention that any non-zero value is equivalent to 'true'.
def CVT_pred_u8
: InstPTX<(outs RegPred:$d), (ins RegI8:$a), "setp.gt.b8\t$d, $a, 0",
[(set RegPred:$d, (trunc RegI8:$a))]>;
def CVT_pred_u16
: InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.b16\t$d, $a, 0",
: InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
[(set RegPred:$d, (trunc RegI16:$a))]>;
def CVT_pred_u32
: InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.b32\t$d, $a, 0",
: InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
[(set RegPred:$d, (trunc RegI32:$a))]>;
def CVT_pred_u64
: InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.b64\t$d, $a, 0",
: InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
[(set RegPred:$d, (trunc RegI64:$a))]>;
def CVT_pred_f32
: InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.b32\t$d, $a, 0",
: InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
[(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
def CVT_pred_f64
: InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.b64\t$d, $a, 0",
: InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
[(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
// Conversion to u8
// PTX does not directly support converting a predicate to a value, so we
// use a select instruction to select either 0 or 1 (integer or fp) based
// on the truth value of the predicate.
def CVT_u8_pred
: InstPTX<(outs RegI8:$d), (ins RegPred:$a), "selp.u8\t$d, 1, 0, $a",
[(set RegI8:$d, (zext RegPred:$a))]>;
def CVT_u8_preds
: InstPTX<(outs RegI8:$d), (ins RegPred:$a), "selp.u8\t$d, 1, 0, $a",
[(set RegI8:$d, (sext RegPred:$a))]>;
def CVT_u8_u32
: InstPTX<(outs RegI8:$d), (ins RegI32:$a), "cvt.u8.u32\t$d, $a",
[(set RegI8:$d, (trunc RegI32:$a))]>;
def CVT_u8_u64
: InstPTX<(outs RegI8:$d), (ins RegI64:$a), "cvt.u8.u64\t$d, $a",
[(set RegI8:$d, (trunc RegI64:$a))]>;
def CVT_u8_f32
: InstPTX<(outs RegI8:$d), (ins RegF32:$a), "cvt.rzi.u8.f32\t$d, $a",
[(set RegI8:$d, (fp_to_uint RegF32:$a))]>;
def CVT_u8_f64
: InstPTX<(outs RegI8:$d), (ins RegF64:$a), "cvt.rzi.u8.f64\t$d, $a",
[(set RegI8:$d, (fp_to_uint RegF64:$a))]>;
// Conversion to u16
// PTX does not directly support converting a predicate to a value, so we
// use a select instruction to select either 0 or 1 (integer or fp) based
// on the truth value of the predicate.
def CVT_u16_preda
: InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
[(set RegI16:$d, (anyext RegPred:$a))]>;
def CVT_u16_pred
: InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
[(set RegI16:$d, (zext RegPred:$a))]>;
@ -964,14 +923,6 @@ def CVT_u16_preds
: InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
[(set RegI16:$d, (sext RegPred:$a))]>;
def CVT_u16_u8
: InstPTX<(outs RegI16:$d), (ins RegI8:$a), "cvt.u16.u8\t$d, $a",
[(set RegI16:$d, (zext RegI8:$a))]>;
def CVT_u16_s8
: InstPTX<(outs RegI16:$d), (ins RegI8:$a), "cvt.u16.s8\t$d, $a",
[(set RegI16:$d, (sext RegI8:$a))]>;
def CVT_u16_u32
: InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
[(set RegI16:$d, (trunc RegI32:$a))]>;
@ -994,9 +945,9 @@ def CVT_u32_pred
: InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
[(set RegI32:$d, (zext RegPred:$a))]>;
def CVT_u32_u8
: InstPTX<(outs RegI32:$d), (ins RegI8:$a), "cvt.u32.u8\t$d, $a",
[(set RegI32:$d, (zext RegI8:$a))]>;
def CVT_u32_b16
: InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
[(set RegI32:$d, (anyext RegI16:$a))]>;
def CVT_u32_u16
: InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
@ -1006,10 +957,6 @@ def CVT_u32_preds
: InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
[(set RegI32:$d, (sext RegPred:$a))]>;
def CVT_u32_s8
: InstPTX<(outs RegI32:$d), (ins RegI8:$a), "cvt.u32.s8\t$d, $a",
[(set RegI32:$d, (zext RegI8:$a))]>;
def CVT_u32_s16
: InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
[(set RegI32:$d, (sext RegI16:$a))]>;
@ -1032,30 +979,22 @@ def CVT_u64_pred
: InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
[(set RegI64:$d, (zext RegPred:$a))]>;
def CVT_u64_u8
: InstPTX<(outs RegI64:$d), (ins RegI8:$a), "cvt.u64.u8\t$d, $a",
[(set RegI64:$d, (zext RegI8:$a))]>;
def CVT_u64_preds
: InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
[(set RegI64:$d, (sext RegPred:$a))]>;
def CVT_u64_u16
: InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
[(set RegI64:$d, (zext RegI16:$a))]>;
def CVT_u64_u32
: InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
[(set RegI64:$d, (zext RegI32:$a))]>;
def CVT_u64_preds
: InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
[(set RegI64:$d, (sext RegPred:$a))]>;
def CVT_u64_s8
: InstPTX<(outs RegI64:$d), (ins RegI8:$a), "cvt.u64.s8\t$d, $a",
[(set RegI64:$d, (zext RegI8:$a))]>;
def CVT_u64_s16
: InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
[(set RegI64:$d, (sext RegI16:$a))]>;
def CVT_u64_u32
: InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
[(set RegI64:$d, (zext RegI32:$a))]>;
def CVT_u64_s32
: InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
[(set RegI64:$d, (sext RegI32:$a))]>;
@ -1075,10 +1014,6 @@ def CVT_f32_pred
"selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0
[(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
def CVT_f32_u8
: InstPTX<(outs RegF32:$d), (ins RegI8:$a), "cvt.rn.f32.u8\t$d, $a",
[(set RegF32:$d, (uint_to_fp RegI8:$a))]>;
def CVT_f32_u16
: InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
[(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
@ -1102,10 +1037,6 @@ def CVT_f64_pred
"selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
[(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
def CVT_f64_u8
: InstPTX<(outs RegF64:$d), (ins RegI8:$a), "cvt.rn.f64.u8\t$d, $a",
[(set RegF64:$d, (uint_to_fp RegI8:$a))]>;
def CVT_f64_u16
: InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
[(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
@ -1144,8 +1075,6 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
///===- Spill Instructions ------------------------------------------------===//
// Special instructions used for stack spilling
def STACKSTOREI8 : InstPTX<(outs), (ins i32imm:$d, RegI8:$a),
"mov.u8\ts$d, $a", []>;
def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
"mov.u16\ts$d, $a", []>;
def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
@ -1157,8 +1086,6 @@ def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
"mov.f64\ts$d, $a", []>;
def STACKLOADI8 : InstPTX<(outs), (ins RegI8:$d, i32imm:$a),
"mov.u8\t$d, s$a", []>;
def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
"mov.u16\t$d, s$a", []>;
def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),

View File

@ -151,137 +151,6 @@ def P125 : PTXReg<"p125">;
def P126 : PTXReg<"p126">;
def P127 : PTXReg<"p127">;
///===- 8-Bit Registers --------------------------------------------------===//
def RQ0 : PTXReg<"rq0">;
def RQ1 : PTXReg<"rq1">;
def RQ2 : PTXReg<"rq2">;
def RQ3 : PTXReg<"rq3">;
def RQ4 : PTXReg<"rq4">;
def RQ5 : PTXReg<"rq5">;
def RQ6 : PTXReg<"rq6">;
def RQ7 : PTXReg<"rq7">;
def RQ8 : PTXReg<"rq8">;
def RQ9 : PTXReg<"rq9">;
def RQ10 : PTXReg<"rq10">;
def RQ11 : PTXReg<"rq11">;
def RQ12 : PTXReg<"rq12">;
def RQ13 : PTXReg<"rq13">;
def RQ14 : PTXReg<"rq14">;
def RQ15 : PTXReg<"rq15">;
def RQ16 : PTXReg<"rq16">;
def RQ17 : PTXReg<"rq17">;
def RQ18 : PTXReg<"rq18">;
def RQ19 : PTXReg<"rq19">;
def RQ20 : PTXReg<"rq20">;
def RQ21 : PTXReg<"rq21">;
def RQ22 : PTXReg<"rq22">;
def RQ23 : PTXReg<"rq23">;
def RQ24 : PTXReg<"rq24">;
def RQ25 : PTXReg<"rq25">;
def RQ26 : PTXReg<"rq26">;
def RQ27 : PTXReg<"rq27">;
def RQ28 : PTXReg<"rq28">;
def RQ29 : PTXReg<"rq29">;
def RQ30 : PTXReg<"rq30">;
def RQ31 : PTXReg<"rq31">;
def RQ32 : PTXReg<"rq32">;
def RQ33 : PTXReg<"rq33">;
def RQ34 : PTXReg<"rq34">;
def RQ35 : PTXReg<"rq35">;
def RQ36 : PTXReg<"rq36">;
def RQ37 : PTXReg<"rq37">;
def RQ38 : PTXReg<"rq38">;
def RQ39 : PTXReg<"rq39">;
def RQ40 : PTXReg<"rq40">;
def RQ41 : PTXReg<"rq41">;
def RQ42 : PTXReg<"rq42">;
def RQ43 : PTXReg<"rq43">;
def RQ44 : PTXReg<"rq44">;
def RQ45 : PTXReg<"rq45">;
def RQ46 : PTXReg<"rq46">;
def RQ47 : PTXReg<"rq47">;
def RQ48 : PTXReg<"rq48">;
def RQ49 : PTXReg<"rq49">;
def RQ50 : PTXReg<"rq50">;
def RQ51 : PTXReg<"rq51">;
def RQ52 : PTXReg<"rq52">;
def RQ53 : PTXReg<"rq53">;
def RQ54 : PTXReg<"rq54">;
def RQ55 : PTXReg<"rq55">;
def RQ56 : PTXReg<"rq56">;
def RQ57 : PTXReg<"rq57">;
def RQ58 : PTXReg<"rq58">;
def RQ59 : PTXReg<"rq59">;
def RQ60 : PTXReg<"rq60">;
def RQ61 : PTXReg<"rq61">;
def RQ62 : PTXReg<"rq62">;
def RQ63 : PTXReg<"rq63">;
def RQ64 : PTXReg<"rq64">;
def RQ65 : PTXReg<"rq65">;
def RQ66 : PTXReg<"rq66">;
def RQ67 : PTXReg<"rq67">;
def RQ68 : PTXReg<"rq68">;
def RQ69 : PTXReg<"rq69">;
def RQ70 : PTXReg<"rq70">;
def RQ71 : PTXReg<"rq71">;
def RQ72 : PTXReg<"rq72">;
def RQ73 : PTXReg<"rq73">;
def RQ74 : PTXReg<"rq74">;
def RQ75 : PTXReg<"rq75">;
def RQ76 : PTXReg<"rq76">;
def RQ77 : PTXReg<"rq77">;
def RQ78 : PTXReg<"rq78">;
def RQ79 : PTXReg<"rq79">;
def RQ80 : PTXReg<"rq80">;
def RQ81 : PTXReg<"rq81">;
def RQ82 : PTXReg<"rq82">;
def RQ83 : PTXReg<"rq83">;
def RQ84 : PTXReg<"rq84">;
def RQ85 : PTXReg<"rq85">;
def RQ86 : PTXReg<"rq86">;
def RQ87 : PTXReg<"rq87">;
def RQ88 : PTXReg<"rq88">;
def RQ89 : PTXReg<"rq89">;
def RQ90 : PTXReg<"rq90">;
def RQ91 : PTXReg<"rq91">;
def RQ92 : PTXReg<"rq92">;
def RQ93 : PTXReg<"rq93">;
def RQ94 : PTXReg<"rq94">;
def RQ95 : PTXReg<"rq95">;
def RQ96 : PTXReg<"rq96">;
def RQ97 : PTXReg<"rq97">;
def RQ98 : PTXReg<"rq98">;
def RQ99 : PTXReg<"rq99">;
def RQ100 : PTXReg<"rq100">;
def RQ101 : PTXReg<"rq101">;
def RQ102 : PTXReg<"rq102">;
def RQ103 : PTXReg<"rq103">;
def RQ104 : PTXReg<"rq104">;
def RQ105 : PTXReg<"rq105">;
def RQ106 : PTXReg<"rq106">;
def RQ107 : PTXReg<"rq107">;
def RQ108 : PTXReg<"rq108">;
def RQ109 : PTXReg<"rq109">;
def RQ110 : PTXReg<"rq110">;
def RQ111 : PTXReg<"rq111">;
def RQ112 : PTXReg<"rq112">;
def RQ113 : PTXReg<"rq113">;
def RQ114 : PTXReg<"rq114">;
def RQ115 : PTXReg<"rq115">;
def RQ116 : PTXReg<"rq116">;
def RQ117 : PTXReg<"rq117">;
def RQ118 : PTXReg<"rq118">;
def RQ119 : PTXReg<"rq119">;
def RQ120 : PTXReg<"rq120">;
def RQ121 : PTXReg<"rq121">;
def RQ122 : PTXReg<"rq122">;
def RQ123 : PTXReg<"rq123">;
def RQ124 : PTXReg<"rq124">;
def RQ125 : PTXReg<"rq125">;
def RQ126 : PTXReg<"rq126">;
def RQ127 : PTXReg<"rq127">;
///===- 16-Bit Registers --------------------------------------------------===//
def RH0 : PTXReg<"rh0">;
@ -679,7 +548,6 @@ def RD127 : PTXReg<"rd127">;
// Register classes
//===----------------------------------------------------------------------===//
def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
def RegI8 : RegisterClass<"PTX", [i8], 8, (sequence "RQ%u", 0, 127)>;
def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;

View File

@ -15,16 +15,15 @@
from sys import argv, exit, stdout
if len(argv) != 6:
print('Usage: generate-register-td.py <num_preds> <num_8> <num_16> <num_32> <num_64>')
if len(argv) != 5:
print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
exit(1)
try:
num_pred = int(argv[1])
num_8bit = int(argv[2])
num_16bit = int(argv[3])
num_32bit = int(argv[4])
num_64bit = int(argv[5])
num_16bit = int(argv[2])
num_32bit = int(argv[3])
num_64bit = int(argv[4])
except:
print('ERROR: Invalid integer parameter')
exit(1)
@ -61,11 +60,6 @@ td_file.write('\n///===- Predicate Registers -----------------------------------
for r in range(0, num_pred):
td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
# Print 8-bit registers
td_file.write('\n///===- 8-Bit Registers --------------------------------------------------===//\n\n')
for r in range(0, num_8bit):
td_file.write('def RQ%d : PTXReg<"rq%d">;\n' % (r, r))
# Print 16-bit registers
td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
for r in range(0, num_16bit):
@ -92,7 +86,6 @@ td_file.write('''
# Print register classes
td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
td_file.write('def RegI8 : RegisterClass<"PTX", [i8], 8, (sequence "RQ%%u", 0, %d)>;\n' % (num_8bit-1))
td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
@ -108,20 +101,16 @@ td_file = open('PTXCallingConv.td', 'w')
# Reserve 10% of the available registers for return values, and the other 90%
# for parameters
num_ret_pred = int(0.1 * num_pred)
num_ret_8bit = int(0.1 * num_8bit)
num_ret_16bit = int(0.1 * num_16bit)
num_ret_32bit = int(0.1 * num_32bit)
num_ret_64bit = int(0.1 * num_64bit)
num_param_pred = num_pred - num_ret_pred
num_param_8bit = num_8bit - num_ret_8bit
num_param_16bit = num_16bit - num_ret_16bit
num_param_32bit = num_32bit - num_ret_32bit
num_param_64bit = num_64bit - num_ret_64bit
param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
param_regs_8bit = [('RQ%d' % (i+num_ret_8bit)) for i in range(0, num_param_8bit)]
ret_regs_8bit = ['RQ%d' % i for i in range(0, num_ret_8bit)]
param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
@ -131,8 +120,6 @@ ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
param_list_8bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_8bit)
ret_list_8bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_8bit)
param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
@ -157,7 +144,6 @@ td_file.write('''
// PTX Formal Parameter Calling Convention
def CC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[%s]>>,
CCIfType<[i8], CCAssignToReg<[%s]>>,
CCIfType<[i16], CCAssignToReg<[%s]>>,
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
@ -166,13 +152,12 @@ def CC_PTX : CallingConv<[
// PTX Return Value Calling Convention
def RetCC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[%s]>>,
CCIfType<[i8], CCAssignToReg<[%s]>>,
CCIfType<[i16], CCAssignToReg<[%s]>>,
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
]>;
''' % (param_list_pred, param_list_8bit, param_list_16bit, param_list_32bit, param_list_64bit,
ret_list_pred, ret_list_8bit, ret_list_16bit, ret_list_32bit, ret_list_64bit))
''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
td_file.close()

View File

@ -3,17 +3,6 @@
; preds
; (note: we convert back to i32 to return)
define ptx_device i32 @cvt_pred_i8(i8 %x, i1 %y) {
; CHECK: setp.gt.b8 p[[P0:[0-9]+]], rq{{[0-9]+}}, 0
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
; CHECK-NEXT: ret;
%a = trunc i8 %x to i1
%b = and i1 %a, %y
%c = zext i1 %b to i32
ret i32 %c
}
define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
; CHECK: setp.gt.b16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
@ -69,43 +58,6 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
ret i32 %c
}
; i8
define ptx_device i8 @cvt_i8_preds(i1 %x) {
; CHECK: selp.u8 rq{{[0-9]+}}, 1, 0, p{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i1 %x to i8
ret i8 %a
}
define ptx_device i8 @cvt_i8_i32(i32 %x) {
; CHECK: cvt.u8.u32 rq{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = trunc i32 %x to i8
ret i8 %a
}
define ptx_device i8 @cvt_i8_i64(i64 %x) {
; CHECK: cvt.u8.u64 rq{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = trunc i64 %x to i8
ret i8 %a
}
define ptx_device i8 @cvt_i8_f32(float %x) {
; CHECK: cvt.rzi.u8.f32 rq{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptoui float %x to i8
ret i8 %a
}
define ptx_device i8 @cvt_i8_f64(double %x) {
; CHECK: cvt.rzi.u8.f64 rq{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptoui double %x to i8
ret i8 %a
}
; i16
define ptx_device i16 @cvt_i16_preds(i1 %x) {
@ -115,13 +67,6 @@ define ptx_device i16 @cvt_i16_preds(i1 %x) {
ret i16 %a
}
define ptx_device i16 @cvt_i16_i8(i8 %x) {
; CHECK: cvt.u16.u8 rh{{[0-9]+}}, rq{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i8 %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_i32(i32 %x) {
; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
@ -159,13 +104,6 @@ define ptx_device i32 @cvt_i32_preds(i1 %x) {
ret i32 %a
}
define ptx_device i32 @cvt_i32_i8(i8 %x) {
; CHECK: cvt.u32.u8 r{{[0-9]+}}, rq{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i8 %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_i16(i16 %x) {
; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
@ -203,13 +141,6 @@ define ptx_device i64 @cvt_i64_preds(i1 %x) {
ret i64 %a
}
define ptx_device i64 @cvt_i64_i8(i8 %x) {
; CHECK: cvt.u64.u8 rd{{[0-9]+}}, rq{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i8 %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_i16(i16 %x) {
; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
@ -247,13 +178,6 @@ define ptx_device float @cvt_f32_preds(i1 %x) {
ret float %a
}
define ptx_device float @cvt_f32_i8(i8 %x) {
; CHECK: cvt.rn.f32.u8 r{{[0-9]+}}, rq{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i8 %x to float
ret float %a
}
define ptx_device float @cvt_f32_i16(i16 %x) {
; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
@ -291,13 +215,6 @@ define ptx_device double @cvt_f64_preds(i1 %x) {
ret double %a
}
define ptx_device double @cvt_f64_i8(i8 %x) {
; CHECK: cvt.rn.f64.u8 rd{{[0-9]+}}, rq{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i8 %x to double
ret double %a
}
define ptx_device double @cvt_f64_i16(i16 %x) {
; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;

View File

@ -1,17 +1,5 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
;CHECK: .extern .global .b8 array_i8[10];
@array_i8 = external global [10 x i8]
;CHECK: .extern .const .b8 array_constant_i8[10];
@array_constant_i8 = external addrspace(1) constant [10 x i8]
;CHECK: .extern .local .b8 array_local_i8[10];
@array_local_i8 = external addrspace(2) global [10 x i8]
;CHECK: .extern .shared .b8 array_shared_i8[10];
@array_shared_i8 = external addrspace(4) global [10 x i8]
;CHECK: .extern .global .b8 array_i16[20];
@array_i16 = external global [10 x i16]
@ -72,13 +60,6 @@
;CHECK: .extern .shared .b8 array_shared_double[80];
@array_shared_double = external addrspace(4) global [10 x double]
define ptx_device i8 @t1_u8(i8* %p) {
entry:
;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}];
;CHECK-NEXT: ret;
%x = load i8* %p
ret i8 %x
}
define ptx_device i16 @t1_u16(i16* %p) {
entry:
@ -120,15 +101,6 @@ entry:
ret double %x
}
define ptx_device i8 @t2_u8(i8* %p) {
entry:
;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}+1];
;CHECK-NEXT: ret;
%i = getelementptr i8* %p, i32 1
%x = load i8* %i
ret i8 %x
}
define ptx_device i16 @t2_u16(i16* %p) {
entry:
;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2];
@ -174,15 +146,6 @@ entry:
ret double %x
}
define ptx_device i8 @t3_u8(i8* %p, i32 %q) {
entry:
;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]];
%i = getelementptr i8* %p, i32 %q
%x = load i8* %i
ret i8 %x
}
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
entry:
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
@ -233,16 +196,6 @@ entry:
ret double %x
}
define ptx_device i8 @t4_global_u8() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8]* @array_i8, i32 0, i32 0
%x = load i8* %i
ret i8 %x
}
define ptx_device i16 @t4_global_u16() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
@ -343,16 +296,6 @@ entry:
ret double %x
}
define ptx_device i8 @t4_local_u8() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8;
;CHECK-NEXT: ld.local.u8 rq{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0
%x = load i8 addrspace(2)* %i
ret i8 %x
}
define ptx_device i16 @t4_local_u16() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
@ -403,16 +346,6 @@ entry:
ret double %x
}
define ptx_device i8 @t4_shared_u8() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8;
;CHECK-NEXT: ld.shared.u8 rq{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0
%x = load i8 addrspace(4)* %i
ret i8 %x
}
define ptx_device i16 @t4_shared_u16() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
@ -463,16 +396,6 @@ entry:
ret double %x
}
define ptx_device i8 @t5_u8() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]+1];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1
%x = load i8* %i
ret i8 %x
}
define ptx_device i16 @t5_u16() {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;

View File

@ -1,11 +1,5 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i8 @t1_u8() {
; CHECK: mov.u8 rq{{[0-9]+}}, 0;
; CHECK: ret;
ret i8 0
}
define ptx_device i16 @t1_u16() {
; CHECK: mov.u16 rh{{[0-9]+}}, 0;
; CHECK: ret;
@ -36,12 +30,6 @@ define ptx_device double @t1_f64() {
ret double 0.0
}
define ptx_device i8 @t2_u8(i8 %x) {
; CHECK: mov.u8 rq{{[0-9]+}}, rq{{[0-9]+}};
; CHECK: ret;
ret i8 %x
}
define ptx_device i16 @t2_u16(i16 %x) {
; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}};
; CHECK: ret;

View File

@ -1,17 +1,5 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
;CHECK: .extern .global .b8 array_i8[10];
@array_i8 = external global [10 x i8]
;CHECK: .extern .const .b8 array_constant_i8[10];
@array_constant_i8 = external addrspace(1) constant [10 x i8]
;CHECK: .extern .local .b8 array_local_i8[10];
@array_local_i8 = external addrspace(2) global [10 x i8]
;CHECK: .extern .shared .b8 array_shared_i8[10];
@array_shared_i8 = external addrspace(4) global [10 x i8]
;CHECK: .extern .global .b8 array_i16[20];
@array_i16 = external global [10 x i16]
@ -72,13 +60,6 @@
;CHECK: .extern .shared .b8 array_shared_double[80];
@array_shared_double = external addrspace(4) global [10 x double]
define ptx_device void @t1_u8(i8* %p, i8 %x) {
entry:
;CHECK: st.global.u8 [r{{[0-9]+}}], rq{{[0-9]+}};
;CHECK-NEXT: ret;
store i8 %x, i8* %p
ret void
}
define ptx_device void @t1_u16(i16* %p, i16 %x) {
entry:
@ -120,15 +101,6 @@ entry:
ret void
}
define ptx_device void @t2_u8(i8* %p, i8 %x) {
entry:
;CHECK: st.global.u8 [r{{[0-9]+}}+1], rq{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i8* %p, i32 1
store i8 %x, i8* %i
ret void
}
define ptx_device void @t2_u16(i16* %p, i16 %x) {
entry:
;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}};
@ -174,16 +146,6 @@ entry:
ret void
}
define ptx_device void @t3_u8(i8* %p, i32 %q, i8 %x) {
entry:
;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i8* %p, i32 %q
store i8 %x, i8* %i
ret void
}
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
entry:
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
@ -239,16 +201,6 @@ entry:
ret void
}
define ptx_device void @t4_global_u8(i8 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8]* @array_i8, i8 0, i8 0
store i8 %x, i8* %i
ret void
}
define ptx_device void @t4_global_u16(i16 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
@ -299,16 +251,6 @@ entry:
ret void
}
define ptx_device void @t4_local_u8(i8 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8;
;CHECK-NEXT: st.local.u8 [r[[R0]]], rq{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0
store i8 %x, i8 addrspace(2)* %i
ret void
}
define ptx_device void @t4_local_u16(i16 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
@ -359,16 +301,6 @@ entry:
ret void
}
define ptx_device void @t4_shared_u8(i8 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8;
;CHECK-NEXT: st.shared.u8 [r[[R0]]], rq{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0
store i8 %x, i8 addrspace(4)* %i
ret void
}
define ptx_device void @t4_shared_u16(i16 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
@ -419,16 +351,6 @@ entry:
ret void
}
define ptx_device void @t5_u8(i8 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
;CHECK-NEXT: st.global.u8 [r[[R0]]+1], rq{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1
store i8 %x, i8* %i
ret void
}
define ptx_device void @t5_u16(i16 %x) {
entry:
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;