mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-30 17:33:24 +00:00
[NVPTX] Clean up comparison/select/convert patterns and factor out PTX instructions from their patterns
Test case is no breakage git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185175 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1c07dae9fc
commit
ef0ccc9320
@ -131,6 +131,53 @@ enum VecType {
|
|||||||
V4 = 4
|
V4 = 4
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// PTXCvtMode - Conversion code enumeration
|
||||||
|
namespace PTXCvtMode {
|
||||||
|
enum CvtMode {
|
||||||
|
NONE = 0,
|
||||||
|
RNI,
|
||||||
|
RZI,
|
||||||
|
RMI,
|
||||||
|
RPI,
|
||||||
|
RN,
|
||||||
|
RZ,
|
||||||
|
RM,
|
||||||
|
RP,
|
||||||
|
|
||||||
|
BASE_MASK = 0x0F,
|
||||||
|
FTZ_FLAG = 0x10,
|
||||||
|
SAT_FLAG = 0x20
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// PTXCmpMode - Comparison mode enumeration
|
||||||
|
namespace PTXCmpMode {
|
||||||
|
enum CmpMode {
|
||||||
|
EQ = 0,
|
||||||
|
NE,
|
||||||
|
LT,
|
||||||
|
LE,
|
||||||
|
GT,
|
||||||
|
GE,
|
||||||
|
LO,
|
||||||
|
LS,
|
||||||
|
HI,
|
||||||
|
HS,
|
||||||
|
EQU,
|
||||||
|
NEU,
|
||||||
|
LTU,
|
||||||
|
LEU,
|
||||||
|
GTU,
|
||||||
|
GEU,
|
||||||
|
NUM,
|
||||||
|
// NAN is a MACRO
|
||||||
|
NotANumber,
|
||||||
|
|
||||||
|
BASE_MASK = 0xFF,
|
||||||
|
FTZ_FLAG = 0x100
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // end namespace llvm;
|
} // end namespace llvm;
|
||||||
|
|
||||||
|
@ -693,6 +693,130 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
|
|||||||
llvm_unreachable("Empty Modifier");
|
llvm_unreachable("Empty Modifier");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NVPTXAsmPrinter::printCvtMode(const MachineInstr *MI, int OpNum,
|
||||||
|
raw_ostream &O, const char *Modifier) {
|
||||||
|
const MachineOperand &MO = MI->getOperand(OpNum);
|
||||||
|
int64_t Imm = MO.getImm();
|
||||||
|
|
||||||
|
if (strcmp(Modifier, "ftz") == 0) {
|
||||||
|
// FTZ flag
|
||||||
|
if (Imm & NVPTX::PTXCvtMode::FTZ_FLAG)
|
||||||
|
O << ".ftz";
|
||||||
|
} else if (strcmp(Modifier, "sat") == 0) {
|
||||||
|
// SAT flag
|
||||||
|
if (Imm & NVPTX::PTXCvtMode::SAT_FLAG)
|
||||||
|
O << ".sat";
|
||||||
|
} else if (strcmp(Modifier, "base") == 0) {
|
||||||
|
// Default operand
|
||||||
|
switch (Imm & NVPTX::PTXCvtMode::BASE_MASK) {
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
case NVPTX::PTXCvtMode::NONE:
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RNI:
|
||||||
|
O << ".rni";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RZI:
|
||||||
|
O << ".rzi";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RMI:
|
||||||
|
O << ".rmi";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RPI:
|
||||||
|
O << ".rpi";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RN:
|
||||||
|
O << ".rn";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RZ:
|
||||||
|
O << ".rz";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RM:
|
||||||
|
O << ".rm";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCvtMode::RP:
|
||||||
|
O << ".rp";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
llvm_unreachable("Invalid conversion modifier");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NVPTXAsmPrinter::printCmpMode(const MachineInstr *MI, int OpNum,
|
||||||
|
raw_ostream &O, const char *Modifier) {
|
||||||
|
const MachineOperand &MO = MI->getOperand(OpNum);
|
||||||
|
int64_t Imm = MO.getImm();
|
||||||
|
|
||||||
|
if (strcmp(Modifier, "ftz") == 0) {
|
||||||
|
// FTZ flag
|
||||||
|
if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG)
|
||||||
|
O << ".ftz";
|
||||||
|
} else if (strcmp(Modifier, "base") == 0) {
|
||||||
|
switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) {
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
case NVPTX::PTXCmpMode::EQ:
|
||||||
|
O << ".eq";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::NE:
|
||||||
|
O << ".ne";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::LT:
|
||||||
|
O << ".lt";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::LE:
|
||||||
|
O << ".le";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::GT:
|
||||||
|
O << ".gt";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::GE:
|
||||||
|
O << ".ge";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::LO:
|
||||||
|
O << ".lo";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::LS:
|
||||||
|
O << ".ls";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::HI:
|
||||||
|
O << ".hi";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::HS:
|
||||||
|
O << ".hs";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::EQU:
|
||||||
|
O << ".equ";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::NEU:
|
||||||
|
O << ".neu";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::LTU:
|
||||||
|
O << ".ltu";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::LEU:
|
||||||
|
O << ".leu";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::GTU:
|
||||||
|
O << ".gtu";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::GEU:
|
||||||
|
O << ".geu";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::NUM:
|
||||||
|
O << ".num";
|
||||||
|
break;
|
||||||
|
case NVPTX::PTXCmpMode::NotANumber:
|
||||||
|
O << ".nan";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
llvm_unreachable("Empty Modifier");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
|
void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
|
||||||
|
|
||||||
emitLinkageDirective(F, O);
|
emitLinkageDirective(F, O);
|
||||||
@ -2033,10 +2157,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
|
|||||||
case NVPTX::StoreParamI32:
|
case NVPTX::StoreParamI32:
|
||||||
case NVPTX::StoreParamI64:
|
case NVPTX::StoreParamI64:
|
||||||
case NVPTX::StoreParamI8:
|
case NVPTX::StoreParamI8:
|
||||||
case NVPTX::StoreParamS32I8:
|
|
||||||
case NVPTX::StoreParamU32I8:
|
|
||||||
case NVPTX::StoreParamS32I16:
|
|
||||||
case NVPTX::StoreParamU32I16:
|
|
||||||
case NVPTX::StoreRetvalF32:
|
case NVPTX::StoreRetvalF32:
|
||||||
case NVPTX::StoreRetvalF64:
|
case NVPTX::StoreRetvalF64:
|
||||||
case NVPTX::StoreRetvalI16:
|
case NVPTX::StoreRetvalI16:
|
||||||
@ -2056,11 +2176,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
|
|||||||
case NVPTX::LoadParamMemI32:
|
case NVPTX::LoadParamMemI32:
|
||||||
case NVPTX::LoadParamMemI64:
|
case NVPTX::LoadParamMemI64:
|
||||||
case NVPTX::LoadParamMemI8:
|
case NVPTX::LoadParamMemI8:
|
||||||
case NVPTX::LoadParamRegF32:
|
|
||||||
case NVPTX::LoadParamRegF64:
|
|
||||||
case NVPTX::LoadParamRegI16:
|
|
||||||
case NVPTX::LoadParamRegI32:
|
|
||||||
case NVPTX::LoadParamRegI64:
|
|
||||||
case NVPTX::PrototypeInst:
|
case NVPTX::PrototypeInst:
|
||||||
case NVPTX::DBG_VALUE:
|
case NVPTX::DBG_VALUE:
|
||||||
return true;
|
return true;
|
||||||
|
@ -198,6 +198,10 @@ private:
|
|||||||
const char *Modifier = 0);
|
const char *Modifier = 0);
|
||||||
void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
|
void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||||
const char *Modifier = 0);
|
const char *Modifier = 0);
|
||||||
|
void printCvtMode(const MachineInstr *MI, int OpNum, raw_ostream &O,
|
||||||
|
const char *Modifier = 0);
|
||||||
|
void printCmpMode(const MachineInstr *MI, int OpNum, raw_ostream &O,
|
||||||
|
const char *Modifier = 0);
|
||||||
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
|
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
|
||||||
raw_ostream &O);
|
raw_ostream &O);
|
||||||
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
|
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||||
|
@ -1965,13 +1965,28 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case NVPTXISD::StoreParamU32:
|
// Special case: if we have a sign-extend/zero-extend node, insert the
|
||||||
Opcode = NVPTX::StoreParamU32I16;
|
// conversion instruction first, and use that as the value operand to
|
||||||
|
// the selected StoreParam node.
|
||||||
|
case NVPTXISD::StoreParamU32: {
|
||||||
|
Opcode = NVPTX::StoreParamI32;
|
||||||
|
SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
|
||||||
|
MVT::i32);
|
||||||
|
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
|
||||||
|
MVT::i32, Ops[0], CvtNone);
|
||||||
|
Ops[0] = SDValue(Cvt, 0);
|
||||||
break;
|
break;
|
||||||
case NVPTXISD::StoreParamS32:
|
}
|
||||||
Opcode = NVPTX::StoreParamS32I16;
|
case NVPTXISD::StoreParamS32: {
|
||||||
|
Opcode = NVPTX::StoreParamI32;
|
||||||
|
SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
|
||||||
|
MVT::i32);
|
||||||
|
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
|
||||||
|
MVT::i32, Ops[0], CvtNone);
|
||||||
|
Ops[0] = SDValue(Cvt, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
SDNode *Ret =
|
SDNode *Ret =
|
||||||
CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
|
CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
|
||||||
|
@ -259,8 +259,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
return "NVPTXISD::StoreParamS32";
|
return "NVPTXISD::StoreParamS32";
|
||||||
case NVPTXISD::StoreParamU32:
|
case NVPTXISD::StoreParamU32:
|
||||||
return "NVPTXISD::StoreParamU32";
|
return "NVPTXISD::StoreParamU32";
|
||||||
case NVPTXISD::MoveToParam:
|
|
||||||
return "NVPTXISD::MoveToParam";
|
|
||||||
case NVPTXISD::CallArgBegin:
|
case NVPTXISD::CallArgBegin:
|
||||||
return "NVPTXISD::CallArgBegin";
|
return "NVPTXISD::CallArgBegin";
|
||||||
case NVPTXISD::CallArg:
|
case NVPTXISD::CallArg:
|
||||||
@ -279,10 +277,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
return "NVPTXISD::Prototype";
|
return "NVPTXISD::Prototype";
|
||||||
case NVPTXISD::MoveParam:
|
case NVPTXISD::MoveParam:
|
||||||
return "NVPTXISD::MoveParam";
|
return "NVPTXISD::MoveParam";
|
||||||
case NVPTXISD::MoveRetval:
|
|
||||||
return "NVPTXISD::MoveRetval";
|
|
||||||
case NVPTXISD::MoveToRetval:
|
|
||||||
return "NVPTXISD::MoveToRetval";
|
|
||||||
case NVPTXISD::StoreRetval:
|
case NVPTXISD::StoreRetval:
|
||||||
return "NVPTXISD::StoreRetval";
|
return "NVPTXISD::StoreRetval";
|
||||||
case NVPTXISD::StoreRetvalV2:
|
case NVPTXISD::StoreRetvalV2:
|
||||||
|
@ -35,7 +35,6 @@ enum NodeType {
|
|||||||
DeclareRetParam,
|
DeclareRetParam,
|
||||||
DeclareRet,
|
DeclareRet,
|
||||||
DeclareScalarRet,
|
DeclareScalarRet,
|
||||||
MoveToParam,
|
|
||||||
PrintCall,
|
PrintCall,
|
||||||
PrintCallUni,
|
PrintCallUni,
|
||||||
CallArgBegin,
|
CallArgBegin,
|
||||||
@ -47,8 +46,6 @@ enum NodeType {
|
|||||||
CallSymbol,
|
CallSymbol,
|
||||||
Prototype,
|
Prototype,
|
||||||
MoveParam,
|
MoveParam,
|
||||||
MoveRetval,
|
|
||||||
MoveToRetval,
|
|
||||||
PseudoUseParam,
|
PseudoUseParam,
|
||||||
RETURN,
|
RETURN,
|
||||||
CallSeqBegin,
|
CallSeqBegin,
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -82,49 +82,36 @@ def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
|
|||||||
//-----------------------------------
|
//-----------------------------------
|
||||||
|
|
||||||
// Map min(1.0, max(0.0, x)) to sat(x)
|
// Map min(1.0, max(0.0, x)) to sat(x)
|
||||||
multiclass SAT<NVPTXRegClass regclass, Operand fimm, Intrinsic IntMinOp,
|
// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
|
||||||
Intrinsic IntMaxOp, PatLeaf f0, PatLeaf f1, string OpStr> {
|
// NaN
|
||||||
|
|
||||||
// fmin(1.0, fmax(0.0, x)) => sat(x)
|
|
||||||
def SAT11 : NVPTXInst<(outs regclass:$dst),
|
|
||||||
(ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
|
|
||||||
OpStr,
|
|
||||||
[(set regclass:$dst, (IntMinOp f1:$srcf0 ,
|
|
||||||
(IntMaxOp f0:$srcf1, regclass:$src)))]>;
|
|
||||||
|
|
||||||
// fmin(1.0, fmax(x, 0.0)) => sat(x)
|
|
||||||
def SAT12 : NVPTXInst<(outs regclass:$dst),
|
|
||||||
(ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
|
|
||||||
OpStr,
|
|
||||||
[(set regclass:$dst, (IntMinOp f1:$srcf0 ,
|
|
||||||
(IntMaxOp regclass:$src, f0:$srcf1)))]>;
|
|
||||||
|
|
||||||
// fmin(fmax(0.0, x), 1.0) => sat(x)
|
|
||||||
def SAT13 : NVPTXInst<(outs regclass:$dst),
|
|
||||||
(ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
|
|
||||||
OpStr,
|
|
||||||
[(set regclass:$dst, (IntMinOp
|
|
||||||
(IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>;
|
|
||||||
|
|
||||||
// fmin(fmax(x, 0.0), 1.0) => sat(x)
|
|
||||||
def SAT14 : NVPTXInst<(outs regclass:$dst),
|
|
||||||
(ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
|
|
||||||
OpStr,
|
|
||||||
[(set regclass:$dst, (IntMinOp
|
|
||||||
(IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>;
|
|
||||||
|
|
||||||
}
|
|
||||||
// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x
|
|
||||||
// is NaN
|
|
||||||
// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
|
// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
|
||||||
// Same story for fmax, fmin.
|
// Same story for fmax, fmin.
|
||||||
|
|
||||||
defm SAT_fmin_fmax_f : SAT<Float32Regs, f32imm, int_nvvm_fmin_f,
|
def : Pat<(int_nvvm_fmin_f immFloat1,
|
||||||
int_nvvm_fmax_f, immFloat0, immFloat1,
|
(int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
|
||||||
"cvt.sat.f32.f32 \t$dst, $src; \n">;
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
||||||
defm SAT_fmin_fmax_d : SAT<Float64Regs, f64imm, int_nvvm_fmin_d,
|
def : Pat<(int_nvvm_fmin_f immFloat1,
|
||||||
int_nvvm_fmax_d, immDouble0, immDouble1,
|
(int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
|
||||||
"cvt.sat.f64.f64 \t$dst, $src; \n">;
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
||||||
|
def : Pat<(int_nvvm_fmin_f
|
||||||
|
(int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
|
||||||
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
||||||
|
def : Pat<(int_nvvm_fmin_f
|
||||||
|
(int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
|
||||||
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
||||||
|
|
||||||
|
def : Pat<(int_nvvm_fmin_d immDouble1,
|
||||||
|
(int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
|
||||||
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
||||||
|
def : Pat<(int_nvvm_fmin_d immDouble1,
|
||||||
|
(int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
|
||||||
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
||||||
|
def : Pat<(int_nvvm_fmin_d
|
||||||
|
(int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
|
||||||
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
||||||
|
def : Pat<(int_nvvm_fmin_d
|
||||||
|
(int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
|
||||||
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
||||||
|
|
||||||
|
|
||||||
// We need a full string for OpcStr here because we need to deal with case like
|
// We need a full string for OpcStr here because we need to deal with case like
|
||||||
@ -312,19 +299,19 @@ def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
|
|||||||
// Floor Ceil
|
// Floor Ceil
|
||||||
//
|
//
|
||||||
|
|
||||||
def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
||||||
def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_floor_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_floor_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_floor_d Float64Regs:$a),
|
||||||
Float64Regs, Float64Regs, int_nvvm_floor_d>;
|
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
|
||||||
|
|
||||||
def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
||||||
def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_ceil_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
|
||||||
def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
|
||||||
Float64Regs, Float64Regs, int_nvvm_ceil_d>;
|
(CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Abs
|
// Abs
|
||||||
@ -347,37 +334,34 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
|
|||||||
// Round
|
// Round
|
||||||
//
|
//
|
||||||
|
|
||||||
def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_round_ftz_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
||||||
def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_round_f Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_round_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
|
||||||
|
def : Pat<(int_nvvm_round_d Float64Regs:$a),
|
||||||
def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs,
|
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
|
||||||
Float64Regs, int_nvvm_round_d>;
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Trunc
|
// Trunc
|
||||||
//
|
//
|
||||||
|
|
||||||
def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
||||||
def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_trunc_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
|
||||||
|
def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
|
||||||
def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs,
|
(CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
|
||||||
Float64Regs, int_nvvm_trunc_d>;
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Saturate
|
// Saturate
|
||||||
//
|
//
|
||||||
|
|
||||||
def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
|
||||||
def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
|
||||||
Float32Regs, Float32Regs, int_nvvm_saturate_f>;
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
||||||
|
def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
|
||||||
def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;",
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
||||||
Float64Regs, Float64Regs, int_nvvm_saturate_d>;
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Exp2 Log2
|
// Exp2 Log2
|
||||||
@ -568,110 +552,110 @@ def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
|
|||||||
// Convert
|
// Convert
|
||||||
//
|
//
|
||||||
|
|
||||||
def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
|
||||||
def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rn>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRN)>;
|
||||||
def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
|
||||||
def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rz>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
|
||||||
def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
|
||||||
def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rm>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRM)>;
|
||||||
def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
|
||||||
def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
|
||||||
Float32Regs, Float64Regs, int_nvvm_d2f_rp>;
|
(CVT_f32_f64 Float64Regs:$a, CvtRP)>;
|
||||||
|
|
||||||
def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2i_rn>;
|
(CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2i_rz>;
|
(CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2i_rm>;
|
(CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2i_rp>;
|
(CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2ui_rn>;
|
(CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2ui_rz>;
|
(CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2ui_rm>;
|
(CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;",
|
def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2ui_rp>;
|
(CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_i2d_rn>;
|
(CVT_f64_s32 Int32Regs:$a, CvtRN)>;
|
||||||
def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_i2d_rz>;
|
(CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
|
||||||
def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_i2d_rm>;
|
(CVT_f64_s32 Int32Regs:$a, CvtRM)>;
|
||||||
def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_i2d_rp>;
|
(CVT_f64_s32 Int32Regs:$a, CvtRP)>;
|
||||||
|
|
||||||
def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_ui2d_rn>;
|
(CVT_f64_u32 Int32Regs:$a, CvtRN)>;
|
||||||
def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_ui2d_rz>;
|
(CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
|
||||||
def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_ui2d_rm>;
|
(CVT_f64_u32 Int32Regs:$a, CvtRM)>;
|
||||||
def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
|
||||||
Float64Regs, Int32Regs, int_nvvm_ui2d_rp>;
|
(CVT_f64_u32 Int32Regs:$a, CvtRP)>;
|
||||||
|
|
||||||
def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
||||||
def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2i_rn>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
||||||
def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2i_rz>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
||||||
def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2i_rm>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
||||||
def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2i_rp>;
|
(CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
||||||
def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ui_rn>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
||||||
def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ui_rz>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
||||||
def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ui_rm>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
|
||||||
Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
||||||
def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs,
|
def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ui_rp>;
|
(CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_i2f_rn>;
|
(CVT_f32_s32 Int32Regs:$a, CvtRN)>;
|
||||||
def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_i2f_rz>;
|
(CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
|
||||||
def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_i2f_rm>;
|
(CVT_f32_s32 Int32Regs:$a, CvtRM)>;
|
||||||
def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_i2f_rp>;
|
(CVT_f32_s32 Int32Regs:$a, CvtRP)>;
|
||||||
|
|
||||||
def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_ui2f_rn>;
|
(CVT_f32_u32 Int32Regs:$a, CvtRN)>;
|
||||||
def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_ui2f_rz>;
|
(CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
|
||||||
def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_ui2f_rm>;
|
(CVT_f32_u32 Int32Regs:$a, CvtRM)>;
|
||||||
def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
|
||||||
Int32Regs, int_nvvm_ui2f_rp>;
|
(CVT_f32_u32 Int32Regs:$a, CvtRP)>;
|
||||||
|
|
||||||
def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
|
def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
|
||||||
Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
|
Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
|
||||||
@ -687,91 +671,106 @@ def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
|
|||||||
"}}"))),
|
"}}"))),
|
||||||
Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
|
Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
|
||||||
|
|
||||||
def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
||||||
def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ll_rn>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
||||||
def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ll_rz>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
||||||
def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ll_rm>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
||||||
def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ll_rp>;
|
(CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
||||||
def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ull_rn>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
||||||
def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ull_rz>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
||||||
def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ull_rm>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;",
|
def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
|
||||||
Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
||||||
def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
|
||||||
Float32Regs, int_nvvm_f2ull_rp>;
|
(CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ll_rn>;
|
(CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ll_rz>;
|
(CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ll_rm>;
|
(CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ll_rp>;
|
(CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ull_rn>;
|
(CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
|
||||||
def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ull_rz>;
|
(CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
|
||||||
def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ull_rm>;
|
(CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
|
||||||
def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs,
|
def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
|
||||||
Float64Regs, int_nvvm_d2ull_rp>;
|
(CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
|
||||||
|
|
||||||
def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2f_rn>;
|
(CVT_f32_s64 Int64Regs:$a, CvtRN)>;
|
||||||
def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2f_rz>;
|
(CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
|
||||||
def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2f_rm>;
|
(CVT_f32_s64 Int64Regs:$a, CvtRM)>;
|
||||||
def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs,
|
def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2f_rp>;
|
(CVT_f32_s64 Int64Regs:$a, CvtRP)>;
|
||||||
def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs,
|
|
||||||
Int64Regs, int_nvvm_ull2f_rn>;
|
|
||||||
def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs,
|
|
||||||
Int64Regs, int_nvvm_ull2f_rz>;
|
|
||||||
def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs,
|
|
||||||
Int64Regs, int_nvvm_ull2f_rm>;
|
|
||||||
def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs,
|
|
||||||
Int64Regs, int_nvvm_ull2f_rp>;
|
|
||||||
|
|
||||||
def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs,
|
def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2d_rn>;
|
(CVT_f32_u64 Int64Regs:$a, CvtRN)>;
|
||||||
def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs,
|
def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2d_rz>;
|
(CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
|
||||||
def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs,
|
def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2d_rm>;
|
(CVT_f32_u64 Int64Regs:$a, CvtRM)>;
|
||||||
def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs,
|
def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
|
||||||
Int64Regs, int_nvvm_ll2d_rp>;
|
(CVT_f32_u64 Int64Regs:$a, CvtRP)>;
|
||||||
def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs,
|
|
||||||
Int64Regs, int_nvvm_ull2d_rn>;
|
def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
|
||||||
def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs,
|
(CVT_f64_s64 Int64Regs:$a, CvtRN)>;
|
||||||
Int64Regs, int_nvvm_ull2d_rz>;
|
def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
|
||||||
def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs,
|
(CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
|
||||||
Int64Regs, int_nvvm_ull2d_rm>;
|
def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
|
||||||
def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs,
|
(CVT_f64_s64 Int64Regs:$a, CvtRM)>;
|
||||||
Int64Regs, int_nvvm_ull2d_rp>;
|
def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
|
||||||
|
(CVT_f64_s64 Int64Regs:$a, CvtRP)>;
|
||||||
|
|
||||||
|
def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
|
||||||
|
(CVT_f64_u64 Int64Regs:$a, CvtRN)>;
|
||||||
|
def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
|
||||||
|
(CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
|
||||||
|
def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
|
||||||
|
(CVT_f64_u64 Int64Regs:$a, CvtRM)>;
|
||||||
|
def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
|
||||||
|
(CVT_f64_u64 Int64Regs:$a, CvtRP)>;
|
||||||
|
|
||||||
|
|
||||||
|
// FIXME: Ideally, we could use these patterns instead of the scope-creating
|
||||||
|
// patterns, but ptxas does not like these since .s16 is not compatible with
|
||||||
|
// .f16. The solution is to use .bXX for all integer register types, but we
|
||||||
|
// are not there yet.
|
||||||
|
//def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
|
||||||
|
// (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
|
||||||
|
//def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
|
||||||
|
// (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
|
||||||
|
//
|
||||||
|
//def : Pat<(int_nvvm_h2f Int16Regs:$a),
|
||||||
|
// (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
|
||||||
|
|
||||||
def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
|
def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
|
||||||
!strconcat(".reg .b16 %temp;\n\t",
|
!strconcat(".reg .b16 %temp;\n\t",
|
||||||
@ -793,6 +792,13 @@ def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
|
|||||||
"}}")))),
|
"}}")))),
|
||||||
Float32Regs, Int16Regs, int_nvvm_h2f>;
|
Float32Regs, Int16Regs, int_nvvm_h2f>;
|
||||||
|
|
||||||
|
def : Pat<(f32 (f16_to_f32 Int16Regs:$a)),
|
||||||
|
(CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
|
||||||
|
def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
|
||||||
|
(CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
|
||||||
|
def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
|
||||||
|
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Bitcast
|
// Bitcast
|
||||||
//
|
//
|
||||||
|
@ -13,11 +13,11 @@ define ptx_kernel void @t1(i1* %a) {
|
|||||||
|
|
||||||
define ptx_kernel void @t2(i1* %a, i8* %b) {
|
define ptx_kernel void @t2(i1* %a, i8* %b) {
|
||||||
; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||||
; PTX32: and.b16 temp, %rs{{[0-9]+}}, 1;
|
; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1;
|
; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||||
; PTX64: and.b16 temp, %rs{{[0-9]+}}, 1;
|
; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1;
|
; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
|
||||||
|
|
||||||
%t1 = load i1* %a
|
%t1 = load i1* %a
|
||||||
%t2 = select i1 %t1, i8 1, i8 2
|
%t2 = select i1 %t1, i8 1, i8 2
|
||||||
|
Loading…
Reference in New Issue
Block a user