PTX: Add programmable rounding mode specifier for int <-> fp conversion instrs.

Also take this opportunity to clean up the rounding mode pass.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140854 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2011-09-30 13:46:52 +00:00
parent 0ad7b6e773
commit 8c1dac54f2
2 changed files with 174 additions and 124 deletions

View File

@ -16,6 +16,7 @@
#include "PTX.h"
#include "PTXTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@ -33,12 +34,18 @@ namespace llvm {
class PTXFPRoundingModePass : public MachineFunctionPass {
private:
static char ID;
typedef std::pair<unsigned, unsigned> RndModeDesc;
PTXTargetMachine& TargetMachine;
DenseMap<unsigned, RndModeDesc> Instrs;
public:
PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
: MachineFunctionPass(ID),
TargetMachine(TM) {}
TargetMachine(TM) {
initializeMap();
}
virtual bool runOnMachineFunction(MachineFunction &MF);
@ -48,6 +55,7 @@ namespace llvm {
private:
void initializeMap();
void processInstruction(MachineInstr &MI);
}; // class PTXFPRoundingModePass
} // namespace llvm
@ -57,7 +65,6 @@ using namespace llvm;
char PTXFPRoundingModePass::ID = 0;
bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
// Look at each basic block
for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
++bbi) {
@ -72,79 +79,96 @@ bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
return false;
}
void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
// If the instruction has a rounding mode set to RndDefault, then assign an
// appropriate rounding mode based on the target device.
void PTXFPRoundingModePass::initializeMap() {
using namespace PTXRoundingMode;
const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
switch (MI.getOpcode()) {
case PTX::FADDrr32:
case PTX::FADDri32:
case PTX::FADDrr64:
case PTX::FADDri64:
case PTX::FSUBrr32:
case PTX::FSUBri32:
case PTX::FSUBrr64:
case PTX::FSUBri64:
case PTX::FMULrr32:
case PTX::FMULri32:
case PTX::FMULrr64:
case PTX::FMULri64:
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
// Build a map of default rounding mode for all instructions that need a
// rounding mode.
Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
}
void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
// Is this an instruction that needs a rounding mode?
if (Instrs.count(MI.getOpcode())) {
const RndModeDesc &Desc = Instrs[MI.getOpcode()];
// Get the rounding mode operand
MachineOperand &Op = MI.getOperand(Desc.first);
// Update the rounding mode if needed
if (Op.getImm() == PTXRoundingMode::RndDefault) {
Op.setImm(Desc.second);
}
break;
case PTX::FNEGrr32:
case PTX::FNEGri32:
case PTX::FNEGrr64:
case PTX::FNEGri64:
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
MI.getOperand(1).setImm(PTXRoundingMode::RndNone);
}
break;
case PTX::FDIVrr32:
case PTX::FDIVri32:
case PTX::FDIVrr64:
case PTX::FDIVri64:
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
if (ST.fdivNeedsRoundingMode())
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
else
MI.getOperand(1).setImm(PTXRoundingMode::RndNone);
}
break;
case PTX::FMADrrr32:
case PTX::FMADrri32:
case PTX::FMADrii32:
case PTX::FMADrrr64:
case PTX::FMADrri64:
case PTX::FMADrii64:
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
if (ST.fmadNeedsRoundingMode())
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
else
MI.getOperand(1).setImm(PTXRoundingMode::RndNone);
}
break;
case PTX::FSQRTrr32:
case PTX::FSQRTri32:
case PTX::FSQRTrr64:
case PTX::FSQRTri64:
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
}
break;
case PTX::FSINrr32:
case PTX::FSINri32:
case PTX::FSINrr64:
case PTX::FSINri64:
case PTX::FCOSrr32:
case PTX::FCOSri32:
case PTX::FCOSrr64:
case PTX::FCOSri64:
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
MI.getOperand(1).setImm(PTXRoundingMode::RndApprox);
}
break;
}
}

View File

@ -655,7 +655,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
// PTX cvt instructions
// Note all of these may actually be used, we just define all possible patterns
// here.
// here (that make sense).
// FIXME: Can we collapse this somehow into a multiclass def?
// To i16
def CVTu16u32
@ -663,13 +664,17 @@ def CVTu16u32
def CVTu16u64
: InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
def CVTu16f32
: InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", []>;
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
"cvt$r.u16.f32\t$d, $a", []>;
def CVTs16f32
: InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.s16.f32\t$d, $a", []>;
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
"cvt$r.s16.f32\t$d, $a", []>;
def CVTu16f64
: InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", []>;
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
"cvt$r.u16.f64\t$d, $a", []>;
def CVTs16f64
: InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.s16.f64\t$d, $a", []>;
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
"cvt$r.s16.f64\t$d, $a", []>;
// To i32
def CVTu32u16
@ -679,13 +684,17 @@ def CVTs32s16
def CVTu32u64
: InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
def CVTu32f32
: InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", []>;
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
"cvt$r.u32.f32\t$d, $a", []>;
def CVTs32f32
: InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.s32.f32\t$d, $a", []>;
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
"cvt$r.s32.f32\t$d, $a", []>;
def CVTu32f64
: InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", []>;
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
"cvt$r.u32.f64\t$d, $a", []>;
def CVTs32f64
: InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.s32.f64\t$d, $a", []>;
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
"cvt$r.s32.f64\t$d, $a", []>;
// To i64
def CVTu64u16
@ -697,43 +706,60 @@ def CVTu64u32
def CVTs64s32
: InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
def CVTu64f32
: InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", []>;
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
"cvt$r.u64.f32\t$d, $a", []>;
def CVTs64f32
: InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.s64.f32\t$d, $a", []>;
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
"cvt$r.s64.f32\t$d, $a", []>;
def CVTu64f64
: InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", []>;
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
"cvt$r.u64.f64\t$d, $a", []>;
def CVTs64f64
: InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.s64.f64\t$d, $a", []>;
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
"cvt$r.s64.f64\t$d, $a", []>;
// To f32
def CVTf32u16
: InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
"cvt$r.f32.u16\t$d, $a", []>;
def CVTf32s16
: InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.s16\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
"cvt$r.f32.s16\t$d, $a", []>;
def CVTf32u32
: InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
"cvt$r.f32.u32\t$d, $a", []>;
def CVTf32s32
: InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.s32\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
"cvt$r.f32.s32\t$d, $a", []>;
def CVTf32u64
: InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
"cvt$r.f32.u64\t$d, $a", []>;
def CVTf32s64
: InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.s64\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
"cvt$r.f32.s64\t$d, $a", []>;
def CVTf32f64
: InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", []>;
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
"cvt$r.f32.f64\t$d, $a", []>;
// To f64
def CVTf64u16
: InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", []>;
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
"cvt$r.f64.u16\t$d, $a", []>;
def CVTf64s16
: InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.s16\t$d, $a", []>;
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
"cvt$r.f64.s16\t$d, $a", []>;
def CVTf64u32
: InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", []>;
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
"cvt$r.f64.u32\t$d, $a", []>;
def CVTf64s32
: InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.s32\t$d, $a", []>;
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
"cvt$r.f64.s32\t$d, $a", []>;
def CVTf64u64
: InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", []>;
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
"cvt$r.f64.u64\t$d, $a", []>;
def CVTf64s64
: InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.s64\t$d, $a", []>;
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
"cvt$r.f64.s64\t$d, $a", []>;
def CVTf64f32
: InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
@ -889,10 +915,10 @@ def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RegF32:$a)>;
def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RegF32:$a)>;
def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RegF64:$a)>;
def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RegF64:$a)>;
def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
// Conversion to u32
def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
@ -902,10 +928,10 @@ def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RegF32:$a)>;
def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RegF32:$a)>;
def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RegF64:$a)>;
def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RegF64:$a)>;
def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
// Conversion to u64
@ -919,33 +945,33 @@ def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RegF32:$a)>;
def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RegF32:$a)>;
def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RegF64:$a)>;
def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RegF64:$a)>;
def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
// Conversion to f32
def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
(MOVf32i32 0x3F800000), (MOVf32i32 0))>;
def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RegI16:$a)>;
def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RegI16:$a)>;
def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RegI32:$a)>;
def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RegI32:$a)>;
def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RegI64:$a)>;
def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RegI64:$a)>;
def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RegF64:$a)>;
def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
// Conversion to f64
def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
(MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RegI16:$a)>;
def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RegI16:$a)>;
def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RegI32:$a)>;
def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RegI32:$a)>;
def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RegI64:$a)>;
def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RegI64:$a)>;
def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;