diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp index 7fa435c6101..0b653e04b3b 100644 --- a/lib/Target/PTX/PTXFPRoundingModePass.cpp +++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -16,6 +16,7 @@ #include "PTX.h" #include "PTXTargetMachine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -33,12 +34,18 @@ namespace llvm { class PTXFPRoundingModePass : public MachineFunctionPass { private: static char ID; + + typedef std::pair RndModeDesc; + PTXTargetMachine& TargetMachine; + DenseMap Instrs; public: PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) : MachineFunctionPass(ID), - TargetMachine(TM) {} + TargetMachine(TM) { + initializeMap(); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -48,6 +55,7 @@ namespace llvm { private: + void initializeMap(); void processInstruction(MachineInstr &MI); }; // class PTXFPRoundingModePass } // namespace llvm @@ -57,7 +65,6 @@ using namespace llvm; char PTXFPRoundingModePass::ID = 0; bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { - // Look at each basic block for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; ++bbi) { @@ -72,79 +79,96 @@ bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { return false; } -void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { - // If the instruction has a rounding mode set to RndDefault, then assign an - // appropriate rounding mode based on the target device. +void PTXFPRoundingModePass::initializeMap() { + using namespace PTXRoundingMode; const PTXSubtarget& ST = TargetMachine.getSubtarget(); - switch (MI.getOpcode()) { - case PTX::FADDrr32: - case PTX::FADDri32: - case PTX::FADDrr64: - case PTX::FADDri64: - case PTX::FSUBrr32: - case PTX::FSUBri32: - case PTX::FSUBrr64: - case PTX::FSUBri64: - case PTX::FMULrr32: - case PTX::FMULri32: - case PTX::FMULrr64: - case PTX::FMULri64: - if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) { - MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven); + + // Build a map of default rounding mode for all instructions that need a + // rounding mode. + Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); + + unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); + + unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); + + Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); + + Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + + Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); +} + +void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { + // Is this an instruction that needs a rounding mode? + if (Instrs.count(MI.getOpcode())) { + const RndModeDesc &Desc = Instrs[MI.getOpcode()]; + // Get the rounding mode operand + MachineOperand &Op = MI.getOperand(Desc.first); + // Update the rounding mode if needed + if (Op.getImm() == PTXRoundingMode::RndDefault) { + Op.setImm(Desc.second); } - break; - case PTX::FNEGrr32: - case PTX::FNEGri32: - case PTX::FNEGrr64: - case PTX::FNEGri64: - if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) { - MI.getOperand(1).setImm(PTXRoundingMode::RndNone); - } - break; - case PTX::FDIVrr32: - case PTX::FDIVri32: - case PTX::FDIVrr64: - case PTX::FDIVri64: - if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) { - if (ST.fdivNeedsRoundingMode()) - MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven); - else - MI.getOperand(1).setImm(PTXRoundingMode::RndNone); - } - break; - case PTX::FMADrrr32: - case PTX::FMADrri32: - case PTX::FMADrii32: - case PTX::FMADrrr64: - case PTX::FMADrri64: - case PTX::FMADrii64: - if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) { - if (ST.fmadNeedsRoundingMode()) - MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven); - else - MI.getOperand(1).setImm(PTXRoundingMode::RndNone); - } - break; - case PTX::FSQRTrr32: - case PTX::FSQRTri32: - case PTX::FSQRTrr64: - case PTX::FSQRTri64: - if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) { - MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven); - } - break; - case PTX::FSINrr32: - case PTX::FSINri32: - case PTX::FSINrr64: - case PTX::FSINri64: - case PTX::FCOSrr32: - case PTX::FCOSri32: - case PTX::FCOSrr64: - case PTX::FCOSri64: - if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) { - MI.getOperand(1).setImm(PTXRoundingMode::RndApprox); - } - break; } } diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 0a7900f1525..1b1b92cdfe2 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -655,7 +655,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { // PTX cvt instructions // Note all of these may actually be used, we just define all possible patterns -// here. +// here (that make sense). +// FIXME: Can we collapse this somehow into a multiclass def? // To i16 def CVTu16u32 @@ -663,13 +664,17 @@ def CVTu16u32 def CVTu16u64 : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>; def CVTu16f32 - : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", []>; + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u16.f32\t$d, $a", []>; def CVTs16f32 - : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.s16.f32\t$d, $a", []>; + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s16.f32\t$d, $a", []>; def CVTu16f64 - : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", []>; + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u16.f64\t$d, $a", []>; def CVTs16f64 - : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.s16.f64\t$d, $a", []>; + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s16.f64\t$d, $a", []>; // To i32 def CVTu32u16 @@ -679,13 +684,17 @@ def CVTs32s16 def CVTu32u64 : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>; def CVTu32f32 - : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", []>; + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u32.f32\t$d, $a", []>; def CVTs32f32 - : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.s32.f32\t$d, $a", []>; + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s32.f32\t$d, $a", []>; def CVTu32f64 - : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", []>; + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u32.f64\t$d, $a", []>; def CVTs32f64 - : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.s32.f64\t$d, $a", []>; + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s32.f64\t$d, $a", []>; // To i64 def CVTu64u16 @@ -697,43 +706,60 @@ def CVTu64u32 def CVTs64s32 : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>; def CVTu64f32 - : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", []>; + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u64.f32\t$d, $a", []>; def CVTs64f32 - : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.s64.f32\t$d, $a", []>; + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s64.f32\t$d, $a", []>; def CVTu64f64 - : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", []>; + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u64.f64\t$d, $a", []>; def CVTs64f64 - : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.s64.f64\t$d, $a", []>; + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s64.f64\t$d, $a", []>; // To f32 def CVTf32u16 - : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.u16\t$d, $a", []>; def CVTf32s16 - : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.s16\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.s16\t$d, $a", []>; def CVTf32u32 - : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.u32\t$d, $a", []>; def CVTf32s32 - : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.s32\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.s32\t$d, $a", []>; def CVTf32u64 - : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.u64\t$d, $a", []>; def CVTf32s64 - : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.s64\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.s64\t$d, $a", []>; def CVTf32f64 - : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.f32.f64\t$d, $a", []>; // To f64 def CVTf64u16 - : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.u16\t$d, $a", []>; def CVTf64s16 - : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.s16\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.s16\t$d, $a", []>; def CVTf64u32 - : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.u32\t$d, $a", []>; def CVTf64s32 - : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.s32\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.s32\t$d, $a", []>; def CVTf64u64 - : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.u64\t$d, $a", []>; def CVTf64s64 - : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.s64\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.s64\t$d, $a", []>; def CVTf64f32 : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>; @@ -889,10 +915,10 @@ def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>; def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>; def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>; -def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RegF32:$a)>; -def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RegF32:$a)>; -def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RegF64:$a)>; -def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RegF64:$a)>; +def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>; +def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>; // Conversion to u32 def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; @@ -902,10 +928,10 @@ def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>; def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>; def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>; def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>; -def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RegF32:$a)>; -def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RegF32:$a)>; -def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RegF64:$a)>; -def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RegF64:$a)>; +def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>; def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>; // Conversion to u64 @@ -919,33 +945,33 @@ def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>; def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>; def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>; def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>; -def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RegF32:$a)>; -def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RegF32:$a)>; -def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RegF64:$a)>; -def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RegF64:$a)>; +def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>; def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>; // Conversion to f32 def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a, (MOVf32i32 0x3F800000), (MOVf32i32 0))>; -def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RegI16:$a)>; -def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RegI16:$a)>; -def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RegI32:$a)>; -def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RegI32:$a)>; -def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RegI64:$a)>; -def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RegI64:$a)>; -def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RegF64:$a)>; +def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>; def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>; // Conversion to f64 def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a, (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>; -def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RegI16:$a)>; -def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RegI16:$a)>; -def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RegI32:$a)>; -def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RegI32:$a)>; -def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RegI64:$a)>; -def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RegI64:$a)>; +def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;