mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
PTX: Add programmable rounding mode specifier for int <-> fp conversion instrs.
Also take this opportunity to clean up the rounding mode pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140854 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0ad7b6e773
commit
8c1dac54f2
@ -16,6 +16,7 @@
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
@ -33,12 +34,18 @@ namespace llvm {
|
||||
class PTXFPRoundingModePass : public MachineFunctionPass {
|
||||
private:
|
||||
static char ID;
|
||||
|
||||
typedef std::pair<unsigned, unsigned> RndModeDesc;
|
||||
|
||||
PTXTargetMachine& TargetMachine;
|
||||
DenseMap<unsigned, RndModeDesc> Instrs;
|
||||
|
||||
public:
|
||||
PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
|
||||
: MachineFunctionPass(ID),
|
||||
TargetMachine(TM) {}
|
||||
TargetMachine(TM) {
|
||||
initializeMap();
|
||||
}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
@ -48,6 +55,7 @@ namespace llvm {
|
||||
|
||||
private:
|
||||
|
||||
void initializeMap();
|
||||
void processInstruction(MachineInstr &MI);
|
||||
}; // class PTXFPRoundingModePass
|
||||
} // namespace llvm
|
||||
@ -57,7 +65,6 @@ using namespace llvm;
|
||||
char PTXFPRoundingModePass::ID = 0;
|
||||
|
||||
bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
// Look at each basic block
|
||||
for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
|
||||
++bbi) {
|
||||
@ -72,79 +79,96 @@ bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
|
||||
// If the instruction has a rounding mode set to RndDefault, then assign an
|
||||
// appropriate rounding mode based on the target device.
|
||||
void PTXFPRoundingModePass::initializeMap() {
|
||||
using namespace PTXRoundingMode;
|
||||
const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
|
||||
switch (MI.getOpcode()) {
|
||||
case PTX::FADDrr32:
|
||||
case PTX::FADDri32:
|
||||
case PTX::FADDrr64:
|
||||
case PTX::FADDri64:
|
||||
case PTX::FSUBrr32:
|
||||
case PTX::FSUBri32:
|
||||
case PTX::FSUBrr64:
|
||||
case PTX::FSUBri64:
|
||||
case PTX::FMULrr32:
|
||||
case PTX::FMULri32:
|
||||
case PTX::FMULrr64:
|
||||
case PTX::FMULri64:
|
||||
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
|
||||
|
||||
// Build a map of default rounding mode for all instructions that need a
|
||||
// rounding mode.
|
||||
Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
|
||||
Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
|
||||
Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
|
||||
Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
|
||||
Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
|
||||
|
||||
unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
|
||||
Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
|
||||
Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
|
||||
Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
|
||||
Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
|
||||
|
||||
unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
|
||||
Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
|
||||
|
||||
Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
|
||||
Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
|
||||
Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
|
||||
Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
}
|
||||
|
||||
void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
|
||||
// Is this an instruction that needs a rounding mode?
|
||||
if (Instrs.count(MI.getOpcode())) {
|
||||
const RndModeDesc &Desc = Instrs[MI.getOpcode()];
|
||||
// Get the rounding mode operand
|
||||
MachineOperand &Op = MI.getOperand(Desc.first);
|
||||
// Update the rounding mode if needed
|
||||
if (Op.getImm() == PTXRoundingMode::RndDefault) {
|
||||
Op.setImm(Desc.second);
|
||||
}
|
||||
break;
|
||||
case PTX::FNEGrr32:
|
||||
case PTX::FNEGri32:
|
||||
case PTX::FNEGrr64:
|
||||
case PTX::FNEGri64:
|
||||
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndNone);
|
||||
}
|
||||
break;
|
||||
case PTX::FDIVrr32:
|
||||
case PTX::FDIVri32:
|
||||
case PTX::FDIVrr64:
|
||||
case PTX::FDIVri64:
|
||||
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
|
||||
if (ST.fdivNeedsRoundingMode())
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
|
||||
else
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndNone);
|
||||
}
|
||||
break;
|
||||
case PTX::FMADrrr32:
|
||||
case PTX::FMADrri32:
|
||||
case PTX::FMADrii32:
|
||||
case PTX::FMADrrr64:
|
||||
case PTX::FMADrri64:
|
||||
case PTX::FMADrii64:
|
||||
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
|
||||
if (ST.fmadNeedsRoundingMode())
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
|
||||
else
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndNone);
|
||||
}
|
||||
break;
|
||||
case PTX::FSQRTrr32:
|
||||
case PTX::FSQRTri32:
|
||||
case PTX::FSQRTrr64:
|
||||
case PTX::FSQRTri64:
|
||||
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndNearestEven);
|
||||
}
|
||||
break;
|
||||
case PTX::FSINrr32:
|
||||
case PTX::FSINri32:
|
||||
case PTX::FSINrr64:
|
||||
case PTX::FSINri64:
|
||||
case PTX::FCOSrr32:
|
||||
case PTX::FCOSri32:
|
||||
case PTX::FCOSrr64:
|
||||
case PTX::FCOSri64:
|
||||
if (MI.getOperand(1).getImm() == PTXRoundingMode::RndDefault) {
|
||||
MI.getOperand(1).setImm(PTXRoundingMode::RndApprox);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -655,7 +655,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
|
||||
// PTX cvt instructions
|
||||
// Note all of these may actually be used, we just define all possible patterns
|
||||
// here.
|
||||
// here (that make sense).
|
||||
// FIXME: Can we collapse this somehow into a multiclass def?
|
||||
|
||||
// To i16
|
||||
def CVTu16u32
|
||||
@ -663,13 +664,17 @@ def CVTu16u32
|
||||
def CVTu16u64
|
||||
: InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
|
||||
def CVTu16f32
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
|
||||
"cvt$r.u16.f32\t$d, $a", []>;
|
||||
def CVTs16f32
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.s16.f32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
|
||||
"cvt$r.s16.f32\t$d, $a", []>;
|
||||
def CVTu16f64
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
|
||||
"cvt$r.u16.f64\t$d, $a", []>;
|
||||
def CVTs16f64
|
||||
: InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.s16.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
|
||||
"cvt$r.s16.f64\t$d, $a", []>;
|
||||
|
||||
// To i32
|
||||
def CVTu32u16
|
||||
@ -679,13 +684,17 @@ def CVTs32s16
|
||||
def CVTu32u64
|
||||
: InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
|
||||
def CVTu32f32
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
|
||||
"cvt$r.u32.f32\t$d, $a", []>;
|
||||
def CVTs32f32
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.s32.f32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
|
||||
"cvt$r.s32.f32\t$d, $a", []>;
|
||||
def CVTu32f64
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
|
||||
"cvt$r.u32.f64\t$d, $a", []>;
|
||||
def CVTs32f64
|
||||
: InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.s32.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
|
||||
"cvt$r.s32.f64\t$d, $a", []>;
|
||||
|
||||
// To i64
|
||||
def CVTu64u16
|
||||
@ -697,43 +706,60 @@ def CVTu64u32
|
||||
def CVTs64s32
|
||||
: InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
|
||||
def CVTu64f32
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
|
||||
"cvt$r.u64.f32\t$d, $a", []>;
|
||||
def CVTs64f32
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.s64.f32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
|
||||
"cvt$r.s64.f32\t$d, $a", []>;
|
||||
def CVTu64f64
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
|
||||
"cvt$r.u64.f64\t$d, $a", []>;
|
||||
def CVTs64f64
|
||||
: InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.s64.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
|
||||
"cvt$r.s64.f64\t$d, $a", []>;
|
||||
|
||||
// To f32
|
||||
def CVTf32u16
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
|
||||
"cvt$r.f32.u16\t$d, $a", []>;
|
||||
def CVTf32s16
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.s16\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
|
||||
"cvt$r.f32.s16\t$d, $a", []>;
|
||||
def CVTf32u32
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
|
||||
"cvt$r.f32.u32\t$d, $a", []>;
|
||||
def CVTf32s32
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.s32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
|
||||
"cvt$r.f32.s32\t$d, $a", []>;
|
||||
def CVTf32u64
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
|
||||
"cvt$r.f32.u64\t$d, $a", []>;
|
||||
def CVTf32s64
|
||||
: InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.s64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
|
||||
"cvt$r.f32.s64\t$d, $a", []>;
|
||||
def CVTf32f64
|
||||
: InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
|
||||
"cvt$r.f32.f64\t$d, $a", []>;
|
||||
|
||||
// To f64
|
||||
def CVTf64u16
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
|
||||
"cvt$r.f64.u16\t$d, $a", []>;
|
||||
def CVTf64s16
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.s16\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
|
||||
"cvt$r.f64.s16\t$d, $a", []>;
|
||||
def CVTf64u32
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
|
||||
"cvt$r.f64.u32\t$d, $a", []>;
|
||||
def CVTf64s32
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.s32\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
|
||||
"cvt$r.f64.s32\t$d, $a", []>;
|
||||
def CVTf64u64
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
|
||||
"cvt$r.f64.u64\t$d, $a", []>;
|
||||
def CVTf64s64
|
||||
: InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.s64\t$d, $a", []>;
|
||||
: InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
|
||||
"cvt$r.f64.s64\t$d, $a", []>;
|
||||
def CVTf64f32
|
||||
: InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
|
||||
|
||||
@ -889,10 +915,10 @@ def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
|
||||
def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
|
||||
def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
|
||||
def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
|
||||
def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RegF32:$a)>;
|
||||
def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RegF32:$a)>;
|
||||
def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RegF64:$a)>;
|
||||
def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RegF64:$a)>;
|
||||
def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
|
||||
def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
|
||||
def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
|
||||
def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
|
||||
|
||||
// Conversion to u32
|
||||
def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
|
||||
@ -902,10 +928,10 @@ def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
|
||||
def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
|
||||
def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
|
||||
def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
|
||||
def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RegF32:$a)>;
|
||||
def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RegF32:$a)>;
|
||||
def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RegF64:$a)>;
|
||||
def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RegF64:$a)>;
|
||||
def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
|
||||
def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
|
||||
def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
|
||||
def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
|
||||
def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
|
||||
|
||||
// Conversion to u64
|
||||
@ -919,33 +945,33 @@ def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
|
||||
def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
|
||||
def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
|
||||
def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
|
||||
def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RegF32:$a)>;
|
||||
def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RegF32:$a)>;
|
||||
def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RegF64:$a)>;
|
||||
def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RegF64:$a)>;
|
||||
def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
|
||||
def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
|
||||
def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
|
||||
def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
|
||||
def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
|
||||
|
||||
// Conversion to f32
|
||||
def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
|
||||
(MOVf32i32 0x3F800000), (MOVf32i32 0))>;
|
||||
def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RegI16:$a)>;
|
||||
def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RegI16:$a)>;
|
||||
def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RegI32:$a)>;
|
||||
def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RegI32:$a)>;
|
||||
def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RegI64:$a)>;
|
||||
def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RegI64:$a)>;
|
||||
def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RegF64:$a)>;
|
||||
def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
|
||||
def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
|
||||
def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
|
||||
def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
|
||||
def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
|
||||
def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
|
||||
def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
|
||||
def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
|
||||
|
||||
// Conversion to f64
|
||||
def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
|
||||
(MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
|
||||
def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RegI16:$a)>;
|
||||
def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RegI16:$a)>;
|
||||
def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RegI32:$a)>;
|
||||
def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RegI32:$a)>;
|
||||
def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RegI64:$a)>;
|
||||
def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RegI64:$a)>;
|
||||
def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
|
||||
def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
|
||||
def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
|
||||
def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
|
||||
def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
|
||||
def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
|
||||
def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
|
||||
def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user