From 9bc94276e796d644cb425a7c7d38cc44dbf4e9c1 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 15:18:11 +0000 Subject: [PATCH] [PowerPC] Add handling for conversions to fast-isel. Yet another chunk of fast-isel code. This one handles various conversions involving floating-point. (It also includes some miscellaneous handling throughout the back end for LWA_32 and LWAX_32 that should have been part of the load-store patch.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189677 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/InstPrinter/PPCInstPrinter.cpp | 12 + lib/Target/PowerPC/PPCAsmPrinter.cpp | 1 + lib/Target/PowerPC/PPCFastISel.cpp | 273 ++++++++++++++++ lib/Target/PowerPC/PPCRegisterInfo.cpp | 2 + test/CodeGen/PowerPC/fast-isel-conversion.ll | 305 ++++++++++++++++++ 5 files changed, 593 insertions(+) create mode 100644 test/CodeGen/PowerPC/fast-isel-conversion.ll diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 08d7665065c..79af2ef1e67 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; #include "PPCGenAsmWriter.inc" @@ -78,6 +79,17 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } + // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is + // used when converting a 32-bit float to a 64-bit float as part of + // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()), + // as otherwise we have problems with incorrect register classes + // in machine instruction verification. For now, just avoid trying + // to print it as such an instruction has no effect (a 32-bit float + // in a register is already in 64-bit form, just with lower + // precision). FIXME: Is there a better solution? + if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS) + return; + printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index bbfad87f240..e4a631a9cf3 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -704,6 +704,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { break; case PPC::LD: case PPC::STD: + case PPC::LWA_32: case PPC::LWA: { // Verify alignment is legal, so we don't create relocations // that can't be supported. diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index aeda78bf6b9..3bceed418a1 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -109,6 +109,10 @@ class PPCFastISel : public FastISel { bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); bool SelectCmp(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectIToFP(const Instruction *I, bool IsSigned); + bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); bool SelectRet(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -135,6 +139,9 @@ class PPCFastISel : public FastISel { const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC); + unsigned PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned); + unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned); // Call handling routines. private: @@ -786,6 +793,260 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return true; } +// Attempt to fast-select a floating-point extend instruction. +bool PPCFastISel::SelectFPExt(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f32 || DestVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // No code is generated for a FP extend. + UpdateValueMap(I, SrcReg); + return true; +} + +// Attempt to fast-select a floating-point truncate instruction. +bool PPCFastISel::SelectFPTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f64 || DestVT != MVT::f32) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // Round the result to single precision. + unsigned DestReg = createResultReg(&PPC::F4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg) + .addReg(SrcReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move an i32 or i64 value in a GPR to an f64 value in an FPR. +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte +// stack slot and 4-byte store/load sequence. Or just sext the 4-byte +// case to 8 bytes which produces tighter code but wastes stack space. +unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, + bool IsSigned) { + + // If necessary, extend 32-bit int to 64-bit. + if (SrcVT == MVT::i32) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return 0; + SrcReg = TmpReg; + } + + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the GPR. + if (!PPCEmitStore(MVT::i64, SrcReg, Addr)) + return 0; + + // Load the integer value into an FPR. The kind of load used depends + // on a number of conditions. + unsigned LoadOpc = PPC::LFD; + + if (SrcVT == MVT::i32) { + Addr.Offset = 4; + if (!IsSigned) + LoadOpc = PPC::LFIWZX; + else if (PPCSubTarget.hasLFIWAX()) + LoadOpc = PPC::LFIWAX; + } + + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned ResultReg = 0; + if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select an integer-to-floating-point conversion. +bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { + MVT DstVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::f32 && DstVT != MVT::f64) + return false; + + Value *Src = I->getOperand(0); + EVT SrcEVT = TLI.getValueType(Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && + SrcVT != MVT::i32 && SrcVT != MVT::i64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // We can only lower an unsigned convert if we have the newer + // floating-point conversion operations. + if (!IsSigned && !PPCSubTarget.hasFPCVT()) + return false; + + // FIXME: For now we require the newer floating-point conversion operations + // (which are present only on P7 and A2 server models) when converting + // to single-precision float. Otherwise we have to generate a lot of + // fiddly code to avoid double rounding. If necessary, the fiddly code + // can be found in PPCTargetLowering::LowerINT_TO_FP(). + if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT()) + return false; + + // Extend the input if necessary. + if (SrcVT == MVT::i8 || SrcVT == MVT::i16) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return false; + SrcVT = MVT::i64; + SrcReg = TmpReg; + } + + // Move the integer value to an FPR. + unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned); + if (FPReg == 0) + return false; + + // Determine the opcode for the conversion. + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned DestReg = createResultReg(RC); + unsigned Opc; + + if (DstVT == MVT::f32) + Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS; + else + Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(FPReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move the floating-point value in SrcReg into an integer destination +// register, and return the register (or zero if we can't handle it). +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned) { + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + // Note that if have STFIWX available, we could use a 4-byte stack + // slot for i32, but this being fast-isel we'll just go with the + // easiest code gen possible. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the FPR. + if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) + return 0; + + // Reload it into a GPR. If we want an i32, modify the address + // to have a 4-byte offset so we load from the right place. + if (VT == MVT::i32) + Addr.Offset = 4; + + // Look at the currently assigned register for this instruction + // to determine the required register class. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select a floating-point-to-integer conversion. +bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { + MVT DstVT, SrcVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::i32 && DstVT != MVT::i64) + return false; + + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + if (!isTypeLegal(SrcTy, SrcVT)) + return false; + + if (SrcVT != MVT::f32 && SrcVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // Convert f32 to f64 if necessary. This is just a meaningless copy + // to get the register class right. COPY_TO_REGCLASS is needed since + // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream. + const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); + if (InRC == &PPC::F4RCRegClass) { + unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) + .addReg(SrcReg).addImm(PPC::F8RCRegClassID); + SrcReg = TmpReg; + } + + // Determine the opcode for the conversion, which takes place + // entirely within FPRs. + unsigned DestReg = createResultReg(&PPC::F8RCRegClass); + unsigned Opc; + + if (DstVT == MVT::i32) + if (IsSigned) + Opc = PPC::FCTIWZ; + else + Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + else + Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Now move the integer value from a float register to an integer register. + unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); + if (IntReg == 0) + return false; + + UpdateValueMap(I, IntReg); + return true; +} + // Attempt to fast-select a binary integer operation that isn't already // handled automatically. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { @@ -1135,6 +1396,18 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { return SelectBranch(I); case Instruction::IndirectBr: return SelectIndirectBr(I); + case Instruction::FPExt: + return SelectFPExt(I); + case Instruction::FPTrunc: + return SelectFPTrunc(I); + case Instruction::SIToFP: + return SelectIToFP(I, /*IsSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*IsSigned*/ false); + case Instruction::FPToSI: + return SelectFPToI(I, /*IsSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*IsSigned*/ false); case Instruction::Add: return SelectBinaryIntOp(I, ISD::ADD); case Instruction::Or: diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index adba613d6e9..aee28302d61 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -69,6 +69,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX; ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX; ImmToIdxMap[PPC::ADDI] = PPC::ADD4; + ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32; // 64-bit ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8; @@ -532,6 +533,7 @@ static bool usesIXAddr(const MachineInstr &MI) { default: return false; case PPC::LWA: + case PPC::LWA_32: case PPC::LD: case PPC::STD: return true; diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll new file mode 100644 index 00000000000..a31c31210c3 --- /dev/null +++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll @@ -0,0 +1,305 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 + +; Test sitofp + +define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp { +entry: +; ELF64: sitofp_single_i64 + %b.addr = alloca float, align 4 + %conv = sitofp i64 %a to float +; ELF64: std +; ELF64: lfd +; ELF64: fcfids + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp { +entry: +; ELF64: sitofp_single_i32 + %b.addr = alloca float, align 4 + %conv = sitofp i32 %a to float +; ELF64: std +; ELF64: lfiwax +; ELF64: fcfids + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp { +entry: +; ELF64: sitofp_single_i16 + %b.addr = alloca float, align 4 + %conv = sitofp i16 %a to float +; ELF64: extsh +; ELF64: std +; ELF64: lfd +; ELF64: fcfids + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @sitofp_single_i8(i8 %a) nounwind ssp { +entry: +; ELF64: sitofp_single_i8 + %b.addr = alloca float, align 4 + %conv = sitofp i8 %a to float +; ELF64: extsb +; ELF64: std +; ELF64: lfd +; ELF64: fcfids + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i32 + %b.addr = alloca double, align 8 + %conv = sitofp i32 %a to double +; ELF64: std +; ELF64: lfiwax +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i64 + %b.addr = alloca double, align 8 + %conv = sitofp i64 %a to double +; ELF64: std +; ELF64: lfd +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i16 + %b.addr = alloca double, align 8 + %conv = sitofp i16 %a to double +; ELF64: extsh +; ELF64: std +; ELF64: lfd +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp { +entry: +; ELF64: sitofp_double_i8 + %b.addr = alloca double, align 8 + %conv = sitofp i8 %a to double +; ELF64: extsb +; ELF64: std +; ELF64: lfd +; ELF64: fcfid + store double %conv, double* %b.addr, align 8 + ret void +} + +; Test uitofp + +define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp { +entry: +; ELF64: uitofp_single_i64 + %b.addr = alloca float, align 4 + %conv = uitofp i64 %a to float +; ELF64: std +; ELF64: lfd +; ELF64: fcfidus + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp { +entry: +; ELF64: uitofp_single_i32 + %b.addr = alloca float, align 4 + %conv = uitofp i32 %a to float +; ELF64: std +; ELF64: lfiwzx +; ELF64: fcfidus + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp { +entry: +; ELF64: uitofp_single_i16 + %b.addr = alloca float, align 4 + %conv = uitofp i16 %a to float +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64: std +; ELF64: lfd +; ELF64: fcfidus + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @uitofp_single_i8(i8 %a) nounwind ssp { +entry: +; ELF64: uitofp_single_i8 + %b.addr = alloca float, align 4 + %conv = uitofp i8 %a to float +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 +; ELF64: std +; ELF64: lfd +; ELF64: fcfidus + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp { +entry: +; ELF64: uitofp_double_i64 + %b.addr = alloca double, align 8 + %conv = uitofp i64 %a to double +; ELF64: std +; ELF64: lfd +; ELF64: fcfidu + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp { +entry: +; ELF64: uitofp_double_i32 + %b.addr = alloca double, align 8 + %conv = uitofp i32 %a to double +; ELF64: std +; ELF64: lfiwzx +; ELF64: fcfidu + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp { +entry: +; ELF64: uitofp_double_i16 + %b.addr = alloca double, align 8 + %conv = uitofp i16 %a to double +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64: std +; ELF64: lfd +; ELF64: fcfidu + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp { +entry: +; ELF64: uitofp_double_i8 + %b.addr = alloca double, align 8 + %conv = uitofp i8 %a to double +; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 +; ELF64: std +; ELF64: lfd +; ELF64: fcfidu + store double %conv, double* %b.addr, align 8 + ret void +} + +; Test fptosi + +define void @fptosi_float_i32(float %a) nounwind ssp { +entry: +; ELF64: fptosi_float_i32 + %b.addr = alloca i32, align 4 + %conv = fptosi float %a to i32 +; ELF64: fctiwz +; ELF64: stfd +; ELF64: lwa + store i32 %conv, i32* %b.addr, align 4 + ret void +} + +define void @fptosi_float_i64(float %a) nounwind ssp { +entry: +; ELF64: fptosi_float_i64 + %b.addr = alloca i64, align 4 + %conv = fptosi float %a to i64 +; ELF64: fctidz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 4 + ret void +} + +define void @fptosi_double_i32(double %a) nounwind ssp { +entry: +; ELF64: fptosi_double_i32 + %b.addr = alloca i32, align 8 + %conv = fptosi double %a to i32 +; ELF64: fctiwz +; ELF64: stfd +; ELF64: lwa + store i32 %conv, i32* %b.addr, align 8 + ret void +} + +define void @fptosi_double_i64(double %a) nounwind ssp { +entry: +; ELF64: fptosi_double_i64 + %b.addr = alloca i64, align 8 + %conv = fptosi double %a to i64 +; ELF64: fctidz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 8 + ret void +} + +; Test fptoui + +define void @fptoui_float_i32(float %a) nounwind ssp { +entry: +; ELF64: fptoui_float_i32 + %b.addr = alloca i32, align 4 + %conv = fptoui float %a to i32 +; ELF64: fctiwuz +; ELF64: stfd +; ELF64: lwz + store i32 %conv, i32* %b.addr, align 4 + ret void +} + +define void @fptoui_float_i64(float %a) nounwind ssp { +entry: +; ELF64: fptoui_float_i64 + %b.addr = alloca i64, align 4 + %conv = fptoui float %a to i64 +; ELF64: fctiduz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 4 + ret void +} + +define void @fptoui_double_i32(double %a) nounwind ssp { +entry: +; ELF64: fptoui_double_i32 + %b.addr = alloca i32, align 8 + %conv = fptoui double %a to i32 +; ELF64: fctiwuz +; ELF64: stfd +; ELF64: lwz + store i32 %conv, i32* %b.addr, align 8 + ret void +} + +define void @fptoui_double_i64(double %a) nounwind ssp { +entry: +; ELF64: fptoui_double_i64 + %b.addr = alloca i64, align 8 + %conv = fptoui double %a to i64 +; ELF64: fctiduz +; ELF64: stfd +; ELF64: ld + store i64 %conv, i64* %b.addr, align 8 + ret void +}