From c4ce78e261582d08475c6bbea334c1c9dbea494b Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 14 Jul 2014 13:08:14 +0000 Subject: [PATCH] [mips] For the FP64A ABI, odd-numbered double-precision moves must not use mtc1/mfc1. Summary: This is because the FP64A the hardware will redirect 32-bit reads/writes from/to odd-numbered registers to the upper 32-bits of the corresponding even register. In effect, simulating FR=0 mode when FR=0 mode is not available. Unfortunately, we have to make the decision to avoid mfc1/mtc1 before register allocation so we currently do this for even registers too. FPXX has a similar requirement on 32-bit architectures that lack mfhc1/mthc1 so this patch also handles the affected moves from the FPU for FPXX too. Moves to the FPU were supported by an earlier commit. Differential Revision: http://reviews.llvm.org/D4484 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212938 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMachineFunction.cpp | 10 +- lib/Target/Mips/MipsMachineFunction.h | 6 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 98 ++++++++++-- lib/Target/Mips/MipsSEInstrInfo.cpp | 24 ++- lib/Target/Mips/MipsSubtarget.cpp | 4 +- test/CodeGen/Mips/fp64a.ll | 197 ++++++++++++++++++++++++ test/CodeGen/Mips/fpxx.ll | 175 +++++++++++++++------ 7 files changed, 443 insertions(+), 71 deletions(-) create mode 100644 test/CodeGen/Mips/fp64a.ll diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index a3306686fc4..bc896be4e1d 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -137,12 +137,12 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) { return MachinePointerInfo(E); } -int MipsFunctionInfo::getBuildPairF64_FI(const TargetRegisterClass *RC) { - if (BuildPairF64_FI == -1) { - BuildPairF64_FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment(), false); +int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { + if (MoveF64ViaSpillFI == -1) { + MoveF64ViaSpillFI = MF.getFrameInfo()->CreateStackObject( + RC->getSize(), RC->getAlignment(), false); } - return BuildPairF64_FI; + return MoveF64ViaSpillFI; } void MipsFunctionInfo::anchor() { } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index a667d43724c..61260e57815 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -55,7 +55,7 @@ public: MipsFunctionInfo(MachineFunction &MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0), VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false), - BuildPairF64_FI(-1) {} + MoveF64ViaSpillFI(-1) {} ~MipsFunctionInfo(); @@ -97,7 +97,7 @@ public: void setSaveS2() { SaveS2 = true; } bool hasSaveS2() const { return SaveS2; } - int getBuildPairF64_FI(const TargetRegisterClass *RC); + int getMoveF64ViaSpillFI(const TargetRegisterClass *RC); std::map StubsNeeded; @@ -141,7 +141,7 @@ private: /// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the /// O32 FPXX ABI is enabled. -1 is used to denote invalid index. - int BuildPairF64_FI; + int MoveF64ViaSpillFI; /// MipsCallEntry maps. StringMap ExternalCallEntries; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index f2276f19afa..d0a17cd834a 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -66,6 +66,8 @@ private: unsigned MFLoOpc); bool expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const; + bool expandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, bool FP64) const; MachineFunction &MF; MachineRegisterInfo &MRI; @@ -118,6 +120,14 @@ bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { if (expandBuildPairF64(MBB, I, true)) MBB.erase(I); return false; + case Mips::ExtractElementF64: + if (expandExtractElementF64(MBB, I, false)) + MBB.erase(I); + return false; + case Mips::ExtractElementF64_64: + if (expandExtractElementF64(MBB, I, true)) + MBB.erase(I); + return false; case TargetOpcode::COPY: if (!expandCopy(MBB, I)) return false; @@ -269,9 +279,10 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, } /// This method expands the same instruction that MipsSEInstrInfo:: -/// expandBuildPairF64 does, for the case when ABI is fpxx and mthc1 is -/// not available. It is implemented here because frame indexes are -/// eliminated before MipsSEInstrInfo::expandBuildPairF64 is called. +/// expandBuildPairF64 does, for the case when ABI is fpxx and mthc1 is not +/// available and the case where the ABI is FP64A. It is implemented here +/// because frame indexes are eliminated before MipsSEInstrInfo:: +/// expandBuildPairF64 is called. bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const { @@ -280,10 +291,18 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, // // The case where dmtc1 is available doesn't need to be handled here // because it never creates a BuildPairF64 node. + // + // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence + // for odd-numbered double precision values (because the lower 32-bits is + // transferred with mtc1 which is redirected to the upper half of the even + // register). Unfortunately, we have to make this decision before register + // allocation so for now we use a spill/reload sequence for all + // double-precision values in regardless of being an odd/even register. const TargetMachine &TM = MF.getTarget(); - if (TM.getSubtarget().isABI_FPXX() - && !TM.getSubtarget().hasMTHC1()) { + const MipsSubtarget &Subtarget = TM.getSubtarget(); + if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) || + (FP64 && !Subtarget.useOddSPReg())) { const MipsSEInstrInfo &TII = *static_cast(TM.getInstrInfo()); const MipsRegisterInfo &TRI = @@ -294,13 +313,18 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, unsigned HiReg = I->getOperand(2).getReg(); // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are - // the cases where mthc1 is not available). - assert(!TM.getSubtarget().isFP64bit()); + // the cases where mthc1 is not available). 64-bit architectures and + // MIPS32r2 or later can use FGR64 though. + assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() || + !Subtarget.isFP64bit()); const TargetRegisterClass *RC = &Mips::GPR32RegClass; - const TargetRegisterClass *RC2 = &Mips::AFGR64RegClass; + const TargetRegisterClass *RC2 = + FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; - int FI = MF.getInfo()->getBuildPairF64_FI(RC2); + // We re-use the same spill slot each time so that the stack frame doesn't + // grow too much in functions with a large number of moves. + int FI = MF.getInfo()->getMoveF64ViaSpillFI(RC2); TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, &TRI, 0); TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, &TRI, @@ -312,6 +336,62 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, return false; } +/// This method expands the same instruction that MipsSEInstrInfo:: +/// expandExtractElementF64 does, for the case when ABI is fpxx and mfhc1 is not +/// available and the case where the ABI is FP64A. It is implemented here +/// because frame indexes are eliminated before MipsSEInstrInfo:: +/// expandExtractElementF64 is called. +bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + bool FP64) const { + // For fpxx and when mfhc1 is not available, use: + // spill + reload via ldc1 + // + // The case where dmfc1 is available doesn't need to be handled here + // because it never creates a ExtractElementF64 node. + // + // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence + // for odd-numbered double precision values (because the lower 32-bits is + // transferred with mfc1 which is redirected to the upper half of the even + // register). Unfortunately, we have to make this decision before register + // allocation so for now we use a spill/reload sequence for all + // double-precision values in regardless of being an odd/even register. + + const TargetMachine &TM = MF.getTarget(); + const MipsSubtarget &Subtarget = TM.getSubtarget(); + if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) || + (FP64 && !Subtarget.useOddSPReg())) { + const MipsSEInstrInfo &TII = + *static_cast(TM.getInstrInfo()); + const MipsRegisterInfo &TRI = + *static_cast(TM.getRegisterInfo()); + + unsigned DstReg = I->getOperand(0).getReg(); + unsigned SrcReg = I->getOperand(1).getReg(); + unsigned N = I->getOperand(2).getImm(); + + // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are + // the cases where mfhc1 is not available). 64-bit architectures and + // MIPS32r2 or later can use FGR64 though. + assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() || + !Subtarget.isFP64bit()); + + const TargetRegisterClass *RC = + FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; + const TargetRegisterClass *RC2 = &Mips::GPR32RegClass; + + // We re-use the same spill slot each time so that the stack frame doesn't + // grow too much in functions with a large number of moves. + int FI = MF.getInfo()->getMoveF64ViaSpillFI(RC); + TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC, &TRI, + 0); + TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, N * 4); + return true; + } + + return false; +} + MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI) : MipsFrameLowering(STI, STI.stackAlignment()) {} diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index d242659d076..26764611c50 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -512,6 +512,7 @@ void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const { + const MipsSubtarget &Subtarget = TM.getSubtarget(); unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); @@ -521,7 +522,15 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo; unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); - if (SubIdx == Mips::sub_hi && TM.getSubtarget().hasMTHC1()) { + // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload + // in MipsSEFrameLowering.cpp. + assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2())); + + // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload + // in MipsSEFrameLowering.cpp. + assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg())); + + if (SubIdx == Mips::sub_hi && Subtarget.hasMTHC1()) { // FIXME: Strictly speaking MFHC1 only reads the top 32-bits however, we // claim to read the whole 64-bits as part of a white lie used to // temporarily work around a widespread bug in the -mfp64 support. @@ -543,6 +552,7 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const { + const MipsSubtarget &Subtarget = TM.getSubtarget(); unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); @@ -564,10 +574,18 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, // The case where dmtc1 is available doesn't need to be handled here // because it never creates a BuildPairF64 node. + // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload + // in MipsSEFrameLowering.cpp. + assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2())); + + // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload + // in MipsSEFrameLowering.cpp. + assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg())); + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo)) .addReg(LoReg); - if (TM.getSubtarget().hasMTHC1()) { + if (Subtarget.hasMTHC1()) { // FIXME: The .addReg(DstReg) is a white lie used to temporarily work // around a widespread bug in the -mfp64 support. // The problem is that none of the 32-bit fpu ops mention the fact @@ -582,7 +600,7 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, BuildMI(MBB, I, dl, get(FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32), DstReg) .addReg(DstReg) .addReg(HiReg); - } else if (TM.getSubtarget().isABI_FPXX()) + } else if (Subtarget.isABI_FPXX()) llvm_unreachable("BuildPairF64 not expanded in frame lowering code!"); else BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi)) diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 0254d4da4d2..902735d7810 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -153,9 +153,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, false); if (!isABI_O32() && !useOddSPReg()) - report_fatal_error("-mattr=+nooddspreg is not currently permitted for a " - "the O32 ABI.", - false); + report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false); if (IsFPXX && (isABI_N32() || isABI_N64())) report_fatal_error("FPXX is not permitted for the N32/N64 ABI's.", false); diff --git a/test/CodeGen/Mips/fp64a.ll b/test/CodeGen/Mips/fp64a.ll new file mode 100644 index 00000000000..5c2c87373a3 --- /dev/null +++ b/test/CodeGen/Mips/fp64a.ll @@ -0,0 +1,197 @@ +; Test that the FP64A ABI performs double precision moves via a spill/reload. +; The requirement is really that odd-numbered double precision registers do not +; use mfc1/mtc1 to move the bottom 32-bits (because the hardware will redirect +; this to the top 32-bits of the even register) but we have to make the decision +; before register allocation so we do this for all double-precision values. + +; We don't test MIPS32r1 since support for 64-bit coprocessors (such as a 64-bit +; FPU) on a 32-bit architecture was added in MIPS32r2. +; FIXME: We currently don't test that attempting to use FP64 on MIPS32r1 is an +; error either. This is because a large number of CodeGen tests are +; incorrectly using this case. We should fix those test cases then add +; this check here. + +; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-BE +; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-BE +; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-LE +; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-LE + +; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A +; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A +; RUN: llc -march=mips64el -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A +; RUN: not llc -march=mips64el -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A + +; 64-FP64A: LLVM ERROR: -mattr=+nooddspreg requires the O32 ABI. + +declare double @dbl(); + +define double @call1(double %d, ...) { + ret double %d + +; ALL-LABEL: call1: + +; 32R2-NO-FP64A-LE-NOT: addiu $sp, $sp +; 32R2-NO-FP64A-LE: mtc1 $4, $f0 +; 32R2-NO-FP64A-LE: mthc1 $5, $f0 + +; 32R2-NO-FP64A-BE-NOT: addiu $sp, $sp +; 32R2-NO-FP64A-BE: mtc1 $5, $f0 +; 32R2-NO-FP64A-BE: mthc1 $4, $f0 + +; 32R2-FP64A-LE: addiu $sp, $sp, -8 +; 32R2-FP64A-LE: sw $4, 0($sp) +; 32R2-FP64A-LE: sw $5, 4($sp) +; 32R2-FP64A-LE: ldc1 $f0, 0($sp) + +; 32R2-FP64A-BE: addiu $sp, $sp, -8 +; 32R2-FP64A-BE: sw $5, 0($sp) +; 32R2-FP64A-BE: sw $4, 4($sp) +; 32R2-FP64A-BE: ldc1 $f0, 0($sp) + +; 64-NO-FP64A: daddiu $sp, $sp, -64 +; 64-NO-FP64A: mov.d $f0, $f12 +} + +define double @call2(i32 %i, double %d) { + ret double %d + +; ALL-LABEL: call2: + +; 32R2-NO-FP64A-LE: mtc1 $6, $f0 +; 32R2-NO-FP64A-LE: mthc1 $7, $f0 + +; 32R2-NO-FP64A-BE: mtc1 $7, $f0 +; 32R2-NO-FP64A-BE: mthc1 $6, $f0 + +; 32R2-FP64A-LE: addiu $sp, $sp, -8 +; 32R2-FP64A-LE: sw $6, 0($sp) +; 32R2-FP64A-LE: sw $7, 4($sp) +; 32R2-FP64A-LE: ldc1 $f0, 0($sp) + +; 32R2-FP64A-BE: addiu $sp, $sp, -8 +; 32R2-FP64A-BE: sw $7, 0($sp) +; 32R2-FP64A-BE: sw $6, 4($sp) +; 32R2-FP64A-BE: ldc1 $f0, 0($sp) + +; 64-NO-FP64A-NOT: daddiu $sp, $sp +; 64-NO-FP64A: mov.d $f0, $f13 +} + +define double @call3(float %f1, float %f2, double %d) { + ret double %d + +; ALL-LABEL: call3: + +; 32R2-NO-FP64A-LE: mtc1 $6, $f0 +; 32R2-NO-FP64A-LE: mthc1 $7, $f0 + +; 32R2-NO-FP64A-BE: mtc1 $7, $f0 +; 32R2-NO-FP64A-BE: mthc1 $6, $f0 + +; 32R2-FP64A-LE: addiu $sp, $sp, -8 +; 32R2-FP64A-LE: sw $6, 0($sp) +; 32R2-FP64A-LE: sw $7, 4($sp) +; 32R2-FP64A-LE: ldc1 $f0, 0($sp) + +; 32R2-FP64A-BE: addiu $sp, $sp, -8 +; 32R2-FP64A-BE: sw $7, 0($sp) +; 32R2-FP64A-BE: sw $6, 4($sp) +; 32R2-FP64A-BE: ldc1 $f0, 0($sp) + +; 64-NO-FP64A-NOT: daddiu $sp, $sp +; 64-NO-FP64A: mov.d $f0, $f14 +} + +define double @call4(float %f, double %d, ...) { + ret double %d + +; ALL-LABEL: call4: + +; 32R2-NO-FP64A-LE: mtc1 $6, $f0 +; 32R2-NO-FP64A-LE: mthc1 $7, $f0 + +; 32R2-NO-FP64A-BE: mtc1 $7, $f0 +; 32R2-NO-FP64A-BE: mthc1 $6, $f0 + +; 32R2-FP64A-LE: addiu $sp, $sp, -8 +; 32R2-FP64A-LE: sw $6, 0($sp) +; 32R2-FP64A-LE: sw $7, 4($sp) +; 32R2-FP64A-LE: ldc1 $f0, 0($sp) + +; 32R2-FP64A-BE: addiu $sp, $sp, -8 +; 32R2-FP64A-BE: sw $7, 0($sp) +; 32R2-FP64A-BE: sw $6, 4($sp) +; 32R2-FP64A-BE: ldc1 $f0, 0($sp) + +; 64-NO-FP64A: daddiu $sp, $sp, -48 +; 64-NO-FP64A: mov.d $f0, $f13 +} + +define double @call5(double %a, double %b, ...) { + %1 = fsub double %a, %b + ret double %1 + +; ALL-LABEL: call5: + +; 32R2-NO-FP64A-LE-DAG: mtc1 $4, $[[T0:f[0-9]+]] +; 32R2-NO-FP64A-LE-DAG: mthc1 $5, $[[T0:f[0-9]+]] +; 32R2-NO-FP64A-LE-DAG: mtc1 $6, $[[T1:f[0-9]+]] +; 32R2-NO-FP64A-LE-DAG: mthc1 $7, $[[T1:f[0-9]+]] +; 32R2-NO-FP64A-LE: sub.d $f0, $[[T0]], $[[T1]] + +; 32R2-NO-FP64A-BE-DAG: mtc1 $5, $[[T0:f[0-9]+]] +; 32R2-NO-FP64A-BE-DAG: mthc1 $4, $[[T0:f[0-9]+]] +; 32R2-NO-FP64A-BE-DAG: mtc1 $7, $[[T1:f[0-9]+]] +; 32R2-NO-FP64A-BE-DAG: mthc1 $6, $[[T1:f[0-9]+]] +; 32R2-NO-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]] + +; 32R2-FP64A-LE: addiu $sp, $sp, -8 +; 32R2-FP64A-LE: sw $6, 0($sp) +; 32R2-FP64A-LE: sw $7, 4($sp) +; 32R2-FP64A-LE: ldc1 $[[T1:f[0-9]+]], 0($sp) +; 32R2-FP64A-LE: sw $4, 0($sp) +; 32R2-FP64A-LE: sw $5, 4($sp) +; 32R2-FP64A-LE: ldc1 $[[T0:f[0-9]+]], 0($sp) +; 32R2-FP64A-LE: sub.d $f0, $[[T0]], $[[T1]] + +; 32R2-FP64A-BE: addiu $sp, $sp, -8 +; 32R2-FP64A-BE: sw $7, 0($sp) +; 32R2-FP64A-BE: sw $6, 4($sp) +; 32R2-FP64A-BE: ldc1 $[[T1:f[0-9]+]], 0($sp) +; 32R2-FP64A-BE: sw $5, 0($sp) +; 32R2-FP64A-BE: sw $4, 4($sp) +; 32R2-FP64A-BE: ldc1 $[[T0:f[0-9]+]], 0($sp) +; 32R2-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]] + +; 64-NO-FP64A: sub.d $f0, $f12, $f13 +} + +define double @move_from(double %d) { + %1 = call double @dbl() + %2 = call double @call2(i32 0, double %1) + ret double %2 + +; ALL-LABEL: move_from: + +; 32R2-NO-FP64A-LE-DAG: mfc1 $6, $f0 +; 32R2-NO-FP64A-LE-DAG: mfhc1 $7, $f0 + +; 32R2-NO-FP64A-BE-DAG: mfc1 $7, $f0 +; 32R2-NO-FP64A-BE-DAG: mfhc1 $6, $f0 + +; 32R2-FP64A-LE: addiu $sp, $sp, -32 +; 32R2-FP64A-LE: sdc1 $f0, 16($sp) +; 32R2-FP64A-LE: lw $6, 16($sp) +; FIXME: This store is redundant +; 32R2-FP64A-LE: sdc1 $f0, 16($sp) +; 32R2-FP64A-LE: lw $7, 20($sp) + +; 32R2-FP64A-BE: addiu $sp, $sp, -32 +; 32R2-FP64A-BE: sdc1 $f0, 16($sp) +; 32R2-FP64A-BE: lw $6, 20($sp) +; FIXME: This store is redundant +; 32R2-FP64A-BE: sdc1 $f0, 16($sp) +; 32R2-FP64A-BE: lw $7, 16($sp) + +; 64-NO-FP64A: mov.d $f13, $f0 +} diff --git a/test/CodeGen/Mips/fpxx.ll b/test/CodeGen/Mips/fpxx.ll index fb75e360597..7e2ed22e2d8 100644 --- a/test/CodeGen/Mips/fpxx.ll +++ b/test/CodeGen/Mips/fpxx.ll @@ -11,38 +11,39 @@ ; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=64-FPXX ; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-NOFPXX -; RUN-TOOD: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-FPXX +; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-FPXX ; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-NOFPXX -; RUN-TOOD: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-FPXX +; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-FPXX +declare double @dbl(); -; 4-FPXX: LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's. -; 64-FPXX: LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's. +; 4-FPXX: LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's. +; 64-FPXX: LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's. define double @test1(double %d, ...) { ret double %d ; ALL-LABEL: test1: -; 32-NOFPXX: mtc1 $4, $f0 -; 32-NOFPXX: mtc1 $5, $f1 +; 32-NOFPXX: mtc1 $4, $f0 +; 32-NOFPXX: mtc1 $5, $f1 ; 32-FPXX: addiu $sp, $sp, -8 ; 32-FPXX: sw $4, 0($sp) ; 32-FPXX: sw $5, 4($sp) ; 32-FPXX: ldc1 $f0, 0($sp) -; 32R2-NOFPXX: mtc1 $4, $f0 -; 32R2-NOFPXX: mthc1 $5, $f0 +; 32R2-NOFPXX: mtc1 $4, $f0 +; 32R2-NOFPXX: mthc1 $5, $f0 -; 32R2-FPXX: mtc1 $4, $f0 -; 32R2-FPXX: mthc1 $5, $f0 +; 32R2-FPXX: mtc1 $4, $f0 +; 32R2-FPXX: mthc1 $5, $f0 ; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used. -; 4-NOFPXX: mov.d $f0, $f12 +; 4-NOFPXX: mov.d $f0, $f12 -; 64-NOFPXX: mov.d $f0, $f12 +; 64-NOFPXX: mov.d $f0, $f12 } define double @test2(i32 %i, double %d) { @@ -50,23 +51,23 @@ define double @test2(i32 %i, double %d) { ; ALL-LABEL: test2: -; 32-NOFPXX: mtc1 $6, $f0 -; 32-NOFPXX: mtc1 $7, $f1 +; 32-NOFPXX: mtc1 $6, $f0 +; 32-NOFPXX: mtc1 $7, $f1 ; 32-FPXX: addiu $sp, $sp, -8 ; 32-FPXX: sw $6, 0($sp) ; 32-FPXX: sw $7, 4($sp) ; 32-FPXX: ldc1 $f0, 0($sp) -; 32R2-NOFPXX: mtc1 $6, $f0 -; 32R2-NOFPXX: mthc1 $7, $f0 +; 32R2-NOFPXX: mtc1 $6, $f0 +; 32R2-NOFPXX: mthc1 $7, $f0 -; 32R2-FPXX: mtc1 $6, $f0 -; 32R2-FPXX: mthc1 $7, $f0 +; 32R2-FPXX: mtc1 $6, $f0 +; 32R2-FPXX: mthc1 $7, $f0 -; 4-NOFPXX: mov.d $f0, $f13 +; 4-NOFPXX: mov.d $f0, $f13 -; 64-NOFPXX: mov.d $f0, $f13 +; 64-NOFPXX: mov.d $f0, $f13 } define double @test3(float %f1, float %f2, double %d) { @@ -74,23 +75,23 @@ define double @test3(float %f1, float %f2, double %d) { ; ALL-LABEL: test3: -; 32-NOFPXX: mtc1 $6, $f0 -; 32-NOFPXX: mtc1 $7, $f1 +; 32-NOFPXX: mtc1 $6, $f0 +; 32-NOFPXX: mtc1 $7, $f1 ; 32-FPXX: addiu $sp, $sp, -8 ; 32-FPXX: sw $6, 0($sp) ; 32-FPXX: sw $7, 4($sp) ; 32-FPXX: ldc1 $f0, 0($sp) -; 32R2-NOFPXX: mtc1 $6, $f0 -; 32R2-NOFPXX: mthc1 $7, $f0 +; 32R2-NOFPXX: mtc1 $6, $f0 +; 32R2-NOFPXX: mthc1 $7, $f0 -; 32R2-FPXX: mtc1 $6, $f0 -; 32R2-FPXX: mthc1 $7, $f0 +; 32R2-FPXX: mtc1 $6, $f0 +; 32R2-FPXX: mthc1 $7, $f0 -; 4-NOFPXX: mov.d $f0, $f14 +; 4-NOFPXX: mov.d $f0, $f14 -; 64-NOFPXX: mov.d $f0, $f14 +; 64-NOFPXX: mov.d $f0, $f14 } define double @test4(float %f, double %d, ...) { @@ -98,23 +99,23 @@ define double @test4(float %f, double %d, ...) { ; ALL-LABEL: test4: -; 32-NOFPXX: mtc1 $6, $f0 -; 32-NOFPXX: mtc1 $7, $f1 +; 32-NOFPXX: mtc1 $6, $f0 +; 32-NOFPXX: mtc1 $7, $f1 ; 32-FPXX: addiu $sp, $sp, -8 ; 32-FPXX: sw $6, 0($sp) ; 32-FPXX: sw $7, 4($sp) ; 32-FPXX: ldc1 $f0, 0($sp) -; 32R2-NOFPXX: mtc1 $6, $f0 -; 32R2-NOFPXX: mthc1 $7, $f0 +; 32R2-NOFPXX: mtc1 $6, $f0 +; 32R2-NOFPXX: mthc1 $7, $f0 -; 32R2-FPXX: mtc1 $6, $f0 -; 32R2-FPXX: mthc1 $7, $f0 +; 32R2-FPXX: mtc1 $6, $f0 +; 32R2-FPXX: mthc1 $7, $f0 -; 4-NOFPXX: mov.d $f0, $f13 +; 4-NOFPXX: mov.d $f0, $f13 -; 64-NOFPXX: mov.d $f0, $f13 +; 64-NOFPXX: mov.d $f0, $f13 } define double @test5() { @@ -122,21 +123,99 @@ define double @test5() { ; ALL-LABEL: test5: -; 32-NOFPXX: mtc1 $zero, $f0 -; 32-NOFPXX: mtc1 $zero, $f1 +; 32-NOFPXX: mtc1 $zero, $f0 +; 32-NOFPXX: mtc1 $zero, $f1 -; 32-FPXX: addiu $sp, $sp, -8 -; 32-FPXX: sw $zero, 0($sp) -; 32-FPXX: sw $zero, 4($sp) -; 32-FPXX: ldc1 $f0, 0($sp) +; 32-FPXX: addiu $sp, $sp, -8 +; 32-FPXX: sw $zero, 0($sp) +; 32-FPXX: sw $zero, 4($sp) +; 32-FPXX: ldc1 $f0, 0($sp) -; 32R2-NOFPXX: mtc1 $zero, $f0 -; 32R2-NOFPXX: mthc1 $zero, $f0 +; 32R2-NOFPXX: mtc1 $zero, $f0 +; 32R2-NOFPXX: mthc1 $zero, $f0 -; 32R2-FPXX: mtc1 $zero, $f0 -; 32R2-FPXX: mthc1 $zero, $f0 +; 32R2-FPXX: mtc1 $zero, $f0 +; 32R2-FPXX: mthc1 $zero, $f0 -; 4-NOFPXX: dmtc1 $zero, $f0 +; 4-NOFPXX: dmtc1 $zero, $f0 -; 64-NOFPXX: dmtc1 $zero, $f0 +; 64-NOFPXX: dmtc1 $zero, $f0 +} + +define double @test6(double %a, double %b, ...) { + %1 = fsub double %a, %b + ret double %1 + +; ALL-LABEL: test6: + +; 32-NOFPXX-DAG: mtc1 $4, $[[T0:f[0-9]+]] +; 32-NOFPXX-DAG: mtc1 $5, ${{f[0-9]*[13579]}} +; 32-NOFPXX-DAG: mtc1 $6, $[[T1:f[0-9]+]] +; 32-NOFPXX-DAG: mtc1 $7, ${{f[0-9]*[13579]}} +; 32-NOFPXX: sub.d $f0, $[[T0]], $[[T1]] + +; 32-FPXX: addiu $sp, $sp, -8 +; 32-FPXX: sw $6, 0($sp) +; 32-FPXX: sw $7, 4($sp) +; 32-FPXX: ldc1 $[[T1:f[0-9]+]], 0($sp) +; 32-FPXX: sw $4, 0($sp) +; 32-FPXX: sw $5, 4($sp) +; 32-FPXX: ldc1 $[[T0:f[0-9]+]], 0($sp) +; 32-FPXX: sub.d $f0, $[[T0]], $[[T1]] + +; 32R2-NOFPXX-DAG: mtc1 $4, $[[T0:f[0-9]+]] +; 32R2-NOFPXX-DAG: mthc1 $5, $[[T0]] +; 32R2-NOFPXX-DAG: mtc1 $6, $[[T1:f[0-9]+]] +; 32R2-NOFPXX-DAG: mthc1 $7, $[[T1]] +; 32R2-NOFPXX: sub.d $f0, $[[T0]], $[[T1]] + +; 32R2-FPXX-DAG: mtc1 $4, $[[T0:f[0-9]+]] +; 32R2-FPXX-DAG: mthc1 $5, $[[T0]] +; 32R2-FPXX-DAG: mtc1 $6, $[[T1:f[0-9]+]] +; 32R2-FPXX-DAG: mthc1 $7, $[[T1]] +; 32R2-FPXX: sub.d $f0, $[[T0]], $[[T1]] + +; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used. +; 4-NOFPXX: sub.d $f0, $f12, $f13 + +; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used. +; 64-NOFPXX: sub.d $f0, $f12, $f13 +} + +define double @move_from1(double %d) { + %1 = call double @dbl() + %2 = call double @test2(i32 0, double %1) + ret double %2 + +; ALL-LABEL: move_from1: + +; 32-NOFPXX-DAG: mfc1 $6, $f0 +; 32-NOFPXX-DAG: mfc1 $7, $f1 + +; 32-FPXX: addiu $sp, $sp, -32 +; 32-FPXX: sdc1 $f0, 16($sp) +; 32-FPXX: lw $6, 16($sp) +; FIXME: This store is redundant +; 32-FPXX: sdc1 $f0, 16($sp) +; 32-FPXX: lw $7, 20($sp) + +; 32R2-NOFPXX-DAG: mfc1 $6, $f0 +; 32R2-NOFPXX-DAG: mfhc1 $7, $f0 + +; 32R2-FPXX-DAG: mfc1 $6, $f0 +; 32R2-FPXX-DAG: mfhc1 $7, $f0 + +; floats/doubles are not passed in integer registers for n64, so dmfc1 is not used. +; We can't use inline assembly to force a copy either because trying to force +; a copy to a GPR this way fails with ; "couldn't allocate input reg for +; constraint 'r'". It therefore seems impossible to test the generation of dmfc1 +; in a simple test. +; 4-NOFPXX: mov.d $f13, $f0 + +; floats/doubles are not passed in integer registers for n64, so dmfc1 is not used. +; We can't use inline assembly to force a copy either because trying to force +; a copy to a GPR this way fails with ; "couldn't allocate input reg for +; constraint 'r'". It therefore seems impossible to test the generation of dmfc1 +; in a simple test. +; 64-NOFPXX: mov.d $f13, $f0 }