diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index b101751e707..4ae6407b1f4 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "delay-slot-filler" #include "Sparc.h" +#include "SparcSubtarget.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -39,10 +40,13 @@ namespace { /// layout, etc. /// TargetMachine &TM; + const SparcSubtarget *Subtarget; static char ID; Filler(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) { } + : MachineFunctionPass(ID), TM(tm), + Subtarget(&TM.getSubtarget()) { + } virtual const char *getPassName() const { return "SPARC Delay Slot Filler"; @@ -102,6 +106,8 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; + const TargetInstrInfo *TII = TM.getInstrInfo(); + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { MachineBasicBlock::iterator MI = I; ++I; @@ -114,6 +120,14 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { continue; } + if (!Subtarget->isV9() && + (MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD + || MI->getOpcode() == SP::FCMPQ)) { + BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP)); + Changed = true; + continue; + } + // If MI has no delay slot, skip. if (!MI->hasDelaySlot()) continue; @@ -126,7 +140,6 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { ++FilledSlots; Changed = true; - const TargetInstrInfo *TII = TM.getInstrInfo(); if (D == MBB.end()) BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP)); else diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 470de70b457..e6a9cf53d18 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -807,20 +807,22 @@ def FDIVQ : F3_3<2, 0b110100, 0b001001111, // Floating-point Compare Instructions, p. 148 // Note: the 2nd template arg is different for these guys. // Note 2: the result of a FCMP is not available until the 2nd cycle -// after the instr is retired, but there is no interlock. This behavior -// is modelled with a forced noop after the instruction. +// after the instr is retired, but there is no interlock in Sparc V8. +// This behavior is modeled with a forced noop after the instruction in +// DelaySlotFiller. + let Defs = [FCC] in { def FCMPS : F3_3c<2, 0b110101, 0b001010001, (outs), (ins FPRegs:$src1, FPRegs:$src2), - "fcmps $src1, $src2\n\tnop", + "fcmps $src1, $src2", [(SPcmpfcc f32:$src1, f32:$src2)]>; def FCMPD : F3_3c<2, 0b110101, 0b001010010, (outs), (ins DFPRegs:$src1, DFPRegs:$src2), - "fcmpd $src1, $src2\n\tnop", + "fcmpd $src1, $src2", [(SPcmpfcc f64:$src1, f64:$src2)]>; def FCMPQ : F3_3c<2, 0b110101, 0b001010011, (outs), (ins QFPRegs:$src1, QFPRegs:$src2), - "fcmpq $src1, $src2\n\tnop", + "fcmpq $src1, $src2", [(SPcmpfcc f128:$src1, f128:$src2)]>, Requires<[HasHardQuad]>; } diff --git a/test/CodeGen/SPARC/2011-01-11-CC.ll b/test/CodeGen/SPARC/2011-01-11-CC.ll index 0cb864a7d5d..50f3a65ff9a 100644 --- a/test/CodeGen/SPARC/2011-01-11-CC.ll +++ b/test/CodeGen/SPARC/2011-01-11-CC.ll @@ -66,9 +66,11 @@ define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind readnone noin entry: ;V8-LABEL: test_select_int_fcc: ;V8: fcmps +;V8-NEXT: nop ;V8: {{fbe|fbne}} ;V9-LABEL: test_select_int_fcc: ;V9: fcmps +;V9-NEXT-NOT: nop ;V9-NOT: {{fbe|fbne}} ;V9: mov{{e|ne}} %fcc0 %0 = fcmp une float %f, 0.000000e+00 @@ -95,9 +97,11 @@ define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind r entry: ;V8-LABEL: test_select_dfp_fcc: ;V8: fcmpd +;V8-NEXT: nop ;V8: {{fbne|fbe}} ;V9-LABEL: test_select_dfp_fcc: ;V9: fcmpd +;V9-NEXT-NOT: nop ;V9-NOT: {{fbne|fbe}} ;V9: fmovd{{e|ne}} %fcc0 %0 = fcmp une double %f, 0.000000e+00 diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll index 789e074d917..7820b766388 100644 --- a/test/CodeGen/SPARC/fp128.ll +++ b/test/CodeGen/SPARC/fp128.ll @@ -64,6 +64,7 @@ entry: ; HARD-LABEL: f128_compare ; HARD: fcmpq +; HARD-NEXT: nop ; SOFT-LABEL: f128_compare ; SOFT: _Q_cmp