Set the rounding mode for the X86 FPU to 64-bits instead of 80-bits. We

don't support long double anyway, and this gives us FP results closer to
other targets.

This also speeds up 179.art from 41.4s to 18.32s, by eliminating a problem
with extra precision that causes an FP == comparison to fail (leading to
extra loop iterations).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@18895 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2004-12-13 17:23:11 +00:00
parent 05f7e7991b
commit c0354c904b

View File

@ -115,6 +115,10 @@ namespace {
// Copy incoming arguments off of the stack...
LoadArgumentsToVirtualRegs(Fn);
// If this is main, emit special code.
if (Fn.hasExternalLinkage() && Fn.getName() == "main")
EmitSpecialCodeForMain();
// Instruction select everything except PHI nodes
visit(Fn);
@ -136,6 +140,10 @@ namespace {
return "X86 Simple Instruction Selection";
}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
void EmitSpecialCodeForMain();
/// visitBasicBlock - This method is called when we are visiting a new basic
/// block. This simply creates a new MachineBasicBlock to emit code into
/// and adds it to the current MachineFunction. Subsequent visit* for
@ -650,6 +658,20 @@ void X86ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
void X86ISel::EmitSpecialCodeForMain() {
// Switch the FPU to 64-bit precision mode for better compatibility and speed.
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
// Set the high part to be 64-bit precision.
addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
CWFrameIdx, 1).addImm(2);
// Reload the modified control word now.
addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
}
/// SelectPHINodes - Insert machine code to generate phis. This is tricky
/// because we have to generate our sources into the source basic blocks, not