From f9abd7e33ea6e8f57176d0069d61595c1347a5ff Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 11 Mar 2009 22:30:01 +0000 Subject: [PATCH] Add a -no-implicit-float flag. This acts like -soft-float, but may generate floating point instructions that are explicitly specified by the user. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66719 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetOptions.h | 6 ++ lib/Target/TargetMachine.cpp | 122 ++++++++++++++-------------- lib/Target/X86/X86ISelLowering.cpp | 25 +++--- 3 files changed, 78 insertions(+), 75 deletions(-) diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index d53e399b859..54468f440d5 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -65,6 +65,12 @@ namespace llvm { /// target FP instructions. extern bool UseSoftFloat; + /// NoImplicitFloat - This flag is enabled when the -no-implicit-float flag is + /// specified on the command line. When this flag is on, the code generator + /// won't generate any implicit floating point instructions. I.e., no XMM or + /// x87 or vectorized memcpy/memmove instructions. This is for X86 only. + extern bool NoImplicitFloat; + /// NoZerosInBSS - By default some codegens place zero-initialized data to /// .bss section. This flag disables such behaviour (necessary, e.g. for /// crt*.o compiling). diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 49f1e4dfaae..2aed0833412 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -29,6 +29,7 @@ namespace llvm { bool FiniteOnlyFPMathOption; bool HonorSignDependentRoundingFPMathOption; bool UseSoftFloat; + bool NoImplicitFloat; bool NoZerosInBSS; bool ExceptionHandling; bool UnwindTablesMandatory; @@ -43,61 +44,64 @@ namespace llvm { bool DisableRedZone; } -static cl::opt PrintCode("print-machineinstrs", +static cl::opt +PrintCode("print-machineinstrs", cl::desc("Print generated machine code"), cl::location(PrintMachineCode), cl::init(false)); - static cl::opt DisableFPElim("disable-fp-elim", - cl::desc("Disable frame pointer elimination optimization"), - cl::location(NoFramePointerElim), - cl::init(false)); + cl::desc("Disable frame pointer elimination optimization"), + cl::location(NoFramePointerElim), + cl::init(false)); static cl::opt DisableExcessPrecision("disable-excess-fp-precision", - cl::desc("Disable optimizations that may increase FP precision"), - cl::location(NoExcessFPPrecision), - cl::init(false)); + cl::desc("Disable optimizations that may increase FP precision"), + cl::location(NoExcessFPPrecision), + cl::init(false)); static cl::opt EnableUnsafeFPMath("enable-unsafe-fp-math", - cl::desc("Enable optimizations that may decrease FP precision"), - cl::location(UnsafeFPMath), - cl::init(false)); + cl::desc("Enable optimizations that may decrease FP precision"), + cl::location(UnsafeFPMath), + cl::init(false)); static cl::opt EnableFiniteOnlyFPMath("enable-finite-only-fp-math", - cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"), - cl::location(FiniteOnlyFPMathOption), - cl::init(false)); + cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"), + cl::location(FiniteOnlyFPMathOption), + cl::init(false)); static cl::opt -EnableHonorSignDependentRoundingFPMath(cl::Hidden, - "enable-sign-dependent-rounding-fp-math", - cl::desc("Force codegen to assume rounding mode can change dynamically"), - cl::location(HonorSignDependentRoundingFPMathOption), - cl::init(false)); - +EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", + cl::Hidden, + cl::desc("Force codegen to assume rounding mode can change dynamically"), + cl::location(HonorSignDependentRoundingFPMathOption), + cl::init(false)); static cl::opt GenerateSoftFloatCalls("soft-float", - cl::desc("Generate software floating point library calls"), - cl::location(UseSoftFloat), - cl::init(false)); + cl::desc("Generate software floating point library calls"), + cl::location(UseSoftFloat), + cl::init(false)); +static cl::opt +GenerateNoImplicitFloats("no-implicit-float", + cl::desc("Don't generate implicit floating point instructions (x86-only)"), + cl::location(NoImplicitFloat), + cl::init(false)); static cl::opt DontPlaceZerosInBSS("nozero-initialized-in-bss", - cl::desc("Don't place zero-initialized symbols into bss section"), - cl::location(NoZerosInBSS), - cl::init(false)); + cl::desc("Don't place zero-initialized symbols into bss section"), + cl::location(NoZerosInBSS), + cl::init(false)); static cl::opt EnableExceptionHandling("enable-eh", - cl::desc("Emit DWARF exception handling (default if target supports)"), - cl::location(ExceptionHandling), - cl::init(false)); + cl::desc("Emit DWARF exception handling (default if target supports)"), + cl::location(ExceptionHandling), + cl::init(false)); static cl::opt EnableUnwindTables("unwind-tables", - cl::desc("Generate unwinding tables for all functions"), - cl::location(UnwindTablesMandatory), - cl::init(false)); + cl::desc("Generate unwinding tables for all functions"), + cl::location(UnwindTablesMandatory), + cl::init(false)); static cl::opt -DefRelocationModel( - "relocation-model", +DefRelocationModel("relocation-model", cl::desc("Choose relocation model"), cl::location(RelocationModel), cl::init(Reloc::Default), @@ -112,8 +116,7 @@ DefRelocationModel( "Relocatable external references, non-relocatable code"), clEnumValEnd)); static cl::opt -DefCodeModel( - "code-model", +DefCodeModel("code-model", cl::desc("Choose code model"), cl::location(CMModel), cl::init(CodeModel::Default), @@ -129,47 +132,40 @@ DefCodeModel( clEnumValN(CodeModel::Large, "large", "Large code model"), clEnumValEnd)); - static cl::opt EnablePerformTailCallOpt("tailcallopt", - cl::desc("Turn on tail call optimization."), - cl::location(PerformTailCallOpt), - cl::init(false)); - + cl::desc("Turn on tail call optimization."), + cl::location(PerformTailCallOpt), + cl::init(false)); static cl::opt OverrideStackAlignment("stack-alignment", - cl::desc("Override default stack alignment"), - cl::location(StackAlignment), - cl::init(0)); - + cl::desc("Override default stack alignment"), + cl::location(StackAlignment), + cl::init(0)); static cl::opt EnableRealignStack("realign-stack", - cl::desc("Realign stack if needed"), - cl::location(RealignStack), - cl::init(true)); - + cl::desc("Realign stack if needed"), + cl::location(RealignStack), + cl::init(true)); static cl::opt AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), - cl::location(VerboseAsm), - cl::init(false)); - + cl::location(VerboseAsm), + cl::init(false)); static cl::opt DisableSwitchTables(cl::Hidden, "disable-jump-tables", - cl::desc("Do not generate jump tables."), - cl::location(DisableJumpTables), - cl::init(false)); - + cl::desc("Do not generate jump tables."), + cl::location(DisableJumpTables), + cl::init(false)); static cl::opt EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", - cl::desc("Use strong PHI elimination."), - cl::location(StrongPHIElim), - cl::init(false)); - + cl::desc("Use strong PHI elimination."), + cl::location(StrongPHIElim), + cl::init(false)); static cl::opt DisableRedZoneOption("disable-red-zone", - cl::desc("Do not emit code that uses the red zone."), - cl::location(DisableRedZone), - cl::init(false)); + cl::desc("Do not emit code that uses the red zone."), + cl::location(DisableRedZone), + cl::init(false)); //--------------------------------------------------------------------------- // TargetMachine Class diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1c10df52e5c..2de73778793 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -493,7 +493,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // Long double always uses X87. - if (!UseSoftFloat) { + if (!UseSoftFloat && !NoImplicitFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -582,7 +582,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. - if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { + if (!UseSoftFloat && !NoImplicitFloat && !DisableMMX && Subtarget->hasMMX()) { addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); @@ -654,7 +654,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); + setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand); setOperationAction(ISD::SELECT, MVT::v8i8, Promote); setOperationAction(ISD::SELECT, MVT::v4i16, Promote); @@ -662,7 +662,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v1i64, Custom); } - if (!UseSoftFloat && Subtarget->hasSSE1()) { + if (!UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -679,11 +679,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); } - if (!UseSoftFloat && Subtarget->hasSSE2()) { + if (!UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2()) { addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); - // FIXME: Unfortunately -soft-float means XMM registers cannot be used even - // for integer operations. + // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM + // registers cannot be used even for integer operations. addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); @@ -727,12 +727,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + if (Subtarget->is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); @@ -888,7 +890,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. - if (Subtarget->getStackAlignment() >= 16) { + if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) { if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) return MVT::v4i32; if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) @@ -899,7 +901,6 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, return MVT::i32; } - /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, @@ -1434,13 +1435,13 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); - assert(!(NumXMMRegs && UseSoftFloat) && + assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || !Subtarget->hasSSE1()) { + if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; - } + // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so they // may be loaded by deferencing the result of va_next.