diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index ec7daeeba76..fda2bddf841 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -24,7 +24,6 @@ #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRecip.h" #include using namespace llvm; @@ -153,12 +152,6 @@ FuseFPOps("fp-contract", "Only fuse FP ops when the result won't be effected."), clEnumValEnd)); -cl::list -ReciprocalOps("recip", - cl::CommaSeparated, - cl::desc("Choose reciprocal operation types and parameters."), - cl::value_desc("all,none,default,divf,!vec-sqrtd,vec-divd:0,sqrt:9...")); - cl::opt DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::desc("Don't place zero-initialized symbols into bss section"), @@ -238,7 +231,6 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { Options.LessPreciseFPMADOption = EnableFPMAD; Options.NoFramePointerElim = DisableFPElim; Options.AllowFPOpFusion = FuseFPOps; - Options.Reciprocals = ReciprocalOps; Options.UnsafeFPMath = EnableUnsafeFPMath; Options.NoInfsFPMath = EnableNoInfsFPMath; Options.NoNaNsFPMath = EnableNoNaNsFPMath; diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 0576d94d01e..d70fe94a9bb 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -15,7 +15,6 @@ #ifndef LLVM_TARGET_TARGETOPTIONS_H #define LLVM_TARGET_TARGETOPTIONS_H -#include "llvm/Target/TargetRecip.h" #include "llvm/MC/MCTargetOptions.h" #include @@ -72,8 +71,7 @@ namespace llvm { CompressDebugSections(false), FunctionSections(false), DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), TrapFuncName(), FloatABIType(FloatABI::Default), - AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(), - JTType(JumpTable::Single), + AllowFPOpFusion(FPOpFusion::Standard), JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs @@ -212,9 +210,6 @@ namespace llvm { /// the value of this option. FPOpFusion::FPOpFusionMode AllowFPOpFusion; - /// This class encapsulates options for reciprocal-estimate code generation. - TargetRecip Reciprocals; - /// JTType - This flag specifies the type of jump-instruction table to /// create for functions that have the jumptable attribute. JumpTable::JumpTableType JTType; @@ -249,7 +244,6 @@ inline bool operator==(const TargetOptions &LHS, ARE_EQUAL(TrapFuncName) && ARE_EQUAL(FloatABIType) && ARE_EQUAL(AllowFPOpFusion) && - ARE_EQUAL(Reciprocals) && ARE_EQUAL(JTType) && ARE_EQUAL(ThreadModel) && ARE_EQUAL(MCOptions); diff --git a/include/llvm/Target/TargetRecip.h b/include/llvm/Target/TargetRecip.h deleted file mode 100644 index d66f66ec6a7..00000000000 --- a/include/llvm/Target/TargetRecip.h +++ /dev/null @@ -1,72 +0,0 @@ -//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class is used to customize machine-specific reciprocal estimate code -// generation in a target-independent way. -// If a target does not support operations in this specification, then code -// generation will default to using supported operations. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_TARGETRECIP_H -#define LLVM_TARGET_TARGETRECIP_H - -#include -#include -#include - -namespace llvm { - -struct TargetRecip { -public: - TargetRecip(); - - /// Initialize all or part of the operations from command-line options or - /// a front end. - TargetRecip(const std::vector &Args); - - /// Set whether a particular reciprocal operation is enabled and how many - /// refinement steps are needed when using it. Use "all" to set enablement - /// and refinement steps for all operations. - void setDefaults(const StringRef &Key, bool Enable, unsigned RefSteps); - - /// Return true if the reciprocal operation has been enabled by default or - /// from the command-line. Return false if the operation has been disabled - /// by default or from the command-line. - bool isEnabled(const StringRef &Key) const; - - /// Return the number of iterations necessary to refine the - /// the result of a machine instruction for the given reciprocal operation. - unsigned getRefinementSteps(const StringRef &Key) const; - - bool operator==(const TargetRecip &Other) const; - -private: - enum { - Uninitialized = -1 - }; - - struct RecipParams { - int8_t Enabled; - int8_t RefinementSteps; - - RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {} - }; - - std::map RecipMap; - typedef std::map::iterator RecipIter; - typedef std::map::const_iterator ConstRecipIter; - - bool parseGlobalParams(const std::string &Arg); - void parseIndividualParams(const std::vector &Args); -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index e6d0199952f..1805437b12f 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -6,7 +6,6 @@ add_llvm_library(LLVMTarget TargetLoweringObjectFile.cpp TargetMachine.cpp TargetMachineC.cpp - TargetRecip.cpp TargetSubtargetInfo.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Target/TargetRecip.cpp b/lib/Target/TargetRecip.cpp deleted file mode 100644 index 42bc487fe6d..00000000000 --- a/lib/Target/TargetRecip.cpp +++ /dev/null @@ -1,225 +0,0 @@ -//===-------------------------- TargetRecip.cpp ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class is used to customize machine-specific reciprocal estimate code -// generation in a target-independent way. -// If a target does not support operations in this specification, then code -// generation will default to using supported operations. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetRecip.h" -#include - -using namespace llvm; - -// These are the names of the individual reciprocal operations. These are -// the key strings for queries and command-line inputs. -// In addition, the command-line interface recognizes the global parameters -// "all", "none", and "default". -static const char *RecipOps[] = { - "divd", - "divf", - "vec-divd", - "vec-divf", - "sqrtd", - "sqrtf", - "vec-sqrtd", - "vec-sqrtf", -}; - -// The uninitialized state is needed for the enabled settings and refinement -// steps because custom settings may arrive via the command-line before target -// defaults are set. -TargetRecip::TargetRecip() { - unsigned NumStrings = llvm::array_lengthof(RecipOps); - for (unsigned i = 0; i < NumStrings; ++i) - RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); -} - -static bool parseRefinementStep(const StringRef &In, size_t &Position, - uint8_t &Value) { - const char RefStepToken = ':'; - Position = In.find(RefStepToken); - if (Position == StringRef::npos) - return false; - - StringRef RefStepString = In.substr(Position + 1); - // Allow exactly one numeric character for the additional refinement - // step parameter. - if (RefStepString.size() == 1) { - char RefStepChar = RefStepString[0]; - if (RefStepChar >= '0' && RefStepChar <= '9') { - Value = RefStepChar - '0'; - return true; - } - } - report_fatal_error("Invalid refinement step for -recip."); -} - -bool TargetRecip::parseGlobalParams(const std::string &Arg) { - StringRef ArgSub = Arg; - - // Look for an optional setting of the number of refinement steps needed - // for this type of reciprocal operation. - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(ArgSub, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = ArgSub.substr(RefPos + 1); - ArgSub = ArgSub.substr(0, RefPos); - } - bool Enable; - bool UseDefaults; - if (ArgSub == "all") { - UseDefaults = false; - Enable = true; - } else if (ArgSub == "none") { - UseDefaults = false; - Enable = false; - } else if (ArgSub == "default") { - UseDefaults = true; - } else { - // Any other string is invalid or an individual setting. - return false; - } - - // All enable values will be initialized to target defaults if 'default' was - // specified. - if (!UseDefaults) - for (auto &KV : RecipMap) - KV.second.Enabled = Enable; - - // Custom refinement count was specified with all, none, or default. - if (!RefStepString.empty()) - for (auto &KV : RecipMap) - KV.second.RefinementSteps = RefSteps; - - return true; -} - -void TargetRecip::parseIndividualParams(const std::vector &Args) { - static const char DisabledPrefix = '!'; - unsigned NumArgs = Args.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - StringRef Val = Args[i]; - - bool IsDisabled = Val[0] == DisabledPrefix; - // Ignore the disablement token for string matching. - if (IsDisabled) - Val = Val.substr(1); - - size_t RefPos; - uint8_t RefSteps; - StringRef RefStepString; - if (parseRefinementStep(Val, RefPos, RefSteps)) { - // Split the string for further processing. - RefStepString = Val.substr(RefPos + 1); - Val = Val.substr(0, RefPos); - } - - RecipIter Iter = RecipMap.find(Val); - if (Iter == RecipMap.end()) { - // Try again specifying float suffix. - Iter = RecipMap.find(Val.str() + 'f'); - if (Iter == RecipMap.end()) { - Iter = RecipMap.find(Val.str() + 'd'); - assert(Iter == RecipMap.end() && "Float entry missing from map"); - report_fatal_error("Invalid option for -recip."); - } - - // The option was specified without a float or double suffix. - if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) { - // Make sure that the double entry was not already specified. - // The float entry will be checked below. - report_fatal_error("Duplicate option for -recip."); - } - } - - if (Iter->second.Enabled != Uninitialized) - report_fatal_error("Duplicate option for -recip."); - - // Mark the matched option as found. Do not allow duplicate specifiers. - Iter->second.Enabled = !IsDisabled; - if (!RefStepString.empty()) - Iter->second.RefinementSteps = RefSteps; - - // If the precision was not specified, the double entry is also initialized. - if (Val.back() != 'f' && Val.back() != 'd') { - RecipMap[Val.str() + 'd'].Enabled = !IsDisabled; - if (!RefStepString.empty()) - RecipMap[Val.str() + 'd'].RefinementSteps = RefSteps; - } - } -} - -TargetRecip::TargetRecip(const std::vector &Args) : - TargetRecip() { - unsigned NumArgs = Args.size(); - - // Check if "all", "default", or "none" was specified. - if (NumArgs == 1 && parseGlobalParams(Args[0])) - return; - - parseIndividualParams(Args); -} - -bool TargetRecip::isEnabled(const StringRef &Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - assert(Iter->second.Enabled != Uninitialized && - "Enablement setting was not initialized"); - return Iter->second.Enabled; -} - -unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const { - ConstRecipIter Iter = RecipMap.find(Key); - assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); - assert(Iter->second.RefinementSteps != Uninitialized && - "Refinement step setting was not initialized"); - return Iter->second.RefinementSteps; -} - -/// Custom settings (previously initialized values) override target defaults. -void TargetRecip::setDefaults(const StringRef &Key, bool Enable, - unsigned RefSteps) { - if (Key == "all") { - for (auto &KV : RecipMap) { - RecipParams &RP = KV.second; - if (RP.Enabled == Uninitialized) - RP.Enabled = Enable; - if (RP.RefinementSteps == Uninitialized) - RP.RefinementSteps = RefSteps; - } - } else { - RecipParams &RP = RecipMap[Key]; - if (RP.Enabled == Uninitialized) - RP.Enabled = Enable; - if (RP.RefinementSteps == Uninitialized) - RP.RefinementSteps = RefSteps; - } -} - -bool TargetRecip::operator==(const TargetRecip &Other) const { - for (const auto &KV : RecipMap) { - const StringRef &Op = KV.first; - const RecipParams &RP = KV.second; - const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; - if (RP.RefinementSteps != OtherRP.RefinementSteps) - return false; - if (RP.Enabled != OtherRP.Enabled) - return false; - } - return true; -} diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7001910a61e..c70e2e95463 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -188,6 +188,10 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; +def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true", + "Use RSQRT* to optimize square root calculations">; +def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst", + "true", "Use RCP* to optimize division calculations">; def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software floating point features.">; @@ -440,7 +444,7 @@ def : ProcessorModel<"btver2", BtVer2Model, FeaturePRFCHW, FeatureAES, FeaturePCLMUL, FeatureBMI, FeatureF16C, FeatureMOVBE, FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem, - FeatureSlowSHLD]>; + FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>; // TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 609973f8971..e850e500db6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -67,6 +67,12 @@ static cl::opt ExperimentalVectorWideningLegalization( "rather than promotion."), cl::Hidden); +static cl::opt ReciprocalEstimateRefinementSteps( + "x86-recip-refinement-steps", cl::init(1), + cl::desc("Specify the number of Newton-Raphson iterations applied to the " + "result of the hardware reciprocal estimate instruction."), + cl::NotHidden); + // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -12895,31 +12901,29 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const { - EVT VT = Op.getValueType(); - const char *RecipOp; + // FIXME: We should use instruction latency models to calculate the cost of + // each potential sequence, but this is very hard to do reliably because + // at least Intel's Core* chips have variable timing based on the number of + // significant digits in the divisor and/or sqrt operand. + if (!Subtarget->useSqrtEst()) + return SDValue(); - // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. + EVT VT = Op.getValueType(); + + // SSE1 has rsqrtss and rsqrtps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision // rsqrt estimate with refinement on x86 prior to FMA requires at least 16 // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget->hasSSE1()) - RecipOp = "sqrtf"; - else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || - (VT == MVT::v8f32 && Subtarget->hasAVX())) - RecipOp = "vec-sqrtf"; - else - return SDValue(); - - TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; - if (!Recips.isEnabled(RecipOp)) - return SDValue(); - - RefinementSteps = Recips.getRefinementSteps(RecipOp); - UseOneConstNR = false; - return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || + (Subtarget->hasAVX() && VT == MVT::v8f32)) { + RefinementSteps = 1; + UseOneConstNR = false; + return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + } + return SDValue(); } /// The minimum architected relative accuracy is 2^-12. We need one @@ -12927,9 +12931,15 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, SDValue X86TargetLowering::getRecipEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { + // FIXME: We should use instruction latency models to calculate the cost of + // each potential sequence, but this is very hard to do reliably because + // at least Intel's Core* chips have variable timing based on the number of + // significant digits in the divisor. + if (!Subtarget->useReciprocalEst()) + return SDValue(); + EVT VT = Op.getValueType(); - const char *RecipOp; - + // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision @@ -12937,20 +12947,12 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget->hasSSE1()) - RecipOp = "divf"; - else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || - (VT == MVT::v8f32 && Subtarget->hasAVX())) - RecipOp = "vec-divf"; - else - return SDValue(); - - TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; - if (!Recips.isEnabled(RecipOp)) - return SDValue(); - - RefinementSteps = Recips.getRefinementSteps(RecipOp); - return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); + if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || + (Subtarget->hasAVX() && VT == MVT::v8f32)) { + RefinementSteps = ReciprocalEstimateRefinementSteps; + return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); + } + return SDValue(); } /// If we have at least two divisions that use the same divisor, convert to diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 65e702e7f35..1cdab14e034 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -273,6 +273,8 @@ void X86Subtarget::initializeEnvironment() { LEAUsesAG = false; SlowLEA = false; SlowIncDec = false; + UseSqrtEst = false; + UseReciprocalEst = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 27429d050bd..455dd7744d7 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -190,6 +190,16 @@ protected: /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; + /// Use the RSQRT* instructions to optimize square root calculations. + /// For this to be profitable, the cost of FSQRT and FDIV must be + /// substantially higher than normal FP ops like FADD and FMUL. + bool UseSqrtEst; + + /// Use the RCP* instructions to optimize FP division calculations. + /// For this to be profitable, the cost of FDIV must be + /// substantially higher than normal FP ops like FADD and FMUL. + bool UseReciprocalEst; + /// Processor has AVX-512 PreFetch Instructions bool HasPFI; @@ -367,6 +377,8 @@ public: bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } bool slowIncDec() const { return SlowIncDec; } + bool useSqrtEst() const { return UseSqrtEst; } + bool useReciprocalEst() const { return UseReciprocalEst; } bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 33576f1771f..3e5f1d82202 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -105,13 +105,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, if (Subtarget.isTargetWin64()) this->Options.TrapUnreachable = true; - // TODO: By default, all reciprocal estimate operations are off because - // that matches the behavior before TargetRecip was added (except for btver2 - // which used subtarget features to enable this type of codegen). - // We should change this to match GCC behavior where everything but - // scalar division estimates are turned on by default with -ffast-math. - this->Options.Reciprocals.setDefaults("all", false, 1); - initAsmInfo(); } diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll index 7f1521a83bc..fcd077092da 100644 --- a/test/CodeGen/X86/recip-fastmath.ll +++ b/test/CodeGen/X86/recip-fastmath.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est | FileCheck %s --check-prefix=RECIP +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE ; If the target's divss/divps instructions are substantially ; slower than rcpss/rcpps with a Newton-Raphson refinement, diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll index 373fa53c970..4c6b521156e 100644 --- a/test/CodeGen/X86/sqrt-fastmath.ll +++ b/test/CodeGen/X86/sqrt-fastmath.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-sqrt-est | FileCheck %s --check-prefix=ESTIMATE declare double @__sqrt_finite(double) #0 declare float @__sqrtf_finite(float) #0