diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index 9111cb1ff0b..46eaef2871b 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -46,6 +46,10 @@ namespace llvm { Znwm, /// void *new(unsigned long, nothrow); ZnwmRKSt9nothrow_t, + /// double __cospi(double x); + cospi, + /// float __cospif(float x); + cospif, /// int __cxa_atexit(void (*f)(void *), void *p, void *d); cxa_atexit, /// void __cxa_guard_abort(guard_t *guard); @@ -61,6 +65,14 @@ namespace llvm { dunder_isoc99_sscanf, /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); memcpy_chk, + /// double __sincospi_stret(double x); + sincospi_stret, + /// float __sincospi_stretf(float x); + sincospi_stretf, + /// double __sinpi(double x); + sinpi, + /// float __sinpif(float x); + sinpif, /// double __sqrt_finite(double x); sqrt_finite, /// float __sqrt_finite(float x); diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 972f7cb8f2f..3e68fe16d4a 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -38,6 +38,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "_ZnwjRKSt9nothrow_t", "_Znwm", "_ZnwmRKSt9nothrow_t", + "__cospi", + "__cospif", "__cxa_atexit", "__cxa_guard_abort", "__cxa_guard_acquire", @@ -45,6 +47,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "__isoc99_scanf", "__isoc99_sscanf", "__memcpy_chk", + "__sincospi_stret", + "__sincospi_stretf", + "__sinpi", + "__sinpif", "__sqrt_finite", "__sqrtf_finite", "__sqrtl_finite", @@ -331,6 +337,24 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "write" }; +static bool hasSinCosPiStret(const Triple &T) { + // Only Darwin variants have _stret versions of combined trig functions. + if (!T.isMacOSX() && T.getOS() != Triple::IOS) + return false; + + // The ABI is rather complicated on x86, so don't do anything special there. + if (T.getArch() == Triple::x86) + return false; + + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9)) + return false; + + if (T.getOS() == Triple::IOS && T.isOSVersionLT(7, 0)) + return false; + + return true; +} + /// initialize - Initialize the set of available library functions based on the /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. @@ -357,6 +381,15 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::memset_pattern16); } + if (!hasSinCosPiStret(T)) { + TLI.setUnavailable(LibFunc::sinpi); + TLI.setUnavailable(LibFunc::sinpif); + TLI.setUnavailable(LibFunc::cospi); + TLI.setUnavailable(LibFunc::cospif); + TLI.setUnavailable(LibFunc::sincospi_stret); + TLI.setUnavailable(LibFunc::sincospi_stretf); + } + if (T.isMacOSX() && T.getArch() == Triple::x86 && !T.isMacOSXVersionLT(10, 7)) { // x86-32 OSX has a scheme where fwrite and fputs (and some other functions diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index cbdd070f1dc..d8388518671 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -1252,6 +1253,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { } }; +struct SinCosPiOpt : public LibCallOptimization { + SinCosPiOpt() {} + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Make sure the prototype is as expected, otherwise the rest of the + // function is probably invalid and likely to abort. + if (!isTrigLibCall(CI)) + return 0; + + Value *Arg = CI->getArgOperand(0); + SmallVector SinCalls; + SmallVector CosCalls; + SmallVector SinCosCalls; + + bool IsFloat = Arg->getType()->isFloatTy(); + + // Look for all compatible sinpi, cospi and sincospi calls with the same + // argument. If there are enough (in some sense) we can make the + // substitution. + for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) + classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls, + SinCosCalls); + + // It's only worthwhile if both sinpi and cospi are actually used. + if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) + return 0; + + Value *Sin, *Cos, *SinCos; + insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, + SinCos); + + replaceTrigInsts(SinCalls, Sin); + replaceTrigInsts(CosCalls, Cos); + replaceTrigInsts(SinCosCalls, SinCos); + + return 0; + } + + bool isTrigLibCall(CallInst *CI) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) && + CI->hasFnAttr(Attribute::ReadNone); + + // Other than that we need float(float) or double(double) + return AttributesSafe && FT->getNumParams() == 1 && + FT->getReturnType() == FT->getParamType(0) && + (FT->getParamType(0)->isFloatTy() || + FT->getParamType(0)->isDoubleTy()); + } + + void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, + SmallVectorImpl &SinCalls, + SmallVectorImpl &CosCalls, + SmallVectorImpl &SinCosCalls) { + CallInst *CI = dyn_cast(Val); + + if (!CI) + return; + + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + LibFunc::Func Func; + if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || + !isTrigLibCall(CI)) + return; + + if (IsFloat) { + if (Func == LibFunc::sinpif) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospif) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stretf) + SinCosCalls.push_back(CI); + } else { + if (Func == LibFunc::sinpi) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospi) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stret) + SinCosCalls.push_back(CI); + } + } + + void replaceTrigInsts(SmallVectorImpl &Calls, Value *Res) { + for (SmallVectorImpl::iterator I = Calls.begin(), + E = Calls.end(); + I != E; ++I) { + LCS->replaceAllUsesWith(*I, Res); + } + } + + void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, + bool UseFloat, Value *&Sin, Value *&Cos, + Value *&SinCos) { + Type *ArgTy = Arg->getType(); + Type *ResTy; + StringRef Name; + + Triple T(OrigCallee->getParent()->getTargetTriple()); + if (UseFloat) { + Name = "__sincospi_stretf"; + + assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); + // x86_64 can't use {float, float} since that would be returned in both + // xmm0 and xmm1, which isn't what a real struct would do. + ResTy = T.getArch() == Triple::x86_64 + ? static_cast(VectorType::get(ArgTy, 2)) + : static_cast(StructType::get(ArgTy, ArgTy, NULL)); + } else { + Name = "__sincospi_stret"; + ResTy = StructType::get(ArgTy, ArgTy, NULL); + } + + Module *M = OrigCallee->getParent(); + Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), + ResTy, ArgTy, NULL); + + if (Instruction *ArgInst = dyn_cast(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. + BasicBlock::iterator Loc = ArgInst; + B.SetInsertPoint(ArgInst->getParent(), ++Loc); + } else { + // Otherwise (e.g. for a constant) the beginning of the function is as + // good a place as any. + BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); + B.SetInsertPoint(&EntryBB, EntryBB.begin()); + } + + SinCos = B.CreateCall(Callee, Arg, "sincospi"); + + if (SinCos->getType()->isStructTy()) { + Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); + Cos = B.CreateExtractValue(SinCos, 1, "cospi"); + } else { + Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), + "sinpi"); + Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), + "cospi"); + } + } + +}; + //===----------------------------------------------------------------------===// // Integer Library Call Optimizations //===----------------------------------------------------------------------===// @@ -1764,6 +1914,7 @@ static MemSetOpt MemSet; // Math library call optimizations. static UnaryDoubleFPOpt UnaryDoubleFP(false); static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); +static SinCosPiOpt SinCosPi; // Integer library call optimizations. static FFSOpt FFS; @@ -1848,6 +1999,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::cos: case LibFunc::cosl: return &Cos; + case LibFunc::sinpif: + case LibFunc::sinpi: + case LibFunc::cospif: + case LibFunc::cospi: + return &SinCosPi; case LibFunc::powf: case LibFunc::pow: case LibFunc::powl: diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll new file mode 100644 index 00000000000..0d1a6027a00 --- /dev/null +++ b/test/Transforms/InstCombine/sincospi.ll @@ -0,0 +1,91 @@ +; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s +; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS +; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS + + +attributes #0 = { readnone nounwind } + +declare float @__sinpif(float %x) #0 +declare float @__cospif(float %x) #0 + +declare double @__sinpi(double %x) #0 +declare double @__cospi(double %x) #0 + +@var32 = global float 0.0 +@var64 = global double 0.0 + +define float @test_instbased_f32() { + %val = load float* @var32 + %sin = call float @__sinpif(float %val) #0 + %cos = call float @__cospif(float %val) #0 + %res = fadd float %sin, %cos + ret float %res +; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32 +; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]]) +; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0 +; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1 + +; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32 +; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]]) +; CHECK: extractvalue { float, float } [[SINCOS]], 0 +; CHECK: extractvalue { float, float } [[SINCOS]], 1 + +; CHECK-NO-SINCOS: call float @__sinpif +; CHECK-NO-SINCOS: call float @__cospif +} + +define float @test_constant_f32() { + %sin = call float @__sinpif(float 1.0) #0 + %cos = call float @__cospif(float 1.0) #0 + %res = fadd float %sin, %cos + ret float %res +; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00) +; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0 +; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1 + +; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00) +; CHECK: extractvalue { float, float } [[SINCOS]], 0 +; CHECK: extractvalue { float, float } [[SINCOS]], 1 + +; CHECK-NO-SINCOS: call float @__sinpif +; CHECK-NO-SINCOS: call float @__cospif +} + +define double @test_instbased_f64() { + %val = load double* @var64 + %sin = call double @__sinpi(double %val) #0 + %cos = call double @__cospi(double %val) #0 + %res = fadd double %sin, %cos + ret double %res +; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64 +; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]]) +; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0 +; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1 + +; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64 +; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]]) +; CHECK: extractvalue { double, double } [[SINCOS]], 0 +; CHECK: extractvalue { double, double } [[SINCOS]], 1 + +; CHECK-NO-SINCOS: call double @__sinpi +; CHECK-NO-SINCOS: call double @__cospi +} + +define double @test_constant_f64() { + %sin = call double @__sinpi(double 1.0) #0 + %cos = call double @__cospi(double 1.0) #0 + %res = fadd double %sin, %cos + ret double %res +; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00) +; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0 +; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1 + +; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00) +; CHECK: extractvalue { double, double } [[SINCOS]], 0 +; CHECK: extractvalue { double, double } [[SINCOS]], 1 + +; CHECK-NO-SINCOS: call double @__sinpi +; CHECK-NO-SINCOS: call double @__cospi +}