diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 8df138d8d7a..4d2636484f9 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -55,6 +55,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions">; +def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", + "Enable AVX instructions">; +def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", + "Enable three-operand fused multiple-add">; +def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", + "Enable four-operand fused multiple-add">; + //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// @@ -82,6 +89,9 @@ def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; +// Sandy Bridge does not have FMA +def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 201367eabfb..a6b0880dac0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -236,6 +236,10 @@ def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; +def HasAVX : Predicate<"Subtarget->hasAVX()">; +def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; +def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 56983ce7f16..8506fa66a64 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -207,6 +207,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; + + HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); + HasAVX = ((ECX >> 28) & 0x1); + if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. unsigned Family = 0; @@ -217,6 +221,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); + HasFMA4 = IsAMD && ((ECX >> 16) & 0x1); } } @@ -342,6 +347,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) , HasX86_64(false) + , HasSSE4A(false) + , HasAVX(false) + , HasFMA3(false) + , HasFMA4(false) , IsBTMemSlow(false) , DarwinVers(0) , IsLinux(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 694b0ebac86..f4f6cce7688 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -64,12 +64,21 @@ protected: /// bool HasX86_64; - /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. - bool IsBTMemSlow; - /// HasSSE4A - True if the processor supports SSE4A instructions. bool HasSSE4A; + /// HasAVX - Target has AVX instructions + bool HasAVX; + + /// HasFMA3 - Target has 3-operand fused multiply-add + bool HasFMA3; + + /// HasFMA4 - Target has 4-operand fused multiply-add + bool HasFMA4; + + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. + bool IsBTMemSlow; + /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. @@ -133,6 +142,9 @@ public: bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } + bool hasAVX() const { return hasAVX(); } + bool hasFMA3() const { return HasFMA3; } + bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; }