From 559806f575297866609c7bef0e5c1084dcdda9a5 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 27 Jan 2006 08:10:46 +0000 Subject: [PATCH] x86 CPU detection and proper subtarget support git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25679 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.h | 6 -- lib/Target/X86/X86.td | 50 +++++++------ lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- lib/Target/X86/X86ISelLowering.cpp | 7 +- lib/Target/X86/X86ISelLowering.h | 8 ++ lib/Target/X86/X86ISelPattern.cpp | 4 + lib/Target/X86/X86InstrInfo.td | 8 +- lib/Target/X86/X86Subtarget.cpp | 111 +++++++++++++++++----------- lib/Target/X86/X86Subtarget.h | 37 ++++++---- lib/Target/X86/X86TargetMachine.cpp | 37 +--------- 10 files changed, 138 insertions(+), 132 deletions(-) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index ff109c94269..3f5332368c5 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -25,12 +25,6 @@ class FunctionPass; class IntrinsicLowering; class MachineCodeEmitter; -enum X86VectorEnum { - AutoDetect, NoSSE, SSE, SSE2, SSE3 -}; - -extern X86VectorEnum X86Vector; -extern bool X86ScalarSSE; extern bool X86DAGIsel; /// createX86ISelPattern - This pass converts an LLVM function into a diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index f341949e00b..6b7b131bb40 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -20,19 +20,19 @@ include "../Target.td" // X86 Subtarget features. // -def Feature64Bit : SubtargetFeature<"64bit", "bool", "Is64Bit", - "Enable 64-bit instructions">; -def FeatureMMX : SubtargetFeature<"mmx", "bool", "HasMMX", +def Feature64Bit : SubtargetFeature<"64bit", "Is64Bit", "true", + "Enabl e 64-bit instructions">; +def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; -def FeatureSSE : SubtargetFeature<"sse", "bool", "HasSSE", +def FeatureSSE : SubtargetFeature<"sse", "X86SSELevel", "SSE", "Enable SSE instructions">; -def FeatureSSE2 : SubtargetFeature<"sse2", "bool", "HasSSE2", +def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions">; -def FeatureSSE3 : SubtargetFeature<"sse3", "bool", "HasSSE3", +def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", "Enable SSE3 instructions">; -def Feature3DNow : SubtargetFeature<"3dnow", "bool", "Has3DNow", +def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions">; -def Feature3DNowA : SubtargetFeature<"3dnowa", "bool", "Has3DNowA", +def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", "Enable 3DNow! Athlon instructions">; //===----------------------------------------------------------------------===// @@ -45,21 +45,20 @@ class Proc Features> def : Proc<"generic", []>; def : Proc<"i386", []>; def : Proc<"i486", []>; -def : Proc<"i586", []>; def : Proc<"pentium", []>; def : Proc<"pentium-mmx", [FeatureMMX]>; def : Proc<"i686", []>; def : Proc<"pentiumpro", []>; def : Proc<"pentium2", [FeatureMMX]>; -def : Proc<"celeron", [FeatureMMX]>; def : Proc<"pentium3", [FeatureMMX, FeatureSSE]>; -def : Proc<"pentium3m", [FeatureMMX, FeatureSSE]>; def : Proc<"pentium-m", [FeatureMMX, FeatureSSE, FeatureSSE2]>; def : Proc<"pentium4", [FeatureMMX, FeatureSSE, FeatureSSE2]>; -def : Proc<"pentium4m", [FeatureMMX, FeatureSSE, FeatureSSE2]>; +def : Proc<"x86-64", [FeatureMMX, FeatureSSE, FeatureSSE2, + Feature64Bit]>; +def : Proc<"yonah", [FeatureMMX, FeatureSSE, FeatureSSE2, + FeatureSSE3]>; def : Proc<"prescott", [FeatureMMX, FeatureSSE, FeatureSSE2, FeatureSSE3]>; -def : Proc<"x86-64", [FeatureMMX, FeatureSSE, FeatureSSE2, Feature64Bit]>; def : Proc<"nocona", [FeatureMMX, FeatureSSE, FeatureSSE2, FeatureSSE3, Feature64Bit]>; @@ -68,17 +67,20 @@ def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>; def : Proc<"athlon", [FeatureMMX, Feature3DNow, Feature3DNowA]>; def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNow, Feature3DNowA]>; -def : Proc<"athlon-4", [FeatureMMX, FeatureSSE, Feature3DNow, Feature3DNowA]>; -def : Proc<"athlon-xp", [FeatureMMX, FeatureSSE, Feature3DNow, Feature3DNowA]>; -def : Proc<"athlon-mp", [FeatureMMX, FeatureSSE, Feature3DNow, Feature3DNowA]>; -def : Proc<"k8", [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow, - Feature3DNowA, Feature64Bit]>; -def : Proc<"opteron", [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow, - Feature3DNowA, Feature64Bit]>; -def : Proc<"athlon64", [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow, - Feature3DNowA, Feature64Bit]>; -def : Proc<"athlon-fx", [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow, - Feature3DNowA, Feature64Bit]>; +def : Proc<"athlon-4", [FeatureMMX, FeatureSSE, Feature3DNow, + Feature3DNowA]>; +def : Proc<"athlon-xp", [FeatureMMX, FeatureSSE, Feature3DNow, + Feature3DNowA]>; +def : Proc<"athlon-mp", [FeatureMMX, FeatureSSE, Feature3DNow, + Feature3DNowA]>; +def : Proc<"k8", [FeatureMMX, FeatureSSE, FeatureSSE2, + Feature3DNow, Feature3DNowA, Feature64Bit]>; +def : Proc<"opteron", [FeatureMMX, FeatureSSE, FeatureSSE2, + Feature3DNow, Feature3DNowA, Feature64Bit]>; +def : Proc<"athlon64", [FeatureMMX, FeatureSSE, FeatureSSE2, + Feature3DNow, Feature3DNowA, Feature64Bit]>; +def : Proc<"athlon-fx", [FeatureMMX, FeatureSSE, FeatureSSE2, + Feature3DNow, Feature3DNowA, Feature64Bit]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index ea1146b213b..43fcd71e4ac 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -161,7 +161,7 @@ void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { // If we are emitting FP stack code, scan the basic block to determine if this // block defines any FP values. If so, put an FP_REG_KILL instruction before // the terminator of the block. - if (X86Vector < SSE2) { + if (!Subtarget->hasSSE2()) { // Note that FP stack instructions *are* used in SSE code when returning // values, but these are not live out of the basic block, so we don't need // an FP_REG_KILL in this case either. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 067b88c9114..cd014aa3319 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33,6 +33,9 @@ static cl::opt EnableFastCC("enable-x86-fastcc", cl::Hidden, X86TargetLowering::X86TargetLowering(TargetMachine &TM) : TargetLowering(TM) { + Subtarget = &TM.getSubtarget(); + X86ScalarSSE = Subtarget->hasSSE2(); + // Set up the TargetLowering object. // X86 is weird, it always uses i8 for shift amounts and setcc results. @@ -1657,8 +1660,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SELECT: { MVT::ValueType VT = Op.getValueType(); bool isFP = MVT::isFloatingPoint(VT); - bool isFPStack = isFP && (X86Vector < SSE2); - bool isFPSSE = isFP && (X86Vector >= SSE2); + bool isFPStack = isFP && !X86ScalarSSE; + bool isFPSSE = isFP && X86ScalarSSE; bool addTest = false; SDOperand Op0 = Op.getOperand(0); SDOperand Cond, CC; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9c61bc8fb07..5787962ba0c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -15,6 +15,7 @@ #ifndef X86ISELLOWERING_H #define X86ISELLOWERING_H +#include "X86Subtarget.h" #include "llvm/Target/TargetLowering.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -227,6 +228,13 @@ namespace llvm { std::pair LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, bool isTailCall, SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG); + + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + + /// X86ScalarSSE - Select between SSE2 or x87 floating point ops. + bool X86ScalarSSE; }; } diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp index 46b0e8dc18d..611d2a9bd70 100644 --- a/lib/Target/X86/X86ISelPattern.cpp +++ b/lib/Target/X86/X86ISelPattern.cpp @@ -101,9 +101,13 @@ namespace { /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; + + /// X86ScalarSSE - Select between SSE2 or x87 floating point ops. + bool X86ScalarSSE; public: ISel(TargetMachine &TM) : SelectionDAGISel(X86Lowering), X86Lowering(TM) { Subtarget = &TM.getSubtarget(); + X86ScalarSSE = Subtarget->hasSSE2(); } virtual const char *getPassName() const { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 37e5c957a10..bfb8fabd020 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -189,10 +189,10 @@ def MRM6m : Format<30>; def MRM7m : Format<31>; //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. -def HasSSE1 : Predicate<"X86Vector >= SSE">; -def HasSSE2 : Predicate<"X86Vector >= SSE2">; -def HasSSE3 : Predicate<"X86Vector >= SSE3">; -def FPStack : Predicate<"X86Vector < SSE2">; +def HasSSE1 : Predicate<"Subtarget->hasSSE()">; +def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def FPStack : Predicate<"!Subtarget->hasSSE2()">; //===----------------------------------------------------------------------===// // X86 specific pattern fragments. diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e523f1448e0..3826c12f3c1 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -16,57 +16,78 @@ #include "X86GenSubtarget.inc" using namespace llvm; -#if defined(__APPLE__) -#include -#include -#include -#include - -/// GetCurrentX86CPU - Returns the current CPUs features. -static const char *GetCurrentX86CPU() { - host_basic_info_data_t hostInfo; - mach_msg_type_number_t infoCount; - - infoCount = HOST_BASIC_INFO_COUNT; - host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, - &infoCount); - - if (hostInfo.cpu_type != CPU_TYPE_I386) return "generic"; - - switch(hostInfo.cpu_subtype) { - case CPU_SUBTYPE_386: return "i386"; - case CPU_SUBTYPE_486: - case CPU_SUBTYPE_486SX: return "i486"; - case CPU_SUBTYPE_PENT: return "pentium"; - case CPU_SUBTYPE_PENTPRO: return "pentiumpro"; - case CPU_SUBTYPE_PENTII_M3: return "pentium2"; - case CPU_SUBTYPE_PENTII_M5: return "pentium2"; - case CPU_SUBTYPE_CELERON: - case CPU_SUBTYPE_CELERON_MOBILE: return "celeron"; - case CPU_SUBTYPE_PENTIUM_3: return "pentium3"; - case CPU_SUBTYPE_PENTIUM_3_M: return "pentium3m"; - case CPU_SUBTYPE_PENTIUM_3_XEON: return "pentium3"; // FIXME: not sure. - case CPU_SUBTYPE_PENTIUM_M: return "pentium-m"; - case CPU_SUBTYPE_PENTIUM_4: return "pentium4"; - case CPU_SUBTYPE_PENTIUM_4_M: return "pentium4m"; - // FIXME: prescott, yonah? Check CPU_THREADTYPE_INTEL_HTT? - case CPU_SUBTYPE_XEON: - case CPU_SUBTYPE_XEON_MP: return "nocona"; - default: ; - } - - return "generic"; -} +static void GetCpuIDAndInfo(unsigned value, unsigned *EAX, unsigned *EBX, + unsigned *ECX, unsigned *EDX) { +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) +#if defined(__GNUC__) + asm ("pushl\t%%ebx\n\t" + "cpuid\n\t" + "popl\t%%ebx" + : "=a" (*EAX), +#if !defined(__DYNAMIC__) // This works around a gcc -fPIC bug + "=b" (*EBX), #endif + "=c" (*ECX), + "=d" (*EDX) + : "a" (value)); +#endif +#endif +} + +static const char *GetCurrentX86CPU() { + unsigned EAX = 0, DUMMY = 0, ECX = 0, EDX = 0; + GetCpuIDAndInfo(0x1, &EAX, &DUMMY, &ECX, &EDX); + unsigned Family = (EAX & (0xffffffff >> (32 - 4)) << 8) >> 8; // Bits 8 - 11 + unsigned Model = (EAX & (0xffffffff >> (32 - 4)) << 4) >> 4; // Bits 4 - 7 + GetCpuIDAndInfo(0x80000001, &EAX, &DUMMY, &ECX, &EDX); + bool Em64T = EDX & (1 << 29); + + switch (Family) { + case 3: + return "i386"; + case 4: + return "i486"; + case 5: + switch (Model) { + case 4: return "pentium-mmx"; + default: return "pentium"; + } + break; + case 6: + switch (Model) { + case 1: return "pentiumpro"; + case 3: + case 5: + case 6: return "pentium2"; + case 7: + case 8: + case 10: + case 11: return "pentium3"; + case 9: + case 13: return "pentium-m"; + case 14: return "yonah"; + default: return "i686"; + } + case 15: { + switch (Model) { + case 3: + case 4: + return (Em64T) ? "nocona" : "prescott"; + default: + return (Em64T) ? "x86-64" : "pentium4"; + } + } + + default: + return "generic"; + } +} X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) : stackAlignment(8), indirectExternAndWeakGlobals(false) { // Determine default and user specified characteristics - std::string CPU = "generic"; -#if defined(__APPLE__) - CPU = GetCurrentX86CPU(); -#endif + std::string CPU = GetCurrentX86CPU(); // Parse features string. ParseSubtargetFeatures(FS, CPU); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 072b54ede20..c874675445e 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -23,6 +23,23 @@ class Module; class X86Subtarget : public TargetSubtarget { protected: + enum X86SSEEnum { + NoMMXSSE, MMX, SSE, SSE2, SSE3 + }; + + enum X863DNowEnum { + NoThreeDNow, ThreeDNow, ThreeDNowA + }; + + /// X86SSELevel - MMX, SSE, SSE2, SSE3, or none supported. + X86SSEEnum X86SSELevel; + + /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported. + X863DNowEnum X863DNowLevel; + + /// Is64Bit - True if the processor supports Em64T. + bool Is64Bit; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -30,14 +47,6 @@ protected: /// Used by instruction selector bool indirectExternAndWeakGlobals; - /// Arch. features used by isel. - bool Is64Bit; - bool HasMMX; - bool HasSSE; - bool HasSSE2; - bool HasSSE3; - bool Has3DNow; - bool Has3DNowA; public: enum { isELF, isCygwin, isDarwin, isWindows @@ -66,12 +75,12 @@ public: bool is64Bit() const { return Is64Bit; } - bool hasMMX() const { return HasMMX; } - bool hasSSE() const { return HasSSE; } - bool hasSSE2() const { return HasSSE2; } - bool hasSSE3() const { return HasSSE3; } - bool has3DNow() const { return Has3DNow; } - bool has3DNowA() const { return Has3DNowA; } + bool hasMMX() const { return X86SSELevel >= MMX; } + bool hasSSE() const { return X86SSELevel >= SSE; } + bool hasSSE2() const { return X86SSELevel >= SSE2; } + bool hasSSE3() const { return X86SSELevel >= SSE3; } + bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } + bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 3e501fffe88..3ac55131e3d 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -26,8 +26,6 @@ #include using namespace llvm; -X86VectorEnum llvm::X86Vector = AutoDetect; -bool llvm::X86ScalarSSE = false; bool llvm::X86DAGIsel = false; /// X86TargetMachineModule - Note that this is used on hosts that cannot link @@ -41,28 +39,11 @@ namespace { cl::opt DisableOutput("disable-x86-llc-output", cl::Hidden, cl::desc("Disable the X86 asm printer, for use " "when profiling the code generator.")); - cl::opt EnableSSEFP("enable-sse-scalar-fp", - cl::desc("Perform FP math in SSE regs instead of the FP stack"), - cl::location(X86ScalarSSE), - cl::init(false)); - cl::opt EnableX86DAGDAG("enable-x86-dag-isel", cl::Hidden, cl::desc("Enable DAG-to-DAG isel for X86"), cl::location(X86DAGIsel), cl::init(false)); - // FIXME: This should eventually be handled with target triples and - // subtarget support! - cl::opt - SSEArg( - cl::desc("Enable SSE support in the X86 target:"), - cl::values( - clEnumValN(SSE, "sse", " Enable SSE support"), - clEnumValN(SSE2, "sse2", " Enable SSE and SSE2 support"), - clEnumValN(SSE3, "sse3", " Enable SSE, SSE2, and SSE3 support"), - clEnumValEnd), - cl::location(X86Vector), cl::init(AutoDetect)); - // Register the target. RegisterTarget X("x86", " IA-32 (Pentium and above)"); } @@ -101,23 +82,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, Subtarget(M, FS), FrameInfo(TargetFrameInfo::StackGrowsDown, Subtarget.getStackAlignment(), -4), - JITInfo(*this) { - if (X86Vector == AutoDetect) { - X86Vector = NoSSE; - if (Subtarget.hasSSE()) - X86Vector = SSE; - if (Subtarget.hasSSE2()) - X86Vector = SSE2; - if (Subtarget.hasSSE3()) - X86Vector = SSE3; - } - - // Scalar SSE FP requires at least SSE2 - X86ScalarSSE &= X86Vector >= SSE2; - - // Ignore -enable-sse-scalar-fp if -enable-x86-dag-isel. - X86ScalarSSE |= (X86DAGIsel && X86Vector >= SSE2); -} + JITInfo(*this) {} // addPassesToEmitFile - We currently use all of the same passes as the JIT