x86 CPU detection and proper subtarget support

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25679 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-14 00:32:55 +00:00 · 2006-01-27 08:10:46 +00:00 · 2006-01-27 08:10:46 +00:00 · 559806f575
commit 559806f575
parent 19c9550744
10 changed files with 138 additions and 132 deletions
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@ -25,12 +25,6 @@ class FunctionPass;
 class IntrinsicLowering;
 class MachineCodeEmitter;

-enum X86VectorEnum {
-  AutoDetect, NoSSE, SSE, SSE2, SSE3
-};
-
-extern X86VectorEnum X86Vector;
-extern bool X86ScalarSSE;
 extern bool X86DAGIsel;

 /// createX86ISelPattern - This pass converts an LLVM function into a 
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@ -20,19 +20,19 @@ include "../Target.td"
 // X86 Subtarget features.
 //
 
-def Feature64Bit     : SubtargetFeature<"64bit", "bool", "Is64Bit",
-                                        "Enable 64-bit instructions">;
-def FeatureMMX       : SubtargetFeature<"mmx", "bool", "HasMMX",
+def Feature64Bit     : SubtargetFeature<"64bit", "Is64Bit", "true",
+                                        "Enabl e 64-bit instructions">;
+def FeatureMMX       : SubtargetFeature<"mmx","X86SSELevel", "MMX",
                                        "Enable MMX instructions">;
-def FeatureSSE       : SubtargetFeature<"sse", "bool", "HasSSE",
+def FeatureSSE       : SubtargetFeature<"sse", "X86SSELevel", "SSE",
                                        "Enable SSE instructions">;
-def FeatureSSE2      : SubtargetFeature<"sse2", "bool", "HasSSE2",
+def FeatureSSE2      : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
                                        "Enable SSE2 instructions">;
-def FeatureSSE3      : SubtargetFeature<"sse3", "bool", "HasSSE3",
+def FeatureSSE3      : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
                                        "Enable SSE3 instructions">;
-def Feature3DNow     : SubtargetFeature<"3dnow", "bool", "Has3DNow",
+def Feature3DNow     : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
                                        "Enable 3DNow! instructions">;
-def Feature3DNowA    : SubtargetFeature<"3dnowa", "bool", "Has3DNowA",
+def Feature3DNowA    : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
                                        "Enable 3DNow! Athlon instructions">;

 //===----------------------------------------------------------------------===//
@ -45,21 +45,20 @@ class Proc<string Name, list<SubtargetFeature> Features>
 def : Proc<"generic",         []>;
 def : Proc<"i386",            []>;
 def : Proc<"i486",            []>;
-def : Proc<"i586",            []>;
 def : Proc<"pentium",         []>;
 def : Proc<"pentium-mmx",     [FeatureMMX]>;
 def : Proc<"i686",            []>;
 def : Proc<"pentiumpro",      []>;
 def : Proc<"pentium2",        [FeatureMMX]>;
-def : Proc<"celeron",         [FeatureMMX]>;
 def : Proc<"pentium3",        [FeatureMMX, FeatureSSE]>;
-def : Proc<"pentium3m",       [FeatureMMX, FeatureSSE]>;
 def : Proc<"pentium-m",       [FeatureMMX, FeatureSSE, FeatureSSE2]>;
 def : Proc<"pentium4",        [FeatureMMX, FeatureSSE, FeatureSSE2]>;
-def : Proc<"pentium4m",       [FeatureMMX, FeatureSSE, FeatureSSE2]>;
+def : Proc<"x86-64",          [FeatureMMX, FeatureSSE, FeatureSSE2,
+                               Feature64Bit]>;
+def : Proc<"yonah",           [FeatureMMX, FeatureSSE, FeatureSSE2,
+                               FeatureSSE3]>;
 def : Proc<"prescott",        [FeatureMMX, FeatureSSE, FeatureSSE2,
                               FeatureSSE3]>;
-def : Proc<"x86-64",          [FeatureMMX, FeatureSSE, FeatureSSE2, Feature64Bit]>;
 def : Proc<"nocona",          [FeatureMMX, FeatureSSE, FeatureSSE2,
                               FeatureSSE3, Feature64Bit]>;

@ -68,17 +67,20 @@ def : Proc<"k6-2",            [FeatureMMX, Feature3DNow]>;
 def : Proc<"k6-3",            [FeatureMMX, Feature3DNow]>;
 def : Proc<"athlon",          [FeatureMMX, Feature3DNow, Feature3DNowA]>;
 def : Proc<"athlon-tbird",    [FeatureMMX, Feature3DNow, Feature3DNowA]>;
-def : Proc<"athlon-4",        [FeatureMMX, FeatureSSE, Feature3DNow, Feature3DNowA]>;
-def : Proc<"athlon-xp",       [FeatureMMX, FeatureSSE, Feature3DNow, Feature3DNowA]>;
-def : Proc<"athlon-mp",       [FeatureMMX, FeatureSSE, Feature3DNow, Feature3DNowA]>;
-def : Proc<"k8",              [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow,
-                               Feature3DNowA, Feature64Bit]>;
-def : Proc<"opteron",         [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow,
-                               Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon64",        [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow,
-                               Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon-fx",       [FeatureMMX, FeatureSSE, FeatureSSE2, Feature3DNow,
-                               Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon-4",        [FeatureMMX, FeatureSSE, Feature3DNow,
+                               Feature3DNowA]>;
+def : Proc<"athlon-xp",       [FeatureMMX, FeatureSSE, Feature3DNow,
+                               Feature3DNowA]>;
+def : Proc<"athlon-mp",       [FeatureMMX, FeatureSSE, Feature3DNow,
+                               Feature3DNowA]>;
+def : Proc<"k8",              [FeatureMMX, FeatureSSE, FeatureSSE2,
+                               Feature3DNow, Feature3DNowA, Feature64Bit]>;
+def : Proc<"opteron",         [FeatureMMX, FeatureSSE, FeatureSSE2,
+                               Feature3DNow, Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon64",        [FeatureMMX, FeatureSSE, FeatureSSE2,
+                               Feature3DNow, Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon-fx",       [FeatureMMX, FeatureSSE, FeatureSSE2,
+                               Feature3DNow, Feature3DNowA, Feature64Bit]>;

 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureMMX, Feature3DNow]>;
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -161,7 +161,7 @@ void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
  // If we are emitting FP stack code, scan the basic block to determine if this
  // block defines any FP values.  If so, put an FP_REG_KILL instruction before
  // the terminator of the block.
-  if (X86Vector < SSE2) {
+  if (!Subtarget->hasSSE2()) {
    // Note that FP stack instructions *are* used in SSE code when returning
    // values, but these are not live out of the basic block, so we don't need
    // an FP_REG_KILL in this case either.
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -33,6 +33,9 @@ static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,

 X86TargetLowering::X86TargetLowering(TargetMachine &TM)
  : TargetLowering(TM) {
+  Subtarget = &TM.getSubtarget<X86Subtarget>();
+  X86ScalarSSE = Subtarget->hasSSE2();
+
  // Set up the TargetLowering object.

  // X86 is weird, it always uses i8 for shift amounts and setcc results.
@ -1657,8 +1660,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
  case ISD::SELECT: {
    MVT::ValueType VT = Op.getValueType();
    bool isFP      = MVT::isFloatingPoint(VT);
-    bool isFPStack = isFP && (X86Vector < SSE2);
-    bool isFPSSE   = isFP && (X86Vector >= SSE2);
+    bool isFPStack = isFP && !X86ScalarSSE;
+    bool isFPSSE   = isFP && X86ScalarSSE;
    bool addTest   = false;
    SDOperand Op0 = Op.getOperand(0);
    SDOperand Cond, CC;
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -15,6 +15,7 @@
 #ifndef X86ISELLOWERING_H
 #define X86ISELLOWERING_H

+#include "X86Subtarget.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/CodeGen/SelectionDAG.h"

@ -227,6 +228,13 @@ namespace llvm {
    std::pair<SDOperand, SDOperand>
    LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, bool isTailCall,
                      SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG);
+
+    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const X86Subtarget *Subtarget;
+
+    /// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
+    bool X86ScalarSSE;
  };
 }

--- a/lib/Target/X86/X86ISelPattern.cpp
+++ b/lib/Target/X86/X86ISelPattern.cpp
@ -101,9 +101,13 @@ namespace {
    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
    /// make the right decision when generating code for different targets.
    const X86Subtarget *Subtarget;
+
+    /// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
+    bool X86ScalarSSE;
  public:
    ISel(TargetMachine &TM) : SelectionDAGISel(X86Lowering), X86Lowering(TM) {
      Subtarget = &TM.getSubtarget<X86Subtarget>();
+      X86ScalarSSE = Subtarget->hasSSE2();
    }

    virtual const char *getPassName() const {
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@ -189,10 +189,10 @@ def MRM6m  : Format<30>; def MRM7m  : Format<31>;

 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
-def HasSSE1 : Predicate<"X86Vector >= SSE">;
-def HasSSE2 : Predicate<"X86Vector >= SSE2">;
-def HasSSE3 : Predicate<"X86Vector >= SSE3">;
-def FPStack : Predicate<"X86Vector < SSE2">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def FPStack : Predicate<"!Subtarget->hasSSE2()">;

 //===----------------------------------------------------------------------===//
 // X86 specific pattern fragments.
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@ -16,57 +16,78 @@
 #include "X86GenSubtarget.inc"
 using namespace llvm;

-#if defined(__APPLE__)
-#include <mach/mach.h>
-#include <mach/mach_host.h>
-#include <mach/host_info.h>
-#include <mach/machine.h>
-
-/// GetCurrentX86CPU - Returns the current CPUs features.
-static const char *GetCurrentX86CPU() {
-  host_basic_info_data_t hostInfo;
-  mach_msg_type_number_t infoCount;
-
-  infoCount = HOST_BASIC_INFO_COUNT;
-  host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, 
-            &infoCount);
-            
-  if (hostInfo.cpu_type != CPU_TYPE_I386) return "generic";
-
-  switch(hostInfo.cpu_subtype) {
-  case CPU_SUBTYPE_386:            return "i386";
-  case CPU_SUBTYPE_486:
-  case CPU_SUBTYPE_486SX:          return "i486";
-  case CPU_SUBTYPE_PENT:           return "pentium";
-  case CPU_SUBTYPE_PENTPRO:        return "pentiumpro";
-  case CPU_SUBTYPE_PENTII_M3:      return "pentium2";
-  case CPU_SUBTYPE_PENTII_M5:      return "pentium2";
-  case CPU_SUBTYPE_CELERON:
-  case CPU_SUBTYPE_CELERON_MOBILE: return "celeron";
-  case CPU_SUBTYPE_PENTIUM_3:      return "pentium3";
-  case CPU_SUBTYPE_PENTIUM_3_M:    return "pentium3m";
-  case CPU_SUBTYPE_PENTIUM_3_XEON: return "pentium3";   // FIXME: not sure.
-  case CPU_SUBTYPE_PENTIUM_M:      return "pentium-m";
-  case CPU_SUBTYPE_PENTIUM_4:      return "pentium4";
-  case CPU_SUBTYPE_PENTIUM_4_M:    return "pentium4m";
-  // FIXME: prescott, yonah? Check CPU_THREADTYPE_INTEL_HTT?
-  case CPU_SUBTYPE_XEON:
-  case CPU_SUBTYPE_XEON_MP:        return "nocona";
-  default: ;
-  }
-  
-  return "generic";
-}
+static void GetCpuIDAndInfo(unsigned value, unsigned *EAX, unsigned *EBX,
+                            unsigned *ECX, unsigned *EDX) {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+#if defined(__GNUC__)
+  asm ("pushl\t%%ebx\n\t"
+       "cpuid\n\t"
+       "popl\t%%ebx"
+       : "=a" (*EAX),
+#if !defined(__DYNAMIC__)  // This works around a gcc -fPIC bug
+         "=b" (*EBX),
 #endif
+         "=c" (*ECX),
+         "=d" (*EDX)
+       :  "a" (value));
+#endif
+#endif
+}
+
+static const char *GetCurrentX86CPU() {
+  unsigned EAX = 0, DUMMY = 0, ECX = 0, EDX = 0;
+  GetCpuIDAndInfo(0x1, &EAX, &DUMMY, &ECX, &EDX);
+  unsigned Family  = (EAX & (0xffffffff >> (32 - 4)) << 8) >> 8; // Bits 8 - 11
+  unsigned Model   = (EAX & (0xffffffff >> (32 - 4)) << 4) >> 4; // Bits 4 - 7
+  GetCpuIDAndInfo(0x80000001, &EAX, &DUMMY, &ECX, &EDX);
+  bool Em64T = EDX & (1 << 29);
+
+  switch (Family) {
+    case 3:
+      return "i386";
+    case 4:
+      return "i486";
+    case 5:
+      switch (Model) {
+      case 4:  return "pentium-mmx";
+      default: return "pentium";
+      }
+      break;
+    case 6:
+      switch (Model) {
+      case 1:  return "pentiumpro";
+      case 3:
+      case 5:
+      case 6:  return "pentium2";
+      case 7:
+      case 8:
+      case 10:
+      case 11: return "pentium3";
+      case 9:
+      case 13: return "pentium-m";
+      case 14: return "yonah";
+      default: return "i686";
+      }
+    case 15: {
+      switch (Model) {
+      case 3:  
+      case 4:
+        return (Em64T) ? "nocona" : "prescott";
+      default:
+        return (Em64T) ? "x86-64" : "pentium4";
+      }
+    }
+      
+  default:
+    return "generic";
+  }
+}

 X86Subtarget::X86Subtarget(const Module &M, const std::string &FS)
  : stackAlignment(8), indirectExternAndWeakGlobals(false) {
      
  // Determine default and user specified characteristics
-  std::string CPU = "generic";
-#if defined(__APPLE__)
-  CPU = GetCurrentX86CPU();
-#endif
+  std::string CPU = GetCurrentX86CPU();

  // Parse features string.
  ParseSubtargetFeatures(FS, CPU);
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@ -23,6 +23,23 @@ class Module;

 class X86Subtarget : public TargetSubtarget {
 protected:
+  enum X86SSEEnum {
+    NoMMXSSE, MMX, SSE, SSE2, SSE3
+  };
+
+  enum X863DNowEnum {
+    NoThreeDNow, ThreeDNow, ThreeDNowA
+  };
+
+  /// X86SSELevel - MMX, SSE, SSE2, SSE3, or none supported.
+  X86SSEEnum X86SSELevel;
+
+  /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+  X863DNowEnum X863DNowLevel;
+
+  /// Is64Bit - True if the processor supports Em64T.
+  bool Is64Bit;
+
  /// stackAlignment - The minimum alignment known to hold of the stack frame on
  /// entry to the function and which must be maintained by every function.
  unsigned stackAlignment;
@ -30,14 +47,6 @@ protected:
  /// Used by instruction selector
  bool indirectExternAndWeakGlobals;

-  /// Arch. features used by isel.
-  bool Is64Bit;
-  bool HasMMX;
-  bool HasSSE;
-  bool HasSSE2;
-  bool HasSSE3;
-  bool Has3DNow;
-  bool Has3DNowA;
 public:
  enum {
    isELF, isCygwin, isDarwin, isWindows
@ -66,12 +75,12 @@ public:

  bool is64Bit() const { return Is64Bit; }

-  bool hasMMX() const { return HasMMX; }
-  bool hasSSE() const { return HasSSE; }
-  bool hasSSE2() const { return HasSSE2; }
-  bool hasSSE3() const { return HasSSE3; }
-  bool has3DNow() const { return Has3DNow; }
-  bool has3DNowA() const { return Has3DNowA; }
+  bool hasMMX() const { return X86SSELevel >= MMX; }
+  bool hasSSE() const { return X86SSELevel >= SSE; }
+  bool hasSSE2() const { return X86SSELevel >= SSE2; }
+  bool hasSSE3() const { return X86SSELevel >= SSE3; }
+  bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+  bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
 };
 } // End llvm namespace

--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@ -26,8 +26,6 @@
 #include <iostream>
 using namespace llvm;

-X86VectorEnum llvm::X86Vector = AutoDetect;
-bool llvm::X86ScalarSSE = false;
 bool llvm::X86DAGIsel = false;

 /// X86TargetMachineModule - Note that this is used on hosts that cannot link
@ -41,28 +39,11 @@ namespace {
  cl::opt<bool> DisableOutput("disable-x86-llc-output", cl::Hidden,
                              cl::desc("Disable the X86 asm printer, for use "
                                       "when profiling the code generator."));
-  cl::opt<bool, true> EnableSSEFP("enable-sse-scalar-fp",
-                cl::desc("Perform FP math in SSE regs instead of the FP stack"),
-                cl::location(X86ScalarSSE),
-                cl::init(false));
-
  cl::opt<bool, true> EnableX86DAGDAG("enable-x86-dag-isel", cl::Hidden,
                      cl::desc("Enable DAG-to-DAG isel for X86"),
                      cl::location(X86DAGIsel),
                      cl::init(false));
  
-  // FIXME: This should eventually be handled with target triples and
-  // subtarget support!
-  cl::opt<X86VectorEnum, true>
-  SSEArg(
-    cl::desc("Enable SSE support in the X86 target:"),
-    cl::values(
-       clEnumValN(SSE,  "sse", "  Enable SSE support"),
-       clEnumValN(SSE2, "sse2", "  Enable SSE and SSE2 support"),
-       clEnumValN(SSE3, "sse3", "  Enable SSE, SSE2, and SSE3 support"),
-       clEnumValEnd),
-    cl::location(X86Vector), cl::init(AutoDetect));
-
  // Register the target.
  RegisterTarget<X86TargetMachine> X("x86", "  IA-32 (Pentium and above)");
 }
@ -101,23 +82,7 @@ X86TargetMachine::X86TargetMachine(const Module &M,
    Subtarget(M, FS),
    FrameInfo(TargetFrameInfo::StackGrowsDown,
              Subtarget.getStackAlignment(), -4),
-    JITInfo(*this) {
-  if (X86Vector == AutoDetect) {
-      X86Vector = NoSSE;
-    if (Subtarget.hasSSE())
-      X86Vector = SSE;
-    if (Subtarget.hasSSE2())
-      X86Vector = SSE2;
-    if (Subtarget.hasSSE3())
-      X86Vector = SSE3;
-  }
-
-  // Scalar SSE FP requires at least SSE2
-  X86ScalarSSE &= X86Vector >= SSE2;
-
-  // Ignore -enable-sse-scalar-fp if -enable-x86-dag-isel.
-  X86ScalarSSE |= (X86DAGIsel && X86Vector >= SSE2);
-}
+    JITInfo(*this) {}


 // addPassesToEmitFile - We currently use all of the same passes as the JIT