X86: enable AVX2 under Haswell native compilation

Patch by Adam Strzelecki

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195632 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-11-25 09:52:59 +00:00
parent dfc615f284
commit 8a6c627fd0
2 changed files with 96 additions and 7 deletions

View File

@ -95,6 +95,75 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
#endif
}
/// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
/// 4 values in the specified arguments. If we can't run cpuid on the host,
/// return true.
bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
#if defined(__GNUC__)
// gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
asm ("movq\t%%rbx, %%rsi\n\t"
"cpuid\n\t"
"xchgq\t%%rbx, %%rsi\n\t"
: "=a" (*rEAX),
"=S" (*rEBX),
"=c" (*rECX),
"=d" (*rEDX)
: "a" (value),
"c" (subleaf));
return false;
#elif defined(_MSC_VER)
// __cpuidex was added in MSVC++ 9.0 SP1
#if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
int registers[4];
__cpuidex(registers, value, subleaf);
*rEAX = registers[0];
*rEBX = registers[1];
*rECX = registers[2];
*rEDX = registers[3];
return false;
#else
return true;
#endif
#else
return true;
#endif
#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
#if defined(__GNUC__)
asm ("movl\t%%ebx, %%esi\n\t"
"cpuid\n\t"
"xchgl\t%%ebx, %%esi\n\t"
: "=a" (*rEAX),
"=S" (*rEBX),
"=c" (*rECX),
"=d" (*rEDX)
: "a" (value),
"c" (subleaf));
return false;
#elif defined(_MSC_VER)
__asm {
mov eax,value
mov ecx,subleaf
cpuid
mov esi,rEAX
mov dword ptr [esi],eax
mov esi,rEBX
mov dword ptr [esi],ebx
mov esi,rECX
mov dword ptr [esi],ecx
mov esi,rEDX
mov dword ptr [esi],edx
}
return false;
#else
return true;
#endif
#else
return true;
#endif
}
static bool OSHasAVXSupport() {
#if defined(__GNUC__)
// Check xgetbv; this uses a .byte sequence instead of the instruction
@ -131,6 +200,14 @@ std::string sys::getHostCPUName() {
unsigned Model = 0;
DetectX86FamilyModel(EAX, Family, Model);
union {
unsigned u[3];
char c[12];
} text;
GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
unsigned MaxLeaf = EAX;
bool HasSSE3 = (ECX & 0x1);
bool HasSSE41 = (ECX & 0x80000);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
@ -138,15 +215,12 @@ std::string sys::getHostCPUName() {
// switch, then we have full AVX support.
const unsigned AVXBits = (1 << 27) | (1 << 28);
bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
!GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
(EBX & 0x20);
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
union {
unsigned u[3];
char c[12];
} text;
GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
switch (Family) {
case 3:
@ -254,10 +328,20 @@ std::string sys::getHostCPUName() {
// Ivy Bridge:
case 58:
case 62: // Ivy Bridge EP
// Not all Ivy Bridge processors support AVX (such as the Pentium
// versions instead of the i7 versions).
return HasAVX ? "core-avx-i" : "corei7";
// Haswell:
case 60:
case 63:
case 69:
case 70:
// Not all Haswell processors support AVX too (such as the Pentium
// versions instead of the i7 versions).
return HasAVX2 ? "core-avx2" : "corei7";
case 28: // Most 45 nm Intel Atom processors
case 38: // 45 nm Atom Lincroft
case 39: // 32 nm Atom Medfield

View File

@ -285,7 +285,12 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
(Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
(Family == 6 && Model == 0x2A) || // SandyBridge
(Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
(Family == 6 && Model == 0x3A))) {// IvyBridge
(Family == 6 && Model == 0x3A) || // IvyBridge
(Family == 6 && Model == 0x3E) || // IvyBridge EP
(Family == 6 && Model == 0x3C) || // Haswell
(Family == 6 && Model == 0x3F) || // ...
(Family == 6 && Model == 0x45) || // ...
(Family == 6 && Model == 0x46))) { // ...
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}