From cdb9bd7eb9620fab864959efa8f3cfcca06b10de Mon Sep 17 00:00:00 2001 From: Yunzhong Gao Date: Wed, 16 Oct 2013 19:04:11 +0000 Subject: [PATCH] Enabling 3DNow! prefetch instruction for a few AMD processors: bobcat, jaguar, bulldozer and piledriver. Support for the instruction itself seems to have already been added in r178040. Differential Revision: http://llvm-reviews.chandlerc.com/D1933 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192828 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 12 ++++++------ lib/Target/X86/X86InstrInfo.td | 2 +- test/CodeGen/X86/prefetch.ll | 3 +++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 784a2642639..b41a9c96082 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -292,19 +292,19 @@ def : Proc<"amdfam10", [FeatureSSE4A, FeaturePOPCNT, FeatureSlowBTMem]>; // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, - FeatureLZCNT, FeaturePOPCNT]>; + FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT]>; // Jaguar def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, - FeatureAES, FeaturePCLMUL, FeatureBMI, - FeatureF16C, FeatureMOVBE, FeatureLZCNT, - FeaturePOPCNT]>; + FeaturePRFCHW, FeatureAES, FeaturePCLMUL, + FeatureBMI, FeatureF16C, FeatureMOVBE, + FeatureLZCNT, FeaturePOPCNT]>; // Bulldozer def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePCLMUL, + FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureLZCNT, FeaturePOPCNT]>; // Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePCLMUL, + FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureTBM, FeatureFMA]>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d65471e2755..6e5d54349fa 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -687,7 +687,7 @@ def HasADX : Predicate<"Subtarget->hasADX()">; def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; -def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">; +def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll index 6157c39c815..d6571acbbb7 100644 --- a/test/CodeGen/X86/prefetch.ll +++ b/test/CodeGen/X86/prefetch.ll @@ -2,6 +2,8 @@ ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s ; RUN: llc < %s -march=x86 -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW ; RUN: llc < %s -march=x86 -mcpu=slm | FileCheck %s -check-prefix=SLM +; RUN: llc < %s -march=x86 -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHW +; RUN: llc < %s -march=x86 -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=NOPRFCHW ; rdar://10538297 @@ -12,6 +14,7 @@ entry: ; CHECK: prefetcht0 ; CHECK: prefetchnta ; PRFCHW: prefetchw +; NOPRFCHW-NOT: prefetchw ; SLM: prefetchw tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )