From 8a4f11e3b69ce3e58b1d147d98a020b1d311f1f1 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 22 Jan 2014 22:20:54 +0000 Subject: [PATCH] Revert "R600: Add work-around for the CF stack entry HW bug" This reverts commit 35b8331cad6eb512a2506adbc394201181da94ba. The -debug-only flag for llc doesn't appear to be available in all build configurations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199845 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.td | 5 - lib/Target/R600/AMDGPUSubtarget.cpp | 6 - lib/Target/R600/AMDGPUSubtarget.h | 2 - lib/Target/R600/Processors.td | 14 +- lib/Target/R600/R600ControlFlowFinalizer.cpp | 43 +--- test/CodeGen/R600/cf-stack-bug.ll | 225 ------------------- 6 files changed, 7 insertions(+), 288 deletions(-) delete mode 100644 test/CodeGen/R600/cf-stack-bug.ll diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index d1e2cf5319c..c4e5efc8d6e 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -63,11 +63,6 @@ def FeatureCaymanISA : SubtargetFeature<"caymanISA", "true", "Use Cayman ISA">; -def FeatureCFALUBug : SubtargetFeature<"cfalubug", - "CFALUBug", - "true", - "GPU has CF_ALU bug">; - class SubtargetFeatureFetchLimit : SubtargetFeature <"fetch"#Value, "TexVTXClauseSize", diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index e77ab5e6d14..f36aa2071c7 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -39,7 +39,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : EnableIRStructurizer = true; EnableIfCvt = true; WavefrontSize = 0; - CFALUBug = false; ParseSubtargetFeatures(GPU, FS); DevName = GPU; } @@ -98,11 +97,6 @@ AMDGPUSubtarget::getStackEntrySize() const { } } bool -AMDGPUSubtarget::hasCFAluBug() const { - assert(getGeneration() <= NORTHERN_ISLANDS); - return CFALUBug; -} -bool AMDGPUSubtarget::isTargetELF() const { return false; } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 7e7f4d0c004..68d853218ba 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -52,7 +52,6 @@ private: bool EnableIRStructurizer; bool EnableIfCvt; unsigned WavefrontSize; - bool CFALUBug; InstrItineraryData InstrItins; @@ -72,7 +71,6 @@ public: bool isIfCvtEnabled() const; unsigned getWavefrontSize() const; unsigned getStackEntrySize() const; - bool hasCFAluBug() const; virtual bool enableMachineScheduler() const { return getGeneration() <= NORTHERN_ISLANDS; diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index fde44814970..e601f353163 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -46,15 +46,13 @@ def : Proc<"rv770", R600_VLIW5_Itin, //===----------------------------------------------------------------------===// def : Proc<"cedar", R600_VLIW5_Itin, - [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32, - FeatureCFALUBug]>; + [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>; def : Proc<"redwood", R600_VLIW5_Itin, - [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64, - FeatureCFALUBug]>; + [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>; def : Proc<"sumo", R600_VLIW5_Itin, - [FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>; + [FeatureEvergreen, FeatureWavefrontSize64]>; def : Proc<"juniper", R600_VLIW5_Itin, [FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>; @@ -68,13 +66,13 @@ def : Proc<"cypress", R600_VLIW5_Itin, //===----------------------------------------------------------------------===// def : Proc<"barts", R600_VLIW5_Itin, - [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>; + [FeatureNorthernIslands, FeatureVertexCache]>; def : Proc<"turks", R600_VLIW5_Itin, - [FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>; + [FeatureNorthernIslands, FeatureVertexCache]>; def : Proc<"caicos", R600_VLIW5_Itin, - [FeatureNorthernIslands, FeatureCFALUBug]>; + [FeatureNorthernIslands]>; def : Proc<"cayman", R600_VLIW4_Itin, [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>; diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 470ff2e1079..6b42a7a9faf 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -73,44 +73,6 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) { return false; } -bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { - if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() && - getLoopDepth() > 1) - return true; - - if (!ST.hasCFAluBug()) - return false; - - switch(Opcode) { - default: return false; - case AMDGPU::CF_ALU_PUSH_BEFORE: - case AMDGPU::CF_ALU_ELSE_AFTER: - case AMDGPU::CF_ALU_BREAK: - case AMDGPU::CF_ALU_CONTINUE: - if (CurrentSubEntries == 0) - return false; - if (ST.getWavefrontSize() == 64) { - // We are being conservative here. We only require this work-around if - // CurrentSubEntries > 3 && - // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) - // - // We have to be conservative, because we don't know for certain that - // our stack allocation algorithm for Evergreen/NI is correct. Applying this - // work-around when CurrentSubEntries > 3 allows us to over-allocate stack - // resources without any problems. - return CurrentSubEntries > 3; - } else { - assert(ST.getWavefrontSize() == 32); - // We are being conservative here. We only require the work-around if - // CurrentSubEntries > 7 && - // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) - // See the comment on the wavefront size == 64 case for why we are - // being conservative. - return CurrentSubEntries > 7; - } - } -} - unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { switch(Item) { default: @@ -510,12 +472,9 @@ public: if (MI->getOpcode() == AMDGPU::CF_ALU) LastAlu.back() = MI; I++; - bool RequiresWorkAround = - CFStack.requiresWorkAroundForInst(MI->getOpcode()); switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: - if (RequiresWorkAround) { - DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); + if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) { BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) .addImm(CfCount + 1) .addImm(1); diff --git a/test/CodeGen/R600/cf-stack-bug.ll b/test/CodeGen/R600/cf-stack-bug.ll deleted file mode 100644 index 7fa07b11eea..00000000000 --- a/test/CodeGen/R600/cf-stack-bug.ll +++ /dev/null @@ -1,225 +0,0 @@ -; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG32 --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC - -; We are currently allocating 2 extra sub-entries on Evergreen / NI for -; non-WQM push instructions if we change this to 1, then we will need to -; add one level of depth to each of these tests. - -; BUG64-NOT: Applying bug work-around -; BUG32-NOT: Applying bug work-around -; NOBUG-NOT: Applying bug work-around -; FUNC-LABEL: @nested3 -define void @nested3(i32 addrspace(1)* %out, i32 %cond) { -entry: - %0 = icmp sgt i32 %cond, 0 - br i1 %0, label %if.1, label %end - -if.1: - %1 = icmp sgt i32 %cond, 10 - br i1 %1, label %if.2, label %if.store.1 - -if.store.1: - store i32 1, i32 addrspace(1)* %out - br label %end - -if.2: - %2 = icmp sgt i32 %cond, 20 - br i1 %2, label %if.3, label %if.2.store - -if.2.store: - store i32 2, i32 addrspace(1)* %out - br label %end - -if.3: - store i32 3, i32 addrspace(1)* %out - br label %end - -end: - ret void -} - -; BUG64: Applying bug work-around -; BUG32-NOT: Applying bug work-around -; NOBUG-NOT: Applying bug work-around -; FUNC-LABEL: @nested4 -define void @nested4(i32 addrspace(1)* %out, i32 %cond) { -entry: - %0 = icmp sgt i32 %cond, 0 - br i1 %0, label %if.1, label %end - -if.1: - %1 = icmp sgt i32 %cond, 10 - br i1 %1, label %if.2, label %if.1.store - -if.1.store: - store i32 1, i32 addrspace(1)* %out - br label %end - -if.2: - %2 = icmp sgt i32 %cond, 20 - br i1 %2, label %if.3, label %if.2.store - -if.2.store: - store i32 2, i32 addrspace(1)* %out - br label %end - -if.3: - %3 = icmp sgt i32 %cond, 30 - br i1 %3, label %if.4, label %if.3.store - -if.3.store: - store i32 3, i32 addrspace(1)* %out - br label %end - -if.4: - store i32 4, i32 addrspace(1)* %out - br label %end - -end: - ret void -} - -; BUG64: Applying bug work-around -; BUG32-NOT: Applying bug work-around -; NOBUG-NOT: Applying bug work-around -; FUNC-LABEL: @nested7 -define void @nested7(i32 addrspace(1)* %out, i32 %cond) { -entry: - %0 = icmp sgt i32 %cond, 0 - br i1 %0, label %if.1, label %end - -if.1: - %1 = icmp sgt i32 %cond, 10 - br i1 %1, label %if.2, label %if.1.store - -if.1.store: - store i32 1, i32 addrspace(1)* %out - br label %end - -if.2: - %2 = icmp sgt i32 %cond, 20 - br i1 %2, label %if.3, label %if.2.store - -if.2.store: - store i32 2, i32 addrspace(1)* %out - br label %end - -if.3: - %3 = icmp sgt i32 %cond, 30 - br i1 %3, label %if.4, label %if.3.store - -if.3.store: - store i32 3, i32 addrspace(1)* %out - br label %end - -if.4: - %4 = icmp sgt i32 %cond, 40 - br i1 %4, label %if.5, label %if.4.store - -if.4.store: - store i32 4, i32 addrspace(1)* %out - br label %end - -if.5: - %5 = icmp sgt i32 %cond, 50 - br i1 %5, label %if.6, label %if.5.store - -if.5.store: - store i32 5, i32 addrspace(1)* %out - br label %end - -if.6: - %6 = icmp sgt i32 %cond, 60 - br i1 %6, label %if.7, label %if.6.store - -if.6.store: - store i32 6, i32 addrspace(1)* %out - br label %end - -if.7: - store i32 7, i32 addrspace(1)* %out - br label %end - -end: - ret void -} - -; BUG64: Applying bug work-around -; BUG32: Applying bug work-around -; NOBUG-NOT: Applying bug work-around -; FUNC-LABEL: @nested8 -define void @nested8(i32 addrspace(1)* %out, i32 %cond) { -entry: - %0 = icmp sgt i32 %cond, 0 - br i1 %0, label %if.1, label %end - -if.1: - %1 = icmp sgt i32 %cond, 10 - br i1 %1, label %if.2, label %if.1.store - -if.1.store: - store i32 1, i32 addrspace(1)* %out - br label %end - -if.2: - %2 = icmp sgt i32 %cond, 20 - br i1 %2, label %if.3, label %if.2.store - -if.2.store: - store i32 2, i32 addrspace(1)* %out - br label %end - -if.3: - %3 = icmp sgt i32 %cond, 30 - br i1 %3, label %if.4, label %if.3.store - -if.3.store: - store i32 3, i32 addrspace(1)* %out - br label %end - -if.4: - %4 = icmp sgt i32 %cond, 40 - br i1 %4, label %if.5, label %if.4.store - -if.4.store: - store i32 4, i32 addrspace(1)* %out - br label %end - -if.5: - %5 = icmp sgt i32 %cond, 50 - br i1 %5, label %if.6, label %if.5.store - -if.5.store: - store i32 5, i32 addrspace(1)* %out - br label %end - -if.6: - %6 = icmp sgt i32 %cond, 60 - br i1 %6, label %if.7, label %if.6.store - -if.6.store: - store i32 6, i32 addrspace(1)* %out - br label %end - -if.7: - %7 = icmp sgt i32 %cond, 70 - br i1 %7, label %if.8, label %if.7.store - -if.7.store: - store i32 7, i32 addrspace(1)* %out - br label %end - -if.8: - store i32 8, i32 addrspace(1)* %out - br label %end - -end: - ret void -}