llvm-6502/lib/Target/R600/AMDGPUTargetTransformInfo.cpp

//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// \file
// This file implements a TargetTransformInfo analysis pass specific to the
// AMDGPU target machine. It uses the target's detailed information to provide
// more precise answers to certain TTI queries, while letting the target
// independent and default TTI implementations handle the rest.
//
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;

#define DEBUG_TYPE "AMDGPUtti"

// Declare the pass initialization routine locally as target-specific passes
// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializeAMDGPUTTIPass(PassRegistry &);
}

namespace {

class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
  const AMDGPUTargetMachine *TM;
  const AMDGPUSubtarget *ST;
  const AMDGPUTargetLowering *TLI;

  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
  /// are set if the result needs to be inserted and/or extracted from vectors.
  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;

public:
  AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
    llvm_unreachable("This pass cannot be directly constructed");
  }

  AMDGPUTTI(const AMDGPUTargetMachine *TM)
      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
        TLI(TM->getTargetLowering()) {
    initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
  }

  void initializePass() override { pushTTIStack(this); }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    TargetTransformInfo::getAnalysisUsage(AU);
  }

  /// Pass identification.
  static char ID;

  /// Provide necessary pointer adjustments for the two base classes.
  void *getAdjustedAnalysisPointer(const void *ID) override {
    if (ID == &TargetTransformInfo::ID)
      return (TargetTransformInfo *)this;
    return this;
  }

  bool hasBranchDivergence() const override;

  void getUnrollingPreferences(Loop *L,
                               UnrollingPreferences &UP) const override;

  /// @}
};

} // end anonymous namespace

INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
                   "AMDGPU Target Transform Info", true, true, false)
char AMDGPUTTI::ID = 0;

ImmutablePass *
llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
  return new AMDGPUTTI(TM);
}

bool AMDGPUTTI::hasBranchDivergence() const { return true; }

void AMDGPUTTI::getUnrollingPreferences(Loop *L,
                                        UnrollingPreferences &UP) const {
  for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
                                                  BI != BE; ++BI) {
    BasicBlock *BB = *BI;
    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
                                                      I != E; ++I) {
      const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
      if (!GEP)
        continue;
      const Value *Ptr = GEP->getPointerOperand();
      const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
      if (Alloca) {
        // We want to do whatever we can to limit the number of alloca
        // instructions that make it through to the code generator.  allocas
        // require us to use indirect addressing, which is slow and prone to
        // compiler bugs.  If this loop does an address calculation on an
        // alloca ptr, then we want to use a higher than normal loop unroll
        // threshold. This will give SROA a better chance to eliminate these
        // allocas.
        //
        // Don't use the maximum allowed value here as it will make some
        // programs way too big.
        UP.Threshold = 500;
      }
    }
  }
}
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// \file`
			`// This file implements a TargetTransformInfo analysis pass specific to the`
			`// AMDGPU target machine. It uses the target's detailed information to provide`
			`// more precise answers to certain TTI queries, while letting the target`
			`// independent and default TTI implementations handle the rest.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "AMDGPU.h"`
			`#include "AMDGPUTargetMachine.h"`
R600: Unconditionally unroll loops that contain GEPs with alloca pointers Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199916 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-23 18:49:28 +00:00			`#include "llvm/Analysis/LoopInfo.h"`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`#include "llvm/Analysis/TargetTransformInfo.h"`
R600: Unconditionally unroll loops that contain GEPs with alloca pointers Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199916 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-23 18:49:28 +00:00			`#include "llvm/Analysis/ValueTracking.h"`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`#include "llvm/Support/Debug.h"`
			`#include "llvm/Target/CostTable.h"`
Re-sort all of the includes with ./utils/sort_includes.py so that subsequent changes are easier to review. About to fix some layering issues, and wanted to separate out the necessary churn. Also comment and sink the include of "Windows.h" in three .inc files to match the usage in Memory.inc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198685 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-07 11:48:04 +00:00			`#include "llvm/Target/TargetLowering.h"`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`using namespace llvm;`

[Modules] Fix potential ODR violations by sinking the DEBUG_TYPE definition below all of the header #include lines, lib/Target/... edition. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206842 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-22 02:41:26 +00:00			`#define DEBUG_TYPE "AMDGPUtti"`

SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`// Declare the pass initialization routine locally as target-specific passes`
			`// don't have a target-wide initialization entry point, and so we rely on the`
			`// pass constructor initialization.`
			`namespace llvm {`
			`void initializeAMDGPUTTIPass(PassRegistry &);`
			`}`

			`namespace {`

Switch all uses of LLVM_FINAL to just use 'final', and remove the macro. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202618 91177308-0d34-0410-b5e6-96231b3b80d8 2014-03-02 08:08:51 +00:00			`class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`const AMDGPUTargetMachine *TM;`
			`const AMDGPUSubtarget *ST;`
			`const AMDGPUTargetLowering *TLI;`

			`/// Estimate the overhead of scalarizing an instruction. Insert and Extract`
			`/// are set if the result needs to be inserted and/or extracted from vectors.`
			`unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;`

			`public:`
[C++] Use 'nullptr'. Target edition. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207197 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-25 05:30:21 +00:00			`AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`llvm_unreachable("This pass cannot be directly constructed");`
			`}`

			`AMDGPUTTI(const AMDGPUTargetMachine *TM)`
			`: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),`
			`TLI(TM->getTargetLowering()) {`
			`initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());`
			`}`

[C++11] Add 'override' keywords and remove 'virtual'. Additionally add 'final' and leave 'virtual' on some methods that are marked virtual without overriding anything and have no obvious overrides themselves. R600 edition git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207503 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-29 07:57:24 +00:00			`void initializePass() override { pushTTIStack(this); }`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00
[C++11] Add 'override' keywords and remove 'virtual'. Additionally add 'final' and leave 'virtual' on some methods that are marked virtual without overriding anything and have no obvious overrides themselves. R600 edition git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207503 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-29 07:57:24 +00:00			`void getAnalysisUsage(AnalysisUsage &AU) const override {`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`TargetTransformInfo::getAnalysisUsage(AU);`
			`}`

			`/// Pass identification.`
			`static char ID;`

			`/// Provide necessary pointer adjustments for the two base classes.`
[C++11] Add 'override' keywords and remove 'virtual'. Additionally add 'final' and leave 'virtual' on some methods that are marked virtual without overriding anything and have no obvious overrides themselves. R600 edition git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207503 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-29 07:57:24 +00:00			`void getAdjustedAnalysisPointer(const void ID) override {`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`if (ID == &TargetTransformInfo::ID)`
			`return (TargetTransformInfo *)this;`
			`return this;`
			`}`

[C++11] Add 'override' keywords and remove 'virtual'. Additionally add 'final' and leave 'virtual' on some methods that are marked virtual without overriding anything and have no obvious overrides themselves. R600 edition git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207503 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-29 07:57:24 +00:00			`bool hasBranchDivergence() const override;`
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00
[C++11] Add 'override' keywords and remove 'virtual'. Additionally add 'final' and leave 'virtual' on some methods that are marked virtual without overriding anything and have no obvious overrides themselves. R600 edition git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207503 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-29 07:57:24 +00:00			`void getUnrollingPreferences(Loop *L,`
			`UnrollingPreferences &UP) const override;`
R600: Unconditionally unroll loops that contain GEPs with alloca pointers Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199916 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-23 18:49:28 +00:00
SimplifyCFG: Use parallel-and and parallel-or mode to consolidate branch conditions Merge consecutive if-regions if they contain identical statements. Both transformations reduce number of branches. The transformation is guarded by a target-hook, and is currently enabled only for +R600, but the correctness has been tested on X86 target using a variety of CPU benchmarks. Patch by: Mei Ye git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187278 91177308-0d34-0410-b5e6-96231b3b80d8 2013-07-27 00:01:07 +00:00			`/// @}`
			`};`

			`} // end anonymous namespace`

			`INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",`
			`"AMDGPU Target Transform Info", true, true, false)`
			`char AMDGPUTTI::ID = 0;`

			`ImmutablePass *`
			`llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {`
			`return new AMDGPUTTI(TM);`
			`}`

			`bool AMDGPUTTI::hasBranchDivergence() const { return true; }`
R600: Unconditionally unroll loops that contain GEPs with alloca pointers Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199916 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-23 18:49:28 +00:00
			`void AMDGPUTTI::getUnrollingPreferences(Loop *L,`
			`UnrollingPreferences &UP) const {`
			`for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();`
			`BI != BE; ++BI) {`
			`BasicBlock BB = BI;`
			`for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();`
			`I != E; ++I) {`
			`const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);`
			`if (!GEP)`
			`continue;`
			`const Value *Ptr = GEP->getPointerOperand();`
			`const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));`
			`if (Alloca) {`
			`// We want to do whatever we can to limit the number of alloca`
			`// instructions that make it through to the code generator. allocas`
			`// require us to use indirect addressing, which is slow and prone to`
			`// compiler bugs. If this loop does an address calculation on an`
R600: Don't unconditionally unroll loops with private memory accesses This causes the size of the scrypt kernel to explode and eats all the memory on some systems. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202195 91177308-0d34-0410-b5e6-96231b3b80d8 2014-02-25 21:36:21 +00:00			`// alloca ptr, then we want to use a higher than normal loop unroll`
Fix tabs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205648 91177308-0d34-0410-b5e6-96231b3b80d8 2014-04-04 20:13:08 +00:00			`// threshold. This will give SROA a better chance to eliminate these`
			`// allocas.`
			`//`
			`// Don't use the maximum allowed value here as it will make some`
			`// programs way too big.`
R600: Don't unconditionally unroll loops with private memory accesses This causes the size of the scrypt kernel to explode and eats all the memory on some systems. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202195 91177308-0d34-0410-b5e6-96231b3b80d8 2014-02-25 21:36:21 +00:00			`UP.Threshold = 500;`
R600: Unconditionally unroll loops that contain GEPs with alloca pointers Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199916 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-23 18:49:28 +00:00			`}`
			`}`
			`}`
			`}`