mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-23 17:32:49 +00:00
436906ab3c
the stack of the analysis group because they are all immutable passes. This is made clear by Craig's recent work to use override systematically -- we weren't overriding anything for 'finalizePass' because there is no such thing. This is kind of a lame restriction on the API -- we can no longer push and pop things, we just set up the stack and run. However, I'm not invested in building some better solution on top of the existing (terrifying) immutable pass and legacy pass manager. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203437 91177308-0d34-0410-b5e6-96231b3b80d8
122 lines
4.3 KiB
C++
122 lines
4.3 KiB
C++
//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// \file
|
|
// This file implements a TargetTransformInfo analysis pass specific to the
|
|
// AMDGPU target machine. It uses the target's detailed information to provide
|
|
// more precise answers to certain TTI queries, while letting the target
|
|
// independent and default TTI implementations handle the rest.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "AMDGPUtti"
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPUTargetMachine.h"
|
|
#include "llvm/Analysis/LoopInfo.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/Analysis/ValueTracking.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Target/CostTable.h"
|
|
#include "llvm/Target/TargetLowering.h"
|
|
using namespace llvm;
|
|
|
|
// Declare the pass initialization routine locally as target-specific passes
|
|
// don't have a target-wide initialization entry point, and so we rely on the
|
|
// pass constructor initialization.
|
|
namespace llvm {
|
|
void initializeAMDGPUTTIPass(PassRegistry &);
|
|
}
|
|
|
|
namespace {
|
|
|
|
class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
|
|
const AMDGPUTargetMachine *TM;
|
|
const AMDGPUSubtarget *ST;
|
|
const AMDGPUTargetLowering *TLI;
|
|
|
|
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
|
|
/// are set if the result needs to be inserted and/or extracted from vectors.
|
|
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
|
|
|
|
public:
|
|
AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
|
|
llvm_unreachable("This pass cannot be directly constructed");
|
|
}
|
|
|
|
AMDGPUTTI(const AMDGPUTargetMachine *TM)
|
|
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
|
|
TLI(TM->getTargetLowering()) {
|
|
initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
virtual void initializePass() override { pushTTIStack(this); }
|
|
|
|
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
TargetTransformInfo::getAnalysisUsage(AU);
|
|
}
|
|
|
|
/// Pass identification.
|
|
static char ID;
|
|
|
|
/// Provide necessary pointer adjustments for the two base classes.
|
|
virtual void *getAdjustedAnalysisPointer(const void *ID) override {
|
|
if (ID == &TargetTransformInfo::ID)
|
|
return (TargetTransformInfo *)this;
|
|
return this;
|
|
}
|
|
|
|
virtual bool hasBranchDivergence() const override;
|
|
|
|
virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
|
|
|
|
/// @}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
|
|
"AMDGPU Target Transform Info", true, true, false)
|
|
char AMDGPUTTI::ID = 0;
|
|
|
|
ImmutablePass *
|
|
llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
|
|
return new AMDGPUTTI(TM);
|
|
}
|
|
|
|
bool AMDGPUTTI::hasBranchDivergence() const { return true; }
|
|
|
|
void AMDGPUTTI::getUnrollingPreferences(Loop *L,
|
|
UnrollingPreferences &UP) const {
|
|
for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
|
|
BI != BE; ++BI) {
|
|
BasicBlock *BB = *BI;
|
|
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
|
|
I != E; ++I) {
|
|
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
|
|
if (!GEP)
|
|
continue;
|
|
const Value *Ptr = GEP->getPointerOperand();
|
|
const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
|
|
if (Alloca) {
|
|
// We want to do whatever we can to limit the number of alloca
|
|
// instructions that make it through to the code generator. allocas
|
|
// require us to use indirect addressing, which is slow and prone to
|
|
// compiler bugs. If this loop does an address calculation on an
|
|
// alloca ptr, then we want to use a higher than normal loop unroll
|
|
// threshold. This will give SROA a better chance to eliminate these
|
|
// allocas.
|
|
//
|
|
// Don't use the maximum allowed value here as it will make some
|
|
// programs way too big.
|
|
UP.Threshold = 500;
|
|
}
|
|
}
|
|
}
|
|
}
|