diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 7831d1211e1..0d027006e67 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -280,6 +280,12 @@ protected: /// List of target independent CodeGen pass IDs. namespace llvm { + /// \brief Create a basic TargetTransformInfo analysis pass. + /// + /// This pass implements the target transform info analysis using the target + /// independent information available to the LLVM code generator. + ImmutablePass *createBasicTargetTransformInfoPass(const TargetLowering *TLI); + /// createUnreachableBlockEliminationPass - The LLVM code generator does not /// work well with unreachable basic blocks (what live ranges make sense for a /// block that cannot be reached?). As such, a code generator should either diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 9ee74dde1dd..79037f78d39 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -69,6 +69,7 @@ void initializeArgPromotionPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAliasAnalysisPass(PassRegistry&); void initializeBasicCallGraphPass(PassRegistry&); +void initializeBasicTTIPass(PassRegistry&); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoPass(PassRegistry&); void initializeBlockPlacementPass(PassRegistry&); diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index ce6abc51700..8359a5c68c3 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -894,6 +894,18 @@ public: return 0; } + //===--------------------------------------------------------------------===// + /// \name Helpers for TargetTransformInfo implementations + /// @{ + + /// Get the ISD node that corresponds to the Instruction class opcode. + int InstructionOpcodeToISD(unsigned Opcode) const; + + /// Estimate the cost of type-legalization and the legalized type. + std::pair getTypeLegalizationCost(Type *Ty) const; + + /// @} + //===--------------------------------------------------------------------===// // TargetLowering Optimization Methods // diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 5756f2c552a..aa049f02fc0 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -108,10 +108,6 @@ public: virtual const TargetLowering *getTargetLowering() const { return 0; } virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; } virtual const DataLayout *getDataLayout() const { return 0; } - virtual const ScalarTargetTransformInfo* - getScalarTargetTransformInfo() const { return 0; } - virtual const VectorTargetTransformInfo* - getVectorTargetTransformInfo() const { return 0; } /// getMCAsmInfo - Return target specific asm information. /// @@ -232,6 +228,9 @@ public: /// sections. static void setFunctionSections(bool); + /// \brief Register analysis passes for this target with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &) {} + /// CodeGenFileType - These enums are meant to be passed into /// addPassesToEmitFile to indicate what type of file to emit, and returned by /// it to indicate what type of file could actually be made. @@ -290,6 +289,11 @@ protected: // Can only create subclasses. CodeGenOpt::Level OL); public: + /// \brief Register analysis passes for this target with a pass manager. + /// + /// This registers target independent analysis passes. + virtual void addAnalysisPasses(PassManagerBase &PM); + /// createPassConfig - Create a pass configuration object to be used by /// addPassToEmitX methods for generating a pipeline of CodeGen passes. virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h deleted file mode 100644 index 20699276196..00000000000 --- a/include/llvm/Target/TargetTransformImpl.h +++ /dev/null @@ -1,104 +0,0 @@ -//=- llvm/Target/TargetTransformImpl.h - Target Loop Trans Info----*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the target-specific implementations of the -// TargetTransform interfaces. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H -#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H - -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/TargetTransformInfo.h" - -namespace llvm { - -class TargetLowering; - -/// ScalarTargetTransformInfo - This is a default implementation for the -/// ScalarTargetTransformInfo interface. Different targets can implement -/// this interface differently. -class ScalarTargetTransformImpl : public ScalarTargetTransformInfo { -protected: - const TargetLowering *TLI; - -public: - /// Ctor - explicit ScalarTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} - - virtual bool isLegalAddImmediate(int64_t imm) const; - - virtual bool isLegalICmpImmediate(int64_t imm) const; - - virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const; - - virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; - - virtual bool isTypeLegal(Type *Ty) const; - - virtual unsigned getJumpBufAlignment() const; - - virtual unsigned getJumpBufSize() const; - - virtual bool shouldBuildLookupTables() const; -}; - -class VectorTargetTransformImpl : public VectorTargetTransformInfo { -protected: - const TargetLowering *TLI; - - /// Estimate the cost of type-legalization and the legalized type. - std::pair getTypeLegalizationCost(Type *Ty) const; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - - // Get the ISD node that corresponds to the Instruction class opcode. - int InstructionOpcodeToISD(unsigned Opcode) const; - -public: - explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} - - virtual ~VectorTargetTransformImpl() {} - - virtual unsigned getNumberOfRegisters(bool Vector) const; - - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; - - virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const; - - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; - - virtual unsigned getCFInstrCost(unsigned Opcode) const; - - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; - - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; - - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const; - - virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, - ArrayRef Tys) const; - - virtual unsigned getNumberOfParts(Type *Tp) const; -}; - -} // end llvm namespace - -#endif diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h index 97d0304a38b..253960da27e 100644 --- a/include/llvm/TargetTransformInfo.h +++ b/include/llvm/TargetTransformInfo.h @@ -30,9 +30,6 @@ namespace llvm { -class ScalarTargetTransformInfo; -class VectorTargetTransformInfo; - /// TargetTransformInfo - This pass provides access to the codegen /// interfaces that are needed for IR-level transformations. class TargetTransformInfo { @@ -42,11 +39,26 @@ protected: /// This is used to implement the default behavior all of the methods which /// is to delegate up through the stack of TTIs until one can answer the /// query. - const TargetTransformInfo *PrevTTI; + TargetTransformInfo *PrevTTI; - /// Every subclass must initialize the base with the previous TTI in the - /// stack, or 0 if there is no previous TTI in the stack. - TargetTransformInfo(const TargetTransformInfo *PrevTTI) : PrevTTI(PrevTTI) {} + /// \brief The top of the stack of TTI analyses available. + /// + /// This is a convenience routine maintained as TTI analyses become available + /// that complements the PrevTTI delegation chain. When one part of an + /// analysis pass wants to query another part of the analysis pass it can use + /// this to start back at the top of the stack. + TargetTransformInfo *TopTTI; + + /// All pass subclasses must in their initializePass routine call + /// pushTTIStack with themselves to update the pointers tracking the previous + /// TTI instance in the analysis group's stack, and the top of the analysis + /// group's stack. + void pushTTIStack(Pass *P); + + /// All pass subclasses must in their finalizePass routine call popTTIStack + /// to update the pointers tracking the previous TTI instance in the analysis + /// group's stack, and the top of the analysis group's stack. + void popTTIStack(); /// All pass subclasses must call TargetTransformInfo::getAnalysisUsage. virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -184,177 +196,7 @@ public: /// This class provides the base case for the stack of TTI analyses. It doesn't /// delegate to anything and uses the STTI and VTTI objects passed in to /// satisfy the queries. -ImmutablePass *createNoTTIPass(const ScalarTargetTransformInfo *S, - const VectorTargetTransformInfo *V); - - -// ---------------------------------------------------------------------------// -// The classes below are inherited and implemented by target-specific classes -// in the codegen. -// ---------------------------------------------------------------------------// - -/// ScalarTargetTransformInfo - This interface is used by IR-level passes -/// that need target-dependent information for generic scalar transformations. -/// LSR, and LowerInvoke use this interface. -class ScalarTargetTransformInfo { -public: - /// PopcntHwSupport - Hardware support for population count. Compared to the - /// SW implementation, HW support is supposed to significantly boost the - /// performance when the population is dense, and it may or may not degrade - /// performance if the population is sparse. A HW support is considered as - /// "Fast" if it can outperform, or is on a par with, SW implementaion when - /// the population is sparse; otherwise, it is considered as "Slow". - enum PopcntHwSupport { - None, - Fast, - Slow - }; - - virtual ~ScalarTargetTransformInfo() {} - - /// isLegalAddImmediate - Return true if the specified immediate is legal - /// add immediate, that is the target has add instructions which can add - /// a register with the immediate without having to materialize the - /// immediate into a register. - virtual bool isLegalAddImmediate(int64_t) const { - return false; - } - /// isLegalICmpImmediate - Return true if the specified immediate is legal - /// icmp immediate, that is the target has icmp instructions which can compare - /// a register against the immediate without having to materialize the - /// immediate into a register. - virtual bool isLegalICmpImmediate(int64_t) const { - return false; - } - /// isLegalAddressingMode - Return true if the addressing mode represented by - /// AM is legal for this target, for a load/store of the specified type. - /// The type may be VoidTy, in which case only return true if the addressing - /// mode is legal for a load/store of any legal type. - /// TODO: Handle pre/postinc as well. - virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { - return false; - } - /// isTruncateFree - Return true if it's free to truncate a value of - /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in - /// register EAX to i16 by referencing its sub-register AX. - virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { - return false; - } - /// Is this type legal. - virtual bool isTypeLegal(Type *Ty) const { - return false; - } - /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes - virtual unsigned getJumpBufAlignment() const { - return 0; - } - /// getJumpBufSize - returns the target's jmp_buf size in bytes. - virtual unsigned getJumpBufSize() const { - return 0; - } - /// shouldBuildLookupTables - Return true if switches should be turned into - /// lookup tables for the target. - virtual bool shouldBuildLookupTables() const { - return true; - } - /// getPopcntHwSupport - Return hardware support for population count. - virtual PopcntHwSupport getPopcntHwSupport(unsigned IntTyWidthInBit) const { - return None; - } - /// getIntImmCost - Return the expected cost of materializing the given - /// integer immediate of the specified type. - virtual unsigned getIntImmCost(const APInt&, Type*) const { - // The default assumption is that the immediate is cheap. - return 1; - } -}; - -/// VectorTargetTransformInfo - This interface is used by the vectorizers -/// to estimate the profitability of vectorization for different instructions. -/// This interface provides the cost of different IR instructions. The cost -/// is unit-less and represents the estimated throughput of the instruction -/// (not the latency!) assuming that all branches are predicted, cache is hit, -/// etc. -class VectorTargetTransformInfo { -public: - virtual ~VectorTargetTransformInfo() {} - - enum ShuffleKind { - Broadcast, // Broadcast element 0 to all other elements. - Reverse, // Reverse the order of the vector. - InsertSubvector, // InsertSubvector. Index indicates start offset. - ExtractSubvector // ExtractSubvector Index indicates start offset. - }; - - /// \return The number of scalar or vector registers that the target has. - /// If 'Vectors' is true, it returns the number of vector registers. If it is - /// set to false, it returns the number of scalar registers. - virtual unsigned getNumberOfRegisters(bool Vector) const { - return 8; - } - - /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { - return 1; - } - - /// \return The cost of a shuffle instruction of kind Kind and of type Tp. - /// The index and subtype parameters are used by the subvector insertion and - /// extraction shuffle kinds. - virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index = 0, Type *SubTp = 0) const { - return 1; - } - - /// \return The expected cost of cast instructions, such as bitcast, trunc, - /// zext, etc. - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - return 1; - } - - /// \return The expected cost of control-flow related instrutctions such as - /// Phi, Ret, Br. - virtual unsigned getCFInstrCost(unsigned Opcode) const { - return 1; - } - - /// \returns The expected cost of compare and select instructions. - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = 0) const { - return 1; - } - - /// \return The expected cost of vector Insert and Extract. - /// Use -1 to indicate that there is no information on the index value. - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index = -1) const { - return 1; - } - - /// \return The cost of Load and Store instructions. - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - return 1; - } - - /// \returns The cost of Intrinsic instructions. - virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, - Type *RetTy, - ArrayRef Tys) const { - return 1; - } - - /// \returns The number of pieces into which the provided type must be - /// split during legalization. Zero is returned when the answer is unknown. - virtual unsigned getNumberOfParts(Type *Tp) const { - return 0; - } -}; - +ImmutablePass *createNoTargetTransformInfoPass(); } // End llvm namespace diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp new file mode 100644 index 00000000000..bafe3da1d29 --- /dev/null +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -0,0 +1,383 @@ +//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the implementation of a basic TargetTransformInfo pass +/// predicated on the target abstractions present in the target independent +/// code generator. It uses these (primarily TargetLowering) to model as much +/// of the TTI query interface as possible. It is included by most targets so +/// that they can specialize only a small subset of the query space. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "basictti" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/TargetTransformInfo.h" +#include + +using namespace llvm; + +namespace { + +class BasicTTI : public ImmutablePass, public TargetTransformInfo { + const TargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + BasicTTI() : ImmutablePass(ID), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + BasicTTI(const TargetLowering *TLI) : ImmutablePass(ID), TLI(TLI) { + initializeBasicTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual bool isLegalAddImmediate(int64_t imm) const; + virtual bool isLegalICmpImmediate(int64_t imm) const; + virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; + virtual bool isTypeLegal(Type *Ty) const; + virtual unsigned getJumpBufAlignment() const; + virtual unsigned getJumpBufSize() const; + virtual bool shouldBuildLookupTables() const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCFInstrCost(unsigned Opcode) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, + ArrayRef Tys) const; + virtual unsigned getNumberOfParts(Type *Tp) const; + + /// @} +}; + +} + +INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", + "Target independent code generator's TTI", true, true, false) +char BasicTTI::ID = 0; + +ImmutablePass * +llvm::createBasicTargetTransformInfoPass(const TargetLowering *TLI) { + return new BasicTTI(TLI); +} + + +bool BasicTTI::isLegalAddImmediate(int64_t imm) const { + return TLI->isLegalAddImmediate(imm); +} + +bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { + return TLI->isLegalICmpImmediate(imm); +} + +bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const { + AddrMode AM; + AM.BaseGV = BaseGV; + AM.BaseOffs = BaseOffset; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Scale; + return TLI->isLegalAddressingMode(AM, Ty); +} + +bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { + return TLI->isTruncateFree(Ty1, Ty2); +} + +bool BasicTTI::isTypeLegal(Type *Ty) const { + EVT T = TLI->getValueType(Ty); + return TLI->isTypeLegal(T); +} + +unsigned BasicTTI::getJumpBufAlignment() const { + return TLI->getJumpBufAlignment(); +} + +unsigned BasicTTI::getJumpBufSize() const { + return TLI->getJumpBufSize(); +} + +bool BasicTTI::shouldBuildLookupTables() const { + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +//===----------------------------------------------------------------------===// +// +// Calls used by the vectorizers. +// +//===----------------------------------------------------------------------===// + +unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { + return 1; +} + +unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + // Check if any of the operands are vector operands. + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair LT = TLI->getTypeLegalizationCost(Ty); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; + } + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; + } + + // Else, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Ty, true, true) + Num * Cost; + } + + // We don't know anything about this scalar instruction. + return 1; +} + +unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return 1; +} + +unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair SrcLT = TLI->getTypeLegalizationCost(Src); + std::pair DstLT = TLI->getTypeLegalizationCost(Dst); + + // Handle scalar conversions. + if (!Src->isVectorTy() && !Dst->isVectorTy()) { + + // Scalar bitcasts are usually free. + if (Opcode == Instruction::BitCast) + return 0; + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) + return 0; + + // Just check the op cost. If the operation is legal then assume it costs 1. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return 1; + + // Assume that illegal scalar instruction are expensive. + return 4; + } + + // Check vector-to-vector casts. + if (Dst->isVectorTy() && Src->isVectorTy()) { + + // If the cast is between same-sized registers, then the check is simple. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Bitcast between types that are legalized to the same type are free. + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + return 0; + + // Assume that Zext is done using AND. + if (Opcode == Instruction::ZExt) + return 1; + + // Assume that sext is done using SHL and SRA. + if (Opcode == Instruction::SExt) + return 2; + + // Just check the op cost. If the operation is legal then assume it costs + // 1 and multiply by the type-legalization overhead. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return SrcLT.first * 1; + } + + // If we are converting vectors and the operation is illegal, or + // if the vectors are legalized to different types, estimate the + // scalarization costs. + unsigned Num = Dst->getVectorNumElements(); + unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), + Src->getScalarType()); + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + return getScalarizationOverhead(Dst, true, true) + Num * Cost; + } + + // We already handled vector-to-vector and scalar-to-scalar conversions. This + // is where we handle bitcast between vectors and scalars. We need to assume + // that the conversion is scalarized in one way or another. + if (Opcode == Instruction::BitCast) + // Illegal bitcasts are done by storing and loading from a stack slot. + return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + + (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); + + llvm_unreachable("Unhandled cast"); + } + +unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { + // Branches are assumed to be predicted. + return 0; +} + +unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Selects on vectors are actually vector selects. + if (ISD == ISD::SELECT) { + assert(CondTy && "CondTy must exist"); + if (CondTy->isVectorTy()) + ISD = ISD::VSELECT; + } + + std::pair LT = TLI->getTypeLegalizationCost(ValTy); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Otherwise, assume that the cast is scalarized. + if (ValTy->isVectorTy()) { + unsigned Num = ValTy->getVectorNumElements(); + if (CondTy) + CondTy = CondTy->getScalarType(); + unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), + CondTy); + + // Return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(ValTy, true, false) + Num * Cost; + } + + // Unknown scalar opcode. + return 1; +} + +unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + return 1; +} + +unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + assert(!Src->isVoidTy() && "Invalid type"); + std::pair LT = TLI->getTypeLegalizationCost(Src); + + // Assume that all loads of legal types cost 1. + return LT.first; +} + +unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, + ArrayRef Tys) const { + // assume that we need to scalarize this intrinsic. + unsigned ScalarizationCost = 0; + unsigned ScalarCalls = 1; + if (RetTy->isVectorTy()) { + ScalarizationCost = getScalarizationOverhead(RetTy, true, false); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { + if (Tys[i]->isVectorTy()) { + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + } + return ScalarCalls + ScalarizationCost; +} + +unsigned BasicTTI::getNumberOfParts(Type *Tp) const { + std::pair LT = TLI->getTypeLegalizationCost(Tp); + return LT.first; +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 1e08672183c..d5f3932c153 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp AllocationOrder.cpp Analysis.cpp + BasicTargetTransformInfo.cpp BranchFolding.cpp CalcSpillWeights.cpp CallingConvLower.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a53f6f8d0f1..a33b6720449 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -19,6 +19,7 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCalculateSpillWeightsPass(Registry); initializeCodePlacementOptPass(Registry); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 1065614f4b2..12cd2d1c089 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -79,6 +79,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } +void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); +} + /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f63e226ff9c..77732e04a22 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1098,6 +1098,103 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } +//===----------------------------------------------------------------------===// +// TargetTransformInfo Helpers +//===----------------------------------------------------------------------===// + +int TargetLowering::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/IR/Instruction.def" + }; + switch (static_cast(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair +TargetLowering::getTypeLegalizationCost(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getValueType(Ty); + + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + LegalizeKind LK = getTypeConversion(C, MTy); + + if (LK.first == TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) + Cost *= 2; + + // Keep legalizing the type. + MTy = LK.second; + } +} + //===----------------------------------------------------------------------===// // Optimization Methods //===----------------------------------------------------------------------===// diff --git a/lib/IR/TargetTransformInfo.cpp b/lib/IR/TargetTransformInfo.cpp index 8d5fb0d40bd..8714aae8eaa 100644 --- a/lib/IR/TargetTransformInfo.cpp +++ b/lib/IR/TargetTransformInfo.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "tti" #include "llvm/TargetTransformInfo.h" #include "llvm/Support/ErrorHandling.h" @@ -19,6 +20,25 @@ char TargetTransformInfo::ID = 0; TargetTransformInfo::~TargetTransformInfo() { } +void TargetTransformInfo::pushTTIStack(Pass *P) { + TopTTI = this; + PrevTTI = &P->getAnalysis(); + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = this; +} + +void TargetTransformInfo::popTTIStack() { + TopTTI = 0; + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = PrevTTI; + + PrevTTI = 0; +} + void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); } @@ -122,26 +142,18 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { namespace { -class NoTTI : public ImmutablePass, public TargetTransformInfo { - const ScalarTargetTransformInfo *STTI; - const VectorTargetTransformInfo *VTTI; - -public: - // FIXME: This constructor doesn't work which breaks the use of NoTTI on the - // commandline. This has to be fixed for NoTTI to be fully usable as an - // analysis pass. - NoTTI() : ImmutablePass(ID), TargetTransformInfo(0) { - llvm_unreachable("Unsupported code path!"); - } - - NoTTI(const ScalarTargetTransformInfo *S, const VectorTargetTransformInfo *V) - : ImmutablePass(ID), - TargetTransformInfo(0), // NoTTI is special and doesn't delegate here. - STTI(S), VTTI(V) { +struct NoTTI : ImmutablePass, TargetTransformInfo { + NoTTI() : ImmutablePass(ID) { initializeNoTTIPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + virtual void initializePass() { + // Note that this subclass is special, and must *not* call initializeTTI as + // it does not chain. + PrevTTI = 0; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { // Note that this subclass is special, and must *not* call // TTI::getAnalysisUsage as it breaks the recursion. } @@ -157,107 +169,102 @@ public: } - // Delegate all predicates through the STTI or VTTI interface. - bool isLegalAddImmediate(int64_t Imm) const { - return STTI->isLegalAddImmediate(Imm); + return false; } bool isLegalICmpImmediate(int64_t Imm) const { - return STTI->isLegalICmpImmediate(Imm); + return false; } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const { - return STTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale); + return false; } bool isTruncateFree(Type *Ty1, Type *Ty2) const { - return STTI->isTruncateFree(Ty1, Ty2); + return false; } bool isTypeLegal(Type *Ty) const { - return STTI->isTypeLegal(Ty); + return false; } unsigned getJumpBufAlignment() const { - return STTI->getJumpBufAlignment(); + return 0; } unsigned getJumpBufSize() const { - return STTI->getJumpBufSize(); + return 0; } bool shouldBuildLookupTables() const { - return STTI->shouldBuildLookupTables(); + return true; } PopcntHwSupport getPopcntHwSupport(unsigned IntTyWidthInBit) const { - return (PopcntHwSupport)STTI->getPopcntHwSupport(IntTyWidthInBit); + return None; } unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { - return STTI->getIntImmCost(Imm, Ty); + return 1; } unsigned getNumberOfRegisters(bool Vector) const { - return VTTI->getNumberOfRegisters(Vector); + return 8; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { - return VTTI->getArithmeticInstrCost(Opcode, Ty); + return 1; } unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, Type *SubTp = 0) const { - return VTTI->getShuffleCost((VectorTargetTransformInfo::ShuffleKind)Kind, - Tp, Index, SubTp); + return 1; } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { - return VTTI->getCastInstrCost(Opcode, Dst, Src); + return 1; } unsigned getCFInstrCost(unsigned Opcode) const { - return VTTI->getCFInstrCost(Opcode); + return 1; } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = 0) const { - return VTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy); + return 1; } unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const { - return VTTI->getVectorInstrCost(Opcode, Val, Index); + return 1; } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - return VTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys) const { - return VTTI->getIntrinsicInstrCost(ID, RetTy, Tys); + return 1; } unsigned getNumberOfParts(Type *Tp) const { - return VTTI->getNumberOfParts(Tp); + return 0; } }; } // end anonymous namespace -INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "no-tti", +INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti", "No target information", true, true, true) char NoTTI::ID = 0; -ImmutablePass *llvm::createNoTTIPass(const ScalarTargetTransformInfo *S, - const VectorTargetTransformInfo *V) { - return new NoTTI(S, V); +ImmutablePass *llvm::createNoTargetTransformInfoPass() { + return new NoTTI(); } diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 1446bbbb8e7..3e1329936a1 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -44,6 +44,9 @@ FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); +/// \brief Creates an X86-specific Target Transformation Info pass. +ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); + void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dcdbd962765..5b3e31f0b16 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10344,35 +10344,3 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } -unsigned -ARMScalarTargetTransformImpl::getIntImmCost(const APInt &Imm, Type *Ty) const { - assert(Ty->isIntegerTy()); - - unsigned Bits = Ty->getPrimitiveSizeInBits(); - if (Bits == 0 || Bits > 32) - return 4; - - int32_t SImmVal = Imm.getSExtValue(); - uint32_t ZImmVal = Imm.getZExtValue(); - if (!Subtarget->isThumb()) { - if ((SImmVal >= 0 && SImmVal < 65536) || - (ARM_AM::getSOImmVal(ZImmVal) != -1) || - (ARM_AM::getSOImmVal(~ZImmVal) != -1)) - return 1; - return Subtarget->hasV6T2Ops() ? 2 : 3; - } else if (Subtarget->isThumb2()) { - if ((SImmVal >= 0 && SImmVal < 65536) || - (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || - (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) - return 1; - return Subtarget->hasV6T2Ops() ? 2 : 3; - } else /*Thumb1*/ { - if (SImmVal >= 0 && SImmVal < 256) - return 1; - if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) - return 2; - // Load from constantpool. - return 3; - } - return 2; -} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 61649a0d4dc..9ee17f0781b 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -22,7 +22,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetTransformImpl.h" #include namespace llvm { @@ -574,16 +573,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } - - class ARMScalarTargetTransformImpl : public ScalarTargetTransformImpl { - const ARMSubtarget *Subtarget; - public: - explicit ARMScalarTargetTransformImpl(const TargetLowering *TL) : - ScalarTargetTransformImpl(TL), - Subtarget(&TL->getTargetMachine().getSubtarget()) {}; - - virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; - }; } #endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 0613f2d9398..dab7f178cd0 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -51,6 +51,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, this->Options.FloatABIType = FloatABI::Soft; } +void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our X86 pass. This + // allows the X86 pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createARMTargetTransformInfoPass(this)); +} + + void ARMTargetMachine::anchor() { } ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, @@ -70,8 +79,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "v128:64:128-v64:64:64-n32-S32")), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), - STTI(&TLInfo), VTTI(&TLInfo) { + FrameLowering(Subtarget) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -103,8 +111,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)), - STTI(&TLInfo), VTTI(&TLInfo) { + : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } namespace { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index bbc479501d0..a8b21b2152a 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -27,7 +27,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -51,6 +50,9 @@ public: return &InstrItins; } + /// \brief Register X86 analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -66,8 +68,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; - ARMScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -89,12 +89,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual const ARMFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const DataLayout *getDataLayout() const { return &DL; } }; @@ -112,8 +106,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr FrameLowering; - ARMScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -142,12 +134,6 @@ public: virtual const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } }; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp new file mode 100644 index 00000000000..7a830dd7311 --- /dev/null +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -0,0 +1,124 @@ +//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// ARM target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "armtti" +#include "ARM.h" +#include "ARMTargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/TargetTransformInfo.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeARMTTIPass(PassRegistry &); +} + +namespace { + +class ARMTTI : public ImmutablePass, public TargetTransformInfo { + const ARMBaseTargetMachine *TM; + const ARMSubtarget *ST; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + ARMTTI() : ImmutablePass(ID), TM(0), ST(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + ARMTTI(const ARMBaseTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) { + initializeARMTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti", + "ARM Target Transform Info", true, true, false) +char ARMTTI::ID = 0; + +ImmutablePass * +llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) { + return new ARMTTI(TM); +} + + +unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Bits > 32) + return 4; + + int32_t SImmVal = Imm.getSExtValue(); + uint32_t ZImmVal = Imm.getZExtValue(); + if (!ST->isThumb()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getSOImmVal(ZImmVal) != -1) || + (ARM_AM::getSOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else if (ST->isThumb2()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || + (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else /*Thumb1*/ { + if (SImmVal >= 0 && SImmVal < 256) + return 1; + if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) + return 2; + // Load from constantpool. + return 3; + } + return 2; +} diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index e279e2d829e..586834cf730 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_target(ARMCodeGen ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp + ARMTargetTransformInfo.cpp MLxExpansionPass.cpp Thumb1FrameLowering.cpp Thumb1InstrInfo.cpp diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index c87a33a8b99..02ac493b421 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -8,7 +8,6 @@ add_llvm_library(LLVMTarget TargetMachine.cpp TargetMachineC.cpp TargetSubtargetInfo.cpp - TargetTransformImpl.cpp ) foreach(t ${LLVM_TARGETS_TO_BUILD}) diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index a39bf69646a..287b3d615bb 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -74,8 +74,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), - InstrItins(&Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(&Subtarget.getInstrItineraryData()) { setMCUseCFI(false); } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index a99396d5146..cf8f9aa3612 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -21,7 +21,6 @@ #include "HexagonSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -35,8 +34,6 @@ class HexagonTargetMachine : public LLVMTargetMachine { HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; const InstrItineraryData* InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, @@ -71,14 +68,6 @@ public: return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } - virtual const DataLayout *getDataLayout() const { return &DL; } static unsigned getModuleMatchQuality(const Module &M); diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 70eb9bac6be..bcdd32fed94 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -42,8 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()) { } namespace { diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 6101720a280..956794dddaf 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -38,8 +37,6 @@ namespace llvm { MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MBlazeTargetMachine(const Target &T, StringRef TT, @@ -75,11 +72,6 @@ namespace llvm { const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const - { return &STTI; } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const - { return &VTTI; } - // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 062c119410e..164e351df95 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, // FIXME: Check DataLayout string. DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { } + FrameLowering(Subtarget) { } namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index d5424231ff4..be695a21110 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -37,8 +36,6 @@ class MSP430TargetMachine : public LLVMTargetMachine { MSP430TargetLowering TLInfo; MSP430SelectionDAGInfo TSInfo; MSP430FrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MSP430TargetMachine(const Target &T, StringRef TT, @@ -64,12 +61,6 @@ public: virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; // MSP430TargetMachine. diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 5397efcd327..1b91e8b1644 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -54,8 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo(), - STTI(&TLInfo), VTTI(&TLInfo) { + TLInfo(*this), TSInfo(*this), JITInfo() { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 74af0ad23a1..c4928c21ebe 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -38,8 +37,6 @@ class MipsTargetMachine : public LLVMTargetMachine { MipsTargetLowering TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MipsTargetMachine(const Target &T, StringRef TT, @@ -73,13 +70,6 @@ public: return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } - // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 3b243dc6009..b4e049ea3e5 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -71,8 +71,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), - STTI(&TLInfo), VTTI(&TLInfo) + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index c8b7f21e11c..1a732be1ade 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -25,7 +25,6 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -45,9 +44,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // Hold Strings that can be free'd all together with NVPTXTargetMachine ManagedStringPool ManagedStrPool; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; - //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); @@ -76,12 +72,6 @@ public: virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } //virtual bool addInstSelector(PassManagerBase &PM, // CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 22a78c5f1f2..b8b7882ac00 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()) { // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index cf14b21719d..d917d99ded6 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -22,7 +22,6 @@ #include "PPCSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -66,12 +63,6 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 7c628d7c555..60bceb708fb 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayout()), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { + FrameLowering(Subtarget) { } namespace { diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 780848d4dc5..081075de2dc 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -22,7 +22,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -33,8 +32,6 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -55,12 +52,6 @@ public: virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp deleted file mode 100644 index 63f34a8c909..00000000000 --- a/lib/Target/TargetTransformImpl.cpp +++ /dev/null @@ -1,388 +0,0 @@ -// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetTransformImpl.h" -#include "llvm/Target/TargetLowering.h" -#include - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// -// Calls used by scalar transformations. -// -//===----------------------------------------------------------------------===// - -bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const { - return TLI->isLegalAddImmediate(imm); -} - -bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const { - return TLI->isLegalICmpImmediate(imm); -} - -bool ScalarTargetTransformImpl::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { - AddrMode AM; - AM.BaseGV = BaseGV; - AM.BaseOffs = BaseOffset; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Scale; - return TLI->isLegalAddressingMode(AM, Ty); -} - -bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const { - return TLI->isTruncateFree(Ty1, Ty2); -} - -bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const { - EVT T = TLI->getValueType(Ty); - return TLI->isTypeLegal(T); -} - -unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const { - return TLI->getJumpBufAlignment(); -} - -unsigned ScalarTargetTransformImpl::getJumpBufSize() const { - return TLI->getJumpBufSize(); -} - -bool ScalarTargetTransformImpl::shouldBuildLookupTables() const { - return TLI->supportJumpTables() && - (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); -} - -//===----------------------------------------------------------------------===// -// -// Calls used by the vectorizers. -// -//===----------------------------------------------------------------------===// -int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const { - enum InstructionOpcodes { -#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, -#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM -#include "llvm/IR/Instruction.def" - }; - switch (static_cast(Opcode)) { - case Ret: return 0; - case Br: return 0; - case Switch: return 0; - case IndirectBr: return 0; - case Invoke: return 0; - case Resume: return 0; - case Unreachable: return 0; - case Add: return ISD::ADD; - case FAdd: return ISD::FADD; - case Sub: return ISD::SUB; - case FSub: return ISD::FSUB; - case Mul: return ISD::MUL; - case FMul: return ISD::FMUL; - case UDiv: return ISD::UDIV; - case SDiv: return ISD::UDIV; - case FDiv: return ISD::FDIV; - case URem: return ISD::UREM; - case SRem: return ISD::SREM; - case FRem: return ISD::FREM; - case Shl: return ISD::SHL; - case LShr: return ISD::SRL; - case AShr: return ISD::SRA; - case And: return ISD::AND; - case Or: return ISD::OR; - case Xor: return ISD::XOR; - case Alloca: return 0; - case Load: return ISD::LOAD; - case Store: return ISD::STORE; - case GetElementPtr: return 0; - case Fence: return 0; - case AtomicCmpXchg: return 0; - case AtomicRMW: return 0; - case Trunc: return ISD::TRUNCATE; - case ZExt: return ISD::ZERO_EXTEND; - case SExt: return ISD::SIGN_EXTEND; - case FPToUI: return ISD::FP_TO_UINT; - case FPToSI: return ISD::FP_TO_SINT; - case UIToFP: return ISD::UINT_TO_FP; - case SIToFP: return ISD::SINT_TO_FP; - case FPTrunc: return ISD::FP_ROUND; - case FPExt: return ISD::FP_EXTEND; - case PtrToInt: return ISD::BITCAST; - case IntToPtr: return ISD::BITCAST; - case BitCast: return ISD::BITCAST; - case ICmp: return ISD::SETCC; - case FCmp: return ISD::SETCC; - case PHI: return 0; - case Call: return 0; - case Select: return ISD::SELECT; - case UserOp1: return 0; - case UserOp2: return 0; - case VAArg: return 0; - case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; - case InsertElement: return ISD::INSERT_VECTOR_ELT; - case ShuffleVector: return ISD::VECTOR_SHUFFLE; - case ExtractValue: return ISD::MERGE_VALUES; - case InsertValue: return ISD::MERGE_VALUES; - case LandingPad: return 0; - } - - llvm_unreachable("Unknown instruction type encountered!"); -} - -std::pair -VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const { - LLVMContext &C = Ty->getContext(); - EVT MTy = TLI->getValueType(Ty); - - unsigned Cost = 1; - // We keep legalizing the type until we find a legal kind. We assume that - // the only operation that costs anything is the split. After splitting - // we need to handle two types. - while (true) { - TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, MTy); - - if (LK.first == TargetLowering::TypeLegal) - return std::make_pair(Cost, MTy.getSimpleVT()); - - if (LK.first == TargetLowering::TypeSplitVector || - LK.first == TargetLowering::TypeExpandInteger) - Cost *= 2; - - // Keep legalizing the type. - MTy = LK.second; - } -} - -unsigned -VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty, - bool Insert, - bool Extract) const { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - -unsigned VectorTargetTransformImpl::getNumberOfRegisters(bool Vector) const { - return 1; -} - -unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode, - Type *Ty) const { - // Check if any of the operands are vector operands. - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair LT = getTypeLegalizationCost(Ty); - - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that thre is some - // overhead to this. - // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; - } - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered then assume - // thare the code is twice as expensive. - return LT.first * 2; - } - - // Else, assume that we need to scalarize this op. - if (Ty->isVectorTy()) { - unsigned Num = Ty->getVectorNumElements(); - unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); - // return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(Ty, true, true) + Num * Cost; - } - - // We don't know anything about this scalar instruction. - return 1; -} - -unsigned VectorTargetTransformImpl::getShuffleCost(ShuffleKind Kind, - Type *Tp, int Index, Type *SubTp) const { - return 1; -} - -unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair SrcLT = getTypeLegalizationCost(Src); - std::pair DstLT = getTypeLegalizationCost(Dst); - - // Handle scalar conversions. - if (!Src->isVectorTy() && !Dst->isVectorTy()) { - - // Scalar bitcasts are usually free. - if (Opcode == Instruction::BitCast) - return 0; - - if (Opcode == Instruction::Trunc && - TLI->isTruncateFree(SrcLT.second, DstLT.second)) - return 0; - - if (Opcode == Instruction::ZExt && - TLI->isZExtFree(SrcLT.second, DstLT.second)) - return 0; - - // Just check the op cost. If the operation is legal then assume it costs 1. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return 1; - - // Assume that illegal scalar instruction are expensive. - return 4; - } - - // Check vector-to-vector casts. - if (Dst->isVectorTy() && Src->isVectorTy()) { - - // If the cast is between same-sized registers, then the check is simple. - if (SrcLT.first == DstLT.first && - SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - - // Bitcast between types that are legalized to the same type are free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) - return 0; - - // Assume that Zext is done using AND. - if (Opcode == Instruction::ZExt) - return 1; - - // Assume that sext is done using SHL and SRA. - if (Opcode == Instruction::SExt) - return 2; - - // Just check the op cost. If the operation is legal then assume it costs - // 1 and multiply by the type-legalization overhead. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return SrcLT.first * 1; - } - - // If we are converting vectors and the operation is illegal, or - // if the vectors are legalized to different types, estimate the - // scalarization costs. - unsigned Num = Dst->getVectorNumElements(); - unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(), - Src->getScalarType()); - - // Return the cost of multiple scalar invocation plus the cost of - // inserting and extracting the values. - return getScalarizationOverhead(Dst, true, true) + Num * Cost; - } - - // We already handled vector-to-vector and scalar-to-scalar conversions. This - // is where we handle bitcast between vectors and scalars. We need to assume - // that the conversion is scalarized in one way or another. - if (Opcode == Instruction::BitCast) - // Illegal bitcasts are done by storing and loading from a stack slot. - return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + - (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); - - llvm_unreachable("Unhandled cast"); - } - -unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const { - // Branches are assumed to be predicted. - return 0; -} - -unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, - Type *ValTy, - Type *CondTy) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - // Selects on vectors are actually vector selects. - if (ISD == ISD::SELECT) { - assert(CondTy && "CondTy must exist"); - if (CondTy->isVectorTy()) - ISD = ISD::VSELECT; - } - - std::pair LT = getTypeLegalizationCost(ValTy); - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. Multiply - // by the type-legalization overhead. - return LT.first * 1; - } - - // Otherwise, assume that the cast is scalarized. - if (ValTy->isVectorTy()) { - unsigned Num = ValTy->getVectorNumElements(); - if (CondTy) - CondTy = CondTy->getScalarType(); - unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(), - CondTy); - - // Return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(ValTy, true, false) + Num * Cost; - } - - // Unknown scalar opcode. - return 1; -} - -unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode, - Type *Val, - unsigned Index) const { - return 1; -} - -unsigned -VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - assert(!Src->isVoidTy() && "Invalid type"); - std::pair LT = getTypeLegalizationCost(Src); - - // Assume that all loads of legal types cost 1. - return LT.first; -} - -unsigned -VectorTargetTransformImpl::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, - ArrayRef Tys) const { - // assume that we need to scalarize this intrinsic. - unsigned ScalarizationCost = 0; - unsigned ScalarCalls = 1; - if (RetTy->isVectorTy()) { - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - } - return ScalarCalls + ScalarizationCost; -} - -unsigned -VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { - std::pair LT = getTypeLegalizationCost(Tp); - return LT.first; -} diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 19912cc6bc8..95f1f22e04b 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -30,6 +30,7 @@ set(sources X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp + X86TargetTransformInfo.cpp X86VZeroUpper.cpp ) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1e7b98d94f4..2bff7ab15ab 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -63,6 +63,9 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, /// FunctionPass *createEmitX86CodeToMemory(); +/// \brief Creates an X86-specific Target Transformation Info pass. +ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM); + } // End llvm namespace #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f482ac98462..4b00b46e736 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18063,265 +18063,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } - -//===----------------------------------------------------------------------===// -// -// X86 cost model. -// -//===----------------------------------------------------------------------===// - -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - -ScalarTargetTransformInfo::PopcntHwSupport -X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const { - assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - - // TODO: Currently the __builtin_popcount() implementation using SSE3 - // instructions is inefficient. Once the problem is fixed, we should - // call ST.hasSSE3() instead of ST.hasSSE4(). - return ST.hasSSE41() ? Fast : None; -} - -unsigned X86VectorTargetTransformInfo::getNumberOfRegisters(bool Vector) const { - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - if (ST.is64Bit()) - return 16; - return 8; -} - -unsigned -X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, - Type *Ty) const { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(Ty); - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - - static const X86CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::MUL, MVT::v4i64, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - }; - - // Look for AVX1 lowering tricks. - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); - if (Idx != -1) - return LT.first * AVX1CostTable[Idx].Cost; - } - // Fallback to the default implementation. - return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); -} - -unsigned -X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(Src); - assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && - "Invalid Opcode"); - - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget(); - - // Each load/store unit costs 1. - unsigned Cost = LT.first * 1; - - // On Sandybridge 256bit load/stores are double pumped - // (but not on Haswell). - if (LT.second.getSizeInBits() > 128 && !ST.hasAVX2()) - Cost*=2; - - return Cost; -} - -unsigned -X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - assert(Val->isVectorTy() && "This must be a vector type"); - - if (Index != -1U) { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(Val); - - // This type is legalized to a scalar type. - if (!LT.second.isVector()) - return 0; - - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; - - // Floating point scalars are already located in index #0. - if (Val->getScalarType()->isFloatingPointTy() && Index == 0) - return 0; - } - - return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); -} - -unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, - Type *ValTy, - Type *CondTy) const { - // Legalize the type. - std::pair LT = getTypeLegalizationCost(ValTy); - - MVT MTy = LT.second; - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget(); - - static const X86CostTblEntry SSE42CostTbl[] = { - { ISD::SETCC, MVT::v2f64, 1 }, - { ISD::SETCC, MVT::v4f32, 1 }, - { ISD::SETCC, MVT::v2i64, 1 }, - { ISD::SETCC, MVT::v4i32, 1 }, - { ISD::SETCC, MVT::v8i16, 1 }, - { ISD::SETCC, MVT::v16i8, 1 }, - }; - - static const X86CostTblEntry AVX1CostTbl[] = { - { ISD::SETCC, MVT::v4f64, 1 }, - { ISD::SETCC, MVT::v8f32, 1 }, - // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, MVT::v4i64, 4 }, - { ISD::SETCC, MVT::v8i32, 4 }, - { ISD::SETCC, MVT::v16i16, 4 }, - { ISD::SETCC, MVT::v32i8, 4 }, - }; - - static const X86CostTblEntry AVX2CostTbl[] = { - { ISD::SETCC, MVT::v4i64, 1 }, - { ISD::SETCC, MVT::v8i32, 1 }, - { ISD::SETCC, MVT::v16i16, 1 }, - { ISD::SETCC, MVT::v32i8, 1 }, - }; - - if (ST.hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; - } - - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTbl[Idx].Cost; - } - - if (ST.hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); -} - -unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, - Type *Dst, - Type *Src) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); - - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget(); - - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, - }; - - if (ST.hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, - array_lengthof(AVXConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return AVXConversionTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); -} - - -unsigned X86VectorTargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, - Type *SubTp) const { - // We only estimate the cost of reverse shuffles. - if (Kind != Reverse) - return VectorTargetTransformImpl::getShuffleCost(Kind, Tp, Index, SubTp); - - std::pair LT = getTypeLegalizationCost(Tp); - unsigned Cost = 1; - if (LT.second.getSizeInBits() > 128) - Cost = 3; // Extract + insert + copy. - - // Multiple by the number of parts. - return Cost * LT.first; -} - diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 86b7764c136..16ce364cd52 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -23,7 +23,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { namespace X86ISD { @@ -945,40 +944,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } - - class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl { - public: - explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) : - ScalarTargetTransformImpl(TL) {}; - - virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; - }; - - class X86VectorTargetTransformInfo : public VectorTargetTransformImpl { - public: - explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : - VectorTargetTransformImpl(TL) {} - - virtual unsigned getNumberOfRegisters(bool Vector) const; - - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; - - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const; - - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; - - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; - - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; - - unsigned getShuffleCost(ShuffleKind Kind, - Type *Tp, int Index, Type *SubTp) const; - }; } #endif // X86ISELLOWERING_H diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ea99796f351..847e06b960a 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -48,8 +48,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TLInfo(*this), TSInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo) { + JITInfo(*this) { } void X86_64TargetMachine::anchor() { } @@ -65,8 +64,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TLInfo(*this), TSInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo){ + JITInfo(*this) { } /// X86TargetMachine ctor - Create an X86 target. @@ -120,6 +118,19 @@ static cl::opt X86EarlyIfConv("x86-early-ifcvt", cl::desc("Enable early if-conversion on X86")); +//===----------------------------------------------------------------------===// +// X86 Analysis Pass Setup +//===----------------------------------------------------------------------===// + +void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our X86 pass. This + // allows the X86 pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createX86TargetTransformInfoPass(this)); +} + + //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index a0749f08921..174d3918318 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -65,6 +64,9 @@ public: return &InstrItins; } + /// \brief Register X86 analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Set up the pass pipeline. virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -81,8 +83,6 @@ class X86_32TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -101,12 +101,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; /// X86_64TargetMachine - X86 64-bit target machine. @@ -118,8 +112,6 @@ class X86_64TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - X86ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -138,12 +130,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp new file mode 100644 index 00000000000..f5aa57740fc --- /dev/null +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -0,0 +1,355 @@ +//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// X86 target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86tti" +#include "X86.h" +#include "X86TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/TargetTransformInfo.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeX86TTIPass(PassRegistry &); +} + +namespace { + +class X86TTI : public ImmutablePass, public TargetTransformInfo { + const X86TargetMachine *TM; + const X86Subtarget *ST; + const X86TargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + X86TTI(const X86TargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializeX86TTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti", + "X86 Target Transform Info", true, true, false) +char X86TTI::ID = 0; + +ImmutablePass * +llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { + return new X86TTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// X86 cost model. +// +//===----------------------------------------------------------------------===// + +namespace { +struct X86CostTblEntry { + int ISD; + MVT Type; + unsigned Cost; +}; +} + +static int +FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) + return i; + + // Could not find an entry. + return -1; +} + +namespace { +struct X86TypeConversionCostTblEntry { + int ISD; + MVT Dst; + MVT Src; + unsigned Cost; +}; +} + +static int +FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, + int ISD, MVT Dst, MVT Src) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) + return i; + + // Could not find an entry. + return -1; +} + + +X86TTI::PopcntHwSupport X86TTI::getPopcntHwSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // TODO: Currently the __builtin_popcount() implementation using SSE3 + // instructions is inefficient. Once the problem is fixed, we should + // call ST->hasSSE3() instead of ST->hasSSE4(). + return ST->hasSSE41() ? Fast : None; +} + +unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (ST->is64Bit()) + return 16; + return 8; +} + +unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; + + // Look for AVX1 lowering tricks. + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; + } + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + // We only estimate the cost of reverse shuffles. + if (Kind != Reverse) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + std::pair LT = TLI->getTypeLegalizationCost(Tp); + unsigned Cost = 1; + if (LT.second.getSizeInBits() > 128) + Cost = 3; // Extract + insert + copy. + + // Multiple by the number of parts. + return Cost * LT.first; +} + +unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + }; + + if (ST->hasAVX()) { + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(ValTy); + + MVT MTy = LT.second; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, + }; + + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, + // AVX1 does not support 8-wide integer compare. + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, + }; + + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, + }; + + if (ST->hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; + } + + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; + } + + if (ST->hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; + } + + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + if (Index != -1U) { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // Floating point scalars are already located in index #0. + if (Val->getScalarType()->isFloatingPointTy() && Index == 0) + return 0; + } + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // On Sandybridge 256bit load/stores are double pumped + // (but not on Haswell). + if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2()) + Cost*=2; + + return Cost; +} diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 6e218353087..28c3d12c05f 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, InstrInfo(), FrameLowering(Subtarget), TLInfo(*this), - TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) { + TSInfo(*this) { } namespace { diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 62ea5b66ad1..eb9a1aa420e 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -21,7 +21,6 @@ #include "XCoreSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -32,8 +31,6 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreFrameLowering FrameLowering; XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -56,12 +53,6 @@ public: virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 68876ea22a2..aa652234731 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -38,7 +38,6 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/TargetTransformInfo.h" #include using namespace llvm; @@ -320,10 +319,8 @@ static int compileModule(char **argv, LLVMContext &Context) { TLI->disableAllFunctions(); PM.add(TLI); - if (target.get()) { - PM.add(createNoTTIPass(target->getScalarTargetTransformInfo(), - target->getVectorTargetTransformInfo())); - } + // Add intenal analysis passes from the target machine. + Target.addAnalysisPasses(PM); // Add the target data from the target machine, if it exists, or the module. if (const DataLayout *TD = Target.getDataLayout()) diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index ca94698a702..e87b378106b 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -42,7 +42,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" using namespace llvm; @@ -372,8 +371,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, // Add an appropriate DataLayout instance for this module... passes.add(new DataLayout(*_target->getDataLayout())); - passes.add(createNoTTIPass(_target->getScalarTargetTransformInfo(), - _target->getVectorTargetTransformInfo())); + _target->addAnalysisPasses(passes); // Enabling internalize here would use its AllButMain variant. It // keeps only main if it exists and does nothing for libraries. Instead diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index c2987f19edd..86edb3d877d 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -43,7 +43,6 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include #include @@ -657,10 +656,9 @@ int main(int argc, char **argv) { Machine = GetTargetMachine(Triple(ModuleTriple)); std::auto_ptr TM(Machine); - if (TM.get()) { - Passes.add(createNoTTIPass(TM->getScalarTargetTransformInfo(), - TM->getVectorTargetTransformInfo())); - } + // Add internal analysis passes from the target machine. + if (TM.get()) + TM->addAnalysisPasses(Passes); OwningPtr FPasses; if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) {