diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index cd499ba5cb0..d5ff0584422 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -959,12 +959,6 @@ public: return false; } - /// Returns the maximal possible offset which can be used for loads / stores - /// from the global. - virtual unsigned getMaximalGlobalOffset() const { - return 0; - } - /// Returns true if a cast between SrcAS and DestAS is a noop. virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { return false; diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 108201f072f..558b81e023c 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -145,7 +145,7 @@ Pass *createLICMPass(); // Pass *createLoopStrengthReducePass(); -Pass *createGlobalMergePass(const TargetMachine *TM = nullptr); +Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset); //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 6da9766379e..32543b7723a 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -90,10 +90,16 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"), cl::init(false)); -STATISTIC(NumMerged , "Number of globals merged"); +STATISTIC(NumMerged, "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { const TargetMachine *TM; + const DataLayout *DL; + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions), see the code that passes in the offset in the ARM backend + // for more information. + unsigned MaxOffset; bool doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const; @@ -117,8 +123,10 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { + explicit GlobalMerge(const TargetMachine *TM = nullptr, + unsigned MaximalOffset = 0) + : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()), + MaxOffset(MaximalOffset) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -138,22 +146,16 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", - false, false) +INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", + false, false) +INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", + false, false) bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TM->getDataLayout(); - - // FIXME: Infer the maximum possible offset depending on the actual users - // (these max offsets are different for the users inside Thumb or ARM - // functions) - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - // FIXME: Find better heuristics std::stable_sort(Globals.begin(), Globals.end(), - [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + [this](const GlobalVariable *GV1, const GlobalVariable *GV2) { Type *Ty1 = cast(GV1->getType())->getElementType(); Type *Ty2 = cast(GV2->getType())->getElementType(); @@ -282,9 +284,6 @@ bool GlobalMerge::doInitialization(Module &M) { DenseMap > Globals, ConstGlobals, BSSGlobals; - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TM->getDataLayout(); - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; setMustKeepGlobalVariables(M); @@ -357,6 +356,6 @@ bool GlobalMerge::doFinalization(Module &M) { return false; } -Pass *llvm::createGlobalMergePass(const TargetMachine *TM) { - return new GlobalMerge(TM); +Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) { + return new GlobalMerge(TM, Offset); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index fb31d7d3376..f16ec9d0062 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -730,13 +730,6 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i64; } -unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { - // FIXME: On AArch64, this depends on the type. - // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). - // and the offset has to be a multiple of the related size in bytes. - return 4095; -} - FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index db15538e43b..0a84294948f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -246,10 +246,6 @@ public: /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 53360428050..d73d0b3f8b7 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -236,8 +236,11 @@ bool AArch64PassConfig::addPreISel() { // get a chance to be merged if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) addPass(createAArch64PromoteConstantPass()); + // FIXME: On AArch64, this depends on the type. + // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). + // and the offset has to be a multiple of the related size in bytes. if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); + addPass(createGlobalMergePass(TM, 4095)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a66d9ab00f4..7620cd0a463 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1170,12 +1170,6 @@ ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, return ARM::createFastISel(funcInfo, libInfo); } -/// getMaximalGlobalOffset - Returns the maximal possible offset which can -/// be used for loads / stores from the global. -unsigned ARMTargetLowering::getMaximalGlobalOffset() const { - return (Subtarget->isThumb1Only() ? 127 : 4095); -} - Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 2544be08234..12513370077 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -353,10 +353,6 @@ namespace llvm { /// specified value type. const TargetRegisterClass *getRegClassFor(MVT VT) const override; - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 11fbb5c3e30..a97a058f2aa 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -326,7 +326,12 @@ void ARMPassConfig::addIRPasses() { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); + // FIXME: This is using the thumb1 only constant value for + // maximal global offset for merging globals. We may want + // to look into using the old value for non-thumb1 code of + // 4095 based on the TargetMachine, but this starts to become + // tricky when doing code gen per function. + addPass(createGlobalMergePass(TM, 127)); return false; }