diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 52227126316..ecf9be861e7 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -81,6 +81,13 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false)); +// FIXME: this could be a transitional option, and we probably need to remove +// it if only we are sure this optimization could always benefit all targets. +static cl::opt +EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, + cl::desc("Enable global merge pass on external linkage"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -129,9 +136,19 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_PASS(GlobalMerge, "global-merge", - "Global Merge", false, false) +static void *initializeGlobalMergePassOnce(PassRegistry &Registry) { + PassInfo *PI = new PassInfo( + "Merge global variables", "global-merge", &GlobalMerge::ID, + PassInfo::NormalCtor_t(callDefaultCtor), false, false, + PassInfo::TargetMachineCtor_t(callTargetMachineCtor)); + Registry.registerPass(*PI, true); + return PI; +} + +void llvm::initializeGlobalMergePass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializeGlobalMergePassOnce) +} bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { @@ -154,11 +171,23 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Type *Int32Ty = Type::getInt32Ty(M.getContext()); + assert(Globals.size() > 1); + + // FIXME: This simple solution merges globals all together as maximum as + // possible. However, with this solution it would be hard to remove dead + // global symbols at link-time. An alternative solution could be checking + // global symbols references function by function, and make the symbols + // being referred in the same function merged and we would probably need + // to introduce heuristic algorithm to solve the merge conflict from + // different functions. for (size_t i = 0, e = Globals.size(); i != e; ) { size_t j = 0; uint64_t MergedSize = 0; std::vector Tys; std::vector Inits; + + bool HasExternal = false; + GlobalVariable *TheFirstExternal = 0; for (j = i; j != e; ++j) { Type *Ty = Globals[j]->getType()->getElementType(); MergedSize += DL->getTypeAllocSize(Ty); @@ -167,17 +196,37 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); + + if (Globals[j]->hasExternalLinkage() && !HasExternal) { + HasExternal = true; + TheFirstExternal = Globals[j]; + } } + // If merged variables doesn't have external linkage, we needn't to expose + // the symbol after merging. + GlobalValue::LinkageTypes Linkage = HasExternal + ? GlobalValue::ExternalLinkage + : GlobalValue::InternalLinkage; + + // If merged variables have external linkage, we use symbol name of the + // first variable merged as the suffix of global symbol name. This would + // be able to avoid the link-time naming conflict for globalm symbols. + Twine MergedGVName = HasExternal + ? "_MergedGlobals_" + TheFirstExternal->getName() + : "_MergedGlobals"; + StructType *MergedTy = StructType::get(M.getContext(), Tys); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); - GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, - GlobalValue::InternalLinkage, - MergedInit, "_MergedGlobals", - nullptr, - GlobalVariable::NotThreadLocal, - AddrSpace); + + GlobalVariable *MergedGV = new GlobalVariable( + M, MergedTy, isConst, Linkage, MergedInit, MergedGVName, nullptr, + GlobalVariable::NotThreadLocal, AddrSpace); + for (size_t k = i; k < j; ++k) { + GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); + std::string Name = Globals[k]->getName(); + Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, k-i) @@ -185,6 +234,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); + + if (Linkage != GlobalValue::InternalLinkage) { + // Generate a new alias... + auto *PTy = cast(GEP->getType()); + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + Linkage, Name, GEP, &M); + } + NumMerged++; } i = j; @@ -245,8 +302,12 @@ bool GlobalMerge::doInitialization(Module &M) { // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - // Merge is safe for "normal" internal globals only - if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + // Merge is safe for "normal" internal or external globals only + if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + continue; + + if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) && + !I->hasInternalLinkage()) continue; PointerType *PT = dyn_cast(I->getType()); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index edf012d8117..5c7db5b16e3 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -38,6 +38,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeDSEPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); + initializeGlobalMergePass(Registry); initializeIndVarSimplifyPass(Registry); initializeJumpThreadingPass(Registry); initializeLICMPass(Registry); diff --git a/test/CodeGen/AArch64/global-merge.ll b/test/CodeGen/AArch64/global-merge.ll new file mode 100644 index 00000000000..aed1dc4d1c7 --- /dev/null +++ b/test/CodeGen/AArch64/global-merge.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true | FileCheck --check-prefix=NO-MERGE %s + +; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE +; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE + +; FIXME: add O1/O2 test for aarch64-none-linux-gnu and aarch64-apple-ios + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: f1: +; CHECK: adrp x{{[0-9]+}}, _MergedGlobals +; CHECK-NOT: adrp + +; CHECK-APPLE-IOS-LABEL: f1: +; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals +; CHECK-APPLE-IOS-NOT: adrp + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} + +; CHECK: .local _MergedGlobals +; CHECK: .comm _MergedGlobals,8,8 +; NO-MERGE-NOT: .local _MergedGlobals + +; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 +; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3 diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll new file mode 100644 index 00000000000..341597e6188 --- /dev/null +++ b/test/CodeGen/ARM/global-merge-1.ll @@ -0,0 +1,85 @@ +; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s + +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 +; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 + +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios3.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +declare i32 @calc(...) #1 + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4 + %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4 + %3 = mul <4 x i32> %2, %1 + store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #2 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0) +} + +attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"LLVM version 3.4 "} +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/GlobalMerge/AArch64/arm64.ll b/test/Transforms/GlobalMerge/AArch64/arm64.ll index 4715fd8de23..9174f725740 100644 --- a/test/Transforms/GlobalMerge/AArch64/arm64.ll +++ b/test/Transforms/GlobalMerge/AArch64/arm64.ll @@ -1,23 +1,6 @@ -; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s -; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s +; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -S -o - | FileCheck %s -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 -; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 - -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" target triple = "arm64-apple-ios7.0.0" diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll new file mode 100644 index 00000000000..f8854dd5456 --- /dev/null +++ b/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll @@ -0,0 +1,22 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s + +; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s + +; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +; CHECK: @_MergedGlobals = internal global { i32, i32 } zeroinitializer + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @f1 +; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 0) +; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 1) + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll new file mode 100644 index 00000000000..1876483882e --- /dev/null +++ b/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll @@ -0,0 +1,30 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s + +@x = global i32 0, align 4 +@y = global i32 0, align 4 +@z = global i32 0, align 4 + +; CHECK: @_MergedGlobals_x = global { i32, i32, i32 } zeroinitializer +; CHECK: @x = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0) +; CHECK: @y = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: @z = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2) + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @f1 +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0) +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) + store i32 %a1, i32* @x, align 4 + store i32 %a2, i32* @y, align 4 + ret void +} + +define void @g1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @g1 +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2) + store i32 %a1, i32* @y, align 4 + store i32 %a2, i32* @z, align 4 + ret void +} diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll new file mode 100644 index 00000000000..811c3eef13d --- /dev/null +++ b/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s + +@x = global [1000 x i32] zeroinitializer, align 1 +@y = global [1000 x i32] zeroinitializer, align 1 +@z = internal global i32 1, align 4 + +; CHECK: @_MergedGlobals_x = global { i32, [1000 x i32] } { i32 1, [1000 x i32] zeroinitializer } +; CHECK: @_MergedGlobals_y = global { [1000 x i32] } zeroinitializer + +; CHECK: @x = alias getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: @y = alias getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0) + +define void @f1(i32 %a1, i32 %a2, i32 %a3) { +; CHECK-LABEL: @f1 +; CHECK: %x3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1), i32 0, i64 3 +; CHECK: %y3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0), i32 0, i64 3 +; CHECK: store i32 %a3, i32* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 0), align 4 + + %x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3 + %y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3 + store i32 %a1, i32* %x3, align 4 + store i32 %a2, i32* %y3, align 4 + store i32 %a3, i32* @z, align 4 + ret void +} diff --git a/test/Transforms/GlobalMerge/ARM/arm.ll b/test/Transforms/GlobalMerge/ARM/arm.ll index 341597e6188..e7553e91835 100644 --- a/test/Transforms/GlobalMerge/ARM/arm.ll +++ b/test/Transforms/GlobalMerge/ARM/arm.ll @@ -1,23 +1,4 @@ -; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s -; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s - -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 -; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 - -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; RUN: opt %s -mtriple=arm-linux-gnuabi -global-merge -S -o - | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios3.0.0" @@ -26,28 +7,30 @@ target triple = "thumbv7-apple-ios3.0.0" @baz = internal global [5 x i32] zeroinitializer, align 4 @foo = internal global [5 x i32] zeroinitializer, align 4 +; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer + ; Function Attrs: nounwind ssp define internal void @initialize() #0 { %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4 %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4 %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4 %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4 %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4 %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4 %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4 %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4 %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4 %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 - store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4 ret void } @@ -59,10 +42,10 @@ define internal void @calculate() #0 { %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4 %3 = mul <4 x i32> %2, %1 store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4 - %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 - %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4 %6 = mul nsw i32 %5, %4 - store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4 ret void } @@ -70,16 +53,3 @@ define internal void @calculate() #0 { define internal i32* @returnFoo() #2 { ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0) } - -attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind } - -!llvm.ident = !{!0} - -!0 = metadata !{metadata !"LLVM version 3.4 "} -!1 = metadata !{metadata !2, metadata !2, i64 0} -!2 = metadata !{metadata !"int", metadata !3, i64 0} -!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} -!4 = metadata !{metadata !"Simple C/C++ TBAA"}