diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c485844aaeb..6ecb4c52c40 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -20,7 +20,154 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); + +/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to +/// some part of a constant global variable. This intentionally only accepts +/// constant expressions because we can't rewrite arbitrary instructions. +static bool pointsToConstantGlobal(Value *V) { + if (GlobalVariable *GV = dyn_cast(V)) + return GV->isConstant(); + if (ConstantExpr *CE = dyn_cast(V)) + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + return pointsToConstantGlobal(CE->getOperand(0)); + return false; +} + +/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +/// pointer to an alloca. Ignore any reads of the pointer, return false if we +/// see any stores or other unknown uses. If we see pointer arithmetic, keep +/// track of whether it moves the pointer (with IsOffset) but otherwise traverse +/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +/// the alloca, and if the source pointer is a pointer to a constant global, we +/// can optimize this. +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + SmallVectorImpl &ToDelete, + bool IsOffset = false) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + User *U = cast(*UI); + + if (LoadInst *LI = dyn_cast(U)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } + + if (BitCastInst *BCI = dyn_cast(U)) { + // If uses of the bitcast are ok, we are ok. + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + return false; + continue; + } + if (GetElementPtrInst *GEP = dyn_cast(U)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, + IsOffset || !GEP->hasAllZeroIndices())) + return false; + continue; + } + + if (CallSite CS = U) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) + continue; + + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } + + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } + } + + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast(U); + if (MI == 0) + return false; + + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; + + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; + + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (UI.getOperandNo() != 0) return false; + + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } + return true; +} + +/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only +/// modified by a copy from a constant global. If we can prove this, we can +/// replace any uses of the alloca with uses of the global directly. +static MemTransferInst * +isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVectorImpl &ToDelete) { + MemTransferInst *TheCopy = 0; + if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) + return TheCopy; + return 0; +} + +/// getPointeeAlignment - Compute the minimum alignment of the value pointed +/// to by the given pointer. +static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { + if (ConstantExpr *CE = dyn_cast(V)) + if (CE->getOpcode() == Instruction::BitCast || + (CE->getOpcode() == Instruction::GetElementPtr && + cast(CE)->hasAllZeroIndices())) + return getPointeeAlignment(CE->getOperand(0), TD); + + if (GlobalVariable *GV = dyn_cast(V)) + if (!GV->isDeclaration()) + return TD.getPreferredAlignment(GV); + + if (PointerType *PT = dyn_cast(V->getType())) + return TD.getABITypeAlignment(PT->getElementType()); + + return 0; +} Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that @@ -113,6 +260,29 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } + } + // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 6637126fcb7..8090fdf178c 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); -STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { @@ -183,9 +182,6 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts); bool ShouldAttemptScalarRepl(AllocaInst *AI); - - static MemTransferInst *isOnlyCopiedFromConstantGlobal( - AllocaInst *AI, SmallVector &ToDelete); }; // SROA_DT - SROA that uses DominatorTree. @@ -1465,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { return false; } -/// getPointeeAlignment - Compute the minimum alignment of the value pointed -/// to by the given pointer. -static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::BitCast || - (CE->getOpcode() == Instruction::GetElementPtr && - cast(CE)->hasAllZeroIndices())) - return getPointeeAlignment(CE->getOperand(0), TD); - - if (GlobalVariable *GV = dyn_cast(V)) - if (!GV->isDeclaration()) - return TD.getPreferredAlignment(GV); - - if (PointerType *PT = dyn_cast(V->getType())) - return TD.getABITypeAlignment(PT->getElementType()); - - return 0; -} - - // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the alloca instructions in the function, removing them // if they are only used by getelementptr instructions. @@ -1516,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) { if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) continue; - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { - if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - ToDelete[i]->eraseFromParent(); - Constant *TheSrc = cast(Copy->getSource()); - AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - Copy->eraseFromParent(); // Don't mutate the global. - AI->eraseFromParent(); - ++NumGlobals; - Changed = true; - continue; - } - } - // Check to see if we can perform the core SROA transformation. We cannot // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar @@ -2656,134 +2609,3 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return true; } - - - -/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to -/// some part of a constant global variable. This intentionally only accepts -/// constant expressions because we don't can't rewrite arbitrary instructions. -static bool PointsToConstantGlobal(Value *V) { - if (GlobalVariable *GV = dyn_cast(V)) - return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::GetElementPtr) - return PointsToConstantGlobal(CE->getOperand(0)); - return false; -} - -/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) -/// pointer to an alloca. Ignore any reads of the pointer, return false if we -/// see any stores or other unknown uses. If we see pointer arithmetic, keep -/// track of whether it moves the pointer (with isOffset) but otherwise traverse -/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to -/// the alloca, and if the source pointer is a pointer to a constant global, we -/// can optimize this. -static bool -isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset, - SmallVector &LifetimeMarkers) { - // We track lifetime intrinsics as we encounter them. If we decide to go - // ahead and replace the value with the global, this lets the caller quickly - // eliminate the markers. - - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - User *U = cast(*UI); - - if (LoadInst *LI = dyn_cast(U)) { - // Ignore non-volatile loads, they are always ok. - if (!LI->isSimple()) return false; - continue; - } - - if (BitCastInst *BCI = dyn_cast(U)) { - // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, - LifetimeMarkers)) - return false; - continue; - } - if (GetElementPtrInst *GEP = dyn_cast(U)) { - // If the GEP has all zero indices, it doesn't offset the pointer. If it - // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices(), - LifetimeMarkers)) - return false; - continue; - } - - if (CallSite CS = U) { - // If this is the function being called then we treat it like a load and - // ignore it. - if (CS.isCallee(UI)) - continue; - - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can - // ignore it if we know that the value isn't captured. - unsigned ArgNo = CS.getArgumentNo(UI); - if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) - continue; - - // If this is being passed as a byval argument, the caller is making a - // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) - continue; - } - - // Lifetime intrinsics can be handled by the caller. - if (IntrinsicInst *II = dyn_cast(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - assert(II->use_empty() && "Lifetime markers have no result to use!"); - LifetimeMarkers.push_back(II); - continue; - } - } - - // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. - MemTransferInst *MI = dyn_cast(U); - if (MI == 0) - return false; - - // If the transfer is using the alloca as a source of the transfer, then - // ignore it since it is a load (unless the transfer is volatile). - if (UI.getOperandNo() == 1) { - if (MI->isVolatile()) return false; - continue; - } - - // If we already have seen a copy, reject the second one. - if (TheCopy) return false; - - // If the pointer has been offset from the start of the alloca, we can't - // safely handle this. - if (isOffset) return false; - - // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 0) return false; - - // If the source of the memcpy/move is not a constant global, reject it. - if (!PointsToConstantGlobal(MI->getSource())) - return false; - - // Otherwise, the transform is safe. Remember the copy instruction. - TheCopy = MI; - } - return true; -} - -/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only -/// modified by a copy from a constant global. If we can prove this, we can -/// replace any uses of the alloca with uses of the global directly. -MemTransferInst * -SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, - SmallVector &ToDelete) { - MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) - return TheCopy; - return 0; -} diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll similarity index 90% rename from test/Transforms/ScalarRepl/memcpy-from-global.ll rename to test/Transforms/InstCombine/memcpy-from-global.ll index 5557a8fd875..83c893e17dd 100644 --- a/test/Transforms/ScalarRepl/memcpy-from-global.ll +++ b/test/Transforms/InstCombine/memcpy-from-global.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" @C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32 ; <[128 x float]*> [#uses=1] @@ -6,12 +6,10 @@ define float @test1(i32 %hash, float %x, float %y, float %z, float %w) { entry: %lookupTable = alloca [128 x float], align 16 ; <[128 x float]*> [#uses=5] %lookupTable1 = bitcast [128 x float]* %lookupTable to i8* ; [#uses=1] - call void @llvm.memcpy.i32( i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i32 512, i32 16 ) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i32 16, i1 false) ; CHECK: @test1 ; CHECK-NOT: alloca -; CHECK-NOT: call{{.*}}@llvm.memcpy -; CHECK: %lookupTable1 = bitcast [128 x float]* @C.0.1248 to i8* ; CHECK-NOT: call{{.*}}@llvm.memcpy %tmp3 = shl i32 %hash, 2 ; [#uses=1] @@ -38,10 +36,6 @@ entry: ret float %tmp43 } -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) - - - declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind %T = type { i8, [123 x i8] } @@ -59,10 +53,11 @@ define void @test2() { ; CHECK: @test2 ; %A alloca is deleted -; CHECK-NEXT: %B = alloca %T +; CHECK-NEXT: alloca [124 x i8] +; CHECK-NEXT: getelementptr inbounds [124 x i8]* ; use @G instead of %A -; CHECK-NEXT: %a = bitcast %T* @G to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* getelementptr inbounds (%T* @G, i64 0, i32 0) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false) call void @bar(i8* %b) @@ -79,8 +74,7 @@ define void @test3() { call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false) call void @bar(i8* %a) readonly ; CHECK: @test3 -; CHECK-NEXT: %a = bitcast %T* @G to i8* -; CHECK-NEXT: call void @bar(i8* %a) +; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T* @G, i64 0, i32 0)) ret void } @@ -90,8 +84,7 @@ define void @test4() { call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false) call void @baz(i8* byval %a) ; CHECK: @test4 -; CHECK-NEXT: %a = bitcast %T* @G to i8* -; CHECK-NEXT: call void @baz(i8* byval %a) +; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0)) ret void } @@ -103,8 +96,7 @@ define void @test5() { call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false) call void @baz(i8* byval %a) ; CHECK: @test5 -; CHECK-NEXT: %a = bitcast %T* @G to i8* -; CHECK-NEXT: call void @baz(i8* byval %a) +; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0)) ret void } @@ -118,8 +110,7 @@ define void @test6() { call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false) call void @bar(i8* %a) readonly ; CHECK: @test6 -; CHECK-NEXT: %a = bitcast -; CHECK-NEXT: call void @bar(i8* %a) +; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*)) ret void } @@ -129,8 +120,7 @@ define void @test7() { call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false) call void @bar(i8* %a) readonly ; CHECK: @test7 -; CHECK-NEXT: %a = bitcast -; CHECK-NEXT: call void @bar(i8* %a) +; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*)) ret void }