This implements the second half of the fix for PR3290, handling

loads from allocas that cover the entire aggregate.  This handles
some memcpy/byval cases that are produced by llvm-gcc.  This triggers
a few times in kc++ (with std::pair<std::_Rb_tree_const_iterator
<kc::impl_abstract_phylum*>,bool>) and once in 176.gcc (with %struct..0anon).




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61915 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2009-01-08 05:42:05 +00:00
parent 8bb5e99013
commit 5ffe6acd57
2 changed files with 125 additions and 2 deletions

View File

@ -122,6 +122,8 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial);
void ConvertToScalar(AllocationInst *AI, const Type *Ty);
@ -599,6 +601,18 @@ void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
continue;
}
return MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
// If loading the entire alloca in one chunk through a bitcasted pointer
// to integer, we can transform it. This happens (for example) when you
// cast a {i32,i32}* to i64* and load through it. This is similar to the
// memcpy case and occurs in various "byval" cases and emulated memcpys.
if (isa<IntegerType>(LI->getType()) &&
TD->getABITypeSize(LI->getType()) ==
TD->getABITypeSize(AI->getType()->getElementType())) {
Info.isMemCpySrc = true;
continue;
}
return MarkUnsafe(Info);
} else {
return MarkUnsafe(Info);
}
@ -628,15 +642,21 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
}
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// This must be a store of the entire alloca from an integer.
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
continue;
}
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
continue;
}
// Otherwise it must be some other user of a gep of the first pointer. Just
// leave these alone.
continue;
}
}
}
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
@ -902,6 +922,83 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
SI->eraseFromParent();
}
/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
/// an integer. Load the individual pieces to form the aggregate value.
void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
const Type *AllocaEltTy = AI->getType()->getElementType();
uint64_t AllocaSizeBits = TD->getABITypeSizeInBits(AllocaEltTy);
// If this isn't a load of the whole alloca to an integer, it may be a load
// of the first element. Just ignore the load in this case and normal SROA
// will handle it.
if (!isa<IntegerType>(LI->getType()) ||
TD->getABITypeSizeInBits(LI->getType()) != AllocaSizeBits)
return;
DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI;
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
const StructLayout *Layout = 0;
uint64_t ArrayEltBitOffset = 0;
if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
Layout = TD->getStructLayout(EltSTy);
} else {
const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
ArrayEltBitOffset = TD->getABITypeSizeInBits(ArrayEltTy);
}
Value *ResultVal = Constant::getNullValue(LI->getType());
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Load the value from the alloca. If the NewElt is an aggregate, cast
// the pointer to an integer of the same size before doing the load.
Value *SrcField = NewElts[i];
const Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
const IntegerType *FieldIntTy =
IntegerType::get(TD->getTypeSizeInBits(FieldTy));
if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
!isa<VectorType>(FieldTy))
SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy),
"", LI);
SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
// If SrcField is a fp or vector of the right size but that isn't an
// integer type, bitcast to an integer so we can shift it.
if (SrcField->getType() != FieldIntTy)
SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
// Zero extend the field to be the same size as the final alloca so that
// we can shift and insert it.
if (SrcField->getType() != ResultVal->getType())
SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
// Determine the number of bits to shift SrcField.
uint64_t Shift;
if (Layout) // Struct case.
Shift = Layout->getElementOffsetInBits(i);
else // Array case.
Shift = i*ArrayEltBitOffset;
if (TD->isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
if (Shift) {
Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
}
ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
}
LI->replaceAllUsesWith(ResultVal);
LI->eraseFromParent();
}
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.

View File

@ -29,3 +29,29 @@ define float @test2(i128 %V) nounwind {
ret float %c
}
;; Load of whole alloca struct as integer
define i64 @test3(i32 %a, i32 %b) nounwind {
%X = alloca {{i32, i32}}
%A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0
%B = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 1
store i32 %a, i32* %A
store i32 %b, i32* %B
%Y = bitcast {{i32,i32}}* %X to i64*
%Z = load i64* %Y
ret i64 %Z
}
;; load of integer from whole struct/array alloca.
define i128 @test4(float %a, float %b) nounwind {
%X = alloca {[4 x float]}
%A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0
%B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
store float %a, float* %A
store float %b, float* %B
%Y = bitcast {[4 x float]}* %X to i128*
%V = load i128* %Y
ret i128 %V
}