[InstCombine] Change LLVM To canonicalize toward the value type being

stored rather than the pointer type.

This change is analogous to r220138 which changed the canonicalization
for loads. The rationale is the same: memory does not have a type,
operations (and thus the values they produce) have a type. We should
match that type as closely as possible rather than reading some form of
semantics into the pointer type.

With this change, loads and stores should no longer be made with
nonsensical types for the values that tehy load and store. This is
particularly important when trying to match specific loaded and stored
types in the process of doing other instcombines, which is what led me
down this twisty maze of miscanonicalization.

I've put quite some effort into looking through IR to find places where
LLVM's optimizer was being unreasonably conservative in the face of
mismatched load and store types, however it is possible (let's say,
likely!) I have missed some. If you see regressions here, or from
r220138, the likely cause is some part of LLVM failing to cope with load
and store types differing. Test cases appreciated, it is important that
we root all of these out of LLVM.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222748 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth
2014-11-25 10:09:51 +00:00
parent 0ab565a275
commit 333d5c9f51
7 changed files with 95 additions and 126 deletions

View File

@@ -491,102 +491,81 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
return nullptr;
}
/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
/// when possible. This makes it generally easy to do alias analysis and/or
/// SROA/mem2reg of the memory object.
static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
User *CI = cast<User>(SI.getOperand(1));
Value *CastOp = CI->getOperand(0);
/// \brief Combine stores to match the type of value being stored.
///
/// The core idea here is that the memory does not have any intrinsic type and
/// where we can we should match the type of a store to the type of value being
/// stored.
///
/// However, this routine must never change the width of a store or the number of
/// stores as that would introduce a semantic change. This combine is expected to
/// be a semantic no-op which just allows stores to more closely model the types
/// of their incoming values.
///
/// Currently, we also refuse to change the precise type used for an atomic or
/// volatile store. This is debatable, and might be reasonable to change later.
/// However, it is risky in case some backend or other part of LLVM is relying
/// on the exact type stored to select appropriate atomic operations.
///
/// \returns true if the store was successfully combined away. This indicates
/// the caller must erase the store instruction. We have to let the caller erase
/// the store instruction sas otherwise there is no way to signal whether it was
/// combined or not: IC.EraseInstFromFunction returns a null pointer.
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
// FIXME: We could probably with some care handle both volatile and atomic
// stores here but it isn't clear that this is important.
if (!SI.isSimple())
return false;
Type *DestPTy = CI->getType()->getPointerElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (!SrcTy) return nullptr;
Value *Ptr = SI.getPointerOperand();
Value *V = SI.getValueOperand();
unsigned AS = SI.getPointerAddressSpace();
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
SI.getAllMetadata(MD);
Type *SrcPTy = SrcTy->getElementType();
// Fold away bit casts of the stored value by storing the original type.
if (auto *BC = dyn_cast<BitCastInst>(V)) {
V = BC->getOperand(0);
StoreInst *NewStore = IC.Builder->CreateAlignedStore(
V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
SI.getAlignment());
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
MDNode *N = MDPair.second;
// Note, essentially every kind of metadata should be preserved here! This
// routine is supposed to clone a store instruction changing *only its
// type*. The only metadata it makes sense to drop is metadata which is
// invalidated when the pointer type changes. This should essentially
// never be the case in LLVM, but we explicitly switch over only known
// metadata to be conservatively correct. If you are adding metadata to
// LLVM which pertains to stores, you almost certainly want to add it
// here.
switch (ID) {
case LLVMContext::MD_dbg:
case LLVMContext::MD_tbaa:
case LLVMContext::MD_prof:
case LLVMContext::MD_fpmath:
case LLVMContext::MD_tbaa_struct:
case LLVMContext::MD_alias_scope:
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
case LLVMContext::MD_mem_parallel_loop_access:
case LLVMContext::MD_nonnull:
// All of these directly apply.
NewStore->setMetadata(ID, N);
break;
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return nullptr;
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
/// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
/// on 32-bit hosts.
SmallVector<Value*, 4> NewGEPIndices;
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
while (1) {
if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
break;
NewGEPIndices.push_back(Zero);
SrcPTy = STy->getElementType(0);
} else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
NewGEPIndices.push_back(Zero);
SrcPTy = ATy->getElementType();
} else {
case LLVMContext::MD_invariant_load:
case LLVMContext::MD_range:
break;
}
}
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
return true;
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return nullptr;
// If the pointers point into different address spaces don't do the
// transformation.
if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace())
return nullptr;
// If the pointers point to values of different sizes don't do the
// transformation.
if (!IC.getDataLayout() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return nullptr;
// If the pointers point to pointers to different address spaces don't do the
// transformation. It is not safe to introduce an addrspacecast instruction in
// this case since, depending on the target, addrspacecast may not be a no-op
// cast.
if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() &&
SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace())
return nullptr;
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
Instruction::CastOps opcode = Instruction::BitCast;
Type* CastSrcTy = DestPTy;
Type* CastDstTy = SrcPTy;
if (CastDstTy->isPointerTy()) {
if (CastSrcTy->isIntegerTy())
opcode = Instruction::IntToPtr;
} else if (CastDstTy->isIntegerTy()) {
if (CastSrcTy->isPointerTy())
opcode = Instruction::PtrToInt;
}
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
Value *SIOp0 = SI.getOperand(0);
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
SI.setOperand(1, CastOp);
return &SI;
// FIXME: We should also canonicalize loads of vectors when their elements are
// cast to other types.
return false;
}
/// equivalentAddressValues - Test if A and B will obviously have the same
@@ -622,6 +601,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
Value *Val = SI.getOperand(0);
Value *Ptr = SI.getOperand(1);
// Try to canonicalize the stored type.
if (combineStoreToValueType(*this, SI))
return EraseInstFromFunction(SI);
// Attempt to improve the alignment.
if (DL) {
unsigned KnownAlign =
@@ -713,17 +696,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (isa<UndefValue>(Val))
return EraseInstFromFunction(SI);
// If the pointer destination is a cast, see if we can fold the cast into the
// source instead.
if (isa<CastInst>(Ptr))
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
if (CE->isCast())
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.

View File

@@ -50,7 +50,8 @@ define void @fu2(i32 %parm) nounwind ssp {
%7 = add i32 %6, 2048
; CHECK: alloca i8
%8 = alloca i8, i32 %7
; CHECK-NEXT: bitcast i8*
; CHECK-NEXT: bitcast double**
; CHECK-NEXT: store i8*
%9 = bitcast i8* %8 to double*
store double* %9, double** %ptr, align 4
br label %10

View File

@@ -94,7 +94,8 @@ entry:
; CHECK: load i32*
; CHECK-NOT: fptoui
; CHECK-NOT: uitofp
; CHECK: bitcast i32 %call to float
; CHECK: bitcast float* %dest to i32*
; CHECK: store i32
%tmp = load float* %source, align 8
%call = call float @alias_i32_to_f32(float %tmp) nounwind
store float %call, float* %dest, align 8
@@ -109,7 +110,8 @@ entry:
; CHECK: load <2 x i32>*
; CHECK-NOT: fptoui
; CHECK-NOT: uitofp
; CHECK: bitcast <2 x i32> %call to <2 x float>
; CHECK: bitcast <2 x float>* %dest to <2 x i32>*
; CHECK: store <2 x i32>
%tmp = load <2 x float>* %source, align 8
%call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind
store <2 x float> %call, <2 x float>* %dest, align 8
@@ -123,7 +125,8 @@ entry:
; CHECK: bitcast <2 x float>* %source to i64*
; CHECK: load i64*
; CHECK: %call = call i64 @func_i64
; CHECK: bitcast i64 %call to <2 x float>
; CHECK: bitcast <2 x float>* %dest to i64*
; CHECK: store i64
%tmp = load <2 x float>* %source, align 8
%call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind
store <2 x float> %call, <2 x float>* %dest, align 8
@@ -136,7 +139,8 @@ entry:
; CHECK: bitcast i64* %source to <2 x float>*
; CHECK: load <2 x float>*
; CHECK: call <2 x float> @func_v2f32
; CHECK: bitcast <2 x float> %call to i64
; CHECK: bitcast i64* %dest to <2 x float>*
; CHECK: store <2 x float>
%tmp = load i64* %source, align 8
%call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind
store i64 %call, i64* %dest, align 8
@@ -149,7 +153,8 @@ entry:
; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
; CHECK: load <2 x i32*>*
; CHECK: call <2 x i32*> @func_v2i32p
; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>*
; CHECK: store <2 x i32*>
%tmp = load <2 x i64*>* %source, align 8
%call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind
store <2 x i64*> %call, <2 x i64*>* %dest, align 8

View File

@@ -10,11 +10,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@G = external constant [5 x i8*]
; CHECK-LABEL: @foo
; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0
define void @foo(%struct.A* %a) nounwind {
; CHECK: store i32 %x, i32* %{{.*}}, align 16, !noalias !0
define void @foo(i32 %x, float* %p) nounwind {
entry:
%0 = bitcast %struct.A* %a to i8***
store i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2), i8*** %0, align 16, !tag !0
%x.cast = bitcast i32 %x to float
store float %x.cast, float* %p, align 16, !noalias !0
ret void
}
@@ -32,4 +32,4 @@ entry:
ret void
}
!0 = metadata !{metadata !"hello"}
!0 = metadata !{metadata !0}

View File

@@ -199,15 +199,6 @@ define i1 @test24(i1 %C) {
; CHECK: ret i1 true
}
define void @test25(i32** %P) {
%c = bitcast i32** %P to float** ; <float**> [#uses=1]
;; Fold cast into null
store float* null, float** %c
ret void
; CHECK: store i32* null, i32** %P
; CHECK: ret void
}
define i32 @test26(float %F) {
;; no need to cast from float->double.
%c = fpext float %F to double ; <double> [#uses=1]

View File

@@ -602,8 +602,8 @@ entry:
%C = load i8** %B, align 8
ret i8* %C
; CHECK-LABEL: @test34(
; CHECK: %V.c = inttoptr i64 %V to i8*
; CHECK: ret i8* %V.c
; CHECK: %[[C:.*]] = inttoptr i64 %V to i8*
; CHECK: ret i8* %[[C]]
}
%t0 = type { i8*, [19 x i8] }

View File

@@ -1395,11 +1395,11 @@ define i8* @test83(i1 %flag) {
; CHECK-LABEL: @test83(
; CHECK: %[[X:.*]] = alloca i8*
; CHECK-NEXT: %[[Y:.*]] = alloca i8*
; CHECK: %[[V:.*]] = load i64* %[[X]]
; CHECK-NEXT: %[[C1:.*]] = inttoptr i64 %[[V]] to i8*
; CHECK-NEXT: store i8* %[[C1]], i8** %[[Y]]
; CHECK-NEXT: %[[C2:.*]] = inttoptr i64 %[[V]] to i8*
; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i8* %[[C2]], i8* %[[C1]]
; CHECK-DAG: %[[X2:.*]] = bitcast i8** %[[X]] to i64*
; CHECK-DAG: %[[Y2:.*]] = bitcast i8** %[[Y]] to i64*
; CHECK: %[[V:.*]] = load i64* %[[X2]]
; CHECK-NEXT: store i64 %[[V]], i64* %[[Y2]]
; CHECK-NEXT: %[[C:.*]] = inttoptr i64 %[[V]] to i8*
; CHECK-NEXT: ret i8* %[[S]]
entry:
%x = alloca i8*