Add the extracted constant offset using GEP

Fixed a TODO in r207783.

Add the extracted constant offset using GEP instead of ugly
ptrtoint+add+inttoptr. Using GEP simplifies future optimizations and makes IR
easier to understand. 

Updated all affected tests, and added a new test in split-gep.ll to cover a
corner case where emitting uglygep is necessary.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209537 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jingyue Wu
2014-05-23 18:39:40 +00:00
parent bb75e24528
commit 8d959dd563
3 changed files with 81 additions and 40 deletions

View File

@@ -487,7 +487,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
int64_t ConstantOffset = int64_t ConstantOffset =
ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
if (ConstantOffset != 0) { if (ConstantOffset != 0) {
assert(NewIdx && "ConstantOffset != 0 implies NewIdx is set"); assert(NewIdx != nullptr &&
"ConstantOffset != 0 implies NewIdx is set");
GEP->setOperand(I, NewIdx); GEP->setOperand(I, NewIdx);
// Clear the inbounds attribute because the new index may be off-bound. // Clear the inbounds attribute because the new index may be off-bound.
// e.g., // e.g.,
@@ -522,44 +523,67 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// => add the offset // => add the offset
// //
// %gep2 ; clone of %gep // %gep2 ; clone of %gep
// %0 = ptrtoint %gep2 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
// %1 = add %0, <offset>
// %new.gep = inttoptr %1
// %gep ; will be removed // %gep ; will be removed
// ... %gep ... // ... %gep ...
// //
// => replace all uses of %gep with %new.gep and remove %gep // => replace all uses of %gep with %new.gep and remove %gep
// //
// %gep2 ; clone of %gep // %gep2 ; clone of %gep
// %0 = ptrtoint %gep2 // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
// %1 = add %0, <offset>
// %new.gep = inttoptr %1
// ... %new.gep ... // ... %new.gep ...
// //
// TODO(jingyue): Emit a GEP instead of an "uglygep" // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
// (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep) to make the IR // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
// prettier and more alias analysis friendly. One caveat: if the original GEP // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
// ends with a StructType, we need to split the GEP at the last // type of %gep.
// SequentialType. For instance, consider the following IR:
// //
// %struct.S = type { float, double } // %gep2 ; clone of %gep
// @array = global [1024 x %struct.S] // %0 = bitcast %gep2 to i8*
// %p = getelementptr %array, 0, %i + 5, 1 // %uglygep = gep %0, <offset>
// // %new.gep = bitcast %uglygep to <type of %gep>
// To separate the constant 5 from %p, we would need to split %p at the last // ... %new.gep ...
// array index so that we have:
//
// %addr = gep %array, 0, %i
// %p = gep %addr, 5, 1
Instruction *NewGEP = GEP->clone(); Instruction *NewGEP = GEP->clone();
NewGEP->insertBefore(GEP); NewGEP->insertBefore(GEP);
Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
Value *Addr = new PtrToIntInst(NewGEP, IntPtrTy, "", GEP);
Addr = BinaryOperator::CreateAdd(
Addr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "", GEP);
Addr = new IntToPtrInst(Addr, GEP->getType(), "", GEP);
GEP->replaceAllUsesWith(Addr); Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
uint64_t ElementTypeSizeOfGEP =
DL->getTypeAllocSize(GEP->getType()->getElementType());
if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
// Very likely. As long as %gep is natually aligned, the byte offset we
// extracted should be a multiple of sizeof(*%gep).
// Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
// cast ElementTypeSizeOfGEP to signed.
int64_t Index =
AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
NewGEP = GetElementPtrInst::Create(
NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
} else {
// Unlikely but possible. For example,
// #pragma pack(1)
// struct S {
// int a[3];
// int64 b[8];
// };
// #pragma pack()
//
// Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
// extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
// sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
// sizeof(int64).
//
// Emit an uglygep in this case.
Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
GEP->getPointerAddressSpace());
NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
NewGEP = GetElementPtrInst::Create(
NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
"uglygep", GEP);
if (GEP->getType() != I8PtrTy)
NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
}
GEP->replaceAllUsesWith(NewGEP);
GEP->eraseFromParent(); GEP->eraseFromParent();
return true; return true;

View File

@@ -54,7 +54,6 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array( ; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i32 %x, i32 %y ; IR: [[BASE_PTR:%[0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i32 %x, i32 %y
; IR: [[BASE_INT:%[0-9]+]] = ptrtoint float addrspace(3)* [[BASE_PTR]] to i64 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
; IR: %5 = add i64 [[BASE_INT]], 4 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
; IR: %10 = add i64 [[BASE_INT]], 128 ; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
; IR: %15 = add i64 [[BASE_INT]], 132

View File

@@ -39,7 +39,7 @@ entry:
} }
; CHECK-LABEL: @sext_zext ; CHECK-LABEL: @sext_zext
; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i32 %i, i32 %j ; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i32 %i, i32 %j
; CHECK: add i64 %{{[0-9]+}}, 136 ; CHECK: getelementptr float* %{{[0-9]+}}, i64 34
; We should be able to trace into sext/zext if it can be distributed to both ; We should be able to trace into sext/zext if it can be distributed to both
; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b) ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
@@ -55,8 +55,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
} }
; CHECK-LABEL: @ext_add_no_overflow ; CHECK-LABEL: @ext_add_no_overflow
; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}} ; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
; CHECK: [[BASE_INT:%[0-9]+]] = ptrtoint float* [[BASE_PTR]] to i64 ; CHECK: getelementptr float* [[BASE_PTR]], i64 33
; CHECK: add i64 [[BASE_INT]], 132
; We should treat "or" with no common bits (%k) as "add", and leave "or" with ; We should treat "or" with no common bits (%k) as "add", and leave "or" with
; potentially common bits (%l) as is. ; potentially common bits (%l) as is.
@@ -69,8 +68,8 @@ entry:
ret float* %p ret float* %p
} }
; CHECK-LABEL: @or ; CHECK-LABEL: @or
; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %j, i64 %l ; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %j, i64 %l
; CHECK: add i64 %{{[0-9]+}}, 384 ; CHECK: getelementptr float* [[BASE_PTR]], i64 96
; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b + ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -84,8 +83,8 @@ entry:
ret float* %p ret float* %p
} }
; CHECK-LABEL: @expr ; CHECK-LABEL: @expr
; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %0, i64 0 ; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %0, i64 0
; CHECK: add i64 %{{[0-9]+}}, 640 ; CHECK: getelementptr float* [[BASE_PTR]], i64 160
; CHECK: store i64 %b5, i64* %out ; CHECK: store i64 %b5, i64* %out
; Verifies we handle "sub" correctly. ; Verifies we handle "sub" correctly.
@@ -97,5 +96,24 @@ define float* @sub(i64 %i, i64 %j) {
} }
; CHECK-LABEL: @sub ; CHECK-LABEL: @sub
; CHECK: %[[j2:[0-9]+]] = sub i64 0, %j ; CHECK: %[[j2:[0-9]+]] = sub i64 0, %j
; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]] ; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
; CHECK: add i64 %{{[0-9]+}}, -620 ; CHECK: getelementptr float* [[BASE_PTR]], i64 -155
%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
; Verifies we can emit correct uglygep if the address is not natually aligned.
define i64* @packed_struct(i32 %i, i32 %j) {
entry:
%s = alloca [1024 x %struct.Packed], align 16
%add = add nsw i32 %j, 3
%idxprom = sext i32 %add to i64
%add1 = add nsw i32 %i, 1
%idxprom2 = sext i32 %add1 to i64
%arrayidx3 = getelementptr inbounds [1024 x %struct.Packed]* %s, i64 0, i64 %idxprom2, i32 1, i64 %idxprom
ret i64* %arrayidx3
}
; CHECK-LABEL: @packed_struct
; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [1024 x %struct.Packed]* %s, i64 0, i32 %i, i32 1, i32 %j
; CHECK: [[CASTED_PTR:%[0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
; CHECK: %uglygep = getelementptr i8* [[CASTED_PTR]], i64 100
; CHECK: bitcast i8* %uglygep to i64*