diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index f02a7108fd6..1cb4fa2c213 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -9795,7 +9795,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } // If we are using a wider index than needed for this platform, shrink it - // to what we need. If the incoming value needs a cast instruction, + // to what we need. If narrower, sign-extend it to what we need. + // If the incoming value needs a cast instruction, // insert it. This explicit cast can make subsequent optimizations more // obvious. Value *Op = *i; @@ -9809,6 +9810,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { *i = Op; MadeChange = true; } + } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) { + if (Constant *C = dyn_cast(Op)) { + *i = ConstantExpr::getSExt(C, TD->getIntPtrType()); + MadeChange = true; + } else { + Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(), + GEP); + *i = Op; + MadeChange = true; + } } } } diff --git a/test/Transforms/InstCombine/getelementptr_promote.ll b/test/Transforms/InstCombine/getelementptr_promote.ll new file mode 100644 index 00000000000..811c5a95916 --- /dev/null +++ b/test/Transforms/InstCombine/getelementptr_promote.ll @@ -0,0 +1,58 @@ +; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep getelementptr | grep {, i64} + +; Instcombine should promote the getelementptr index up to the target's +; pointer size, making the conversion explicit, which helps expose it to +; other optimizations. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin8" + +define i64 @test(i64* %first, i32 %count) nounwind { +entry: + %first_addr = alloca i64* ; [#uses=2] + %count_addr = alloca i32 ; [#uses=2] + %retval = alloca i64 ; [#uses=2] + %n = alloca i32 ; [#uses=5] + %result = alloca i64 ; [#uses=4] + %0 = alloca i64 ; [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store i64* %first, i64** %first_addr + store i32 %count, i32* %count_addr + store i64 0, i64* %result, align 8 + store i32 0, i32* %n, align 4 + br label %bb1 + +bb: ; preds = %bb1 + %1 = load i64** %first_addr, align 8 ; [#uses=1] + %2 = load i32* %n, align 4 ; [#uses=1] + %3 = bitcast i32 %2 to i32 ; [#uses=1] + %4 = getelementptr i64* %1, i32 %3 ; [#uses=1] + %5 = load i64* %4, align 8 ; [#uses=1] + %6 = lshr i64 %5, 4 ; [#uses=1] + %7 = load i64* %result, align 8 ; [#uses=1] + %8 = add i64 %6, %7 ; [#uses=1] + store i64 %8, i64* %result, align 8 + %9 = load i32* %n, align 4 ; [#uses=1] + %10 = add i32 %9, 1 ; [#uses=1] + store i32 %10, i32* %n, align 4 + br label %bb1 + +bb1: ; preds = %bb, %entry + %11 = load i32* %n, align 4 ; [#uses=1] + %12 = load i32* %count_addr, align 4 ; [#uses=1] + %13 = icmp slt i32 %11, %12 ; [#uses=1] + %14 = zext i1 %13 to i8 ; [#uses=1] + %toBool = icmp ne i8 %14, 0 ; [#uses=1] + br i1 %toBool, label %bb, label %bb2 + +bb2: ; preds = %bb1 + %15 = load i64* %result, align 8 ; [#uses=1] + store i64 %15, i64* %0, align 8 + %16 = load i64* %0, align 8 ; [#uses=1] + store i64 %16, i64* %retval, align 8 + br label %return + +return: ; preds = %bb2 + %retval3 = load i64* %retval ; [#uses=1] + ret i64 %retval3 +}