diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index eb7628e741b..01f49d2ca53 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1252,6 +1252,64 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { return commonPointerCastTransforms(CI); } +/// OptimizeVectorResize - This input value (which is known to have vector type) +/// is being zero extended or truncated to the specified vector type. Try to +/// replace it with a shuffle (and vector/vector bitcast) if possible. +/// +/// The source and destination vector types may have different element types. +static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, + InstCombiner &IC) { + // We can only do this optimization if the output is a multiple of the input + // element size, or the input is a multiple of the output element size. + // Convert the input type to have the same element type as the output. + const VectorType *SrcTy = cast(InVal->getType()); + + if (SrcTy->getElementType() != DestTy->getElementType()) { + // The input types don't need to be identical, but for now they must be the + // same size. There is no specific reason we couldn't handle things like + // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten + // there yet. + if (SrcTy->getElementType()->getPrimitiveSizeInBits() != + DestTy->getElementType()->getPrimitiveSizeInBits()) + return 0; + + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); + InVal = IC.Builder->CreateBitCast(InVal, SrcTy); + } + + // Now that the element types match, get the shuffle mask and RHS of the + // shuffle to use, which depends on whether we're increasing or decreasing the + // size of the input. + SmallVector ShuffleMask; + Value *V2; + const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); + + if (SrcTy->getNumElements() > DestTy->getNumElements()) { + // If we're shrinking the number of elements, just shuffle in the low + // elements from the input and use undef as the second shuffle input. + V2 = UndefValue::get(SrcTy); + for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) + ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + + } else { + // If we're increasing the number of elements, shuffle in all of the + // elements from InVal and fill the rest of the result elements with zeros + // from a constant zero. + V2 = Constant::getNullValue(SrcTy); + unsigned SrcElts = SrcTy->getNumElements(); + for (unsigned i = 0, e = SrcElts; i != e; ++i) + ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + + // The excess elements reference the first element of the zero input. + ShuffleMask.append(DestTy->getNumElements()-SrcElts, + ConstantInt::get(Int32Ty, SrcElts)); + } + + Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size()); + return new ShuffleVectorInst(InVal, V2, Mask); +} + + Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. @@ -1310,6 +1368,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } + + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa(SrcTy) && (isa(Src) || isa(Src))){ + CastInst *SrcCast = cast(Src); + if (BitCastInst *BCIn = dyn_cast(SrcCast->getOperand(0))) + if (isa(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + cast(DestTy), *this)) + return I; + } } if (const VectorType *SrcVTy = dyn_cast(SrcTy)) { diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll index 878da6894f0..77fccdfa52d 100644 --- a/test/Transforms/InstCombine/cast.ll +++ b/test/Transforms/InstCombine/cast.ll @@ -605,3 +605,36 @@ define i64 @test59(i8 %A, i8 %B) nounwind { ; CHECK-NOT: i32 ; CHECK: ret i64 %H } + +define <3 x i32> @test60(<4 x i32> %call4) nounwind { + %tmp11 = bitcast <4 x i32> %call4 to i128 + %tmp9 = trunc i128 %tmp11 to i96 + %tmp10 = bitcast i96 %tmp9 to <3 x i32> + ret <3 x i32> %tmp10 + +; CHECK: @test60 +; CHECK-NEXT: shufflevector +; CHECK-NEXT: ret +} + +define <4 x i32> @test61(<3 x i32> %call4) nounwind { + %tmp11 = bitcast <3 x i32> %call4 to i96 + %tmp9 = zext i96 %tmp11 to i128 + %tmp10 = bitcast i128 %tmp9 to <4 x i32> + ret <4 x i32> %tmp10 +; CHECK: @test61 +; CHECK-NEXT: shufflevector +; CHECK-NEXT: ret +} + +define <4 x i32> @test62(<3 x float> %call4) nounwind { + %tmp11 = bitcast <3 x float> %call4 to i96 + %tmp9 = zext i96 %tmp11 to i128 + %tmp10 = bitcast i128 %tmp9 to <4 x i32> + ret <4 x i32> %tmp10 +; CHECK: @test62 +; CHECK-NEXT: bitcast +; CHECK-NEXT: shufflevector +; CHECK-NEXT: ret +} +