diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 42246ff3216..0ae12aef82e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -690,15 +690,45 @@ Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) + const Type *FromType = FromVal->getType(); + if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast(FromVal->getType())) { + if (const VectorType *VTy = dyn_cast(FromType)) { unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (ToTypeSize == AllocaSize) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); + if (ToTypeSize == AllocaSize) { + if (FromType->getPrimitiveSizeInBits() == + ToType->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + else { + // Vectors with the same element type can have the same allocation + // size but different primitive sizes (e.g., <3 x i32> and <4 x i32>) + // In this case, use a shuffle vector instead of a bit cast. + const VectorType *ToVTy = dyn_cast(ToType); + assert(ToVTy && (ToVTy->getElementType() == VTy->getElementType()) && + "Vectors must have the same element type"); + LLVMContext &Context = FromVal->getContext(); + Value *UnV = UndefValue::get(FromType); + unsigned numEltsFrom = VTy->getNumElements(); + unsigned numEltsTo = ToVTy->getNumElements(); + + SmallVector Args; + unsigned minNumElts = std::min(numEltsFrom, numEltsTo); + unsigned i; + for (i=0; i != minNumElts; ++i) + Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), i)); + + if (i < numEltsTo) { + Constant* UnC = UndefValue::get(Type::getInt32Ty(Context)); + for (; i != numEltsTo; ++i) + Args.push_back(UnC); + } + Constant *Mask = ConstantVector::get(Args); + return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); + } + } if (ToType->isVectorTy()) { assert(isPowerOf2_64(AllocaSize / ToTypeSize) && @@ -837,8 +867,36 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); + if (ValSize == VecSize) { + if (VTy->getPrimitiveSizeInBits() == + SV->getType()->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + else { + // Vectors with the same element type can have the same allocation + // size but different primitive sizes (e.g., <3 x i32> and <4 x i32>) + // In this case, use a shuffle vector instead of a bit cast. + const VectorType *SVVTy = dyn_cast(SV->getType()); + assert(SVVTy && (SVVTy->getElementType() == VTy->getElementType()) && + "Vectors must have the same element type"); + Value *UnV = UndefValue::get(SVVTy); + unsigned numEltsFrom = SVVTy->getNumElements(); + unsigned numEltsTo = VTy->getNumElements(); + + SmallVector Args; + unsigned minNumElts = std::min(numEltsFrom, numEltsTo); + unsigned i; + for (i=0; i != minNumElts; ++i) + Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), i)); + + if (i < numEltsTo) { + Constant* UnC = UndefValue::get(Type::getInt32Ty(Context)); + for (; i != numEltsTo; ++i) + Args.push_back(UnC); + } + Constant *Mask = ConstantVector::get(Args); + return Builder.CreateShuffleVector(SV, UnV, Mask, "tmpV"); + } + } if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) { assert(Offset == 0 && "Can't insert a value of a smaller vector type at " diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll index ef701c621da..9c17a54294f 100644 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ b/test/Transforms/ScalarRepl/vector_promote.ll @@ -202,3 +202,49 @@ define float @test13(<4 x float> %x, <2 x i32> %y) { ; CHECK-NOT: alloca ; CHECK: bitcast <4 x float> %x to i128 } + +define <3 x float> @test14(<3 x float> %x) { +entry: + %x.addr = alloca <3 x float>, align 16 + %r = alloca <3 x i32>, align 16 + %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> + %storetmp = bitcast <3 x float>* %x.addr to <4 x float>* + store <4 x float> %extractVec, <4 x float>* %storetmp, align 16 + %tmp = load <3 x float>* %x.addr, align 16 + %cmp = fcmp une <3 x float> %tmp, zeroinitializer + %sext = sext <3 x i1> %cmp to <3 x i32> + %and = and <3 x i32> , %sext + %extractVec1 = shufflevector <3 x i32> %and, <3 x i32> undef, <4 x i32> + %storetmp2 = bitcast <3 x i32>* %r to <4 x i32>* + store <4 x i32> %extractVec1, <4 x i32>* %storetmp2, align 16 + %tmp3 = load <3 x i32>* %r, align 16 + %0 = bitcast <3 x i32> %tmp3 to <3 x float> + %tmp4 = load <3 x float>* %x.addr, align 16 + ret <3 x float> %tmp4 +; CHECK: @test14 +; CHECK-NOT: alloca +; CHECK: shufflevector <4 x i32> %extractVec1, <4 x i32> undef, <3 x i32> +} + +define void @test15(<3 x i64>* sret %agg.result, <3 x i64> %x, <3 x i64> %min) { +entry: + %x.addr = alloca <3 x i64>, align 32 + %min.addr = alloca <3 x i64>, align 32 + %extractVec = shufflevector <3 x i64> %x, <3 x i64> undef, <4 x i32> + %storetmp = bitcast <3 x i64>* %x.addr to <4 x i64>* + store <4 x i64> %extractVec, <4 x i64>* %storetmp, align 32 + %extractVec1 = shufflevector <3 x i64> %min, <3 x i64> undef, <4 x i32> + %storetmp2 = bitcast <3 x i64>* %min.addr to <4 x i64>* + store <4 x i64> %extractVec1, <4 x i64>* %storetmp2, align 32 + %tmp = load <3 x i64>* %x.addr + %tmp5 = extractelement <3 x i64> %tmp, i32 0 + %tmp11 = insertelement <3 x i64> %tmp, i64 %tmp5, i32 0 + store <3 x i64> %tmp11, <3 x i64>* %x.addr + %tmp30 = load <3 x i64>* %x.addr, align 32 + store <3 x i64> %tmp30, <3 x i64>* %agg.result + ret void +; CHECK: @test15 +; CHECK-NOT: alloca +; CHECK: shufflevector <4 x i64> %tmpV2, <4 x i64> undef, <3 x i32> +} +