From ff73d8fef9a94ceb2fc35d9371deeb0214c0eeee Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Mon, 30 Apr 2012 16:53:34 +0000 Subject: [PATCH] Don't introduce illegal types when creating vmull operations. ARM BUILD_VECTORs created after type legalization cannot use i8 or i16 operands, since those types are not legal. Instead use i32 operands, which will be implicitly truncated by the BUILD_VECTOR to match the element type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155824 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 4 +- test/CodeGen/ARM/vmul.ll | 74 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e193672ef7a..88da2632d79 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4792,7 +4792,9 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { for (unsigned i = 0; i != NumElts; ++i) { ConstantSDNode *C = cast(N->getOperand(i)); const APInt &CInt = C->getAPIntValue(); - Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); + // Element types smaller than 32 bits are not legal, so use i32 elements. + // The values are implicitly truncated so sext vs. zext doesn't matter. + Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 61d89bbae83..74628f0c5ce 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -525,3 +525,77 @@ define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) { %3 = extractelement <8 x i16> %2, i32 0 ret i16 %3 } + +; A constant build_vector created for a vmull with half-width elements must +; not introduce illegal types. +define void @vmull_buildvector() nounwind optsize ssp align 2 { +; CHECK: vmull_buildvector +entry: + br i1 undef, label %for.end179, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.cond.loopexit: ; preds = %for.body33, %for.body + br i1 undef, label %for.end179, label %for.body + +for.body: ; preds = %for.cond.loopexit, %for.body.lr.ph + br i1 undef, label %for.cond.loopexit, label %for.body33.lr.ph + +for.body33.lr.ph: ; preds = %for.body + %.sub = select i1 undef, i32 0, i32 undef + br label %for.body33 + +for.body33: ; preds = %for.body33, %for.body33.lr.ph + %add45 = add i32 undef, undef + %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1) + %0 = load i32** undef, align 4 + %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8> + %vmovl.i249 = zext <8 x i8> %1 to <8 x i16> + %shuffle.i246 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i240 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> + %2 = bitcast <1 x i64> %shuffle.i240 to <8 x i8> + %3 = bitcast <16 x i8> undef to <2 x i64> + %vmovl.i237 = zext <8 x i8> undef to <8 x i16> + %shuffle.i234 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i226 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %vmovl.i225 = zext <8 x i8> undef to <8 x i16> + %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249 + %vshl_n = shl <8 x i16> %mul.i223, + %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> , <8 x i16> %vshl_n) nounwind + %mul.i209 = mul <8 x i16> undef, + %vshr_n130 = lshr <8 x i16> undef, + %vshr_n134 = lshr <8 x i16> %mul.i209, + %sub.i205 = sub <8 x i16> , %vshr_n130 + %sub.i203 = sub <8 x i16> , %vshr_n134 + %add.i200 = add <8 x i16> %sub.i205, + %add.i198 = add <8 x i16> %add.i200, %sub.i203 + %mul.i194 = mul <8 x i16> %add.i198, %vmovl.i237 + %mul.i191 = mul <8 x i16> %vshr_n130, undef + %add.i192 = add <8 x i16> %mul.i191, %mul.i194 + %mul.i187 = mul <8 x i16> %vshr_n134, undef + %add.i188 = add <8 x i16> %mul.i187, %add.i192 + %mul.i185 = mul <8 x i16> undef, undef + %add.i186 = add <8 x i16> %mul.i185, undef + %vrshr_n160 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i188, <8 x i16> ) + %vrshr_n163 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i186, <8 x i16> ) + %mul.i184 = mul <8 x i16> undef, %vrshr_n160 + %mul.i181 = mul <8 x i16> undef, %vmovl.i225 + %add.i182 = add <8 x i16> %mul.i181, %mul.i184 + %vrshr_n170 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i182, <8 x i16> ) + %vqmovn1.i180 = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %vrshr_n170) nounwind + %4 = bitcast <8 x i8> %vqmovn1.i180 to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %4, <1 x i64> undef, <2 x i32> + %5 = bitcast <2 x i64> %shuffle.i to <16 x i8> + store <16 x i8> %5, <16 x i8>* undef, align 16 + %add177 = add nsw i32 undef, 16 + br i1 undef, label %for.body33, label %for.cond.loopexit + +for.end179: ; preds = %for.cond.loopexit, %entry + ret void +} + +declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone