From a9d9f7eae81857a172745520b8de48dcd157ce9d Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Tue, 16 Dec 2014 10:59:27 +0000 Subject: [PATCH] [ARM] Prevent PerformVCVTCombine from combining a vmul/vcvt with 8 lanes This would result in a crash since the vcvt used does not support v8i32 types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224332 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 8 +++++--- test/CodeGen/ARM/isel-v8i32-crash.ll | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/ARM/isel-v8i32-crash.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3fce38e2e3d..e908c42e975 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9355,16 +9355,18 @@ static SDValue PerformVCVTCombine(SDNode *N, MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); - if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 || + NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would - // be lossy. + // be lossy. We also can't handle more then 4 lanes, since these intructions + // only support v2i32/v4i32 types. return SDValue(); } unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - unsigned NumLanes = Op.getValueType().getVectorNumElements(); SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, diff --git a/test/CodeGen/ARM/isel-v8i32-crash.ll b/test/CodeGen/ARM/isel-v8i32-crash.ll new file mode 100644 index 00000000000..0116fe8de7c --- /dev/null +++ b/test/CodeGen/ARM/isel-v8i32-crash.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=armv7-linux-gnu | FileCheck %s + +; Check we don't crash when trying to combine: +; (d1 = ) (power of 2) +; vmul.f32 d0, d1, d0 +; vcvt.s32.f32 d0, d0 +; into: +; vcvt.s32.f32 d0, d0, #3 +; when we have a vector length of 8, due to use of v8i32 types. + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; CHECK: func: +; CHECK: vcvt.s32.f32 q[[R:[0-9]]], q[[R]], #3 +define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 { +entry: + %0 = bitcast float* %pf to <8 x float>* + %1 = load <8 x float>* %0, align 4 + %2 = fmul <8 x float> %1, + %3 = fptosi <8 x float> %2 to <8 x i16> + %4 = bitcast i16* %pb to <8 x i16>* + store <8 x i16> %3, <8 x i16>* %4, align 2 + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }