From bc92b2ca377145f4d39fa0283c41c69e3b42527b Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 27 Apr 2015 21:01:20 +0000 Subject: [PATCH] [AArch64] Don't assert when combining (v3f32 select (setcc f64)). When the setcc has f64 operands, we can't build a vector setcc mask to feed a vselect, because f64 doesn't divide v3f32 evenly. Just bail out when that happens. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235917 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++++ test/CodeGen/AArch64/arm64-neon-select_cc.ll | 32 +++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0607c4a3b91..e66b07ea2f4 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8668,6 +8668,12 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts); EVT CCVT = SrcVT.changeVectorElementTypeToInteger(); + // Also bail out if the vector CCVT isn't the same size as ResVT. + // This can happen if the SETCC operand size doesn't divide the ResVT size + // (e.g., f64 vs v3f32). + if (CCVT.getSizeInBits() != ResVT.getSizeInBits()) + return SDValue(); + // First perform a vector comparison, where lane 0 is the one we're interested // in. SDLoc DL(N0); diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll index d334c0846ac..b98d2d9219b 100644 --- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll +++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ +; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { ; CHECK-LABEL: test_select_cc_v8i8_i8: @@ -219,3 +220,32 @@ define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b ret <2 x i32> %e } + +; Also make sure we support irregular/non-power-of-2 types such as v3f32. +define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 { +; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32: +; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s +; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]].s[0] +; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, [[RES]].16b +; CHECK-NEXT: ret + %cc = fcmp oeq float %c1, %c2 + %r = select i1 %cc, <3 x float> %a, <3 x float> %b + ret <3 x float> %r +} + +define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 { +; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64: +; CHECK-NEXT: fcmp d2, d3 +; CHECK-NEXT: movn [[N0:w[0-9]+]], #0 +; CHECK-NEXT: csel [[MASK:w[0-9]+]], [[N0]], wzr, eq +; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]] +; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, [[RES]].16b +; CHECK-NEXT: ret + %cc = fcmp oeq double %c1, %c2 + %r = select i1 %cc, <3 x float> %a, <3 x float> %b + ret <3 x float> %r +} + +attributes #0 = { nounwind}