From 14fe2e6948c394b0565787eddf59feec29a765da Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 1 Dec 2014 20:59:00 +0000 Subject: [PATCH] [AArch64] Don't combine "select (setcc i1 LHS, RHS), vL, vR". r208210 introduced an optimization that improves the vector select codegen by doing the setcc on vectors directly. This is a problem they the setcc operands are i1s, because the optimization would create vectors of i1, which aren't legal. Part of PR21549. Differential Revision: http://reviews.llvm.org/D6308 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223075 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++++++ test/CodeGen/AArch64/arm64-neon-select_cc.ll | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 16ad2f6e3b5..622b0e1d73d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8479,6 +8479,12 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { // largest real NEON comparison is 64-bits per lane, which means the result is // at most 32-bits and an illegal vector. Just bail out for now. EVT SrcVT = N0.getOperand(0).getValueType(); + + // Don't try to do this optimization when the setcc itself has i1 operands. + // There are no legal vectors of i1, so this would be pointless. + if (SrcVT == MVT::i1) + return SDValue(); + int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits(); if (!ResVT.isVector() || NumMaskElts == 0) return SDValue(); diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll index 95c582a5348..d334c0846ac 100644 --- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll +++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll @@ -204,3 +204,18 @@ define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d ret <2 x double> %e } + +; Special case: when the select condition is an icmp with i1 operands, don't +; do the comparison on vectors. +; Part of PR21549. +define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: test_select_cc_v2i32_icmpi1: +; CHECK: tst w0, #0x1 +; CHECK: csetm [[MASK:w[0-9]+]], ne +; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]] +; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b +; CHECK: mov v0.16b, [[DUPMASK]].16b + %cmp = icmp ne i1 %cc, 0 + %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %e +}