[AArch64 NEON] Lower SELECT_CC with vector operand.

When the scalar compare is between floating point and operands are
vector, we custom lower SELECT_CC to use NEON SIMD compare for
generating less instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200365 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kevin Qin 2014-01-29 01:57:30 +00:00
parent e0f5a86671
commit 79c6a4f347
3 changed files with 362 additions and 56 deletions

View File

@ -458,6 +458,32 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v16i8, Expand);
setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
setOperationAction(ISD::SELECT, MVT::v8i16, Expand);
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
setOperationAction(ISD::SELECT, MVT::v2i64, Expand);
setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT, MVT::v2f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom);
// Vector ExtLoad and TruncStore are expanded.
for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE;
I <= MVT::LAST_VECTOR_VALUETYPE; ++I) {
@ -2661,62 +2687,6 @@ AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
}
}
// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
SDValue
AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue IfTrue = Op.getOperand(2);
SDValue IfFalse = Op.getOperand(3);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
if (LHS.getValueType() == MVT::f128) {
// f128 comparisons are lowered to libcalls, but slot in nicely here
// afterwards.
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (RHS.getNode() == 0) {
RHS = DAG.getConstant(0, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType().isInteger()) {
SDValue A64cc;
// Integers are handled in a separate function because the combinations of
// immediates and tests can get hairy and we may want to fiddle things.
SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
CmpOp, IfTrue, IfFalse, A64cc);
}
// Note that some LLVM floating-point CondCodes can't be lowered to a single
// conditional branch, hence FPCCToA64CC can set a second test, where either
// passing is sufficient.
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
CondCode = FPCCToA64CC(CC, Alternative);
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
DAG.getCondCode(CC));
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
Op.getValueType(),
SetCC, IfTrue, IfFalse, A64cc);
if (Alternative != A64CC::Invalid) {
A64cc = DAG.getConstant(Alternative, MVT::i32);
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
SetCC, IfTrue, A64SELECT_CC, A64cc);
}
return A64SELECT_CC;
}
// (SELECT testbit, iftrue, iffalse)
SDValue
AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@ -3004,6 +2974,158 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return A64SELECT_CC;
}
static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue IfTrue = Op.getOperand(2);
SDValue IfFalse = Op.getOperand(3);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
// If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will
// use NEON compare.
if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) {
EVT EltVT = LHS.getValueType();
unsigned EltNum = 128 / EltVT.getSizeInBits();
EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum);
unsigned SubConstant =
(LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64;
EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64;
EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum);
LHS
= SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
VT, DAG.getTargetConstant(0, MVT::i32), LHS,
DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
RHS
= SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
VT, DAG.getTargetConstant(0, MVT::i32), RHS,
DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC);
SDValue ResCC = LowerVectorSETCC(VSetCC, DAG);
EVT IfTrueVT = IfTrue.getValueType();
EVT CastEltT =
MVT::getIntegerVT(IfTrueVT.getVectorElementType().getSizeInBits());
EVT CastVT = EVT::getVectorVT(*DAG.getContext(), CastEltT,
IfTrueVT.getVectorNumElements());
if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) {
EVT DUPVT =
EVT::getVectorVT(*DAG.getContext(), CEltT,
IfTrueVT.getSizeInBits() / CEltT.getSizeInBits());
ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC,
DAG.getConstant(0, MVT::i64, false));
ResCC = DAG.getNode(ISD::BITCAST, dl, CastVT, ResCC);
} else {
// FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function
// can't handle them and will hit this assert.
assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() &&
"Vector of IfTrue & IfFalse is too small.");
unsigned ExEltNum =
EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits();
EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum);
ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC,
DAG.getConstant(0, MVT::i64, false));
ResCC = DAG.getNode(ISD::BITCAST, dl, CastVT, ResCC);
}
SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
ResCC, IfTrue, IfFalse);
return VSelect;
}
// Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are
// vectors.
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
CondCode = FPCCToA64CC(CC, Alternative);
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
DAG.getCondCode(CC));
EVT SEVT = MVT::i32;
if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32)
SEVT = MVT::i64;
SDValue AllOne = DAG.getConstant(-1, SEVT);
SDValue AllZero = DAG.getConstant(0, SEVT);
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC,
AllOne, AllZero, A64cc);
if (Alternative != A64CC::Invalid) {
A64cc = DAG.getConstant(Alternative, MVT::i32);
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
SetCC, AllOne, A64SELECT_CC, A64cc);
}
SDValue VDup;
if (IfTrue.getValueType().getVectorNumElements() == 1)
VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, IfTrue.getValueType(),
A64SELECT_CC);
else
VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, IfTrue.getValueType(),
A64SELECT_CC);
SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
VDup, IfTrue, IfFalse);
return VSelect;
}
// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
SDValue
AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue IfTrue = Op.getOperand(2);
SDValue IfFalse = Op.getOperand(3);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
if (IfTrue.getValueType().isVector())
return LowerVectorSELECT_CC(Op, DAG);
if (LHS.getValueType() == MVT::f128) {
// f128 comparisons are lowered to libcalls, but slot in nicely here
// afterwards.
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (RHS.getNode() == 0) {
RHS = DAG.getConstant(0, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType().isInteger()) {
SDValue A64cc;
// Integers are handled in a separate function because the combinations of
// immediates and tests can get hairy and we may want to fiddle things.
SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp,
IfTrue, IfFalse, A64cc);
}
// Note that some LLVM floating-point CondCodes can't be lowered to a single
// conditional branch, hence FPCCToA64CC can set a second test, where either
// passing is sufficient.
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
CondCode = FPCCToA64CC(CC, Alternative);
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
DAG.getCondCode(CC));
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
Op.getValueType(),
SetCC, IfTrue, IfFalse, A64cc);
if (Alternative != A64CC::Invalid) {
A64cc = DAG.getConstant(Alternative, MVT::i32);
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
SetCC, IfTrue, A64SELECT_CC, A64cc);
}
return A64SELECT_CC;
}
SDValue
AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();

View File

@ -461,10 +461,14 @@ multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
(INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),

View File

@ -0,0 +1,180 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_i8:
; CHECK: and w0, w0, #0xff
; CHECK-NEXT: cmp w0, w1, uxtb
; CHECK-NEXT: csinv w0, wzr, wzr, ne
; CHECK-NEXT: dup v{{[0-9]+}}.8b, w0
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
%cmp31 = icmp eq i8 %a, %b
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
ret <8x i8> %e
}
define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_f32:
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
ret <8x i8> %e
}
define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_f64:
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
ret <8x i8> %e
}
define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_i8:
; CHECK: and w0, w0, #0xff
; CHECK-NEXT: cmp w0, w1, uxtb
; CHECK-NEXT: csinv w0, wzr, wzr, ne
; CHECK-NEXT: dup v{{[0-9]+}}.16b, w0
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
%cmp31 = icmp eq i8 %a, %b
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
ret <16x i8> %e
}
define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_f32:
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
ret <16x i8> %e
}
define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_f64:
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
ret <16x i8> %e
}
define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
; CHECK-LABEL: test_select_cc_v4i16:
; CHECK: and w0, w0, #0xffff
; CHECK-NEXT: cmp w0, w1, uxth
; CHECK-NEXT: csinv w0, wzr, wzr, ne
; CHECK-NEXT: dup v{{[0-9]+}}.4h, w0
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
%cmp31 = icmp eq i16 %a, %b
%e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
ret <4x i16> %e
}
define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
; CHECK-LABEL: test_select_cc_v8i16:
; CHECK: and w0, w0, #0xffff
; CHECK-NEXT: cmp w0, w1, uxth
; CHECK-NEXT: csinv w0, wzr, wzr, ne
; CHECK-NEXT: dup v{{[0-9]+}}.8h, w0
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
%cmp31 = icmp eq i16 %a, %b
%e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
ret <8x i16> %e
}
define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
; CHECK-LABEL: test_select_cc_v2i32:
; CHECK: cmp w0, w1, uxtw
; CHECK-NEXT: csinv w0, wzr, wzr, ne
; CHECK-NEXT: dup v{{[0-9]+}}.2s, w0
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
%cmp31 = icmp eq i32 %a, %b
%e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
ret <2x i32> %e
}
define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
; CHECK-LABEL: test_select_cc_v4i32:
; CHECK: cmp w0, w1, uxtw
; CHECK-NEXT: csinv w0, wzr, wzr, ne
; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
%cmp31 = icmp eq i32 %a, %b
%e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
ret <4x i32> %e
}
define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
; CHECK-LABEL: test_select_cc_v1i64:
; CHECK: cmp x0, x1
; CHECK-NEXT: csinv x0, xzr, xzr, ne
; CHECK-NEXT: fmov d{{[0-9]+}}, x0
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
%cmp31 = icmp eq i64 %a, %b
%e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
ret <1x i64> %e
}
define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
; CHECK-LABEL: test_select_cc_v2i64:
; CHECK: cmp x0, x1
; CHECK-NEXT: csinv x0, xzr, xzr, ne
; CHECK-NEXT: dup v{{[0-9]+}}.2d, x0
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
%cmp31 = icmp eq i64 %a, %b
%e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
ret <2x i64> %e
}
define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
; CHECK-LABEL: test_select_cc_v1f32:
; CHECK: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s2, s3, eq
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
ret <1 x float> %e
}
define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
; CHECK-LABEL: test_select_cc_v2f32:
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
ret <2 x float> %e
}
define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
; CHECK-LABEL: test_select_cc_v4f32:
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <4x float> %c, <4x float> %d
ret <4x float> %e
}
define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
; CHECK-LABEL: test_select_cc_v1f64:
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
ret <1 x double> %e
}
define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
; CHECK-LABEL: test_select_cc_v2f64:
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
ret <2 x double> %e
}