mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-12 07:37:34 +00:00
[AArch64 NEON] Lower SELECT_CC with vector operand.
When the scalar compare is between floating point and operands are vector, we custom lower SELECT_CC to use NEON SIMD compare for generating less instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200365 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e0f5a86671
commit
79c6a4f347
@ -458,6 +458,32 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
|
||||
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v16i8, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v8i16, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::v2f64, Expand);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom);
|
||||
|
||||
// Vector ExtLoad and TruncStore are expanded.
|
||||
for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE;
|
||||
I <= MVT::LAST_VECTOR_VALUETYPE; ++I) {
|
||||
@ -2661,62 +2687,6 @@ AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
}
|
||||
|
||||
// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
SDValue IfTrue = Op.getOperand(2);
|
||||
SDValue IfFalse = Op.getOperand(3);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
|
||||
|
||||
if (LHS.getValueType() == MVT::f128) {
|
||||
// f128 comparisons are lowered to libcalls, but slot in nicely here
|
||||
// afterwards.
|
||||
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
|
||||
|
||||
// If softenSetCCOperands returned a scalar, we need to compare the result
|
||||
// against zero to select between true and false values.
|
||||
if (RHS.getNode() == 0) {
|
||||
RHS = DAG.getConstant(0, LHS.getValueType());
|
||||
CC = ISD::SETNE;
|
||||
}
|
||||
}
|
||||
|
||||
if (LHS.getValueType().isInteger()) {
|
||||
SDValue A64cc;
|
||||
|
||||
// Integers are handled in a separate function because the combinations of
|
||||
// immediates and tests can get hairy and we may want to fiddle things.
|
||||
SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
|
||||
|
||||
return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
|
||||
CmpOp, IfTrue, IfFalse, A64cc);
|
||||
}
|
||||
|
||||
// Note that some LLVM floating-point CondCodes can't be lowered to a single
|
||||
// conditional branch, hence FPCCToA64CC can set a second test, where either
|
||||
// passing is sufficient.
|
||||
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
|
||||
CondCode = FPCCToA64CC(CC, Alternative);
|
||||
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
|
||||
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
|
||||
DAG.getCondCode(CC));
|
||||
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
|
||||
Op.getValueType(),
|
||||
SetCC, IfTrue, IfFalse, A64cc);
|
||||
|
||||
if (Alternative != A64CC::Invalid) {
|
||||
A64cc = DAG.getConstant(Alternative, MVT::i32);
|
||||
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
|
||||
SetCC, IfTrue, A64SELECT_CC, A64cc);
|
||||
|
||||
}
|
||||
|
||||
return A64SELECT_CC;
|
||||
}
|
||||
|
||||
// (SELECT testbit, iftrue, iffalse)
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -3004,6 +2974,158 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
return A64SELECT_CC;
|
||||
}
|
||||
|
||||
static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) {
|
||||
SDLoc dl(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
SDValue IfTrue = Op.getOperand(2);
|
||||
SDValue IfFalse = Op.getOperand(3);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
|
||||
|
||||
// If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will
|
||||
// use NEON compare.
|
||||
if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) {
|
||||
EVT EltVT = LHS.getValueType();
|
||||
unsigned EltNum = 128 / EltVT.getSizeInBits();
|
||||
EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum);
|
||||
unsigned SubConstant =
|
||||
(LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64;
|
||||
EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64;
|
||||
EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum);
|
||||
|
||||
LHS
|
||||
= SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
|
||||
VT, DAG.getTargetConstant(0, MVT::i32), LHS,
|
||||
DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
|
||||
RHS
|
||||
= SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
|
||||
VT, DAG.getTargetConstant(0, MVT::i32), RHS,
|
||||
DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
|
||||
|
||||
SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC);
|
||||
SDValue ResCC = LowerVectorSETCC(VSetCC, DAG);
|
||||
EVT IfTrueVT = IfTrue.getValueType();
|
||||
EVT CastEltT =
|
||||
MVT::getIntegerVT(IfTrueVT.getVectorElementType().getSizeInBits());
|
||||
EVT CastVT = EVT::getVectorVT(*DAG.getContext(), CastEltT,
|
||||
IfTrueVT.getVectorNumElements());
|
||||
if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) {
|
||||
EVT DUPVT =
|
||||
EVT::getVectorVT(*DAG.getContext(), CEltT,
|
||||
IfTrueVT.getSizeInBits() / CEltT.getSizeInBits());
|
||||
ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC,
|
||||
DAG.getConstant(0, MVT::i64, false));
|
||||
|
||||
ResCC = DAG.getNode(ISD::BITCAST, dl, CastVT, ResCC);
|
||||
} else {
|
||||
// FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function
|
||||
// can't handle them and will hit this assert.
|
||||
assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() &&
|
||||
"Vector of IfTrue & IfFalse is too small.");
|
||||
|
||||
unsigned ExEltNum =
|
||||
EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits();
|
||||
EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum);
|
||||
ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC,
|
||||
DAG.getConstant(0, MVT::i64, false));
|
||||
ResCC = DAG.getNode(ISD::BITCAST, dl, CastVT, ResCC);
|
||||
}
|
||||
SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
|
||||
ResCC, IfTrue, IfFalse);
|
||||
return VSelect;
|
||||
}
|
||||
|
||||
// Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are
|
||||
// vectors.
|
||||
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
|
||||
CondCode = FPCCToA64CC(CC, Alternative);
|
||||
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
|
||||
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
|
||||
DAG.getCondCode(CC));
|
||||
EVT SEVT = MVT::i32;
|
||||
if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32)
|
||||
SEVT = MVT::i64;
|
||||
SDValue AllOne = DAG.getConstant(-1, SEVT);
|
||||
SDValue AllZero = DAG.getConstant(0, SEVT);
|
||||
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC,
|
||||
AllOne, AllZero, A64cc);
|
||||
|
||||
if (Alternative != A64CC::Invalid) {
|
||||
A64cc = DAG.getConstant(Alternative, MVT::i32);
|
||||
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
|
||||
SetCC, AllOne, A64SELECT_CC, A64cc);
|
||||
}
|
||||
SDValue VDup;
|
||||
if (IfTrue.getValueType().getVectorNumElements() == 1)
|
||||
VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, IfTrue.getValueType(),
|
||||
A64SELECT_CC);
|
||||
else
|
||||
VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, IfTrue.getValueType(),
|
||||
A64SELECT_CC);
|
||||
SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
|
||||
VDup, IfTrue, IfFalse);
|
||||
return VSelect;
|
||||
}
|
||||
|
||||
// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
SDValue IfTrue = Op.getOperand(2);
|
||||
SDValue IfFalse = Op.getOperand(3);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
|
||||
|
||||
if (IfTrue.getValueType().isVector())
|
||||
return LowerVectorSELECT_CC(Op, DAG);
|
||||
|
||||
if (LHS.getValueType() == MVT::f128) {
|
||||
// f128 comparisons are lowered to libcalls, but slot in nicely here
|
||||
// afterwards.
|
||||
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
|
||||
|
||||
// If softenSetCCOperands returned a scalar, we need to compare the result
|
||||
// against zero to select between true and false values.
|
||||
if (RHS.getNode() == 0) {
|
||||
RHS = DAG.getConstant(0, LHS.getValueType());
|
||||
CC = ISD::SETNE;
|
||||
}
|
||||
}
|
||||
|
||||
if (LHS.getValueType().isInteger()) {
|
||||
SDValue A64cc;
|
||||
|
||||
// Integers are handled in a separate function because the combinations of
|
||||
// immediates and tests can get hairy and we may want to fiddle things.
|
||||
SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
|
||||
|
||||
return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp,
|
||||
IfTrue, IfFalse, A64cc);
|
||||
}
|
||||
|
||||
// Note that some LLVM floating-point CondCodes can't be lowered to a single
|
||||
// conditional branch, hence FPCCToA64CC can set a second test, where either
|
||||
// passing is sufficient.
|
||||
A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
|
||||
CondCode = FPCCToA64CC(CC, Alternative);
|
||||
SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
|
||||
SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
|
||||
DAG.getCondCode(CC));
|
||||
SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
|
||||
Op.getValueType(),
|
||||
SetCC, IfTrue, IfFalse, A64cc);
|
||||
|
||||
if (Alternative != A64CC::Invalid) {
|
||||
A64cc = DAG.getConstant(Alternative, MVT::i32);
|
||||
A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
|
||||
SetCC, IfTrue, A64SELECT_CC, A64cc);
|
||||
|
||||
}
|
||||
|
||||
return A64SELECT_CC;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
|
||||
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
|
||||
|
@ -461,10 +461,14 @@ multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
|
||||
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
|
||||
def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
|
||||
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
|
||||
def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
|
||||
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
|
||||
def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
|
||||
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
|
||||
def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
|
||||
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
|
||||
def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
|
||||
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
|
||||
def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
|
||||
(INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
|
||||
def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
|
||||
|
180
test/CodeGen/AArch64/neon-select_cc.ll
Normal file
180
test/CodeGen/AArch64/neon-select_cc.ll
Normal file
@ -0,0 +1,180 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
|
||||
|
||||
define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v8i8_i8:
|
||||
; CHECK: and w0, w0, #0xff
|
||||
; CHECK-NEXT: cmp w0, w1, uxtb
|
||||
; CHECK-NEXT: csinv w0, wzr, wzr, ne
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.8b, w0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
|
||||
%cmp31 = icmp eq i8 %a, %b
|
||||
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
|
||||
ret <8x i8> %e
|
||||
}
|
||||
|
||||
define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v8i8_f32:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
|
||||
%cmp31 = fcmp oeq float %a, %b
|
||||
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
|
||||
ret <8x i8> %e
|
||||
}
|
||||
|
||||
define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v8i8_f64:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
|
||||
%cmp31 = fcmp oeq double %a, %b
|
||||
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
|
||||
ret <8x i8> %e
|
||||
}
|
||||
|
||||
define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v16i8_i8:
|
||||
; CHECK: and w0, w0, #0xff
|
||||
; CHECK-NEXT: cmp w0, w1, uxtb
|
||||
; CHECK-NEXT: csinv w0, wzr, wzr, ne
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.16b, w0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
|
||||
%cmp31 = icmp eq i8 %a, %b
|
||||
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
|
||||
ret <16x i8> %e
|
||||
}
|
||||
|
||||
define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v16i8_f32:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
|
||||
%cmp31 = fcmp oeq float %a, %b
|
||||
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
|
||||
ret <16x i8> %e
|
||||
}
|
||||
|
||||
define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v16i8_f64:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
|
||||
%cmp31 = fcmp oeq double %a, %b
|
||||
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
|
||||
ret <16x i8> %e
|
||||
}
|
||||
|
||||
define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v4i16:
|
||||
; CHECK: and w0, w0, #0xffff
|
||||
; CHECK-NEXT: cmp w0, w1, uxth
|
||||
; CHECK-NEXT: csinv w0, wzr, wzr, ne
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.4h, w0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
|
||||
%cmp31 = icmp eq i16 %a, %b
|
||||
%e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
|
||||
ret <4x i16> %e
|
||||
}
|
||||
|
||||
define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v8i16:
|
||||
; CHECK: and w0, w0, #0xffff
|
||||
; CHECK-NEXT: cmp w0, w1, uxth
|
||||
; CHECK-NEXT: csinv w0, wzr, wzr, ne
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.8h, w0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
|
||||
%cmp31 = icmp eq i16 %a, %b
|
||||
%e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
|
||||
ret <8x i16> %e
|
||||
}
|
||||
|
||||
define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v2i32:
|
||||
; CHECK: cmp w0, w1, uxtw
|
||||
; CHECK-NEXT: csinv w0, wzr, wzr, ne
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.2s, w0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
|
||||
%cmp31 = icmp eq i32 %a, %b
|
||||
%e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
|
||||
ret <2x i32> %e
|
||||
}
|
||||
|
||||
define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v4i32:
|
||||
; CHECK: cmp w0, w1, uxtw
|
||||
; CHECK-NEXT: csinv w0, wzr, wzr, ne
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
|
||||
%cmp31 = icmp eq i32 %a, %b
|
||||
%e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
|
||||
ret <4x i32> %e
|
||||
}
|
||||
|
||||
define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v1i64:
|
||||
; CHECK: cmp x0, x1
|
||||
; CHECK-NEXT: csinv x0, xzr, xzr, ne
|
||||
; CHECK-NEXT: fmov d{{[0-9]+}}, x0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b
|
||||
%cmp31 = icmp eq i64 %a, %b
|
||||
%e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
|
||||
ret <1x i64> %e
|
||||
}
|
||||
|
||||
define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v2i64:
|
||||
; CHECK: cmp x0, x1
|
||||
; CHECK-NEXT: csinv x0, xzr, xzr, ne
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.2d, x0
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b
|
||||
%cmp31 = icmp eq i64 %a, %b
|
||||
%e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
|
||||
ret <2x i64> %e
|
||||
}
|
||||
|
||||
define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v1f32:
|
||||
; CHECK: fcmp s0, s1
|
||||
; CHECK-NEXT: fcsel s0, s2, s3, eq
|
||||
%cmp31 = fcmp oeq float %a, %b
|
||||
%e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
|
||||
ret <1 x float> %e
|
||||
}
|
||||
|
||||
define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v2f32:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
|
||||
%cmp31 = fcmp oeq float %a, %b
|
||||
%e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
|
||||
ret <2 x float> %e
|
||||
}
|
||||
|
||||
define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v4f32:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
|
||||
%cmp31 = fcmp oeq float %a, %b
|
||||
%e = select i1 %cmp31, <4x float> %c, <4x float> %d
|
||||
ret <4x float> %e
|
||||
}
|
||||
|
||||
define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v1f64:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b
|
||||
%cmp31 = fcmp oeq double %a, %b
|
||||
%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
|
||||
ret <1 x double> %e
|
||||
}
|
||||
|
||||
define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
|
||||
; CHECK-LABEL: test_select_cc_v2f64:
|
||||
; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
|
||||
; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b
|
||||
%cmp31 = fcmp oeq double %a, %b
|
||||
%e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
|
||||
ret <2 x double> %e
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user