mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 05:32:25 +00:00
AArch64: add initial NEON support
Patch by Ana Pazos. - Completed implementation of instruction formats: AdvSIMD three same AdvSIMD modified immediate AdvSIMD scalar pairwise - Completed implementation of instruction classes (some of the instructions in these classes belong to yet unfinished instruction formats): Vector Arithmetic Vector Immediate Vector Pairwise Arithmetic - Initial implementation of instruction formats: AdvSIMD scalar two-reg misc AdvSIMD scalar three same - Intial implementation of instruction class: Scalar Arithmetic - Initial clang changes to support arm v8 intrinsics. Note: no clang changes for scalar intrinsics function name mangling yet. - Comprehensive test cases for added instructions To verify auto codegen, encoding, decoding, diagnosis, intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187567 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
691aa094da
commit
87773c318f
@ -494,6 +494,7 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
|
||||
include "llvm/IR/IntrinsicsPowerPC.td"
|
||||
include "llvm/IR/IntrinsicsX86.td"
|
||||
include "llvm/IR/IntrinsicsARM.td"
|
||||
include "llvm/IR/IntrinsicsAArch64.td"
|
||||
include "llvm/IR/IntrinsicsXCore.td"
|
||||
include "llvm/IR/IntrinsicsHexagon.td"
|
||||
include "llvm/IR/IntrinsicsNVVM.td"
|
||||
|
41
include/llvm/IR/IntrinsicsAArch64.td
Normal file
41
include/llvm/IR/IntrinsicsAArch64.td
Normal file
@ -0,0 +1,41 @@
|
||||
//===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all of the AArch64-specific intrinsics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Advanced SIMD (NEON)
|
||||
|
||||
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
||||
|
||||
// Vector Absolute Compare (Floating Point)
|
||||
def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// Vector maxNum (Floating Point)
|
||||
def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Vector minNum (Floating Point)
|
||||
def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Vector Pairwise maxNum (Floating Point)
|
||||
def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Vector Pairwise minNum (Floating Point)
|
||||
def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Vector Multiply Extended (Floating Point)
|
||||
def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic;
|
||||
}
|
@ -61,7 +61,7 @@ def CC_A64_APCS : CallingConv<[
|
||||
// Vectors and Floating-point types.
|
||||
CCIfType<[v2i8], CCBitConvertToType<f16>>,
|
||||
CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
|
||||
CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCBitConvertToType<f128>>,
|
||||
|
||||
|
@ -42,6 +42,8 @@ static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
|
||||
AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
: TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
|
||||
|
||||
const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
|
||||
|
||||
// SIMD compares set the entire lane's bits to 1
|
||||
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
|
||||
|
||||
@ -53,6 +55,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
// And the vectors
|
||||
addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
|
||||
}
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
// We combine OR nodes for bitfield and NEON BSL operations.
|
||||
@ -251,6 +268,31 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
|
||||
setExceptionPointerRegister(AArch64::X0);
|
||||
setExceptionSelectorRegister(AArch64::X1);
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
|
||||
|
||||
setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
|
||||
@ -777,7 +819,22 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
|
||||
case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
|
||||
|
||||
default: return NULL;
|
||||
case AArch64ISD::NEON_BSL:
|
||||
return "AArch64ISD::NEON_BSL";
|
||||
case AArch64ISD::NEON_MOVIMM:
|
||||
return "AArch64ISD::NEON_MOVIMM";
|
||||
case AArch64ISD::NEON_MVNIMM:
|
||||
return "AArch64ISD::NEON_MVNIMM";
|
||||
case AArch64ISD::NEON_FMOVIMM:
|
||||
return "AArch64ISD::NEON_FMOVIMM";
|
||||
case AArch64ISD::NEON_CMP:
|
||||
return "AArch64ISD::NEON_CMP";
|
||||
case AArch64ISD::NEON_CMPZ:
|
||||
return "AArch64ISD::NEON_CMPZ";
|
||||
case AArch64ISD::NEON_TST:
|
||||
return "AArch64ISD::NEON_TST";
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2230,6 +2287,213 @@ AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
DAG.getConstant(A64CC::NE, MVT::i32));
|
||||
}
|
||||
|
||||
static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
|
||||
SDLoc DL(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
||||
EVT VT = Op.getValueType();
|
||||
bool Invert = false;
|
||||
SDValue Op0, Op1;
|
||||
unsigned Opcode;
|
||||
|
||||
if (LHS.getValueType().isInteger()) {
|
||||
|
||||
// Attempt to use Vector Integer Compare Mask Test instruction.
|
||||
// TST = icmp ne (and (op0, op1), zero).
|
||||
if (CC == ISD::SETNE) {
|
||||
if (((LHS.getOpcode() == ISD::AND) &&
|
||||
ISD::isBuildVectorAllZeros(RHS.getNode())) ||
|
||||
((RHS.getOpcode() == ISD::AND) &&
|
||||
ISD::isBuildVectorAllZeros(LHS.getNode()))) {
|
||||
|
||||
SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
|
||||
SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
|
||||
SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
|
||||
return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
|
||||
// Note: Compare against Zero does not support unsigned predicates.
|
||||
if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
|
||||
ISD::isBuildVectorAllZeros(LHS.getNode())) &&
|
||||
!isUnsignedIntSetCC(CC)) {
|
||||
|
||||
// If LHS is the zero value, swap operands and CondCode.
|
||||
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
|
||||
CC = getSetCCSwappedOperands(CC);
|
||||
Op0 = RHS;
|
||||
} else
|
||||
Op0 = LHS;
|
||||
|
||||
// Ensure valid CondCode for Compare Mask against Zero instruction:
|
||||
// EQ, GE, GT, LE, LT.
|
||||
if (ISD::SETNE == CC) {
|
||||
Invert = true;
|
||||
CC = ISD::SETEQ;
|
||||
}
|
||||
|
||||
// Using constant type to differentiate integer and FP compares with zero.
|
||||
Op1 = DAG.getConstant(0, MVT::i32);
|
||||
Opcode = AArch64ISD::NEON_CMPZ;
|
||||
|
||||
} else {
|
||||
// Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
|
||||
// Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
|
||||
bool Swap = false;
|
||||
switch (CC) {
|
||||
default:
|
||||
llvm_unreachable("Illegal integer comparison.");
|
||||
case ISD::SETEQ:
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE:
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETUGE:
|
||||
break;
|
||||
case ISD::SETNE:
|
||||
Invert = true;
|
||||
CC = ISD::SETEQ;
|
||||
break;
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULE:
|
||||
case ISD::SETLT:
|
||||
case ISD::SETLE:
|
||||
Swap = true;
|
||||
CC = getSetCCSwappedOperands(CC);
|
||||
}
|
||||
|
||||
if (Swap)
|
||||
std::swap(LHS, RHS);
|
||||
|
||||
Opcode = AArch64ISD::NEON_CMP;
|
||||
Op0 = LHS;
|
||||
Op1 = RHS;
|
||||
}
|
||||
|
||||
// Generate Compare Mask instr or Compare Mask against Zero instr.
|
||||
SDValue NeonCmp =
|
||||
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
|
||||
|
||||
if (Invert)
|
||||
NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
|
||||
|
||||
return NeonCmp;
|
||||
}
|
||||
|
||||
// Now handle Floating Point cases.
|
||||
// Attempt to use Vector Floating Point Compare Mask against Zero instruction.
|
||||
if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
|
||||
ISD::isBuildVectorAllZeros(LHS.getNode())) {
|
||||
|
||||
// If LHS is the zero value, swap operands and CondCode.
|
||||
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
|
||||
CC = getSetCCSwappedOperands(CC);
|
||||
Op0 = RHS;
|
||||
} else
|
||||
Op0 = LHS;
|
||||
|
||||
// Using constant type to differentiate integer and FP compares with zero.
|
||||
Op1 = DAG.getConstantFP(0, MVT::f32);
|
||||
Opcode = AArch64ISD::NEON_CMPZ;
|
||||
} else {
|
||||
// Attempt to use Vector Floating Point Compare Mask instruction.
|
||||
Op0 = LHS;
|
||||
Op1 = RHS;
|
||||
Opcode = AArch64ISD::NEON_CMP;
|
||||
}
|
||||
|
||||
SDValue NeonCmpAlt;
|
||||
// Some register compares have to be implemented with swapped CC and operands,
|
||||
// e.g.: OLT implemented as OGT with swapped operands.
|
||||
bool SwapIfRegArgs = false;
|
||||
|
||||
// Ensure valid CondCode for FP Compare Mask against Zero instruction:
|
||||
// EQ, GE, GT, LE, LT.
|
||||
// And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
|
||||
switch (CC) {
|
||||
default:
|
||||
llvm_unreachable("Illegal FP comparison");
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETNE:
|
||||
Invert = true; // Fallthrough
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETEQ:
|
||||
CC = ISD::SETEQ;
|
||||
break;
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETLT:
|
||||
CC = ISD::SETLT;
|
||||
SwapIfRegArgs = true;
|
||||
break;
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETGT:
|
||||
CC = ISD::SETGT;
|
||||
break;
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETLE:
|
||||
CC = ISD::SETLE;
|
||||
SwapIfRegArgs = true;
|
||||
break;
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETGE:
|
||||
CC = ISD::SETGE;
|
||||
break;
|
||||
case ISD::SETUGE:
|
||||
Invert = true;
|
||||
CC = ISD::SETLT;
|
||||
SwapIfRegArgs = true;
|
||||
break;
|
||||
case ISD::SETULE:
|
||||
Invert = true;
|
||||
CC = ISD::SETGT;
|
||||
break;
|
||||
case ISD::SETUGT:
|
||||
Invert = true;
|
||||
CC = ISD::SETLE;
|
||||
SwapIfRegArgs = true;
|
||||
break;
|
||||
case ISD::SETULT:
|
||||
Invert = true;
|
||||
CC = ISD::SETGE;
|
||||
break;
|
||||
case ISD::SETUEQ:
|
||||
Invert = true; // Fallthrough
|
||||
case ISD::SETONE:
|
||||
// Expand this to (OGT |OLT).
|
||||
NeonCmpAlt =
|
||||
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
|
||||
CC = ISD::SETLT;
|
||||
SwapIfRegArgs = true;
|
||||
break;
|
||||
case ISD::SETUO:
|
||||
Invert = true; // Fallthrough
|
||||
case ISD::SETO:
|
||||
// Expand this to (OGE | OLT).
|
||||
NeonCmpAlt =
|
||||
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
|
||||
CC = ISD::SETLT;
|
||||
SwapIfRegArgs = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
|
||||
CC = getSetCCSwappedOperands(CC);
|
||||
std::swap(Op0, Op1);
|
||||
}
|
||||
|
||||
// Generate FP Compare Mask instr or FP Compare Mask against Zero instr
|
||||
SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
|
||||
|
||||
if (NeonCmpAlt.getNode())
|
||||
NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
|
||||
|
||||
if (Invert)
|
||||
NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
|
||||
|
||||
return NeonCmp;
|
||||
}
|
||||
|
||||
// (SETCC lhs, rhs, condcode)
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -2239,6 +2503,9 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (VT.isVector())
|
||||
return LowerVectorSETCC(Op, DAG);
|
||||
|
||||
if (LHS.getValueType() == MVT::f128) {
|
||||
// f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
|
||||
// for the rest of the function (some i32 or i64 values).
|
||||
@ -2395,11 +2662,155 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
||||
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
|
||||
case ISD::VASTART: return LowerVASTART(Op, DAG);
|
||||
case ISD::BUILD_VECTOR:
|
||||
return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Check if the specified splat value corresponds to a valid vector constant
|
||||
/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
|
||||
/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
|
||||
/// values.
|
||||
static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
|
||||
unsigned SplatBitSize, SelectionDAG &DAG,
|
||||
bool is128Bits, NeonModImmType type, EVT &VT,
|
||||
unsigned &Imm, unsigned &OpCmode) {
|
||||
switch (SplatBitSize) {
|
||||
default:
|
||||
llvm_unreachable("unexpected size for isNeonModifiedImm");
|
||||
case 8: {
|
||||
if (type != Neon_Mov_Imm)
|
||||
return false;
|
||||
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
|
||||
// Neon movi per byte: Op=0, Cmode=1110.
|
||||
OpCmode = 0xe;
|
||||
Imm = SplatBits;
|
||||
VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
// Neon move inst per halfword
|
||||
VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
|
||||
if ((SplatBits & ~0xff) == 0) {
|
||||
// Value = 0x00nn is 0x00nn LSL 0
|
||||
// movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
|
||||
// bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
|
||||
// Op=x, Cmode=100y
|
||||
Imm = SplatBits;
|
||||
OpCmode = 0x8;
|
||||
break;
|
||||
}
|
||||
if ((SplatBits & ~0xff00) == 0) {
|
||||
// Value = 0xnn00 is 0x00nn LSL 8
|
||||
// movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
|
||||
// bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
|
||||
// Op=x, Cmode=101x
|
||||
Imm = SplatBits >> 8;
|
||||
OpCmode = 0xa;
|
||||
break;
|
||||
}
|
||||
// can't handle any other
|
||||
return false;
|
||||
}
|
||||
|
||||
case 32: {
|
||||
// First the LSL variants (MSL is unusable by some interested instructions).
|
||||
|
||||
// Neon move instr per word, shift zeros
|
||||
VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
|
||||
if ((SplatBits & ~0xff) == 0) {
|
||||
// Value = 0x000000nn is 0x000000nn LSL 0
|
||||
// movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
|
||||
// bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
|
||||
// Op=x, Cmode=000x
|
||||
Imm = SplatBits;
|
||||
OpCmode = 0;
|
||||
break;
|
||||
}
|
||||
if ((SplatBits & ~0xff00) == 0) {
|
||||
// Value = 0x0000nn00 is 0x000000nn LSL 8
|
||||
// movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
|
||||
// bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
|
||||
// Op=x, Cmode=001x
|
||||
Imm = SplatBits >> 8;
|
||||
OpCmode = 0x2;
|
||||
break;
|
||||
}
|
||||
if ((SplatBits & ~0xff0000) == 0) {
|
||||
// Value = 0x00nn0000 is 0x000000nn LSL 16
|
||||
// movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
|
||||
// bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
|
||||
// Op=x, Cmode=010x
|
||||
Imm = SplatBits >> 16;
|
||||
OpCmode = 0x4;
|
||||
break;
|
||||
}
|
||||
if ((SplatBits & ~0xff000000) == 0) {
|
||||
// Value = 0xnn000000 is 0x000000nn LSL 24
|
||||
// movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
|
||||
// bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
|
||||
// Op=x, Cmode=011x
|
||||
Imm = SplatBits >> 24;
|
||||
OpCmode = 0x6;
|
||||
break;
|
||||
}
|
||||
|
||||
// Now the MSL immediates.
|
||||
|
||||
// Neon move instr per word, shift ones
|
||||
if ((SplatBits & ~0xffff) == 0 &&
|
||||
((SplatBits | SplatUndef) & 0xff) == 0xff) {
|
||||
// Value = 0x0000nnff is 0x000000nn MSL 8
|
||||
// movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
|
||||
// Op=x, Cmode=1100
|
||||
Imm = SplatBits >> 8;
|
||||
OpCmode = 0xc;
|
||||
break;
|
||||
}
|
||||
if ((SplatBits & ~0xffffff) == 0 &&
|
||||
((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
|
||||
// Value = 0x00nnffff is 0x000000nn MSL 16
|
||||
// movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
|
||||
// Op=x, Cmode=1101
|
||||
Imm = SplatBits >> 16;
|
||||
OpCmode = 0xd;
|
||||
break;
|
||||
}
|
||||
// can't handle any other
|
||||
return false;
|
||||
}
|
||||
|
||||
case 64: {
|
||||
if (type != Neon_Mov_Imm)
|
||||
return false;
|
||||
// Neon move instr bytemask, where each byte is either 0x00 or 0xff.
|
||||
// movi Op=1, Cmode=1110.
|
||||
OpCmode = 0x1e;
|
||||
uint64_t BitMask = 0xff;
|
||||
uint64_t Val = 0;
|
||||
unsigned ImmMask = 1;
|
||||
Imm = 0;
|
||||
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
|
||||
if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
|
||||
Val |= BitMask;
|
||||
Imm |= ImmMask;
|
||||
} else if ((SplatBits & BitMask) != 0) {
|
||||
return false;
|
||||
}
|
||||
BitMask <<= 8;
|
||||
ImmMask <<= 1;
|
||||
}
|
||||
SplatBits = Val;
|
||||
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static SDValue PerformANDCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
|
||||
@ -2725,6 +3136,7 @@ static SDValue PerformORCombine(SDNode *N,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
@ -2745,6 +3157,44 @@ static SDValue PerformORCombine(SDNode *N,
|
||||
if (Res.getNode())
|
||||
return Res;
|
||||
|
||||
if (!Subtarget->hasNEON())
|
||||
return SDValue();
|
||||
|
||||
// Attempt to use vector immediate-form BSL
|
||||
// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
|
||||
|
||||
SDValue N0 = N->getOperand(0);
|
||||
if (N0.getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
|
||||
SDValue N1 = N->getOperand(1);
|
||||
if (N1.getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
|
||||
if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
|
||||
APInt SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
|
||||
APInt SplatBits0;
|
||||
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
|
||||
HasAnyUndefs) &&
|
||||
!HasAnyUndefs) {
|
||||
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
|
||||
APInt SplatBits1;
|
||||
if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
|
||||
HasAnyUndefs) &&
|
||||
!HasAnyUndefs && SplatBits0 == ~SplatBits1) {
|
||||
// Canonicalize the vector type to make instruction selection simpler.
|
||||
EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
|
||||
SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
|
||||
N0->getOperand(1), N0->getOperand(0),
|
||||
N1->getOperand(0));
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -2819,6 +3269,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
// expansion code take care of it.
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *ST) const {
|
||||
|
||||
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
|
||||
SDLoc DL(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
APInt SplatBits, SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
|
||||
// Note we favor lowering MOVI over MVNI.
|
||||
// This has implications on the definition of patterns in TableGen to select
|
||||
// BIC immediate instructions but not ORR immediate instructions.
|
||||
// If this lowering order is changed, TableGen patterns for BIC immediate and
|
||||
// ORR immediate instructions have to be updated.
|
||||
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
|
||||
if (SplatBitSize <= 64) {
|
||||
// First attempt to use vector immediate-form MOVI
|
||||
EVT NeonMovVT;
|
||||
unsigned Imm = 0;
|
||||
unsigned OpCmode = 0;
|
||||
|
||||
if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
|
||||
SplatBitSize, DAG, VT.is128BitVector(),
|
||||
Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
|
||||
SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
|
||||
SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
|
||||
|
||||
if (ImmVal.getNode() && OpCmodeVal.getNode()) {
|
||||
SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
|
||||
ImmVal, OpCmodeVal);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
|
||||
}
|
||||
}
|
||||
|
||||
// Then attempt to use vector immediate-form MVNI
|
||||
uint64_t NegatedImm = (~SplatBits).getZExtValue();
|
||||
if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
|
||||
DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
|
||||
Imm, OpCmode)) {
|
||||
SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
|
||||
SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
|
||||
if (ImmVal.getNode() && OpCmodeVal.getNode()) {
|
||||
SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
|
||||
ImmVal, OpCmodeVal);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to use vector immediate-form FMOV
|
||||
if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
|
||||
(VT == MVT::v2f64 && SplatBitSize == 64)) {
|
||||
APFloat RealVal(
|
||||
SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
|
||||
SplatBits);
|
||||
uint32_t ImmVal;
|
||||
if (A64Imms::isFPImm(RealVal, ImmVal)) {
|
||||
SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
|
||||
return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
AArch64TargetLowering::ConstraintType
|
||||
AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
|
||||
if (Constraint.size() == 1) {
|
||||
|
@ -111,7 +111,28 @@ namespace AArch64ISD {
|
||||
// created using the small memory model style: i.e. adrp/add or
|
||||
// adrp/mem-op. This exists to prevent bare TargetAddresses which may never
|
||||
// get selected.
|
||||
WrapperSmall
|
||||
WrapperSmall,
|
||||
|
||||
// Vector bitwise select
|
||||
NEON_BSL,
|
||||
|
||||
// Vector move immediate
|
||||
NEON_MOVIMM,
|
||||
|
||||
// Vector Move Inverted Immediate
|
||||
NEON_MVNIMM,
|
||||
|
||||
// Vector FP move immediate
|
||||
NEON_FMOVIMM,
|
||||
|
||||
// Vector compare
|
||||
NEON_CMP,
|
||||
|
||||
// Vector compare zero
|
||||
NEON_CMPZ,
|
||||
|
||||
// Vector compare bitwise test
|
||||
NEON_TST
|
||||
};
|
||||
}
|
||||
|
||||
@ -148,9 +169,11 @@ public:
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
|
||||
SDLoc DL, SDValue &Chain) const;
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *ST) const;
|
||||
|
||||
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
|
||||
SDValue &Chain) const;
|
||||
|
||||
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
||||
/// for tail call optimization. Targets which want to do tail call
|
||||
@ -253,6 +276,10 @@ private:
|
||||
return &getTargetMachine().getSubtarget<AArch64Subtarget>();
|
||||
}
|
||||
};
|
||||
enum NeonModImmType {
|
||||
Neon_Mov_Imm,
|
||||
Neon_Mvn_Imm
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
|
||||
|
@ -959,3 +959,96 @@ class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
|
||||
let Inst{4-0} = op4;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Neon Instruction Format Definitions.
|
||||
//
|
||||
|
||||
let Predicates = [HasNEON] in {
|
||||
|
||||
class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
|
||||
: InstAlias<Asm, Result, Emit> {
|
||||
}
|
||||
|
||||
// Format AdvSIMD 3 vector registers with same vector type
|
||||
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
|
||||
dag outs, dag ins, string asmstr,
|
||||
list<dag> patterns, InstrItinClass itin>
|
||||
: A64InstRdnm<outs, ins, asmstr, patterns, itin>
|
||||
{
|
||||
let Inst{31} = 0b0;
|
||||
let Inst{30} = q;
|
||||
let Inst{29} = u;
|
||||
let Inst{28-24} = 0b01110;
|
||||
let Inst{23-22} = size;
|
||||
let Inst{21} = 0b1;
|
||||
// Inherit Rm in 20-16
|
||||
let Inst{15-11} = opcode;
|
||||
let Inst{10} = 0b1;
|
||||
// Inherit Rn in 9-5
|
||||
// Inherit Rd in 4-0
|
||||
}
|
||||
|
||||
// Format AdvSIMD 1 vector register with modified immediate
|
||||
class NeonI_1VModImm<bit q, bit op,
|
||||
dag outs, dag ins, string asmstr,
|
||||
list<dag> patterns, InstrItinClass itin>
|
||||
: A64InstRd<outs,ins, asmstr, patterns, itin>
|
||||
{
|
||||
bits<8> Imm;
|
||||
bits<4> cmode;
|
||||
let Inst{31} = 0b0;
|
||||
let Inst{30} = q;
|
||||
let Inst{29} = op;
|
||||
let Inst{28-19} = 0b0111100000;
|
||||
let Inst{15-12} = cmode;
|
||||
let Inst{11} = 0b0; // o2
|
||||
let Inst{10} = 1;
|
||||
// Inherit Rd in 4-0
|
||||
let Inst{18-16} = Imm{7-5}; // imm a:b:c
|
||||
let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h
|
||||
}
|
||||
|
||||
// Format AdvSIMD 3 scalar registers with same type
|
||||
|
||||
class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
|
||||
dag outs, dag ins, string asmstr,
|
||||
list<dag> patterns, InstrItinClass itin>
|
||||
: A64InstRdnm<outs, ins, asmstr, patterns, itin>
|
||||
{
|
||||
let Inst{31} = 0b0;
|
||||
let Inst{30} = 0b1;
|
||||
let Inst{29} = u;
|
||||
let Inst{28-24} = 0b11110;
|
||||
let Inst{23-22} = size;
|
||||
let Inst{21} = 0b1;
|
||||
// Inherit Rm in 20-16
|
||||
let Inst{15-11} = opcode;
|
||||
let Inst{10} = 0b1;
|
||||
// Inherit Rn in 9-5
|
||||
// Inherit Rd in 4-0
|
||||
}
|
||||
|
||||
|
||||
// Format AdvSIMD 2 vector registers miscellaneous
|
||||
class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
|
||||
dag outs, dag ins, string asmstr,
|
||||
list<dag> patterns, InstrItinClass itin>
|
||||
: A64InstRdn<outs, ins, asmstr, patterns, itin>
|
||||
{
|
||||
let Inst{31} = 0b0;
|
||||
let Inst{30} = q;
|
||||
let Inst{29} = u;
|
||||
let Inst{28-24} = 0b01110;
|
||||
let Inst{23-22} = size;
|
||||
let Inst{21-17} = 0b10000;
|
||||
let Inst{16-12} = opcode;
|
||||
let Inst{11-10} = 0b10;
|
||||
|
||||
// Inherit Rn in 9-5
|
||||
// Inherit Rd in 4-0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,17 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM Instruction Predicate Definitions.
|
||||
//
|
||||
def HasNEON : Predicate<"Subtarget->hasNEON()">,
|
||||
AssemblerPredicate<"FeatureNEON", "neon">;
|
||||
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
|
||||
AssemblerPredicate<"FeatureCrypto","crypto">;
|
||||
|
||||
// Use fused MAC if more precision in FP computation is allowed.
|
||||
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
|
||||
" FPOpFusion::Fast)">;
|
||||
include "AArch64InstrFormats.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2173,6 +2184,29 @@ def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
|
||||
def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
|
||||
def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
|
||||
|
||||
// Extra patterns for when we're allowed to optimise separate multiplication and
|
||||
// addition.
|
||||
let Predicates = [UseFusedMAC] in {
|
||||
def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
|
||||
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
|
||||
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
|
||||
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
|
||||
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
|
||||
def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra),
|
||||
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating-point <-> fixed-point conversion instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -5123,3 +5157,9 @@ defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
|
||||
|
||||
defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
|
||||
(i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Advanced SIMD (NEON) Support
|
||||
//
|
||||
|
||||
include "AArch64InstrNEON.td"
|
1634
lib/Target/AArch64/AArch64InstrNEON.td
Normal file
1634
lib/Target/AArch64/AArch64InstrNEON.td
Normal file
File diff suppressed because it is too large
Load Diff
@ -109,6 +109,11 @@ bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
|
||||
case MachineOperand::MO_Immediate:
|
||||
MCOp = MCOperand::CreateImm(MO.getImm());
|
||||
break;
|
||||
case MachineOperand::MO_FPImmediate: {
|
||||
assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported");
|
||||
MCOp = MCOperand::CreateFPImm(0.0);
|
||||
break;
|
||||
}
|
||||
case MachineOperand::MO_BlockAddress:
|
||||
MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
|
||||
break;
|
||||
|
@ -185,7 +185,7 @@ foreach Index = 0-31 in {
|
||||
// These two classes contain the same registers, which should be reasonably
|
||||
// sensible for MC and allocation purposes, but allows them to be treated
|
||||
// separately for things like stack spilling.
|
||||
def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
|
||||
def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64,
|
||||
(sequence "V%u", 0, 31)>;
|
||||
|
||||
def VPR128 : RegisterClass<"AArch64",
|
||||
|
@ -26,10 +26,8 @@
|
||||
using namespace llvm;
|
||||
|
||||
AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
|
||||
: AArch64GenSubtargetInfo(TT, CPU, FS)
|
||||
, HasNEON(true)
|
||||
, HasCrypto(true)
|
||||
, TargetTriple(TT) {
|
||||
: AArch64GenSubtargetInfo(TT, CPU, FS), HasNEON(false), HasCrypto(false),
|
||||
TargetTriple(TT) {
|
||||
|
||||
ParseSubtargetFeatures(CPU, FS);
|
||||
}
|
||||
|
@ -48,6 +48,9 @@ public:
|
||||
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
|
||||
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
|
||||
|
||||
bool hasNEON() const { return HasNEON; }
|
||||
|
||||
bool hasCrypto() const { return HasCrypto; }
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
|
@ -664,8 +664,42 @@ public:
|
||||
return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
|
||||
}
|
||||
|
||||
template<int MemSize> bool isSImm7Scaled() const {
|
||||
if (!isImm()) return false;
|
||||
bool isNeonMovImmShiftLSL() const {
|
||||
if (!isShiftOrExtend())
|
||||
return false;
|
||||
|
||||
if (ShiftExtend.ShiftType != A64SE::LSL)
|
||||
return false;
|
||||
|
||||
// Valid shift amount is 0, 8, 16 and 24.
|
||||
return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24;
|
||||
}
|
||||
|
||||
bool isNeonMovImmShiftLSLH() const {
|
||||
if (!isShiftOrExtend())
|
||||
return false;
|
||||
|
||||
if (ShiftExtend.ShiftType != A64SE::LSL)
|
||||
return false;
|
||||
|
||||
// Valid shift amount is 0 and 8.
|
||||
return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8;
|
||||
}
|
||||
|
||||
bool isNeonMovImmShiftMSL() const {
|
||||
if (!isShiftOrExtend())
|
||||
return false;
|
||||
|
||||
if (ShiftExtend.ShiftType != A64SE::MSL)
|
||||
return false;
|
||||
|
||||
// Valid shift amount is 8 and 16.
|
||||
return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16;
|
||||
}
|
||||
|
||||
template <int MemSize> bool isSImm7Scaled() const {
|
||||
if (!isImm())
|
||||
return false;
|
||||
|
||||
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
|
||||
if (!CE) return false;
|
||||
@ -705,10 +739,27 @@ public:
|
||||
return isa<MCConstantExpr>(getImm());
|
||||
}
|
||||
|
||||
bool isNeonUImm64Mask() const {
|
||||
if (!isImm())
|
||||
return false;
|
||||
|
||||
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
|
||||
if (!CE)
|
||||
return false;
|
||||
|
||||
uint64_t Value = CE->getValue();
|
||||
|
||||
// i64 value with each byte being either 0x00 or 0xff.
|
||||
for (unsigned i = 0; i < 8; ++i, Value >>= 8)
|
||||
if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
|
||||
unsigned ShiftAmount,
|
||||
bool ImplicitAmount,
|
||||
SMLoc S, SMLoc E) {
|
||||
SMLoc S,SMLoc E) {
|
||||
AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
|
||||
Op->ImmWithLSL.Val = Val;
|
||||
Op->ImmWithLSL.ShiftAmount = ShiftAmount;
|
||||
@ -1026,6 +1077,40 @@ public:
|
||||
Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
|
||||
}
|
||||
|
||||
// For Vector Immediates shifted imm operands.
|
||||
void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
|
||||
if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24)
|
||||
llvm_unreachable("Invalid shift amount for vector immediate inst.");
|
||||
|
||||
// Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3.
|
||||
int64_t Imm = ShiftExtend.Amount / 8;
|
||||
Inst.addOperand(MCOperand::CreateImm(Imm));
|
||||
}
|
||||
|
||||
void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
|
||||
if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8)
|
||||
llvm_unreachable("Invalid shift amount for vector immediate inst.");
|
||||
|
||||
// Encode LSLH shift amount 0, 8 as 0, 1.
|
||||
int64_t Imm = ShiftExtend.Amount / 8;
|
||||
Inst.addOperand(MCOperand::CreateImm(Imm));
|
||||
}
|
||||
|
||||
void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
|
||||
if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16)
|
||||
llvm_unreachable("Invalid shift amount for vector immediate inst.");
|
||||
|
||||
// Encode MSL shift amount 8, 16 as 0, 1.
|
||||
int64_t Imm = ShiftExtend.Amount / 8 - 1;
|
||||
Inst.addOperand(MCOperand::CreateImm(Imm));
|
||||
}
|
||||
|
||||
// For the extend in load-store (register offset) instructions.
|
||||
template<unsigned MemSize>
|
||||
void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
|
||||
@ -1065,6 +1150,20 @@ public:
|
||||
|
||||
Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
|
||||
}
|
||||
|
||||
void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
|
||||
// A bit from each byte in the constant forms the encoded immediate
|
||||
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
|
||||
uint64_t Value = CE->getValue();
|
||||
|
||||
unsigned Imm = 0;
|
||||
for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
|
||||
Imm |= (Value & 1) << i;
|
||||
}
|
||||
Inst.addOperand(MCOperand::CreateImm(Imm));
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace.
|
||||
@ -1660,20 +1759,21 @@ AArch64AsmParser::ParseShiftExtend(
|
||||
std::string LowerID = IDVal.lower();
|
||||
|
||||
A64SE::ShiftExtSpecifiers Spec =
|
||||
StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
|
||||
.Case("lsl", A64SE::LSL)
|
||||
.Case("lsr", A64SE::LSR)
|
||||
.Case("asr", A64SE::ASR)
|
||||
.Case("ror", A64SE::ROR)
|
||||
.Case("uxtb", A64SE::UXTB)
|
||||
.Case("uxth", A64SE::UXTH)
|
||||
.Case("uxtw", A64SE::UXTW)
|
||||
.Case("uxtx", A64SE::UXTX)
|
||||
.Case("sxtb", A64SE::SXTB)
|
||||
.Case("sxth", A64SE::SXTH)
|
||||
.Case("sxtw", A64SE::SXTW)
|
||||
.Case("sxtx", A64SE::SXTX)
|
||||
.Default(A64SE::Invalid);
|
||||
StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
|
||||
.Case("lsl", A64SE::LSL)
|
||||
.Case("msl", A64SE::MSL)
|
||||
.Case("lsr", A64SE::LSR)
|
||||
.Case("asr", A64SE::ASR)
|
||||
.Case("ror", A64SE::ROR)
|
||||
.Case("uxtb", A64SE::UXTB)
|
||||
.Case("uxth", A64SE::UXTH)
|
||||
.Case("uxtw", A64SE::UXTW)
|
||||
.Case("uxtx", A64SE::UXTX)
|
||||
.Case("sxtb", A64SE::SXTB)
|
||||
.Case("sxth", A64SE::SXTH)
|
||||
.Case("sxtw", A64SE::SXTW)
|
||||
.Case("sxtx", A64SE::SXTX)
|
||||
.Default(A64SE::Invalid);
|
||||
|
||||
if (Spec == A64SE::Invalid)
|
||||
return MatchOperand_NoMatch;
|
||||
@ -1683,8 +1783,8 @@ AArch64AsmParser::ParseShiftExtend(
|
||||
S = Parser.getTok().getLoc();
|
||||
Parser.Lex();
|
||||
|
||||
if (Spec != A64SE::LSL && Spec != A64SE::LSR &&
|
||||
Spec != A64SE::ASR && Spec != A64SE::ROR) {
|
||||
if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR &&
|
||||
Spec != A64SE::ROR && Spec != A64SE::MSL) {
|
||||
// The shift amount can be omitted for the extending versions, but not real
|
||||
// shifts:
|
||||
// add x0, x0, x0, uxtb
|
||||
@ -2019,7 +2119,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
"expected compatible register or floating-point constant");
|
||||
case Match_FPZero:
|
||||
return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
|
||||
"expected floating-point constant #0.0");
|
||||
"expected floating-point constant #0.0 or invalid register type");
|
||||
case Match_Label:
|
||||
return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
|
||||
"expected label or encodable integer pc offset");
|
||||
|
@ -85,6 +85,9 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
|
||||
unsigned RegNo, uint64_t Address,
|
||||
const void *Decoder);
|
||||
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
|
||||
unsigned RegNo, uint64_t Address,
|
||||
const void *Decoder);
|
||||
@ -126,6 +129,10 @@ static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
|
||||
unsigned ShiftAmount,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
|
||||
static DecodeStatus
|
||||
DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
|
||||
uint64_t Address, const void *Decoder);
|
||||
|
||||
static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
|
||||
unsigned ShiftAmount,
|
||||
@ -336,9 +343,20 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Address,
|
||||
const void *Decoder) {
|
||||
if (RegNo > 31)
|
||||
return MCDisassembler::Fail;
|
||||
|
||||
uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
|
||||
Inst.addOperand(MCOperand::CreateReg(Register));
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus
|
||||
DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Address, const void *Decoder) {
|
||||
uint64_t Address, const void *Decoder) {
|
||||
if (RegNo > 31)
|
||||
return MCDisassembler::Fail;
|
||||
|
||||
@ -799,4 +817,24 @@ extern "C" void LLVMInitializeAArch64Disassembler() {
|
||||
createAArch64Disassembler);
|
||||
}
|
||||
|
||||
template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
|
||||
static DecodeStatus
|
||||
DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
|
||||
uint64_t Address, const void *Decoder) {
|
||||
bool IsLSL = false;
|
||||
if (Ext == A64SE::LSL)
|
||||
IsLSL = true;
|
||||
else if (Ext != A64SE::MSL)
|
||||
return MCDisassembler::Fail;
|
||||
|
||||
// MSL and LSLH accepts encoded shift amount 0 or 1.
|
||||
if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1)
|
||||
return MCDisassembler::Fail;
|
||||
|
||||
// LSL accepts encoded shift amount 0, 1, 2 or 3.
|
||||
if (IsLSL && ShiftAmount > 3)
|
||||
return MCDisassembler::Fail;
|
||||
|
||||
Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
@ -406,3 +406,84 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
|
||||
|
||||
printAnnotation(O, Annot);
|
||||
}
|
||||
|
||||
template <A64SE::ShiftExtSpecifiers Ext, bool isHalf>
|
||||
void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI,
|
||||
unsigned OpNum,
|
||||
raw_ostream &O) {
|
||||
const MCOperand &MO = MI->getOperand(OpNum);
|
||||
|
||||
assert(MO.isImm() &&
|
||||
"Immediate operand required for Neon vector immediate inst.");
|
||||
|
||||
bool IsLSL = false;
|
||||
if (Ext == A64SE::LSL)
|
||||
IsLSL = true;
|
||||
else if (Ext != A64SE::MSL)
|
||||
llvm_unreachable("Invalid shift specifier in movi instruction");
|
||||
|
||||
int64_t Imm = MO.getImm();
|
||||
|
||||
// MSL and LSLH accepts encoded shift amount 0 or 1.
|
||||
if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1)
|
||||
llvm_unreachable("Invalid shift amount in movi instruction");
|
||||
|
||||
// LSH accepts encoded shift amount 0, 1, 2 or 3.
|
||||
if (IsLSL && (Imm < 0 || Imm > 3))
|
||||
llvm_unreachable("Invalid shift amount in movi instruction");
|
||||
|
||||
// Print shift amount as multiple of 8 with MSL encoded shift amount
|
||||
// 0 and 1 printed as 8 and 16.
|
||||
if (!IsLSL)
|
||||
Imm++;
|
||||
Imm *= 8;
|
||||
|
||||
// LSL #0 is not printed
|
||||
if (IsLSL) {
|
||||
if (Imm == 0)
|
||||
return;
|
||||
O << ", lsl";
|
||||
} else
|
||||
O << ", msl";
|
||||
|
||||
O << " #" << Imm;
|
||||
}
|
||||
|
||||
void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &o) {
|
||||
o << "#0x0";
|
||||
}
|
||||
|
||||
void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O) {
|
||||
const MCOperand &MOUImm = MI->getOperand(OpNum);
|
||||
|
||||
assert(MOUImm.isImm() &&
|
||||
"Immediate operand required for Neon vector immediate inst.");
|
||||
|
||||
unsigned Imm = MOUImm.getImm();
|
||||
|
||||
O << "#0x";
|
||||
O.write_hex(Imm);
|
||||
}
|
||||
|
||||
void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
|
||||
unsigned OpNum,
|
||||
raw_ostream &O) {
|
||||
const MCOperand &MOUImm8 = MI->getOperand(OpNum);
|
||||
|
||||
assert(MOUImm8.isImm() &&
|
||||
"Immediate operand required for Neon vector immediate bytemask inst.");
|
||||
|
||||
uint32_t UImm8 = MOUImm8.getImm();
|
||||
uint64_t Mask = 0;
|
||||
|
||||
// Replicates 0x00 or 0xff byte in a 64-bit vector
|
||||
for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
|
||||
if ((UImm8 >> ByteNum) & 1)
|
||||
Mask |= (uint64_t)0xff << (8 * ByteNum);
|
||||
}
|
||||
|
||||
O << "#0x";
|
||||
O.write_hex(Mask);
|
||||
}
|
||||
|
@ -164,9 +164,14 @@ public:
|
||||
return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
|
||||
}
|
||||
|
||||
|
||||
template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
|
||||
void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -40,7 +40,7 @@ MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
|
||||
StringRef CPU,
|
||||
StringRef FS) {
|
||||
MCSubtargetInfo *X = new MCSubtargetInfo();
|
||||
InitAArch64MCSubtargetInfo(X, TT, CPU, "");
|
||||
InitAArch64MCSubtargetInfo(X, TT, CPU, FS);
|
||||
return X;
|
||||
}
|
||||
|
||||
|
@ -1105,3 +1105,69 @@ bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
|
||||
|
||||
return isMOVNImm(RegWidth, Value, UImm16, Shift);
|
||||
}
|
||||
|
||||
// decodeNeonModShiftImm - Decode a Neon OpCmode value into the
|
||||
// the shift amount and the shift type (shift zeros or ones in) and
|
||||
// returns whether the OpCmode value implies a shift operation.
|
||||
bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm,
|
||||
unsigned &ShiftOnesIn) {
|
||||
ShiftImm = 0;
|
||||
ShiftOnesIn = false;
|
||||
bool HasShift = true;
|
||||
|
||||
if (OpCmode == 0xe) {
|
||||
// movi byte
|
||||
HasShift = false;
|
||||
} else if (OpCmode == 0x1e) {
|
||||
// movi 64-bit bytemask
|
||||
HasShift = false;
|
||||
} else if ((OpCmode & 0xc) == 0x8) {
|
||||
// shift zeros, per halfword
|
||||
ShiftImm = ((OpCmode & 0x2) >> 1);
|
||||
} else if ((OpCmode & 0x8) == 0) {
|
||||
// shift zeros, per word
|
||||
ShiftImm = ((OpCmode & 0x6) >> 1);
|
||||
} else if ((OpCmode & 0xe) == 0xc) {
|
||||
// shift ones, per word
|
||||
ShiftOnesIn = true;
|
||||
ShiftImm = (OpCmode & 0x1);
|
||||
} else {
|
||||
// per byte, per bytemask
|
||||
llvm_unreachable("Unsupported Neon modified immediate");
|
||||
}
|
||||
|
||||
return HasShift;
|
||||
}
|
||||
|
||||
// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values
|
||||
// into the element value and the element size in bits.
|
||||
uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode,
|
||||
unsigned &EltBits) {
|
||||
uint64_t DecodedVal = Val;
|
||||
EltBits = 0;
|
||||
|
||||
if (OpCmode == 0xe) {
|
||||
// movi byte
|
||||
EltBits = 8;
|
||||
} else if (OpCmode == 0x1e) {
|
||||
// movi 64-bit bytemask
|
||||
DecodedVal = 0;
|
||||
for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
|
||||
if ((Val >> ByteNum) & 1)
|
||||
DecodedVal |= (uint64_t)0xff << (8 * ByteNum);
|
||||
}
|
||||
EltBits = 64;
|
||||
} else if ((OpCmode & 0xc) == 0x8) {
|
||||
// shift zeros, per halfword
|
||||
EltBits = 16;
|
||||
} else if ((OpCmode & 0x8) == 0) {
|
||||
// shift zeros, per word
|
||||
EltBits = 32;
|
||||
} else if ((OpCmode & 0xe) == 0xc) {
|
||||
// shift ones, per word
|
||||
EltBits = 32;
|
||||
} else {
|
||||
llvm_unreachable("Unsupported Neon modified immediate");
|
||||
}
|
||||
return DecodedVal;
|
||||
}
|
||||
|
@ -289,6 +289,7 @@ namespace A64SE {
|
||||
enum ShiftExtSpecifiers {
|
||||
Invalid = -1,
|
||||
LSL,
|
||||
MSL,
|
||||
LSR,
|
||||
ASR,
|
||||
ROR,
|
||||
@ -1068,7 +1069,10 @@ namespace A64Imms {
|
||||
// MOVN but *not* with a MOVZ (because that would take priority).
|
||||
bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
|
||||
|
||||
}
|
||||
uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits);
|
||||
bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm,
|
||||
unsigned &ShiftOnesIn);
|
||||
}
|
||||
|
||||
} // end namespace llvm;
|
||||
|
||||
|
21
test/CodeGen/AArch64/complex-copy-noneon.ll
Normal file
21
test/CodeGen/AArch64/complex-copy-noneon.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s
|
||||
|
||||
; The DAG combiner decided to use a vector load/store for this struct copy
|
||||
; previously. This probably shouldn't happen without NEON, but the most
|
||||
; important thing is that it compiles.
|
||||
|
||||
define void @store_combine() nounwind {
|
||||
%src = alloca { double, double }, align 8
|
||||
%dst = alloca { double, double }, align 8
|
||||
|
||||
%src.realp = getelementptr inbounds { double, double }* %src, i32 0, i32 0
|
||||
%src.real = load double* %src.realp
|
||||
%src.imagp = getelementptr inbounds { double, double }* %src, i32 0, i32 1
|
||||
%src.imag = load double* %src.imagp
|
||||
|
||||
%dst.realp = getelementptr inbounds { double, double }* %dst, i32 0, i32 0
|
||||
%dst.imagp = getelementptr inbounds { double, double }* %dst, i32 0, i32 1
|
||||
store double %src.real, double* %dst.realp
|
||||
store double %src.imag, double* %dst.imagp
|
||||
ret void
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
|
||||
;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
|
||||
; CHECK-LABEL: test_inline_constraint_r:
|
||||
@ -44,6 +44,26 @@ define i32 @test_inline_constraint_Q(i32 *%ptr) {
|
||||
|
||||
@dump = global fp128 zeroinitializer
|
||||
|
||||
define void @test_inline_constraint_w(<8 x i8> %vec64, <4 x float> %vec128, half %hlf, float %flt, double %dbl, fp128 %quad) {
|
||||
; CHECK: test_inline_constraint_w:
|
||||
call <8 x i8> asm sideeffect "add $0.8b, $1.8b, $1.8b", "=w,w"(<8 x i8> %vec64)
|
||||
call <8 x i8> asm sideeffect "fadd $0.4s, $1.4s, $1.4s", "=w,w"(<4 x float> %vec128)
|
||||
; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
; CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
|
||||
; Arguably semantically dodgy to output "vN", but it's what GCC does
|
||||
; so purely for compatibility we want vector registers to be output.
|
||||
call float asm sideeffect "fcvt ${0:s}, ${1:h}", "=w,w"(half undef)
|
||||
call float asm sideeffect "fadd $0.2s, $0.2s, $0.2s", "=w,w"(float %flt)
|
||||
call double asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(double %dbl)
|
||||
call fp128 asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(fp128 %quad)
|
||||
; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
|
||||
; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_inline_constraint_I() {
|
||||
; CHECK-LABEL: test_inline_constraint_I:
|
||||
call void asm sideeffect "add x0, x0, $0", "I"(i32 0)
|
||||
|
226
test/CodeGen/AArch64/neon-aba-abd.ll
Normal file
226
test/CodeGen/AArch64/neon-aba-abd.ll
Normal file
@ -0,0 +1,226 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uabd_v8i8:
|
||||
%abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uabd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %abd
|
||||
}
|
||||
|
||||
define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uaba_v8i8:
|
||||
%abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
%aba = add <8 x i8> %lhs, %abd
|
||||
; CHECK: uaba v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %aba
|
||||
}
|
||||
|
||||
define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_sabd_v8i8:
|
||||
%abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: sabd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %abd
|
||||
}
|
||||
|
||||
define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_saba_v8i8:
|
||||
%abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
%aba = add <8 x i8> %lhs, %abd
|
||||
; CHECK: saba v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %aba
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uabd_v16i8:
|
||||
%abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uabd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %abd
|
||||
}
|
||||
|
||||
define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uaba_v16i8:
|
||||
%abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
%aba = add <16 x i8> %lhs, %abd
|
||||
; CHECK: uaba v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %aba
|
||||
}
|
||||
|
||||
define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_sabd_v16i8:
|
||||
%abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: sabd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %abd
|
||||
}
|
||||
|
||||
define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_saba_v16i8:
|
||||
%abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
%aba = add <16 x i8> %lhs, %abd
|
||||
; CHECK: saba v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %aba
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uabd_v4i16:
|
||||
%abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uabd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %abd
|
||||
}
|
||||
|
||||
define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uaba_v4i16:
|
||||
%abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
%aba = add <4 x i16> %lhs, %abd
|
||||
; CHECK: uaba v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %aba
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sabd_v4i16:
|
||||
%abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sabd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %abd
|
||||
}
|
||||
|
||||
define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_saba_v4i16:
|
||||
%abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
%aba = add <4 x i16> %lhs, %abd
|
||||
; CHECK: saba v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %aba
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uabd_v8i16:
|
||||
%abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uabd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %abd
|
||||
}
|
||||
|
||||
define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uaba_v8i16:
|
||||
%abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
%aba = add <8 x i16> %lhs, %abd
|
||||
; CHECK: uaba v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %aba
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sabd_v8i16:
|
||||
%abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sabd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %abd
|
||||
}
|
||||
|
||||
define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_saba_v8i16:
|
||||
%abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
%aba = add <8 x i16> %lhs, %abd
|
||||
; CHECK: saba v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %aba
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uabd_v2i32:
|
||||
%abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uabd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %abd
|
||||
}
|
||||
|
||||
define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uaba_v2i32:
|
||||
%abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
%aba = add <2 x i32> %lhs, %abd
|
||||
; CHECK: uaba v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %aba
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sabd_v2i32:
|
||||
%abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sabd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %abd
|
||||
}
|
||||
|
||||
define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_saba_v2i32:
|
||||
%abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
%aba = add <2 x i32> %lhs, %abd
|
||||
; CHECK: saba v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %aba
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uabd_v4i32:
|
||||
%abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uabd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %abd
|
||||
}
|
||||
|
||||
define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uaba_v4i32:
|
||||
%abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
%aba = add <4 x i32> %lhs, %abd
|
||||
; CHECK: uaba v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %aba
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sabd_v4i32:
|
||||
%abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sabd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %abd
|
||||
}
|
||||
|
||||
define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_saba_v4i32:
|
||||
%abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
%aba = add <4 x i32> %lhs, %abd
|
||||
; CHECK: saba v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %aba
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>)
|
||||
|
||||
define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fabd_v2f32:
|
||||
%abd = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fabd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %abd
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>)
|
||||
|
||||
define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fabd_v4f32:
|
||||
%abd = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fabd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %abd
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fabd_v2f64:
|
||||
%abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fabd v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %abd
|
||||
}
|
92
test/CodeGen/AArch64/neon-add-pairwise.ll
Normal file
92
test/CodeGen/AArch64/neon-add-pairwise.ll
Normal file
@ -0,0 +1,92 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: test_addp_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: addp v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_addp_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: addp v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_addp_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: addp v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_addp_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: addp v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_addp_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: addp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_addp_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: addp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_addp_v2i64:
|
||||
%val = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: addp v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_faddp_v2f32:
|
||||
%val = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: faddp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_faddp_v4f32:
|
||||
%val = call <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: faddp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_faddp_v2f64:
|
||||
%val = call <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: faddp v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
132
test/CodeGen/AArch64/neon-add-sub.ll
Normal file
132
test/CodeGen/AArch64/neon-add-sub.ll
Normal file
@ -0,0 +1,132 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp3 = add <8 x i8> %A, %B;
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp3 = add <16 x i8> %A, %B;
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
|
||||
%tmp3 = add <4 x i16> %A, %B;
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
|
||||
%tmp3 = add <8 x i16> %A, %B;
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp3 = add <2 x i32> %A, %B;
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp3 = add <4 x i32> %A, %B;
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp3 = add <2 x i64> %A, %B;
|
||||
ret <2 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
|
||||
;CHECK: fadd {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp3 = fadd <2 x float> %A, %B;
|
||||
ret <2 x float> %tmp3
|
||||
}
|
||||
|
||||
define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
|
||||
;CHECK: fadd {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp3 = fadd <4 x float> %A, %B;
|
||||
ret <4 x float> %tmp3
|
||||
}
|
||||
define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
|
||||
;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp3 = fadd <2 x double> %A, %B;
|
||||
ret <2 x double> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp3 = sub <8 x i8> %A, %B;
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp3 = sub <16 x i8> %A, %B;
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
|
||||
%tmp3 = sub <4 x i16> %A, %B;
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
|
||||
%tmp3 = sub <8 x i16> %A, %B;
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp3 = sub <2 x i32> %A, %B;
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp3 = sub <4 x i32> %A, %B;
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp3 = sub <2 x i64> %A, %B;
|
||||
ret <2 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
|
||||
;CHECK: fsub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp3 = fsub <2 x float> %A, %B;
|
||||
ret <2 x float> %tmp3
|
||||
}
|
||||
|
||||
define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
|
||||
;CHECK: fsub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp3 = fsub <4 x float> %A, %B;
|
||||
ret <4 x float> %tmp3
|
||||
}
|
||||
define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
|
||||
;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp3 = fsub <2 x double> %A, %B;
|
||||
ret <2 x double> %tmp3
|
||||
}
|
||||
|
||||
define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
|
||||
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
%tmp3 = add <1 x i64> %A, %B;
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
|
||||
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
%tmp3 = sub <1 x i64> %A, %B;
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
574
test/CodeGen/AArch64/neon-bitcast.ll
Normal file
574
test/CodeGen/AArch64/neon-bitcast.ll
Normal file
@ -0,0 +1,574 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; From <8 x i8>
|
||||
|
||||
define <1 x i64> @test_v8i8_to_v1i64(<8 x i8> %in) nounwind {
|
||||
; CHECK: test_v8i8_to_v1i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i8> %in to <1 x i64>
|
||||
ret <1 x i64> %val
|
||||
}
|
||||
|
||||
define <2 x i32> @test_v8i8_to_v2i32(<8 x i8> %in) nounwind {
|
||||
; CHECK: test_v8i8_to_v2i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i8> %in to <2 x i32>
|
||||
ret <2 x i32> %val
|
||||
}
|
||||
|
||||
define <2 x float> @test_v8i8_to_v1f32(<8 x i8> %in) nounwind{
|
||||
; CHECK: test_v8i8_to_v1f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i8> %in to <2 x float>
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x i16> @test_v8i8_to_v4i16(<8 x i8> %in) nounwind{
|
||||
; CHECK: test_v8i8_to_v4i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i8> %in to <4 x i16>
|
||||
ret <4 x i16> %val
|
||||
}
|
||||
|
||||
define <8 x i8> @test_v8i8_to_v8i8(<8 x i8> %in) nounwind{
|
||||
; CHECK: test_v8i8_to_v8i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i8> %in to <8 x i8>
|
||||
ret <8 x i8> %val
|
||||
}
|
||||
|
||||
; From <4 x i16>
|
||||
|
||||
define <1 x i64> @test_v4i16_to_v1i64(<4 x i16> %in) nounwind {
|
||||
; CHECK: test_v4i16_to_v1i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i16> %in to <1 x i64>
|
||||
ret <1 x i64> %val
|
||||
}
|
||||
|
||||
define <2 x i32> @test_v4i16_to_v2i32(<4 x i16> %in) nounwind {
|
||||
; CHECK: test_v4i16_to_v2i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i16> %in to <2 x i32>
|
||||
ret <2 x i32> %val
|
||||
}
|
||||
|
||||
define <2 x float> @test_v4i16_to_v1f32(<4 x i16> %in) nounwind{
|
||||
; CHECK: test_v4i16_to_v1f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i16> %in to <2 x float>
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x i16> @test_v4i16_to_v4i16(<4 x i16> %in) nounwind{
|
||||
; CHECK: test_v4i16_to_v4i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i16> %in to <4 x i16>
|
||||
ret <4 x i16> %val
|
||||
}
|
||||
|
||||
define <8 x i8> @test_v4i16_to_v8i8(<4 x i16> %in) nounwind{
|
||||
; CHECK: test_v4i16_to_v8i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i16> %in to <8 x i8>
|
||||
ret <8 x i8> %val
|
||||
}
|
||||
|
||||
; From <2 x i32>
|
||||
|
||||
define <1 x i64> @test_v2i32_to_v1i64(<2 x i32> %in) nounwind {
|
||||
; CHECK: test_v2i32_to_v1i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i32> %in to <1 x i64>
|
||||
ret <1 x i64> %val
|
||||
}
|
||||
|
||||
define <2 x i32> @test_v2i32_to_v2i32(<2 x i32> %in) nounwind {
|
||||
; CHECK: test_v2i32_to_v2i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i32> %in to <2 x i32>
|
||||
ret <2 x i32> %val
|
||||
}
|
||||
|
||||
define <2 x float> @test_v2i32_to_v1f32(<2 x i32> %in) nounwind{
|
||||
; CHECK: test_v2i32_to_v1f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i32> %in to <2 x float>
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x i16> @test_v2i32_to_v4i16(<2 x i32> %in) nounwind{
|
||||
; CHECK: test_v2i32_to_v4i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i32> %in to <4 x i16>
|
||||
ret <4 x i16> %val
|
||||
}
|
||||
|
||||
define <8 x i8> @test_v2i32_to_v8i8(<2 x i32> %in) nounwind{
|
||||
; CHECK: test_v2i32_to_v8i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i32> %in to <8 x i8>
|
||||
ret <8 x i8> %val
|
||||
}
|
||||
|
||||
; From <2 x float>
|
||||
|
||||
define <1 x i64> @test_v2f32_to_v1i64(<2 x float> %in) nounwind {
|
||||
; CHECK: test_v2f32_to_v1i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x float> %in to <1 x i64>
|
||||
ret <1 x i64> %val
|
||||
}
|
||||
|
||||
define <2 x i32> @test_v2f32_to_v2i32(<2 x float> %in) nounwind {
|
||||
; CHECK: test_v2f32_to_v2i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x float> %in to <2 x i32>
|
||||
ret <2 x i32> %val
|
||||
}
|
||||
|
||||
define <2 x float> @test_v2f32_to_v2f32(<2 x float> %in) nounwind{
|
||||
; CHECK: test_v2f32_to_v2f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x float> %in to <2 x float>
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x i16> @test_v2f32_to_v4i16(<2 x float> %in) nounwind{
|
||||
; CHECK: test_v2f32_to_v4i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x float> %in to <4 x i16>
|
||||
ret <4 x i16> %val
|
||||
}
|
||||
|
||||
define <8 x i8> @test_v2f32_to_v8i8(<2 x float> %in) nounwind{
|
||||
; CHECK: test_v2f32_to_v8i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x float> %in to <8 x i8>
|
||||
ret <8 x i8> %val
|
||||
}
|
||||
|
||||
; From <1 x i64>
|
||||
|
||||
define <1 x i64> @test_v1i64_to_v1i64(<1 x i64> %in) nounwind {
|
||||
; CHECK: test_v1i64_to_v1i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <1 x i64> %in to <1 x i64>
|
||||
ret <1 x i64> %val
|
||||
}
|
||||
|
||||
define <2 x i32> @test_v1i64_to_v2i32(<1 x i64> %in) nounwind {
|
||||
; CHECK: test_v1i64_to_v2i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <1 x i64> %in to <2 x i32>
|
||||
ret <2 x i32> %val
|
||||
}
|
||||
|
||||
define <2 x float> @test_v1i64_to_v2f32(<1 x i64> %in) nounwind{
|
||||
; CHECK: test_v1i64_to_v2f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <1 x i64> %in to <2 x float>
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x i16> @test_v1i64_to_v4i16(<1 x i64> %in) nounwind{
|
||||
; CHECK: test_v1i64_to_v4i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <1 x i64> %in to <4 x i16>
|
||||
ret <4 x i16> %val
|
||||
}
|
||||
|
||||
define <8 x i8> @test_v1i64_to_v8i8(<1 x i64> %in) nounwind{
|
||||
; CHECK: test_v1i64_to_v8i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <1 x i64> %in to <8 x i8>
|
||||
ret <8 x i8> %val
|
||||
}
|
||||
|
||||
|
||||
; From <16 x i8>
|
||||
|
||||
define <2 x double> @test_v16i8_to_v2f64(<16 x i8> %in) nounwind {
|
||||
; CHECK: test_v16i8_to_v2f64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <16 x i8> %in to <2 x double>
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @test_v16i8_to_v2i64(<16 x i8> %in) nounwind {
|
||||
; CHECK: test_v16i8_to_v2i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <16 x i8> %in to <2 x i64>
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
define <4 x i32> @test_v16i8_to_v4i32(<16 x i8> %in) nounwind {
|
||||
; CHECK: test_v16i8_to_v4i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <16 x i8> %in to <4 x i32>
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_v16i8_to_v2f32(<16 x i8> %in) nounwind{
|
||||
; CHECK: test_v16i8_to_v2f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <16 x i8> %in to <4 x float>
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <8 x i16> @test_v16i8_to_v8i16(<16 x i8> %in) nounwind{
|
||||
; CHECK: test_v16i8_to_v8i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <16 x i8> %in to <8 x i16>
|
||||
ret <8 x i16> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @test_v16i8_to_v16i8(<16 x i8> %in) nounwind{
|
||||
; CHECK: test_v16i8_to_v16i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <16 x i8> %in to <16 x i8>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
; From <8 x i16>
|
||||
|
||||
define <2 x double> @test_v8i16_to_v2f64(<8 x i16> %in) nounwind {
|
||||
; CHECK: test_v8i16_to_v2f64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i16> %in to <2 x double>
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @test_v8i16_to_v2i64(<8 x i16> %in) nounwind {
|
||||
; CHECK: test_v8i16_to_v2i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i16> %in to <2 x i64>
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
define <4 x i32> @test_v8i16_to_v4i32(<8 x i16> %in) nounwind {
|
||||
; CHECK: test_v8i16_to_v4i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i16> %in to <4 x i32>
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_v8i16_to_v2f32(<8 x i16> %in) nounwind{
|
||||
; CHECK: test_v8i16_to_v2f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i16> %in to <4 x float>
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <8 x i16> @test_v8i16_to_v8i16(<8 x i16> %in) nounwind{
|
||||
; CHECK: test_v8i16_to_v8i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i16> %in to <8 x i16>
|
||||
ret <8 x i16> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @test_v8i16_to_v16i8(<8 x i16> %in) nounwind{
|
||||
; CHECK: test_v8i16_to_v16i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <8 x i16> %in to <16 x i8>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
; From <4 x i32>
|
||||
|
||||
define <2 x double> @test_v4i32_to_v2f64(<4 x i32> %in) nounwind {
|
||||
; CHECK: test_v4i32_to_v2f64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i32> %in to <2 x double>
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @test_v4i32_to_v2i64(<4 x i32> %in) nounwind {
|
||||
; CHECK: test_v4i32_to_v2i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i32> %in to <2 x i64>
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
define <4 x i32> @test_v4i32_to_v4i32(<4 x i32> %in) nounwind {
|
||||
; CHECK: test_v4i32_to_v4i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i32> %in to <4 x i32>
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_v4i32_to_v2f32(<4 x i32> %in) nounwind{
|
||||
; CHECK: test_v4i32_to_v2f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i32> %in to <4 x float>
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <8 x i16> @test_v4i32_to_v8i16(<4 x i32> %in) nounwind{
|
||||
; CHECK: test_v4i32_to_v8i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i32> %in to <8 x i16>
|
||||
ret <8 x i16> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @test_v4i32_to_v16i8(<4 x i32> %in) nounwind{
|
||||
; CHECK: test_v4i32_to_v16i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x i32> %in to <16 x i8>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
; From <4 x float>
|
||||
|
||||
define <2 x double> @test_v4f32_to_v2f64(<4 x float> %in) nounwind {
|
||||
; CHECK: test_v4f32_to_v2f64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x float> %in to <2 x double>
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @test_v4f32_to_v2i64(<4 x float> %in) nounwind {
|
||||
; CHECK: test_v4f32_to_v2i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x float> %in to <2 x i64>
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
define <4 x i32> @test_v4f32_to_v4i32(<4 x float> %in) nounwind {
|
||||
; CHECK: test_v4f32_to_v4i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x float> %in to <4 x i32>
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_v4f32_to_v4f32(<4 x float> %in) nounwind{
|
||||
; CHECK: test_v4f32_to_v4f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x float> %in to <4 x float>
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <8 x i16> @test_v4f32_to_v8i16(<4 x float> %in) nounwind{
|
||||
; CHECK: test_v4f32_to_v8i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x float> %in to <8 x i16>
|
||||
ret <8 x i16> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @test_v4f32_to_v16i8(<4 x float> %in) nounwind{
|
||||
; CHECK: test_v4f32_to_v16i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <4 x float> %in to <16 x i8>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
; From <2 x i64>
|
||||
|
||||
define <2 x double> @test_v2i64_to_v2f64(<2 x i64> %in) nounwind {
|
||||
; CHECK: test_v2i64_to_v2f64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i64> %in to <2 x double>
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @test_v2i64_to_v2i64(<2 x i64> %in) nounwind {
|
||||
; CHECK: test_v2i64_to_v2i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i64> %in to <2 x i64>
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
define <4 x i32> @test_v2i64_to_v4i32(<2 x i64> %in) nounwind {
|
||||
; CHECK: test_v2i64_to_v4i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i64> %in to <4 x i32>
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_v2i64_to_v4f32(<2 x i64> %in) nounwind{
|
||||
; CHECK: test_v2i64_to_v4f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i64> %in to <4 x float>
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <8 x i16> @test_v2i64_to_v8i16(<2 x i64> %in) nounwind{
|
||||
; CHECK: test_v2i64_to_v8i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i64> %in to <8 x i16>
|
||||
ret <8 x i16> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @test_v2i64_to_v16i8(<2 x i64> %in) nounwind{
|
||||
; CHECK: test_v2i64_to_v16i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x i64> %in to <16 x i8>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
; From <2 x double>
|
||||
|
||||
define <2 x double> @test_v2f64_to_v2f64(<2 x double> %in) nounwind {
|
||||
; CHECK: test_v2f64_to_v2f64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x double> %in to <2 x double>
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @test_v2f64_to_v2i64(<2 x double> %in) nounwind {
|
||||
; CHECK: test_v2f64_to_v2i64:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x double> %in to <2 x i64>
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
define <4 x i32> @test_v2f64_to_v4i32(<2 x double> %in) nounwind {
|
||||
; CHECK: test_v2f64_to_v4i32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x double> %in to <4 x i32>
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_v2f64_to_v4f32(<2 x double> %in) nounwind{
|
||||
; CHECK: test_v2f64_to_v4f32:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x double> %in to <4 x float>
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <8 x i16> @test_v2f64_to_v8i16(<2 x double> %in) nounwind{
|
||||
; CHECK: test_v2f64_to_v8i16:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x double> %in to <8 x i16>
|
||||
ret <8 x i16> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @test_v2f64_to_v16i8(<2 x double> %in) nounwind{
|
||||
; CHECK: test_v2f64_to_v16i8:
|
||||
; CHECK-NEXT: // BB#0:
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%val = bitcast <2 x double> %in to <16 x i8>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
594
test/CodeGen/AArch64/neon-bitwise-instructions.ll
Normal file
594
test/CodeGen/AArch64/neon-bitwise-instructions.ll
Normal file
@ -0,0 +1,594 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
|
||||
define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <8 x i8> %a, %b;
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @and16xi8(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <16 x i8> %a, %b;
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
|
||||
define <8 x i8> @orr8xi8(<8 x i8> %a, <8 x i8> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = or <8 x i8> %a, %b;
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @orr16xi8(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = or <16 x i8> %a, %b;
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
|
||||
define <8 x i8> @xor8xi8(<8 x i8> %a, <8 x i8> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <8 x i8> %a, %b;
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <16 x i8> %a, %b;
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
|
||||
%tmp3 = or <8 x i8> %tmp1, %tmp2
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
|
||||
%tmp3 = or <16 x i8> %tmp1, %tmp2
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = or <8 x i8> %a, %tmp1
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = or <16 x i8> %a, %tmp1
|
||||
ret <16 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = and <8 x i8> %a, %tmp1
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
|
||||
%tmp2 = and <16 x i8> %a, %tmp1
|
||||
ret <16 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.2s, #0xff
|
||||
%tmp1 = or <2 x i32> %a, < i32 255, i32 255>
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #8
|
||||
%tmp1 = or <2 x i32> %a, < i32 65280, i32 65280>
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #16
|
||||
%tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680>
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #24
|
||||
%tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080>
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4s, #0xff
|
||||
%tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255>
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #8
|
||||
%tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280>
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #16
|
||||
%tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680>
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #24
|
||||
%tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080>
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4h, #0xff
|
||||
%tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 >
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8
|
||||
%tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.8h, #0xff
|
||||
%tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8
|
||||
%tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.2s, #0x10
|
||||
%tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 >
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #8
|
||||
%tmp1 = and <2 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519 >
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #16
|
||||
%tmp1 = and <2 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039 >
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #24
|
||||
%tmp1 = and <2 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159>
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4s, #0x10
|
||||
%tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #8
|
||||
%tmp1 = and <4 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519 >
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #16
|
||||
%tmp1 = and <4 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039 >
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #24
|
||||
%tmp1 = and <4 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159>
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4h, #0x10
|
||||
%tmp1 = and <4 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 >
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4h, #0x0
|
||||
%tmp1 = and <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4h, #0x10, lsl #8
|
||||
%tmp1 = and <4 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519>
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.4h, #0x0, lsl #8
|
||||
%tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255>
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.8h, #0x10
|
||||
%tmp1 = and <8 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599,
|
||||
i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 >
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.8h, #0x0
|
||||
%tmp1 = and <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.8h, #0x10, lsl #8
|
||||
%tmp1 = and <8 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519,
|
||||
i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519>
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) {
|
||||
;CHECK: bic {{v[0-31]+}}.8h, #0x0, lsl #8
|
||||
%tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @and2xi32(<2 x i32> %a, <2 x i32> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <2 x i32> %a, %b;
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @and4xi16(<4 x i16> %a, <4 x i16> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <4 x i16> %a, %b;
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @and1xi64(<1 x i64> %a, <1 x i64> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <1 x i64> %a, %b;
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @and4xi32(<4 x i32> %a, <4 x i32> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <4 x i32> %a, %b;
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @and8xi16(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <8 x i16> %a, %b;
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @and2xi64(<2 x i64> %a, <2 x i64> %b) {
|
||||
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <2 x i64> %a, %b;
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @orr2xi32(<2 x i32> %a, <2 x i32> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = or <2 x i32> %a, %b;
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @orr4xi16(<4 x i16> %a, <4 x i16> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = or <4 x i16> %a, %b;
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @orr1xi64(<1 x i64> %a, <1 x i64> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = or <1 x i64> %a, %b;
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @orr4xi32(<4 x i32> %a, <4 x i32> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = or <4 x i32> %a, %b;
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @orr8xi16(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = or <8 x i16> %a, %b;
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @orr2xi64(<2 x i64> %a, <2 x i64> %b) {
|
||||
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = or <2 x i64> %a, %b;
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @eor2xi32(<2 x i32> %a, <2 x i32> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <2 x i32> %a, %b;
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @eor4xi16(<4 x i16> %a, <4 x i16> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <4 x i16> %a, %b;
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @eor1xi64(<1 x i64> %a, <1 x i64> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <1 x i64> %a, %b;
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @eor4xi32(<4 x i32> %a, <4 x i32> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <4 x i32> %a, %b;
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @eor8xi16(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <8 x i16> %a, %b;
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @eor2xi64(<2 x i64> %a, <2 x i64> %b) {
|
||||
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <2 x i64> %a, %b;
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
||||
define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
|
||||
%tmp2 = and <2 x i32> %a, %tmp1
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
|
||||
%tmp2 = and <4 x i16> %a, %tmp1
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <1 x i64> %b, < i64 -1>
|
||||
%tmp2 = and <1 x i64> %a, %tmp1
|
||||
ret <1 x i64> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%tmp2 = and <4 x i32> %a, %tmp1
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
%tmp2 = and <8 x i16> %a, %tmp1
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b) {
|
||||
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
|
||||
%tmp2 = and <2 x i64> %a, %tmp1
|
||||
ret <2 x i64> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
|
||||
%tmp2 = or <2 x i32> %a, %tmp1
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
|
||||
%tmp2 = or <4 x i16> %a, %tmp1
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = xor <1 x i64> %b, < i64 -1>
|
||||
%tmp2 = or <1 x i64> %a, %tmp1
|
||||
ret <1 x i64> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%tmp2 = or <4 x i32> %a, %tmp1
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
|
||||
%tmp2 = or <8 x i16> %a, %tmp1
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) {
|
||||
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
|
||||
%tmp2 = or <2 x i64> %a, %tmp1
|
||||
ret <2 x i64> %tmp2
|
||||
}
|
||||
define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 >
|
||||
%tmp2 = and <2 x i32> %b, < i32 0, i32 0 >
|
||||
%tmp3 = or <2 x i32> %tmp1, %tmp2
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
|
||||
define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 >
|
||||
%tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 >
|
||||
%tmp3 = or <4 x i16> %tmp1, %tmp2
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = and <1 x i64> %a, < i64 -1 >
|
||||
%tmp2 = and <1 x i64> %b, < i64 0 >
|
||||
%tmp3 = or <1 x i64> %tmp1, %tmp2
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 >
|
||||
%tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 >
|
||||
%tmp3 = or <4 x i32> %tmp1, %tmp2
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 >
|
||||
%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 >
|
||||
%tmp3 = or <8 x i16> %tmp1, %tmp2
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 >
|
||||
%tmp2 = and <2 x i64> %b, < i64 0, i64 0 >
|
||||
%tmp3 = or <2 x i64> %tmp1, %tmp2
|
||||
ret <2 x i64> %tmp3
|
||||
}
|
||||
|
||||
|
||||
define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%1 = and <8 x i8> %v1, %v2
|
||||
%2 = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
%3 = and <8 x i8> %2, %v3
|
||||
%4 = or <8 x i8> %1, %3
|
||||
ret <8 x i8> %4
|
||||
}
|
||||
|
||||
define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%1 = and <4 x i16> %v1, %v2
|
||||
%2 = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%3 = and <4 x i16> %2, %v3
|
||||
%4 = or <4 x i16> %1, %3
|
||||
ret <4 x i16> %4
|
||||
}
|
||||
|
||||
define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%1 = and <2 x i32> %v1, %v2
|
||||
%2 = xor <2 x i32> %v1, <i32 -1, i32 -1>
|
||||
%3 = and <2 x i32> %2, %v3
|
||||
%4 = or <2 x i32> %1, %3
|
||||
ret <2 x i32> %4
|
||||
}
|
||||
|
||||
define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%1 = and <1 x i64> %v1, %v2
|
||||
%2 = xor <1 x i64> %v1, <i64 -1>
|
||||
%3 = and <1 x i64> %2, %v3
|
||||
%4 = or <1 x i64> %1, %3
|
||||
ret <1 x i64> %4
|
||||
}
|
||||
|
||||
define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%1 = and <16 x i8> %v1, %v2
|
||||
%2 = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
%3 = and <16 x i8> %2, %v3
|
||||
%4 = or <16 x i8> %1, %3
|
||||
ret <16 x i8> %4
|
||||
}
|
||||
|
||||
define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%1 = and <8 x i16> %v1, %v2
|
||||
%2 = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%3 = and <8 x i16> %2, %v3
|
||||
%4 = or <8 x i16> %1, %3
|
||||
ret <8 x i16> %4
|
||||
}
|
||||
|
||||
define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%1 = and <4 x i32> %v1, %v2
|
||||
%2 = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%3 = and <4 x i32> %2, %v3
|
||||
%4 = or <4 x i32> %1, %3
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
|
||||
define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
|
||||
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%1 = and <2 x i64> %v1, %v2
|
||||
%2 = xor <2 x i64> %v1, <i64 -1, i64 -1>
|
||||
%3 = and <2 x i64> %2, %v3
|
||||
%4 = or <2 x i64> %1, %3
|
||||
ret <2 x i64> %4
|
||||
}
|
||||
|
||||
define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4h, #0xff
|
||||
%val = or <8 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
|
||||
ret <8 x i8> %val
|
||||
}
|
||||
|
||||
define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8
|
||||
%val = or <8 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
|
||||
ret <8 x i8> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.8h, #0xff
|
||||
%val = or <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) {
|
||||
;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8
|
||||
%val = or <16 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
|
1982
test/CodeGen/AArch64/neon-compare-instructions.ll
Normal file
1982
test/CodeGen/AArch64/neon-compare-instructions.ll
Normal file
File diff suppressed because it is too large
Load Diff
56
test/CodeGen/AArch64/neon-facge-facgt.ll
Normal file
56
test/CodeGen/AArch64/neon-facge-facgt.ll
Normal file
@ -0,0 +1,56 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>)
|
||||
declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>)
|
||||
declare <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: facge_from_intr_v2i32:
|
||||
%val = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %A, <2 x float> %B)
|
||||
; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
ret <2 x i32> %val
|
||||
}
|
||||
define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: facge_from_intr_v4i32:
|
||||
%val = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %A, <4 x float> %B)
|
||||
; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: facge_from_intr_v2i64:
|
||||
%val = call <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double> %A, <2 x double> %B)
|
||||
; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>)
|
||||
declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>)
|
||||
declare <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: facgt_from_intr_v2i32:
|
||||
%val = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %A, <2 x float> %B)
|
||||
; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
ret <2 x i32> %val
|
||||
}
|
||||
define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: facgt_from_intr_v4i32:
|
||||
%val = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %A, <4 x float> %B)
|
||||
; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
ret <4 x i32> %val
|
||||
}
|
||||
|
||||
define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: facgt_from_intr_v2i64:
|
||||
%val = call <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double> %A, <2 x double> %B)
|
||||
; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
ret <2 x i64> %val
|
||||
}
|
||||
|
112
test/CodeGen/AArch64/neon-fma.ll
Normal file
112
test/CodeGen/AArch64/neon-fma.ll
Normal file
@ -0,0 +1,112 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
|
||||
|
||||
define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp1 = fmul <2 x float> %A, %B;
|
||||
%tmp2 = fadd <2 x float> %C, %tmp1;
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp1 = fmul <4 x float> %A, %B;
|
||||
%tmp2 = fadd <4 x float> %C, %tmp1;
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp1 = fmul <2 x double> %A, %B;
|
||||
%tmp2 = fadd <2 x double> %C, %tmp1;
|
||||
ret <2 x double> %tmp2
|
||||
}
|
||||
|
||||
|
||||
define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp1 = fmul <2 x float> %A, %B;
|
||||
%tmp2 = fsub <2 x float> %C, %tmp1;
|
||||
ret <2 x float> %tmp2
|
||||
}
|
||||
|
||||
define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
|
||||
;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp1 = fmul <4 x float> %A, %B;
|
||||
%tmp2 = fsub <4 x float> %C, %tmp1;
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
||||
define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
|
||||
;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp1 = fmul <2 x double> %A, %B;
|
||||
%tmp2 = fsub <2 x double> %C, %tmp1;
|
||||
ret <2 x double> %tmp2
|
||||
}
|
||||
|
||||
|
||||
; Another set of tests for when the intrinsic is used.
|
||||
|
||||
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%negA = fsub <2 x float> <float -0.0, float -0.0>, %A
|
||||
%val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C)
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
|
||||
;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A
|
||||
%val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C)
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
|
||||
;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%negA = fsub <2 x double> <double -0.0, double -0.0>, %A
|
||||
%val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C)
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
|
||||
;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
|
||||
ret <2 x double> %val
|
||||
}
|
54
test/CodeGen/AArch64/neon-frsqrt-frecp.ll
Normal file
54
test/CodeGen/AArch64/neon-frsqrt-frecp.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
; Set of tests for when the intrinsic is used.
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @frsqrts_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: frsqrts v0.2s, v0.2s, v1.2s
|
||||
%val = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @frsqrts_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: frsqrts v0.4s, v0.4s, v1.4s
|
||||
%val = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @frsqrts_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: frsqrts v0.2d, v0.2d, v1.2d
|
||||
%val = call <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @frecps_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: frecps v0.2s, v0.2s, v1.2s
|
||||
%val = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @frecps_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: frecps v0.4s, v0.4s, v1.4s
|
||||
%val = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @frecps_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: frecps v0.2d, v0.2d, v1.2d
|
||||
%val = call <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
207
test/CodeGen/AArch64/neon-halving-add-sub.ll
Normal file
207
test/CodeGen/AArch64/neon-halving-add-sub.ll
Normal file
@ -0,0 +1,207 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uhadd_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uhadd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_shadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_shadd_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: shadd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uhadd_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uhadd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_shadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_shadd_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: shadd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_uhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uhadd_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uhadd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_shadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_shadd_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: shadd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uhadd_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uhadd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_shadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_shadd_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: shadd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_uhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uhadd_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uhadd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_shadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_shadd_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: shadd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uhadd_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uhadd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_shadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_shadd_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: shadd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uhsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uhsub_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uhsub v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_shsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_shsub_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: shsub v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uhsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uhsub_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uhsub v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_shsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_shsub_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: shsub v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_uhsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uhsub_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uhsub v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_shsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_shsub_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: shsub v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uhsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uhsub_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uhsub v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_shsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_shsub_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: shsub v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_uhsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uhsub_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uhsub v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_shsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_shsub_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: shsub v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uhsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uhsub_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uhsub v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_shsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_shsub_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: shsub v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
310
test/CodeGen/AArch64/neon-max-min-pairwise.ll
Normal file
310
test/CodeGen/AArch64/neon-max-min-pairwise.ll
Normal file
@ -0,0 +1,310 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: test_smaxp_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: smaxp v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: umaxp v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_smaxp_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: smaxp v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_umaxp_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: umaxp v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_smaxp_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: smaxp v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_umaxp_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: umaxp v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_smaxp_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: smaxp v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_umaxp_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: umaxp v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_smaxp_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: smaxp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_umaxp_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: umaxp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_smaxp_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: smaxp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_umaxp_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: umaxp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: test_sminp_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: sminp v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uminp v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_sminp_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: sminp v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uminp_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uminp v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sminp_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sminp v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uminp_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uminp v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sminp_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sminp v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uminp_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uminp v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sminp_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sminp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uminp_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uminp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sminp_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sminp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uminp_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uminp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fmaxp_v2f32:
|
||||
%val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fmaxp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fmaxp_v4f32:
|
||||
%val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fmaxp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fmaxp_v2f64:
|
||||
%val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fmaxp v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fminp_v2f32:
|
||||
%val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fminp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fminp_v4f32:
|
||||
%val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fminp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fminp_v2f64:
|
||||
%val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fminp v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fmaxnmp_v2f32:
|
||||
%val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fmaxnmp_v4f32:
|
||||
%val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fmaxnmp_v2f64:
|
||||
%val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fminnmp_v2f32:
|
||||
%val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fminnmp v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fminnmp_v4f32:
|
||||
%val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fminnmp v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fminnmp_v2f64:
|
||||
%val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fminnmp v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
310
test/CodeGen/AArch64/neon-max-min.ll
Normal file
310
test/CodeGen/AArch64/neon-max-min.ll
Normal file
@ -0,0 +1,310 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: test_smax_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: smax v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: umax v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_smax_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: smax v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_umax_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: umax v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_smax_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: smax v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_umax_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: umax v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_smax_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: smax v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_umax_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: umax v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_smax_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: smax v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_umax_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: umax v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_smax_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: smax v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_umax_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: umax v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; Using registers other than v0, v1 are possible, but would be odd.
|
||||
; CHECK: test_smin_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: smin v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: umin v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_smin_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: smin v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_umin_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: umin v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_smin_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: smin v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_umin_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: umin v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_smin_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: smin v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_umin_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: umin v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_smin_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: smin v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_umin_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: umin v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_smin_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: smin v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_umin_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: umin v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fmax_v2f32:
|
||||
%val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fmax v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fmax_v4f32:
|
||||
%val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fmax v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fmax_v2f64:
|
||||
%val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fmax v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fmin_v2f32:
|
||||
%val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fmin v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fmin_v4f32:
|
||||
%val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fmin v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fmin_v2f64:
|
||||
%val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fmin v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
|
||||
declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fmaxnm_v2f32:
|
||||
%val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fmaxnm v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fmaxnm_v4f32:
|
||||
%val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fmaxnm v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fmaxnm_v2f64:
|
||||
%val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fmaxnm v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; CHECK: test_fminnm_v2f32:
|
||||
%val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
; CHECK: fminnm v0.2s, v0.2s, v1.2s
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; CHECK: test_fminnm_v4f32:
|
||||
%val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
; CHECK: fminnm v0.4s, v0.4s, v1.4s
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; CHECK: test_fminnm_v2f64:
|
||||
%val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
; CHECK: fminnm v0.2d, v0.2d, v1.2d
|
||||
ret <2 x double> %val
|
||||
}
|
88
test/CodeGen/AArch64/neon-mla-mls.ll
Normal file
88
test/CodeGen/AArch64/neon-mla-mls.ll
Normal file
@ -0,0 +1,88 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
|
||||
define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
|
||||
;CHECK: mla {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = mul <8 x i8> %A, %B;
|
||||
%tmp2 = add <8 x i8> %C, %tmp1;
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
|
||||
;CHECK: mla {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = mul <16 x i8> %A, %B;
|
||||
%tmp2 = add <16 x i8> %C, %tmp1;
|
||||
ret <16 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
|
||||
;CHECK: mla {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
|
||||
%tmp1 = mul <4 x i16> %A, %B;
|
||||
%tmp2 = add <4 x i16> %C, %tmp1;
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
|
||||
;CHECK: mla {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
|
||||
%tmp1 = mul <8 x i16> %A, %B;
|
||||
%tmp2 = add <8 x i16> %C, %tmp1;
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
|
||||
;CHECK: mla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp1 = mul <2 x i32> %A, %B;
|
||||
%tmp2 = add <2 x i32> %C, %tmp1;
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
|
||||
;CHECK: mla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp1 = mul <4 x i32> %A, %B;
|
||||
%tmp2 = add <4 x i32> %C, %tmp1;
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
|
||||
;CHECK: mls {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp1 = mul <8 x i8> %A, %B;
|
||||
%tmp2 = sub <8 x i8> %C, %tmp1;
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
|
||||
;CHECK: mls {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp1 = mul <16 x i8> %A, %B;
|
||||
%tmp2 = sub <16 x i8> %C, %tmp1;
|
||||
ret <16 x i8> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
|
||||
;CHECK: mls {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
|
||||
%tmp1 = mul <4 x i16> %A, %B;
|
||||
%tmp2 = sub <4 x i16> %C, %tmp1;
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
|
||||
;CHECK: mls {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
|
||||
%tmp1 = mul <8 x i16> %A, %B;
|
||||
%tmp2 = sub <8 x i16> %C, %tmp1;
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
|
||||
;CHECK: mls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp1 = mul <2 x i32> %A, %B;
|
||||
%tmp2 = sub <2 x i32> %C, %tmp1;
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
|
||||
;CHECK: mls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp1 = mul <4 x i32> %A, %B;
|
||||
%tmp2 = sub <4 x i32> %C, %tmp1;
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
|
205
test/CodeGen/AArch64/neon-mov.ll
Normal file
205
test/CodeGen/AArch64/neon-mov.ll
Normal file
@ -0,0 +1,205 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
define <8 x i8> @movi8b() {
|
||||
;CHECK: movi {{v[0-31]+}}.8b, #0x8
|
||||
ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
|
||||
}
|
||||
|
||||
define <16 x i8> @movi16b() {
|
||||
;CHECK: movi {{v[0-31]+}}.16b, #0x8
|
||||
ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
|
||||
}
|
||||
|
||||
define <2 x i32> @movi2s_lsl0() {
|
||||
;CHECK: movi {{v[0-31]+}}.2s, #0xff
|
||||
ret <2 x i32> < i32 255, i32 255 >
|
||||
}
|
||||
|
||||
define <2 x i32> @movi2s_lsl8() {
|
||||
;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #8
|
||||
ret <2 x i32> < i32 65280, i32 65280 >
|
||||
}
|
||||
|
||||
define <2 x i32> @movi2s_lsl16() {
|
||||
;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #16
|
||||
ret <2 x i32> < i32 16711680, i32 16711680 >
|
||||
|
||||
}
|
||||
|
||||
define <2 x i32> @movi2s_lsl24() {
|
||||
;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #24
|
||||
ret <2 x i32> < i32 4278190080, i32 4278190080 >
|
||||
}
|
||||
|
||||
define <4 x i32> @movi4s_lsl0() {
|
||||
;CHECK: movi {{v[0-31]+}}.4s, #0xff
|
||||
ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 >
|
||||
}
|
||||
|
||||
define <4 x i32> @movi4s_lsl8() {
|
||||
;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #8
|
||||
ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 >
|
||||
}
|
||||
|
||||
define <4 x i32> @movi4s_lsl16() {
|
||||
;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #16
|
||||
ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 >
|
||||
|
||||
}
|
||||
|
||||
define <4 x i32> @movi4s_lsl24() {
|
||||
;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #24
|
||||
ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 >
|
||||
}
|
||||
|
||||
define <4 x i16> @movi4h_lsl0() {
|
||||
;CHECK: movi {{v[0-31]+}}.4h, #0xff
|
||||
ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 >
|
||||
}
|
||||
|
||||
define <4 x i16> @movi4h_lsl8() {
|
||||
;CHECK: movi {{v[0-31]+}}.4h, #0xff, lsl #8
|
||||
ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 >
|
||||
}
|
||||
|
||||
define <8 x i16> @movi8h_lsl0() {
|
||||
;CHECK: movi {{v[0-31]+}}.8h, #0xff
|
||||
ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
|
||||
}
|
||||
|
||||
define <8 x i16> @movi8h_lsl8() {
|
||||
;CHECK: movi {{v[0-31]+}}.8h, #0xff, lsl #8
|
||||
ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
|
||||
}
|
||||
|
||||
|
||||
define <2 x i32> @mvni2s_lsl0() {
|
||||
;CHECK: mvni {{v[0-31]+}}.2s, #0x10
|
||||
ret <2 x i32> < i32 4294967279, i32 4294967279 >
|
||||
}
|
||||
|
||||
define <2 x i32> @mvni2s_lsl8() {
|
||||
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #8
|
||||
ret <2 x i32> < i32 4294963199, i32 4294963199 >
|
||||
}
|
||||
|
||||
define <2 x i32> @mvni2s_lsl16() {
|
||||
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #16
|
||||
ret <2 x i32> < i32 4293918719, i32 4293918719 >
|
||||
}
|
||||
|
||||
define <2 x i32> @mvni2s_lsl24() {
|
||||
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #24
|
||||
ret <2 x i32> < i32 4026531839, i32 4026531839 >
|
||||
}
|
||||
|
||||
define <4 x i32> @mvni4s_lsl0() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4s, #0x10
|
||||
ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
|
||||
}
|
||||
|
||||
define <4 x i32> @mvni4s_lsl8() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #8
|
||||
ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 >
|
||||
}
|
||||
|
||||
define <4 x i32> @mvni4s_lsl16() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #16
|
||||
ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 >
|
||||
|
||||
}
|
||||
|
||||
define <4 x i32> @mvni4s_lsl24() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #24
|
||||
ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 >
|
||||
}
|
||||
|
||||
|
||||
define <4 x i16> @mvni4h_lsl0() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4h, #0x10
|
||||
ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
|
||||
}
|
||||
|
||||
define <4 x i16> @mvni4h_lsl8() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4h, #0x10, lsl #8
|
||||
ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
|
||||
}
|
||||
|
||||
define <8 x i16> @mvni8h_lsl0() {
|
||||
;CHECK: mvni {{v[0-31]+}}.8h, #0x10
|
||||
ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 >
|
||||
}
|
||||
|
||||
define <8 x i16> @mvni8h_lsl8() {
|
||||
;CHECK: mvni {{v[0-31]+}}.8h, #0x10, lsl #8
|
||||
ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 >
|
||||
}
|
||||
|
||||
|
||||
define <2 x i32> @movi2s_msl8(<2 x i32> %a) {
|
||||
;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #8
|
||||
ret <2 x i32> < i32 65535, i32 65535 >
|
||||
}
|
||||
|
||||
define <2 x i32> @movi2s_msl16() {
|
||||
;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #16
|
||||
ret <2 x i32> < i32 16777215, i32 16777215 >
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @movi4s_msl8() {
|
||||
;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #8
|
||||
ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 >
|
||||
}
|
||||
|
||||
define <4 x i32> @movi4s_msl16() {
|
||||
;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #16
|
||||
ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 >
|
||||
}
|
||||
|
||||
define <2 x i32> @mvni2s_msl8() {
|
||||
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #8
|
||||
ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264>
|
||||
}
|
||||
|
||||
define <2 x i32> @mvni2s_msl16() {
|
||||
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #16
|
||||
ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504>
|
||||
}
|
||||
|
||||
define <4 x i32> @mvni4s_msl8() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #8
|
||||
ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264>
|
||||
}
|
||||
|
||||
define <4 x i32> @mvni4s_msl16() {
|
||||
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #16
|
||||
ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504>
|
||||
}
|
||||
|
||||
define <2 x i64> @movi2d() {
|
||||
;CHECK: movi {{v[0-31]+}}.2d, #0xff0000ff0000ffff
|
||||
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
|
||||
}
|
||||
|
||||
define <1 x i64> @movid() {
|
||||
;CHECK: movi {{d[0-31]+}}, #0xff0000ff0000ffff
|
||||
ret <1 x i64> < i64 18374687574888349695 >
|
||||
}
|
||||
|
||||
define <2 x float> @fmov2s() {
|
||||
;CHECK: fmov {{v[0-31]+}}.2s, #-12.00000000
|
||||
ret <2 x float> < float -1.2e1, float -1.2e1>
|
||||
}
|
||||
|
||||
define <4 x float> @fmov4s() {
|
||||
;CHECK: fmov {{v[0-31]+}}.4s, #-12.00000000
|
||||
ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1>
|
||||
}
|
||||
|
||||
define <2 x double> @fmov2d() {
|
||||
;CHECK: fmov {{v[0-31]+}}.2d, #-12.00000000
|
||||
ret <2 x double> < double -1.2e1, double -1.2e1>
|
||||
}
|
||||
|
||||
|
181
test/CodeGen/AArch64/neon-mul-div.ll
Normal file
181
test/CodeGen/AArch64/neon-mul-div.ll
Normal file
@ -0,0 +1,181 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
|
||||
define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
|
||||
;CHECK: mul {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
|
||||
%tmp3 = mul <8 x i8> %A, %B;
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
|
||||
;CHECK: mul {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
|
||||
%tmp3 = mul <16 x i8> %A, %B;
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
|
||||
;CHECK: mul {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
|
||||
%tmp3 = mul <4 x i16> %A, %B;
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
|
||||
;CHECK: mul {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
|
||||
%tmp3 = mul <8 x i16> %A, %B;
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
|
||||
;CHECK: mul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp3 = mul <2 x i32> %A, %B;
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
|
||||
;CHECK: mul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp3 = mul <4 x i32> %A, %B;
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
|
||||
;CHECK: fmul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp3 = fmul <2 x float> %A, %B;
|
||||
ret <2 x float> %tmp3
|
||||
}
|
||||
|
||||
define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
|
||||
;CHECK: fmul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp3 = fmul <4 x float> %A, %B;
|
||||
ret <4 x float> %tmp3
|
||||
}
|
||||
define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
|
||||
;CHECK: fmul {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp3 = fmul <2 x double> %A, %B;
|
||||
ret <2 x double> %tmp3
|
||||
}
|
||||
|
||||
|
||||
define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
|
||||
;CHECK: fdiv {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
|
||||
%tmp3 = fdiv <2 x float> %A, %B;
|
||||
ret <2 x float> %tmp3
|
||||
}
|
||||
|
||||
define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
|
||||
;CHECK: fdiv {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
|
||||
%tmp3 = fdiv <4 x float> %A, %B;
|
||||
ret <4 x float> %tmp3
|
||||
}
|
||||
define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
|
||||
;CHECK: fdiv {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
|
||||
%tmp3 = fdiv <2 x double> %A, %B;
|
||||
ret <2 x double> %tmp3
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: poly_mulv8i8:
|
||||
%prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: pmul v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %prod
|
||||
}
|
||||
|
||||
define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: poly_mulv16i8:
|
||||
%prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: pmul v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %prod
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sqdmulh_v4i16:
|
||||
%prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %prod
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sqdmulh_v8i16:
|
||||
%prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %prod
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sqdmulh_v2i32:
|
||||
%prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %prod
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sqdmulh_v4i32:
|
||||
%prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %prod
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sqrdmulh_v4i16:
|
||||
%prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %prod
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sqrdmulh_v8i16:
|
||||
%prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %prod
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sqrdmulh_v2i32:
|
||||
%prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %prod
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sqrdmulh_v4i32:
|
||||
%prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %prod
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
|
||||
declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
|
||||
declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
|
||||
|
||||
define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: fmulx v0.2s, v0.2s, v1.2s
|
||||
%val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
|
||||
ret <2 x float> %val
|
||||
}
|
||||
|
||||
define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: fmulx v0.4s, v0.4s, v1.4s
|
||||
%val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
|
||||
ret <4 x float> %val
|
||||
}
|
||||
|
||||
define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
|
||||
; Using registers other than v0, v1 and v2 are possible, but would be odd.
|
||||
; CHECK: fmulx v0.2d, v0.2d, v1.2d
|
||||
%val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
|
||||
ret <2 x double> %val
|
||||
}
|
105
test/CodeGen/AArch64/neon-rounding-halving-add.ll
Normal file
105
test/CodeGen/AArch64/neon-rounding-halving-add.ll
Normal file
@ -0,0 +1,105 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_urhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_urhadd_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: urhadd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_srhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_srhadd_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: srhadd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_urhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_urhadd_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: urhadd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_srhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_srhadd_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: srhadd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_urhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_urhadd_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: urhadd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_srhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_srhadd_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: srhadd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_urhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_urhadd_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: urhadd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_srhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_srhadd_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: srhadd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_urhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_urhadd_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: urhadd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_srhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_srhadd_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: srhadd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_urhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_urhadd_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: urhadd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_srhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_srhadd_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: srhadd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
|
138
test/CodeGen/AArch64/neon-rounding-shift.ll
Normal file
138
test/CodeGen/AArch64/neon-rounding-shift.ll
Normal file
@ -0,0 +1,138 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_urshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_urshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: urshl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_srshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_srshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: srshl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_urshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_urshl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: urshl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_srshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_srshl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: srshl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_urshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_urshl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: urshl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_srshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_srshl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: srshl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_urshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_urshl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: urshl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_srshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_srshl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: srshl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_urshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_urshl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: urshl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_srshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_srshl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: srshl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_urshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_urshl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: urshl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_srshl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: srshl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_urshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: urshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_srshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: srshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_urshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_urshl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: urshl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @test_srshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_srshl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: srshl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
274
test/CodeGen/AArch64/neon-saturating-add-sub.ll
Normal file
274
test/CodeGen/AArch64/neon-saturating-add-sub.ll
Normal file
@ -0,0 +1,274 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uqadd_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uqadd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_sqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_sqadd_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: sqadd v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uqadd_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uqadd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_sqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_sqadd_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: sqadd v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_uqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uqadd_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uqadd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sqadd_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sqadd v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uqadd_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uqadd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sqadd_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sqadd v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_uqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uqadd_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uqadd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sqadd_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sqadd v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uqadd_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uqadd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sqadd_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sqadd v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_uqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_uqadd_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: uqadd v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @test_sqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_sqadd_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: sqadd v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uqsub_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uqsub v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_sqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_sqsub_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: sqsub v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uqsub_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uqsub v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_sqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_sqsub_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: sqsub v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_uqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uqsub_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uqsub v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sqsub_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sqsub v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uqsub_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uqsub v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sqsub_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sqsub v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_uqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uqsub_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uqsub v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sqsub_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sqsub v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uqsub_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uqsub v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sqsub_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sqsub v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_uqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_uqsub_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: uqsub v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_sqsub_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: sqsub v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
138
test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
Normal file
138
test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
Normal file
@ -0,0 +1,138 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uqrshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uqrshl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_sqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_sqrshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: sqrshl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uqrshl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uqrshl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_sqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_sqrshl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: sqrshl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_uqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uqrshl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uqrshl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sqrshl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sqrshl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uqrshl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uqrshl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sqrshl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sqrshl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_uqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uqrshl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uqrshl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sqrshl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sqrshl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uqrshl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uqrshl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sqrshl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sqrshl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqrshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqrshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqrshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqrshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_uqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_uqrshl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: uqrshl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @test_sqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_sqrshl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: sqrshl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
138
test/CodeGen/AArch64/neon-saturating-shift.ll
Normal file
138
test/CodeGen/AArch64/neon-saturating-shift.ll
Normal file
@ -0,0 +1,138 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uqshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: uqshl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_sqshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: sqshl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_uqshl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: uqshl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_sqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_sqshl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: sqshl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_uqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_uqshl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: uqshl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sqshl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sqshl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_uqshl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: uqshl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sqshl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sqshl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_uqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_uqshl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: uqshl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sqshl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sqshl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_uqshl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: uqshl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sqshl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sqshl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_uqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_uqshl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: uqshl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @test_sqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_sqshl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: sqshl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
140
test/CodeGen/AArch64/neon-shift.ll
Normal file
140
test/CodeGen/AArch64/neon-shift.ll
Normal file
@ -0,0 +1,140 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>)
|
||||
declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_uqshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: ushl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK: test_sqshl_v8i8:
|
||||
%tmp1 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
|
||||
; CHECK: sshl v0.8b, v0.8b, v1.8b
|
||||
ret <8 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_ushl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_ushl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: ushl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <16 x i8> @test_sshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
|
||||
; CHECK: test_sshl_v16i8:
|
||||
%tmp1 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
|
||||
; CHECK: sshl v0.16b, v0.16b, v1.16b
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>)
|
||||
declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>)
|
||||
|
||||
define <4 x i16> @test_ushl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_ushl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: ushl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||
; CHECK: test_sshl_v4i16:
|
||||
%tmp1 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
|
||||
; CHECK: sshl v0.4h, v0.4h, v1.4h
|
||||
ret <4 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_ushl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_ushl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: ushl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_sshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK: test_sshl_v8i16:
|
||||
%tmp1 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
|
||||
; CHECK: sshl v0.8h, v0.8h, v1.8h
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>)
|
||||
declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_ushl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_ushl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: ushl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||
; CHECK: test_sshl_v2i32:
|
||||
%tmp1 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
|
||||
; CHECK: sshl v0.2s, v0.2s, v1.2s
|
||||
ret <2 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_ushl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_ushl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: ushl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK: test_sshl_v4i32:
|
||||
%tmp1 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
|
||||
; CHECK: sshl v0.4s, v0.4s, v1.4s
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_ushl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: ushl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_ushl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_ushl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: ushl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: test_sshl_v2i64:
|
||||
%tmp1 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
|
||||
; CHECK: sshl v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// RUN: not llvm-mc -triple=aarch64 < %s 2> %t
|
||||
// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
|
||||
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -2892,13 +2892,13 @@
|
||||
movi wzr, #0x44444444
|
||||
movi w3, #0xffff
|
||||
movi x9, #0x0000ffff00000000
|
||||
// CHECK-ERROR: error: invalid instruction
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR-NEXT: movi wzr, #0x44444444
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
// CHECK-ERROR: error: invalid instruction
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR-NEXT: movi w3, #0xffff
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
// CHECK-ERROR: error: invalid instruction
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR-NEXT: movi x9, #0x0000ffff00000000
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// RUN: llvm-mc -triple=aarch64 -show-encoding < %s | FileCheck %s
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
|
||||
.globl _func
|
||||
|
||||
// Check that the assembler can handle the documented syntax from the ARM ARM.
|
||||
|
78
test/MC/AArch64/neon-aba-abd.s
Normal file
78
test/MC/AArch64/neon-aba-abd.s
Normal file
@ -0,0 +1,78 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Absolute Difference and Accumulate (Signed, Unsigned)
|
||||
//----------------------------------------------------------------------
|
||||
uaba v0.8b, v1.8b, v2.8b
|
||||
uaba v0.16b, v1.16b, v2.16b
|
||||
uaba v0.4h, v1.4h, v2.4h
|
||||
uaba v0.8h, v1.8h, v2.8h
|
||||
uaba v0.2s, v1.2s, v2.2s
|
||||
uaba v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: uaba v0.8b, v1.8b, v2.8b // encoding: [0x20,0x7c,0x22,0x2e]
|
||||
// CHECK: uaba v0.16b, v1.16b, v2.16b // encoding: [0x20,0x7c,0x22,0x6e]
|
||||
// CHECK: uaba v0.4h, v1.4h, v2.4h // encoding: [0x20,0x7c,0x62,0x2e]
|
||||
// CHECK: uaba v0.8h, v1.8h, v2.8h // encoding: [0x20,0x7c,0x62,0x6e]
|
||||
// CHECK: uaba v0.2s, v1.2s, v2.2s // encoding: [0x20,0x7c,0xa2,0x2e]
|
||||
// CHECK: uaba v0.4s, v1.4s, v2.4s // encoding: [0x20,0x7c,0xa2,0x6e]
|
||||
|
||||
|
||||
saba v0.8b, v1.8b, v2.8b
|
||||
saba v0.16b, v1.16b, v2.16b
|
||||
saba v0.4h, v1.4h, v2.4h
|
||||
saba v0.8h, v1.8h, v2.8h
|
||||
saba v0.2s, v1.2s, v2.2s
|
||||
saba v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: saba v0.8b, v1.8b, v2.8b // encoding: [0x20,0x7c,0x22,0x0e]
|
||||
// CHECK: saba v0.16b, v1.16b, v2.16b // encoding: [0x20,0x7c,0x22,0x4e]
|
||||
// CHECK: saba v0.4h, v1.4h, v2.4h // encoding: [0x20,0x7c,0x62,0x0e]
|
||||
// CHECK: saba v0.8h, v1.8h, v2.8h // encoding: [0x20,0x7c,0x62,0x4e]
|
||||
// CHECK: saba v0.2s, v1.2s, v2.2s // encoding: [0x20,0x7c,0xa2,0x0e]
|
||||
// CHECK: saba v0.4s, v1.4s, v2.4s // encoding: [0x20,0x7c,0xa2,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Absolute Difference (Signed, Unsigned)
|
||||
//----------------------------------------------------------------------
|
||||
uabd v0.8b, v1.8b, v2.8b
|
||||
uabd v0.16b, v1.16b, v2.16b
|
||||
uabd v0.4h, v1.4h, v2.4h
|
||||
uabd v0.8h, v1.8h, v2.8h
|
||||
uabd v0.2s, v1.2s, v2.2s
|
||||
uabd v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: uabd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x74,0x22,0x2e]
|
||||
// CHECK: uabd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x74,0x22,0x6e]
|
||||
// CHECK: uabd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x74,0x62,0x2e]
|
||||
// CHECK: uabd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x74,0x62,0x6e]
|
||||
// CHECK: uabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x74,0xa2,0x2e]
|
||||
// CHECK: uabd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x74,0xa2,0x6e]
|
||||
|
||||
sabd v0.8b, v1.8b, v2.8b
|
||||
sabd v0.16b, v1.16b, v2.16b
|
||||
sabd v0.4h, v1.4h, v2.4h
|
||||
sabd v0.8h, v1.8h, v2.8h
|
||||
sabd v0.2s, v1.2s, v2.2s
|
||||
sabd v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: sabd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x74,0x22,0x0e]
|
||||
// CHECK: sabd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x74,0x22,0x4e]
|
||||
// CHECK: sabd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x74,0x62,0x0e]
|
||||
// CHECK: sabd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x74,0x62,0x4e]
|
||||
// CHECK: sabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x74,0xa2,0x0e]
|
||||
// CHECK: sabd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x74,0xa2,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Absolute Difference (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fabd v0.2s, v1.2s, v2.2s
|
||||
fabd v31.4s, v15.4s, v16.4s
|
||||
fabd v7.2d, v8.2d, v25.2d
|
||||
|
||||
// CHECK: fabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x2e]
|
||||
// CHECK: fabd v31.4s, v15.4s, v16.4s // encoding: [0xff,0xd5,0xb0,0x6e]
|
||||
// CHECK: fabd v7.2d, v8.2d, v25.2d // encoding: [0x07,0xd5,0xf9,0x6e]
|
||||
|
35
test/MC/AArch64/neon-add-pairwise.s
Normal file
35
test/MC/AArch64/neon-add-pairwise.s
Normal file
@ -0,0 +1,35 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Add Pairwise (Integer)
|
||||
//------------------------------------------------------------------------------
|
||||
addp v0.8b, v1.8b, v2.8b
|
||||
addp v0.16b, v1.16b, v2.16b
|
||||
addp v0.4h, v1.4h, v2.4h
|
||||
addp v0.8h, v1.8h, v2.8h
|
||||
addp v0.2s, v1.2s, v2.2s
|
||||
addp v0.4s, v1.4s, v2.4s
|
||||
addp v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: addp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xbc,0x22,0x0e]
|
||||
// CHECK: addp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xbc,0x22,0x4e]
|
||||
// CHECK: addp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xbc,0x62,0x0e]
|
||||
// CHECK: addp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xbc,0x62,0x4e]
|
||||
// CHECK: addp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xbc,0xa2,0x0e]
|
||||
// CHECK: addp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xbc,0xa2,0x4e]
|
||||
// CHECK: addp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xbc,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Add Pairwise (Floating Point
|
||||
//------------------------------------------------------------------------------
|
||||
faddp v0.2s, v1.2s, v2.2s
|
||||
faddp v0.4s, v1.4s, v2.4s
|
||||
faddp v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e]
|
||||
// CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e]
|
||||
// CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e]
|
||||
|
82
test/MC/AArch64/neon-add-sub-instructions.s
Normal file
82
test/MC/AArch64/neon-add-sub-instructions.s
Normal file
@ -0,0 +1,82 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Add
|
||||
//------------------------------------------------------------------------------
|
||||
add v0.8b, v1.8b, v2.8b
|
||||
add v0.16b, v1.16b, v2.16b
|
||||
add v0.4h, v1.4h, v2.4h
|
||||
add v0.8h, v1.8h, v2.8h
|
||||
add v0.2s, v1.2s, v2.2s
|
||||
add v0.4s, v1.4s, v2.4s
|
||||
add v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: add v0.8b, v1.8b, v2.8b // encoding: [0x20,0x84,0x22,0x0e]
|
||||
// CHECK: add v0.16b, v1.16b, v2.16b // encoding: [0x20,0x84,0x22,0x4e]
|
||||
// CHECK: add v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x62,0x0e]
|
||||
// CHECK: add v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x62,0x4e]
|
||||
// CHECK: add v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0xa2,0x0e]
|
||||
// CHECK: add v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0xa2,0x4e]
|
||||
// CHECK: add v0.2d, v1.2d, v2.2d // encoding: [0x20,0x84,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Sub
|
||||
//------------------------------------------------------------------------------
|
||||
sub v0.8b, v1.8b, v2.8b
|
||||
sub v0.16b, v1.16b, v2.16b
|
||||
sub v0.4h, v1.4h, v2.4h
|
||||
sub v0.8h, v1.8h, v2.8h
|
||||
sub v0.2s, v1.2s, v2.2s
|
||||
sub v0.4s, v1.4s, v2.4s
|
||||
sub v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: sub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x84,0x22,0x2e]
|
||||
// CHECK: sub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x84,0x22,0x6e]
|
||||
// CHECK: sub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x62,0x2e]
|
||||
// CHECK: sub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x62,0x6e]
|
||||
// CHECK: sub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0xa2,0x2e]
|
||||
// CHECK: sub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0xa2,0x6e]
|
||||
// CHECK: sub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x84,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Floating-Point Add
|
||||
//------------------------------------------------------------------------------
|
||||
fadd v0.2s, v1.2s, v2.2s
|
||||
fadd v0.4s, v1.4s, v2.4s
|
||||
fadd v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: fadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x0e]
|
||||
// CHECK: fadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x4e]
|
||||
// CHECK: fadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x4e]
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Floating-Point Sub
|
||||
//------------------------------------------------------------------------------
|
||||
fsub v0.2s, v1.2s, v2.2s
|
||||
fsub v0.4s, v1.4s, v2.4s
|
||||
fsub v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: fsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x0e]
|
||||
// CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e]
|
||||
// CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Add
|
||||
//------------------------------------------------------------------------------
|
||||
add d31, d0, d16
|
||||
|
||||
// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Sub
|
||||
//------------------------------------------------------------------------------
|
||||
sub d1, d7, d8
|
||||
|
||||
// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]
|
||||
|
||||
|
||||
|
60
test/MC/AArch64/neon-bitwise-instructions.s
Normal file
60
test/MC/AArch64/neon-bitwise-instructions.s
Normal file
@ -0,0 +1,60 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector And
|
||||
//------------------------------------------------------------------------------
|
||||
and v0.8b, v1.8b, v2.8b
|
||||
and v0.16b, v1.16b, v2.16b
|
||||
|
||||
// CHECK: and v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x22,0x0e]
|
||||
// CHECK: and v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x22,0x4e]
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Orr
|
||||
//------------------------------------------------------------------------------
|
||||
orr v0.8b, v1.8b, v2.8b
|
||||
orr v0.16b, v1.16b, v2.16b
|
||||
|
||||
// CHECK: orr v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xa2,0x0e]
|
||||
// CHECK: orr v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xa2,0x4e]
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Eor
|
||||
//------------------------------------------------------------------------------
|
||||
eor v0.8b, v1.8b, v2.8b
|
||||
eor v0.16b, v1.16b, v2.16b
|
||||
|
||||
// CHECK: eor v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x22,0x2e]
|
||||
// CHECK: eor v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x22,0x6e]
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Bitwise
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
bit v0.8b, v1.8b, v2.8b
|
||||
bit v0.16b, v1.16b, v2.16b
|
||||
bif v0.8b, v1.8b, v2.8b
|
||||
bif v0.16b, v1.16b, v2.16b
|
||||
bsl v0.8b, v1.8b, v2.8b
|
||||
bsl v0.16b, v1.16b, v2.16b
|
||||
orn v0.8b, v1.8b, v2.8b
|
||||
orn v0.16b, v1.16b, v2.16b
|
||||
bic v0.8b, v1.8b, v2.8b
|
||||
bic v0.16b, v1.16b, v2.16b
|
||||
|
||||
// CHECK: bit v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xa2,0x2e]
|
||||
// CHECK: bit v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xa2,0x6e]
|
||||
// CHECK: bif v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xe2,0x2e]
|
||||
// CHECK: bif v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xe2,0x6e]
|
||||
// CHECK: bsl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x62,0x2e]
|
||||
// CHECK: bsl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x62,0x6e]
|
||||
// CHECK: orn v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xe2,0x0e]
|
||||
// CHECK: orn v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xe2,0x4e]
|
||||
// CHECK: bic v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x62,0x0e]
|
||||
// CHECK: bic v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x62,0x4e]
|
||||
|
405
test/MC/AArch64/neon-compare-instructions.s
Normal file
405
test/MC/AArch64/neon-compare-instructions.s
Normal file
@ -0,0 +1,405 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Equal (Integer)
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmeq v0.8b, v15.8b, v17.8b
|
||||
cmeq v1.16b, v31.16b, v8.16b
|
||||
cmeq v15.4h, v16.4h, v17.4h
|
||||
cmeq v5.8h, v6.8h, v7.8h
|
||||
cmeq v29.2s, v27.2s, v28.2s
|
||||
cmeq v9.4s, v7.4s, v8.4s
|
||||
cmeq v3.2d, v31.2d, v21.2d
|
||||
|
||||
// CHECK: cmeq v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x8d,0x31,0x2e]
|
||||
// CHECK: cmeq v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x8f,0x28,0x6e]
|
||||
// CHECK: cmeq v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x8e,0x71,0x2e]
|
||||
// CHECK: cmeq v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x8c,0x67,0x6e]
|
||||
// CHECK: cmeq v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x8f,0xbc,0x2e]
|
||||
// CHECK: cmeq v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x8c,0xa8,0x6e]
|
||||
// CHECK: cmeq v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x8f,0xf5,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Higher or Same (Unsigned Integer)
|
||||
// Vector Compare Mask Less or Same (Unsigned Integer)
|
||||
// CMLS is alias for CMHS with operands reversed.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmhs v0.8b, v15.8b, v17.8b
|
||||
cmhs v1.16b, v31.16b, v8.16b
|
||||
cmhs v15.4h, v16.4h, v17.4h
|
||||
cmhs v5.8h, v6.8h, v7.8h
|
||||
cmhs v29.2s, v27.2s, v28.2s
|
||||
cmhs v9.4s, v7.4s, v8.4s
|
||||
cmhs v3.2d, v31.2d, v21.2d
|
||||
|
||||
cmls v0.8b, v17.8b, v15.8b
|
||||
cmls v1.16b, v8.16b, v31.16b
|
||||
cmls v15.4h, v17.4h, v16.4h
|
||||
cmls v5.8h, v7.8h, v6.8h
|
||||
cmls v29.2s, v28.2s, v27.2s
|
||||
cmls v9.4s, v8.4s, v7.4s
|
||||
cmls v3.2d, v21.2d, v31.2d
|
||||
|
||||
// CHECK: cmhs v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x2e]
|
||||
// CHECK: cmhs v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x6e]
|
||||
// CHECK: cmhs v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x2e]
|
||||
// CHECK: cmhs v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x6e]
|
||||
// CHECK: cmhs v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x2e]
|
||||
// CHECK: cmhs v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x6e]
|
||||
// CHECK: cmhs v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x6e]
|
||||
// CHECK: cmhs v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x2e]
|
||||
// CHECK: cmhs v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x6e]
|
||||
// CHECK: cmhs v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x2e]
|
||||
// CHECK: cmhs v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x6e]
|
||||
// CHECK: cmhs v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x2e]
|
||||
// CHECK: cmhs v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x6e]
|
||||
// CHECK: cmhs v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than or Equal (Integer)
|
||||
// Vector Compare Mask Less Than or Equal (Integer)
|
||||
// CMLE is alias for CMGE with operands reversed.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmge v0.8b, v15.8b, v17.8b
|
||||
cmge v1.16b, v31.16b, v8.16b
|
||||
cmge v15.4h, v16.4h, v17.4h
|
||||
cmge v5.8h, v6.8h, v7.8h
|
||||
cmge v29.2s, v27.2s, v28.2s
|
||||
cmge v9.4s, v7.4s, v8.4s
|
||||
cmge v3.2d, v31.2d, v21.2d
|
||||
|
||||
cmle v0.8b, v17.8b, v15.8b
|
||||
cmle v1.16b, v8.16b, v31.16b
|
||||
cmle v15.4h, v17.4h, v16.4h
|
||||
cmle v5.8h, v7.8h, v6.8h
|
||||
cmle v29.2s, v28.2s, v27.2s
|
||||
cmle v9.4s, v8.4s, v7.4s
|
||||
cmle v3.2d, v21.2d, v31.2d
|
||||
|
||||
// CHECK: cmge v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x0e]
|
||||
// CHECK: cmge v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x4e]
|
||||
// CHECK: cmge v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x0e]
|
||||
// CHECK: cmge v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x4e]
|
||||
// CHECK: cmge v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x0e]
|
||||
// CHECK: cmge v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x4e]
|
||||
// CHECK: cmge v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x4e]
|
||||
// CHECK: cmge v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x0e]
|
||||
// CHECK: cmge v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x4e]
|
||||
// CHECK: cmge v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x0e]
|
||||
// CHECK: cmge v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x4e]
|
||||
// CHECK: cmge v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x0e]
|
||||
// CHECK: cmge v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x4e]
|
||||
// CHECK: cmge v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Higher (Unsigned Integer)
|
||||
// Vector Compare Mask Lower (Unsigned Integer)
|
||||
// CMLO is alias for CMHI with operands reversed.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmhi v0.8b, v15.8b, v17.8b
|
||||
cmhi v1.16b, v31.16b, v8.16b
|
||||
cmhi v15.4h, v16.4h, v17.4h
|
||||
cmhi v5.8h, v6.8h, v7.8h
|
||||
cmhi v29.2s, v27.2s, v28.2s
|
||||
cmhi v9.4s, v7.4s, v8.4s
|
||||
cmhi v3.2d, v31.2d, v21.2d
|
||||
|
||||
cmlo v0.8b, v17.8b, v15.8b
|
||||
cmlo v1.16b, v8.16b, v31.16b
|
||||
cmlo v15.4h, v17.4h, v16.4h
|
||||
cmlo v5.8h, v7.8h, v6.8h
|
||||
cmlo v29.2s, v28.2s, v27.2s
|
||||
cmlo v9.4s, v8.4s, v7.4s
|
||||
cmlo v3.2d, v21.2d, v31.2d
|
||||
|
||||
// CHECK: cmhi v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x2e]
|
||||
// CHECK: cmhi v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x6e]
|
||||
// CHECK: cmhi v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x2e]
|
||||
// CHECK: cmhi v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x6e]
|
||||
// CHECK: cmhi v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x2e]
|
||||
// CHECK: cmhi v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x6e]
|
||||
// CHECK: cmhi v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x6e]
|
||||
// CHECK: cmhi v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x2e]
|
||||
// CHECK: cmhi v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x6e]
|
||||
// CHECK: cmhi v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x2e]
|
||||
// CHECK: cmhi v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x6e]
|
||||
// CHECK: cmhi v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x2e]
|
||||
// CHECK: cmhi v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x6e]
|
||||
// CHECK: cmhi v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than (Integer)
|
||||
// Vector Compare Mask Less Than (Integer)
|
||||
// CMLT is alias for CMGT with operands reversed.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmgt v0.8b, v15.8b, v17.8b
|
||||
cmgt v1.16b, v31.16b, v8.16b
|
||||
cmgt v15.4h, v16.4h, v17.4h
|
||||
cmgt v5.8h, v6.8h, v7.8h
|
||||
cmgt v29.2s, v27.2s, v28.2s
|
||||
cmgt v9.4s, v7.4s, v8.4s
|
||||
cmgt v3.2d, v31.2d, v21.2d
|
||||
|
||||
cmlt v0.8b, v17.8b, v15.8b
|
||||
cmlt v1.16b, v8.16b, v31.16b
|
||||
cmlt v15.4h, v17.4h, v16.4h
|
||||
cmlt v5.8h, v7.8h, v6.8h
|
||||
cmlt v29.2s, v28.2s, v27.2s
|
||||
cmlt v9.4s, v8.4s, v7.4s
|
||||
cmlt v3.2d, v21.2d, v31.2d
|
||||
|
||||
// CHECK: cmgt v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x0e]
|
||||
// CHECK: cmgt v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x4e]
|
||||
// CHECK: cmgt v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x0e]
|
||||
// CHECK: cmgt v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x4e]
|
||||
// CHECK: cmgt v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x0e]
|
||||
// CHECK: cmgt v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x4e]
|
||||
// CHECK: cmgt v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x4e]
|
||||
// CHECK: cmgt v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x0e]
|
||||
// CHECK: cmgt v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x4e]
|
||||
// CHECK: cmgt v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x0e]
|
||||
// CHECK: cmgt v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x4e]
|
||||
// CHECK: cmgt v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x0e]
|
||||
// CHECK: cmgt v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x4e]
|
||||
// CHECK: cmgt v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Bitwise Test (Integer)
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmtst v0.8b, v15.8b, v17.8b
|
||||
cmtst v1.16b, v31.16b, v8.16b
|
||||
cmtst v15.4h, v16.4h, v17.4h
|
||||
cmtst v5.8h, v6.8h, v7.8h
|
||||
cmtst v29.2s, v27.2s, v28.2s
|
||||
cmtst v9.4s, v7.4s, v8.4s
|
||||
cmtst v3.2d, v31.2d, v21.2d
|
||||
|
||||
// CHECK: cmtst v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x8d,0x31,0x0e]
|
||||
// CHECK: cmtst v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x8f,0x28,0x4e]
|
||||
// CHECK: cmtst v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x8e,0x71,0x0e]
|
||||
// CHECK: cmtst v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x8c,0x67,0x4e]
|
||||
// CHECK: cmtst v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x8f,0xbc,0x0e]
|
||||
// CHECK: cmtst v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x8c,0xa8,0x4e]
|
||||
// CHECK: cmtst v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x8f,0xf5,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Equal (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcmeq v0.2s, v31.2s, v16.2s
|
||||
fcmeq v4.4s, v7.4s, v15.4s
|
||||
fcmeq v29.2d, v2.2d, v5.2d
|
||||
|
||||
// CHECK: fcmeq v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0x30,0x0e]
|
||||
// CHECK: fcmeq v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0x2f,0x4e]
|
||||
// CHECK: fcmeq v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0x65,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than Or Equal (Floating Point)
|
||||
// Vector Compare Mask Less Than Or Equal (Floating Point)
|
||||
// FCMLE is alias for FCMGE with operands reversed.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcmge v31.4s, v29.4s, v28.4s
|
||||
fcmge v3.2s, v8.2s, v12.2s
|
||||
fcmge v17.2d, v15.2d, v13.2d
|
||||
fcmle v31.4s, v28.4s, v29.4s
|
||||
fcmle v3.2s, v12.2s, v8.2s
|
||||
fcmle v17.2d, v13.2d, v15.2d
|
||||
|
||||
// CHECK: fcmge v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xe7,0x3c,0x6e]
|
||||
// CHECK: fcmge v3.2s, v8.2s, v12.2s // encoding: [0x03,0xe5,0x2c,0x2e]
|
||||
// CHECK: fcmge v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xe5,0x6d,0x6e]
|
||||
// CHECK: fcmge v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xe7,0x3c,0x6e]
|
||||
// CHECK: fcmge v3.2s, v8.2s, v12.2s // encoding: [0x03,0xe5,0x2c,0x2e]
|
||||
// CHECK: fcmge v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xe5,0x6d,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than (Floating Point)
|
||||
// Vector Compare Mask Less Than (Floating Point)
|
||||
// FCMLT is alias for FCMGT with operands reversed.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcmgt v0.2s, v31.2s, v16.2s
|
||||
fcmgt v4.4s, v7.4s, v15.4s
|
||||
fcmgt v29.2d, v2.2d, v5.2d
|
||||
fcmlt v0.2s, v16.2s, v31.2s
|
||||
fcmlt v4.4s, v15.4s, v7.4s
|
||||
fcmlt v29.2d, v5.2d, v2.2d
|
||||
|
||||
// CHECK: fcmgt v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0xb0,0x2e]
|
||||
// CHECK: fcmgt v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0xaf,0x6e]
|
||||
// CHECK: fcmgt v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0xe5,0x6e]
|
||||
// CHECK: fcmgt v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0xb0,0x2e]
|
||||
// CHECK: fcmgt v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0xaf,0x6e]
|
||||
// CHECK: fcmgt v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0xe5,0x6e]
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Equal to Zero (Integer)
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmeq v0.8b, v15.8b, #0
|
||||
cmeq v1.16b, v31.16b, #0
|
||||
cmeq v15.4h, v16.4h, #0
|
||||
cmeq v5.8h, v6.8h, #0
|
||||
cmeq v29.2s, v27.2s, #0
|
||||
cmeq v9.4s, v7.4s, #0
|
||||
cmeq v3.2d, v31.2d, #0
|
||||
|
||||
// CHECK: cmeq v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x99,0x20,0x0e]
|
||||
// CHECK: cmeq v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x9b,0x20,0x4e]
|
||||
// CHECK: cmeq v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x9a,0x60,0x0e]
|
||||
// CHECK: cmeq v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x98,0x60,0x4e]
|
||||
// CHECK: cmeq v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x9b,0xa0,0x0e]
|
||||
// CHECK: cmeq v9.4s, v7.4s, #0x0 // encoding: [0xe9,0x98,0xa0,0x4e]
|
||||
// CHECK: cmeq v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x9b,0xe0,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
|
||||
//----------------------------------------------------------------------
|
||||
cmge v0.8b, v15.8b, #0
|
||||
cmge v1.16b, v31.16b, #0
|
||||
cmge v15.4h, v16.4h, #0
|
||||
cmge v5.8h, v6.8h, #0
|
||||
cmge v29.2s, v27.2s, #0
|
||||
cmge v17.4s, v20.4s, #0
|
||||
cmge v3.2d, v31.2d, #0
|
||||
|
||||
// CHECK: cmge v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x89,0x20,0x2e]
|
||||
// CHECK: cmge v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x8b,0x20,0x6e]
|
||||
// CHECK: cmge v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x8a,0x60,0x2e]
|
||||
// CHECK: cmge v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x88,0x60,0x6e]
|
||||
// CHECK: cmge v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x8b,0xa0,0x2e]
|
||||
// CHECK: cmge v17.4s, v20.4s, #0x0 // encoding: [0x91,0x8a,0xa0,0x6e]
|
||||
// CHECK: cmge v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x8b,0xe0,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than Zero (Signed Integer)
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
cmgt v0.8b, v15.8b, #0
|
||||
cmgt v1.16b, v31.16b, #0
|
||||
cmgt v15.4h, v16.4h, #0
|
||||
cmgt v5.8h, v6.8h, #0
|
||||
cmgt v29.2s, v27.2s, #0
|
||||
cmgt v9.4s, v7.4s, #0
|
||||
cmgt v3.2d, v31.2d, #0
|
||||
|
||||
// CHECK: cmgt v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x89,0x20,0x0e]
|
||||
// CHECK: cmgt v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x8b,0x20,0x4e]
|
||||
// CHECK: cmgt v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x8a,0x60,0x0e]
|
||||
// CHECK: cmgt v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x88,0x60,0x4e]
|
||||
// CHECK: cmgt v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x8b,0xa0,0x0e]
|
||||
// CHECK: cmgt v9.4s, v7.4s, #0x0 // encoding: [0xe9,0x88,0xa0,0x4e]
|
||||
// CHECK: cmgt v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x8b,0xe0,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
|
||||
//----------------------------------------------------------------------
|
||||
cmle v0.8b, v15.8b, #0
|
||||
cmle v1.16b, v31.16b, #0
|
||||
cmle v15.4h, v16.4h, #0
|
||||
cmle v5.8h, v6.8h, #0
|
||||
cmle v29.2s, v27.2s, #0
|
||||
cmle v9.4s, v7.4s, #0
|
||||
cmle v3.2d, v31.2d, #0
|
||||
|
||||
// CHECK: cmle v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x99,0x20,0x2e]
|
||||
// CHECK: cmle v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x9b,0x20,0x6e]
|
||||
// CHECK: cmle v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x9a,0x60,0x2e]
|
||||
// CHECK: cmle v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x98,0x60,0x6e]
|
||||
// CHECK: cmle v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x9b,0xa0,0x2e]
|
||||
// CHECK: cmle v9.4s, v7.4s, #0x0 // encoding: [0xe9,0x98,0xa0,0x6e]
|
||||
// CHECK: cmle v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x9b,0xe0,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Less Than Zero (Signed Integer)
|
||||
//----------------------------------------------------------------------
|
||||
cmlt v0.8b, v15.8b, #0
|
||||
cmlt v1.16b, v31.16b, #0
|
||||
cmlt v15.4h, v16.4h, #0
|
||||
cmlt v5.8h, v6.8h, #0
|
||||
cmlt v29.2s, v27.2s, #0
|
||||
cmlt v9.4s, v7.4s, #0
|
||||
cmlt v3.2d, v31.2d, #0
|
||||
|
||||
// CHECK: cmlt v0.8b, v15.8b, #0x0 // encoding: [0xe0,0xa9,0x20,0x0e]
|
||||
// CHECK: cmlt v1.16b, v31.16b, #0x0 // encoding: [0xe1,0xab,0x20,0x4e]
|
||||
// CHECK: cmlt v15.4h, v16.4h, #0x0 // encoding: [0x0f,0xaa,0x60,0x0e]
|
||||
// CHECK: cmlt v5.8h, v6.8h, #0x0 // encoding: [0xc5,0xa8,0x60,0x4e]
|
||||
// CHECK: cmlt v29.2s, v27.2s, #0x0 // encoding: [0x7d,0xab,0xa0,0x0e]
|
||||
// CHECK: cmlt v9.4s, v7.4s, #0x0 // encoding: [0xe9,0xa8,0xa0,0x4e]
|
||||
// CHECK: cmlt v3.2d, v31.2d, #0x0 // encoding: [0xe3,0xab,0xe0,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Equal to Zero (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fcmeq v0.2s, v31.2s, #0.0
|
||||
fcmeq v4.4s, v7.4s, #0.0
|
||||
fcmeq v29.2d, v2.2d, #0.0
|
||||
|
||||
// CHECK: fcmeq v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xdb,0xa0,0x0e]
|
||||
// CHECK: fcmeq v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xd8,0xa0,0x4e]
|
||||
// CHECK: fcmeq v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xd8,0xe0,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fcmge v31.4s, v29.4s, #0.0
|
||||
fcmge v3.2s, v8.2s, #0.0
|
||||
fcmge v17.2d, v15.2d, #0.0
|
||||
|
||||
// CHECK: fcmge v31.4s, v29.4s, #0.0 // encoding: [0xbf,0xcb,0xa0,0x6e]
|
||||
// CHECK: fcmge v3.2s, v8.2s, #0.0 // encoding: [0x03,0xc9,0xa0,0x2e]
|
||||
// CHECK: fcmge v17.2d, v15.2d, #0.0 // encoding: [0xf1,0xc9,0xe0,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Greater Than Zero (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fcmgt v0.2s, v31.2s, #0.0
|
||||
fcmgt v4.4s, v7.4s, #0.0
|
||||
fcmgt v29.2d, v2.2d, #0.0
|
||||
|
||||
// CHECK: fcmgt v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xcb,0xa0,0x0e]
|
||||
// CHECK: fcmgt v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xc8,0xa0,0x4e]
|
||||
// CHECK: fcmgt v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xc8,0xe0,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Less Than or Equal To Zero (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fcmle v1.4s, v8.4s, #0.0
|
||||
fcmle v3.2s, v20.2s, #0.0
|
||||
fcmle v7.2d, v13.2d, #0.0
|
||||
|
||||
// CHECK: fcmle v1.4s, v8.4s, #0.0 // encoding: [0x01,0xd9,0xa0,0x6e]
|
||||
// CHECK: fcmle v3.2s, v20.2s, #0.0 // encoding: [0x83,0xda,0xa0,0x2e]
|
||||
// CHECK: fcmle v7.2d, v13.2d, #0.0 // encoding: [0xa7,0xd9,0xe0,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Compare Mask Less Than Zero (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fcmlt v16.2s, v2.2s, #0.0
|
||||
fcmlt v15.4s, v4.4s, #0.0
|
||||
fcmlt v5.2d, v29.2d, #0.0
|
||||
|
||||
// CHECK: fcmlt v16.2s, v2.2s, #0.0 // encoding: [0x50,0xe8,0xa0,0x0e]
|
||||
// CHECK: fcmlt v15.4s, v4.4s, #0.0 // encoding: [0x8f,0xe8,0xa0,0x4e]
|
||||
// CHECK: fcmlt v5.2d, v29.2d, #0.0 // encoding: [0xa5,0xeb,0xe0,0x4e]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
1207
test/MC/AArch64/neon-diagnostics.s
Normal file
1207
test/MC/AArch64/neon-diagnostics.s
Normal file
File diff suppressed because it is too large
Load Diff
41
test/MC/AArch64/neon-facge-facgt.s
Normal file
41
test/MC/AArch64/neon-facge-facgt.s
Normal file
@ -0,0 +1,41 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
|
||||
// FACLE is alias for FACGE with operands reversed
|
||||
//----------------------------------------------------------------------
|
||||
facge v0.2s, v31.2s, v16.2s
|
||||
facge v4.4s, v7.4s, v15.4s
|
||||
facge v29.2d, v2.2d, v5.2d
|
||||
facle v0.2s, v16.2s, v31.2s
|
||||
facle v4.4s, v15.4s, v7.4s
|
||||
facle v29.2d, v5.2d, v2.2d
|
||||
|
||||
// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
|
||||
// CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e]
|
||||
// CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e]
|
||||
// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
|
||||
// CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e]
|
||||
// CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Absolute Compare Mask Less Than (Floating Point)
|
||||
// FACLT is alias for FACGT with operands reversed
|
||||
//----------------------------------------------------------------------
|
||||
facgt v31.4s, v29.4s, v28.4s
|
||||
facgt v3.2s, v8.2s, v12.2s
|
||||
facgt v17.2d, v15.2d, v13.2d
|
||||
faclt v31.4s, v28.4s, v29.4s
|
||||
faclt v3.2s, v12.2s, v8.2s
|
||||
faclt v17.2d, v13.2d, v15.2d
|
||||
|
||||
// CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e]
|
||||
// CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e]
|
||||
// CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e]
|
||||
// CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e]
|
||||
// CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e]
|
||||
// CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e]
|
||||
|
||||
|
27
test/MC/AArch64/neon-frsqrt-frecp.s
Normal file
27
test/MC/AArch64/neon-frsqrt-frecp.s
Normal file
@ -0,0 +1,27 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Reciprocal Square Root Step (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
frsqrts v0.2s, v31.2s, v16.2s
|
||||
frsqrts v4.4s, v7.4s, v15.4s
|
||||
frsqrts v29.2d, v2.2d, v5.2d
|
||||
|
||||
// CHECK: frsqrts v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xff,0xb0,0x0e]
|
||||
// CHECK: frsqrts v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xfc,0xaf,0x4e]
|
||||
// CHECK: frsqrts v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xfc,0xe5,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Reciprocal Step (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
frecps v31.4s, v29.4s, v28.4s
|
||||
frecps v3.2s, v8.2s, v12.2s
|
||||
frecps v17.2d, v15.2d, v13.2d
|
||||
|
||||
// CHECK: frecps v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xff,0x3c,0x4e]
|
||||
// CHECK: frecps v3.2s, v8.2s, v12.2s // encoding: [0x03,0xfd,0x2c,0x0e]
|
||||
// CHECK: frecps v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xfd,0x6d,0x4e]
|
||||
|
||||
|
74
test/MC/AArch64/neon-halving-add-sub.s
Normal file
74
test/MC/AArch64/neon-halving-add-sub.s
Normal file
@ -0,0 +1,74 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Halving Add (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
shadd v0.8b, v1.8b, v2.8b
|
||||
shadd v0.16b, v1.16b, v2.16b
|
||||
shadd v0.4h, v1.4h, v2.4h
|
||||
shadd v0.8h, v1.8h, v2.8h
|
||||
shadd v0.2s, v1.2s, v2.2s
|
||||
shadd v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: shadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x04,0x22,0x0e]
|
||||
// CHECK: shadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x04,0x22,0x4e]
|
||||
// CHECK: shadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x62,0x0e]
|
||||
// CHECK: shadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x04,0x62,0x4e]
|
||||
// CHECK: shadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x04,0xa2,0x0e]
|
||||
// CHECK: shadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x04,0xa2,0x4e]
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Halving Add (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uhadd v0.8b, v1.8b, v2.8b
|
||||
uhadd v0.16b, v1.16b, v2.16b
|
||||
uhadd v0.4h, v1.4h, v2.4h
|
||||
uhadd v0.8h, v1.8h, v2.8h
|
||||
uhadd v0.2s, v1.2s, v2.2s
|
||||
uhadd v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: uhadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x04,0x22,0x2e]
|
||||
// CHECK: uhadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x04,0x22,0x6e]
|
||||
// CHECK: uhadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x62,0x2e]
|
||||
// CHECK: uhadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x04,0x62,0x6e]
|
||||
// CHECK: uhadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x04,0xa2,0x2e]
|
||||
// CHECK: uhadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x04,0xa2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Halving Sub (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
shsub v0.8b, v1.8b, v2.8b
|
||||
shsub v0.16b, v1.16b, v2.16b
|
||||
shsub v0.4h, v1.4h, v2.4h
|
||||
shsub v0.8h, v1.8h, v2.8h
|
||||
shsub v0.2s, v1.2s, v2.2s
|
||||
shsub v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: shsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x24,0x22,0x0e]
|
||||
// CHECK: shsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x24,0x22,0x4e]
|
||||
// CHECK: shsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x24,0x62,0x0e]
|
||||
// CHECK: shsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x24,0x62,0x4e]
|
||||
// CHECK: shsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x24,0xa2,0x0e]
|
||||
// CHECK: shsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x24,0xa2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Halving Sub (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uhsub v0.8b, v1.8b, v2.8b
|
||||
uhsub v0.16b, v1.16b, v2.16b
|
||||
uhsub v0.4h, v1.4h, v2.4h
|
||||
uhsub v0.8h, v1.8h, v2.8h
|
||||
uhsub v0.2s, v1.2s, v2.2s
|
||||
uhsub v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: uhsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x24,0x22,0x2e]
|
||||
// CHECK: uhsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x24,0x22,0x6e]
|
||||
// CHECK: uhsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x24,0x62,0x2e]
|
||||
// CHECK: uhsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x24,0x62,0x6e]
|
||||
// CHECK: uhsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x24,0xa2,0x2e]
|
||||
// CHECK: uhsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x24,0xa2,0x6e]
|
||||
|
110
test/MC/AArch64/neon-max-min-pairwise.s
Normal file
110
test/MC/AArch64/neon-max-min-pairwise.s
Normal file
@ -0,0 +1,110 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Maximum Pairwise (Signed and Unsigned Integer)
|
||||
//----------------------------------------------------------------------
|
||||
smaxp v0.8b, v1.8b, v2.8b
|
||||
smaxp v0.16b, v1.16b, v2.16b
|
||||
smaxp v0.4h, v1.4h, v2.4h
|
||||
smaxp v0.8h, v1.8h, v2.8h
|
||||
smaxp v0.2s, v1.2s, v2.2s
|
||||
smaxp v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: smaxp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xa4,0x22,0x0e]
|
||||
// CHECK: smaxp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xa4,0x22,0x4e]
|
||||
// CHECK: smaxp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xa4,0x62,0x0e]
|
||||
// CHECK: smaxp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xa4,0x62,0x4e]
|
||||
// CHECK: smaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xa4,0xa2,0x0e]
|
||||
// CHECK: smaxp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xa4,0xa2,0x4e]
|
||||
|
||||
umaxp v0.8b, v1.8b, v2.8b
|
||||
umaxp v0.16b, v1.16b, v2.16b
|
||||
umaxp v0.4h, v1.4h, v2.4h
|
||||
umaxp v0.8h, v1.8h, v2.8h
|
||||
umaxp v0.2s, v1.2s, v2.2s
|
||||
umaxp v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: umaxp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xa4,0x22,0x2e]
|
||||
// CHECK: umaxp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xa4,0x22,0x6e]
|
||||
// CHECK: umaxp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xa4,0x62,0x2e]
|
||||
// CHECK: umaxp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xa4,0x62,0x6e]
|
||||
// CHECK: umaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xa4,0xa2,0x2e]
|
||||
// CHECK: umaxp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xa4,0xa2,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Minimum Pairwise (Signed and Unsigned Integer)
|
||||
//----------------------------------------------------------------------
|
||||
sminp v0.8b, v1.8b, v2.8b
|
||||
sminp v0.16b, v1.16b, v2.16b
|
||||
sminp v0.4h, v1.4h, v2.4h
|
||||
sminp v0.8h, v1.8h, v2.8h
|
||||
sminp v0.2s, v1.2s, v2.2s
|
||||
sminp v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: sminp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xac,0x22,0x0e]
|
||||
// CHECK: sminp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xac,0x22,0x4e]
|
||||
// CHECK: sminp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xac,0x62,0x0e]
|
||||
// CHECK: sminp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xac,0x62,0x4e]
|
||||
// CHECK: sminp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xac,0xa2,0x0e]
|
||||
// CHECK: sminp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xac,0xa2,0x4e]
|
||||
|
||||
uminp v0.8b, v1.8b, v2.8b
|
||||
uminp v0.16b, v1.16b, v2.16b
|
||||
uminp v0.4h, v1.4h, v2.4h
|
||||
uminp v0.8h, v1.8h, v2.8h
|
||||
uminp v0.2s, v1.2s, v2.2s
|
||||
uminp v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: uminp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xac,0x22,0x2e]
|
||||
// CHECK: uminp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xac,0x22,0x6e]
|
||||
// CHECK: uminp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xac,0x62,0x2e]
|
||||
// CHECK: uminp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xac,0x62,0x6e]
|
||||
// CHECK: uminp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xac,0xa2,0x2e]
|
||||
// CHECK: uminp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xac,0xa2,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Maximum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fmaxp v0.2s, v1.2s, v2.2s
|
||||
fmaxp v31.4s, v15.4s, v16.4s
|
||||
fmaxp v7.2d, v8.2d, v25.2d
|
||||
|
||||
// CHECK: fmaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x2e]
|
||||
// CHECK: fmaxp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x6e]
|
||||
// CHECK: fmaxp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Minimum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fminp v10.2s, v15.2s, v22.2s
|
||||
fminp v3.4s, v5.4s, v6.4s
|
||||
fminp v17.2d, v13.2d, v2.2d
|
||||
|
||||
// CHECK: fminp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x2e]
|
||||
// CHECK: fminp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x6e]
|
||||
// CHECK: fminp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector maxNum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fmaxnmp v0.2s, v1.2s, v2.2s
|
||||
fmaxnmp v31.4s, v15.4s, v16.4s
|
||||
fmaxnmp v7.2d, v8.2d, v25.2d
|
||||
|
||||
// CHECK: fmaxnmp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x2e]
|
||||
// CHECK: fmaxnmp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x6e]
|
||||
// CHECK: fmaxnmp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector minNum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fminnmp v10.2s, v15.2s, v22.2s
|
||||
fminnmp v3.4s, v5.4s, v6.4s
|
||||
fminnmp v17.2d, v13.2d, v2.2d
|
||||
|
||||
// CHECK: fminnmp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x2e]
|
||||
// CHECK: fminnmp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x6e]
|
||||
// CHECK: fminnmp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x6e]
|
||||
|
110
test/MC/AArch64/neon-max-min.s
Normal file
110
test/MC/AArch64/neon-max-min.s
Normal file
@ -0,0 +1,110 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Maximum (Signed and Unsigned Integer)
|
||||
//----------------------------------------------------------------------
|
||||
smax v0.8b, v1.8b, v2.8b
|
||||
smax v0.16b, v1.16b, v2.16b
|
||||
smax v0.4h, v1.4h, v2.4h
|
||||
smax v0.8h, v1.8h, v2.8h
|
||||
smax v0.2s, v1.2s, v2.2s
|
||||
smax v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: smax v0.8b, v1.8b, v2.8b // encoding: [0x20,0x64,0x22,0x0e]
|
||||
// CHECK: smax v0.16b, v1.16b, v2.16b // encoding: [0x20,0x64,0x22,0x4e]
|
||||
// CHECK: smax v0.4h, v1.4h, v2.4h // encoding: [0x20,0x64,0x62,0x0e]
|
||||
// CHECK: smax v0.8h, v1.8h, v2.8h // encoding: [0x20,0x64,0x62,0x4e]
|
||||
// CHECK: smax v0.2s, v1.2s, v2.2s // encoding: [0x20,0x64,0xa2,0x0e]
|
||||
// CHECK: smax v0.4s, v1.4s, v2.4s // encoding: [0x20,0x64,0xa2,0x4e]
|
||||
|
||||
umax v0.8b, v1.8b, v2.8b
|
||||
umax v0.16b, v1.16b, v2.16b
|
||||
umax v0.4h, v1.4h, v2.4h
|
||||
umax v0.8h, v1.8h, v2.8h
|
||||
umax v0.2s, v1.2s, v2.2s
|
||||
umax v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: umax v0.8b, v1.8b, v2.8b // encoding: [0x20,0x64,0x22,0x2e]
|
||||
// CHECK: umax v0.16b, v1.16b, v2.16b // encoding: [0x20,0x64,0x22,0x6e]
|
||||
// CHECK: umax v0.4h, v1.4h, v2.4h // encoding: [0x20,0x64,0x62,0x2e]
|
||||
// CHECK: umax v0.8h, v1.8h, v2.8h // encoding: [0x20,0x64,0x62,0x6e]
|
||||
// CHECK: umax v0.2s, v1.2s, v2.2s // encoding: [0x20,0x64,0xa2,0x2e]
|
||||
// CHECK: umax v0.4s, v1.4s, v2.4s // encoding: [0x20,0x64,0xa2,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Minimum (Signed and Unsigned Integer)
|
||||
//----------------------------------------------------------------------
|
||||
smin v0.8b, v1.8b, v2.8b
|
||||
smin v0.16b, v1.16b, v2.16b
|
||||
smin v0.4h, v1.4h, v2.4h
|
||||
smin v0.8h, v1.8h, v2.8h
|
||||
smin v0.2s, v1.2s, v2.2s
|
||||
smin v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: smin v0.8b, v1.8b, v2.8b // encoding: [0x20,0x6c,0x22,0x0e]
|
||||
// CHECK: smin v0.16b, v1.16b, v2.16b // encoding: [0x20,0x6c,0x22,0x4e]
|
||||
// CHECK: smin v0.4h, v1.4h, v2.4h // encoding: [0x20,0x6c,0x62,0x0e]
|
||||
// CHECK: smin v0.8h, v1.8h, v2.8h // encoding: [0x20,0x6c,0x62,0x4e]
|
||||
// CHECK: smin v0.2s, v1.2s, v2.2s // encoding: [0x20,0x6c,0xa2,0x0e]
|
||||
// CHECK: smin v0.4s, v1.4s, v2.4s // encoding: [0x20,0x6c,0xa2,0x4e]
|
||||
|
||||
umin v0.8b, v1.8b, v2.8b
|
||||
umin v0.16b, v1.16b, v2.16b
|
||||
umin v0.4h, v1.4h, v2.4h
|
||||
umin v0.8h, v1.8h, v2.8h
|
||||
umin v0.2s, v1.2s, v2.2s
|
||||
umin v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: umin v0.8b, v1.8b, v2.8b // encoding: [0x20,0x6c,0x22,0x2e]
|
||||
// CHECK: umin v0.16b, v1.16b, v2.16b // encoding: [0x20,0x6c,0x22,0x6e]
|
||||
// CHECK: umin v0.4h, v1.4h, v2.4h // encoding: [0x20,0x6c,0x62,0x2e]
|
||||
// CHECK: umin v0.8h, v1.8h, v2.8h // encoding: [0x20,0x6c,0x62,0x6e]
|
||||
// CHECK: umin v0.2s, v1.2s, v2.2s // encoding: [0x20,0x6c,0xa2,0x2e]
|
||||
// CHECK: umin v0.4s, v1.4s, v2.4s // encoding: [0x20,0x6c,0xa2,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Maximum (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fmax v0.2s, v1.2s, v2.2s
|
||||
fmax v31.4s, v15.4s, v16.4s
|
||||
fmax v7.2d, v8.2d, v25.2d
|
||||
|
||||
// CHECK: fmax v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x0e]
|
||||
// CHECK: fmax v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x4e]
|
||||
// CHECK: fmax v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Minimum (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fmin v10.2s, v15.2s, v22.2s
|
||||
fmin v3.4s, v5.4s, v6.4s
|
||||
fmin v17.2d, v13.2d, v2.2d
|
||||
|
||||
// CHECK: fmin v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x0e]
|
||||
// CHECK: fmin v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x4e]
|
||||
// CHECK: fmin v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector maxNum (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fmaxnm v0.2s, v1.2s, v2.2s
|
||||
fmaxnm v31.4s, v15.4s, v16.4s
|
||||
fmaxnm v7.2d, v8.2d, v25.2d
|
||||
|
||||
// CHECK: fmaxnm v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x0e]
|
||||
// CHECK: fmaxnm v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x4e]
|
||||
// CHECK: fmaxnm v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector minNum (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
fminnm v10.2s, v15.2s, v22.2s
|
||||
fminnm v3.4s, v5.4s, v6.4s
|
||||
fminnm v17.2d, v13.2d, v2.2d
|
||||
|
||||
// CHECK: fminnm v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x0e]
|
||||
// CHECK: fminnm v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x4e]
|
||||
// CHECK: fminnm v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x4e]
|
||||
|
61
test/MC/AArch64/neon-mla-mls-instructions.s
Normal file
61
test/MC/AArch64/neon-mla-mls-instructions.s
Normal file
@ -0,0 +1,61 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Integer Multiply-accumulate
|
||||
//----------------------------------------------------------------------
|
||||
mla v0.8b, v1.8b, v2.8b
|
||||
mla v0.16b, v1.16b, v2.16b
|
||||
mla v0.4h, v1.4h, v2.4h
|
||||
mla v0.8h, v1.8h, v2.8h
|
||||
mla v0.2s, v1.2s, v2.2s
|
||||
mla v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: mla v0.8b, v1.8b, v2.8b // encoding: [0x20,0x94,0x22,0x0e]
|
||||
// CHECK: mla v0.16b, v1.16b, v2.16b // encoding: [0x20,0x94,0x22,0x4e]
|
||||
// CHECK: mla v0.4h, v1.4h, v2.4h // encoding: [0x20,0x94,0x62,0x0e]
|
||||
// CHECK: mla v0.8h, v1.8h, v2.8h // encoding: [0x20,0x94,0x62,0x4e]
|
||||
// CHECK: mla v0.2s, v1.2s, v2.2s // encoding: [0x20,0x94,0xa2,0x0e]
|
||||
// CHECK: mla v0.4s, v1.4s, v2.4s // encoding: [0x20,0x94,0xa2,0x4e]
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Integer Multiply-subtract
|
||||
//----------------------------------------------------------------------
|
||||
mls v0.8b, v1.8b, v2.8b
|
||||
mls v0.16b, v1.16b, v2.16b
|
||||
mls v0.4h, v1.4h, v2.4h
|
||||
mls v0.8h, v1.8h, v2.8h
|
||||
mls v0.2s, v1.2s, v2.2s
|
||||
mls v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: mls v0.8b, v1.8b, v2.8b // encoding: [0x20,0x94,0x22,0x2e]
|
||||
// CHECK: mls v0.16b, v1.16b, v2.16b // encoding: [0x20,0x94,0x22,0x6e]
|
||||
// CHECK: mls v0.4h, v1.4h, v2.4h // encoding: [0x20,0x94,0x62,0x2e]
|
||||
// CHECK: mls v0.8h, v1.8h, v2.8h // encoding: [0x20,0x94,0x62,0x6e]
|
||||
// CHECK: mls v0.2s, v1.2s, v2.2s // encoding: [0x20,0x94,0xa2,0x2e]
|
||||
// CHECK: mls v0.4s, v1.4s, v2.4s // encoding: [0x20,0x94,0xa2,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Floating-Point Multiply-accumulate
|
||||
//----------------------------------------------------------------------
|
||||
fmla v0.2s, v1.2s, v2.2s
|
||||
fmla v0.4s, v1.4s, v2.4s
|
||||
fmla v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: fmla v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0x22,0x0e]
|
||||
// CHECK: fmla v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0x22,0x4e]
|
||||
// CHECK: fmla v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0x62,0x4e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Floating-Point Multiply-subtract
|
||||
//----------------------------------------------------------------------
|
||||
fmls v0.2s, v1.2s, v2.2s
|
||||
fmls v0.4s, v1.4s, v2.4s
|
||||
fmls v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: fmls v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0xa2,0x0e]
|
||||
// CHECK: fmls v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0xa2,0x4e]
|
||||
// CHECK: fmls v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0xe2,0x4e]
|
||||
|
207
test/MC/AArch64/neon-mov.s
Normal file
207
test/MC/AArch64/neon-mov.s
Normal file
@ -0,0 +1,207 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move Immediate Shifted
|
||||
//----------------------------------------------------------------------
|
||||
movi v0.2s, #1
|
||||
movi v1.2s, #0
|
||||
movi v15.2s, #1, lsl #8
|
||||
movi v16.2s, #1, lsl #16
|
||||
movi v31.2s, #1, lsl #24
|
||||
movi v0.4s, #1
|
||||
movi v0.4s, #1, lsl #8
|
||||
movi v0.4s, #1, lsl #16
|
||||
movi v0.4s, #1, lsl #24
|
||||
movi v0.4h, #1
|
||||
movi v0.4h, #1, lsl #8
|
||||
movi v0.8h, #1
|
||||
movi v0.8h, #1, lsl #8
|
||||
|
||||
// CHECK: movi v0.2s, #0x1 // encoding: [0x20,0x04,0x00,0x0f]
|
||||
// CHECK: movi v1.2s, #0x0 // encoding: [0x01,0x04,0x00,0x0f]
|
||||
// CHECK: movi v15.2s, #0x1, lsl #8 // encoding: [0x2f,0x24,0x00,0x0f]
|
||||
// CHECK: movi v16.2s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x0f]
|
||||
// CHECK: movi v31.2s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x0f]
|
||||
// CHECK: movi v0.4s, #0x1 // encoding: [0x20,0x04,0x00,0x4f]
|
||||
// CHECK: movi v0.4s, #0x1, lsl #8 // encoding: [0x20,0x24,0x00,0x4f]
|
||||
// CHECK: movi v0.4s, #0x1, lsl #16 // encoding: [0x20,0x44,0x00,0x4f]
|
||||
// CHECK: movi v0.4s, #0x1, lsl #24 // encoding: [0x20,0x64,0x00,0x4f]
|
||||
// CHECK: movi v0.4h, #0x1 // encoding: [0x20,0x84,0x00,0x0f]
|
||||
// CHECK: movi v0.4h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x0f]
|
||||
// CHECK: movi v0.8h, #0x1 // encoding: [0x20,0x84,0x00,0x4f]
|
||||
// CHECK: movi v0.8h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x4f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move Inverted Immediate Shifted
|
||||
//----------------------------------------------------------------------
|
||||
mvni v0.2s, #1
|
||||
mvni v1.2s, #0
|
||||
mvni v0.2s, #1, lsl #8
|
||||
mvni v0.2s, #1, lsl #16
|
||||
mvni v0.2s, #1, lsl #24
|
||||
mvni v0.4s, #1
|
||||
mvni v15.4s, #1, lsl #8
|
||||
mvni v16.4s, #1, lsl #16
|
||||
mvni v31.4s, #1, lsl #24
|
||||
mvni v0.4h, #1
|
||||
mvni v0.4h, #1, lsl #8
|
||||
mvni v0.8h, #1
|
||||
mvni v0.8h, #1, lsl #8
|
||||
|
||||
// CHECK: mvni v0.2s, #0x1 // encoding: [0x20,0x04,0x00,0x2f]
|
||||
// CHECK: mvni v1.2s, #0x0 // encoding: [0x01,0x04,0x00,0x2f]
|
||||
// CHECK: mvni v0.2s, #0x1, lsl #8 // encoding: [0x20,0x24,0x00,0x2f]
|
||||
// CHECK: mvni v0.2s, #0x1, lsl #16 // encoding: [0x20,0x44,0x00,0x2f]
|
||||
// CHECK: mvni v0.2s, #0x1, lsl #24 // encoding: [0x20,0x64,0x00,0x2f]
|
||||
// CHECK: mvni v0.4s, #0x1 // encoding: [0x20,0x04,0x00,0x6f]
|
||||
// CHECK: mvni v15.4s, #0x1, lsl #8 // encoding: [0x2f,0x24,0x00,0x6f]
|
||||
// CHECK: mvni v16.4s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x6f]
|
||||
// CHECK: mvni v31.4s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x6f]
|
||||
// CHECK: mvni v0.4h, #0x1 // encoding: [0x20,0x84,0x00,0x2f]
|
||||
// CHECK: mvni v0.4h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x2f]
|
||||
// CHECK: mvni v0.8h, #0x1 // encoding: [0x20,0x84,0x00,0x6f]
|
||||
// CHECK: mvni v0.8h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x6f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Bitwise Bit Clear (AND NOT) - immediate
|
||||
//----------------------------------------------------------------------
|
||||
bic v0.2s, #1
|
||||
bic v1.2s, #0
|
||||
bic v0.2s, #1, lsl #8
|
||||
bic v0.2s, #1, lsl #16
|
||||
bic v0.2s, #1, lsl #24
|
||||
bic v0.4s, #1
|
||||
bic v0.4s, #1, lsl #8
|
||||
bic v0.4s, #1, lsl #16
|
||||
bic v0.4s, #1, lsl #24
|
||||
bic v15.4h, #1
|
||||
bic v16.4h, #1, lsl #8
|
||||
bic v0.8h, #1
|
||||
bic v31.8h, #1, lsl #8
|
||||
|
||||
// CHECK: bic v0.2s, #0x1 // encoding: [0x20,0x14,0x00,0x2f]
|
||||
// CHECK: bic v1.2s, #0x0 // encoding: [0x01,0x14,0x00,0x2f]
|
||||
// CHECK: bic v0.2s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x2f]
|
||||
// CHECK: bic v0.2s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x2f]
|
||||
// CHECK: bic v0.2s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x2f]
|
||||
// CHECK: bic v0.4s, #0x1 // encoding: [0x20,0x14,0x00,0x6f]
|
||||
// CHECK: bic v0.4s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x6f]
|
||||
// CHECK: bic v0.4s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x6f]
|
||||
// CHECK: bic v0.4s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x6f]
|
||||
// CHECK: bic v15.4h, #0x1 // encoding: [0x2f,0x94,0x00,0x2f]
|
||||
// CHECK: bic v16.4h, #0x1, lsl #8 // encoding: [0x30,0xb4,0x00,0x2f]
|
||||
// CHECK: bic v0.8h, #0x1 // encoding: [0x20,0x94,0x00,0x6f]
|
||||
// CHECK: bic v31.8h, #0x1, lsl #8 // encoding: [0x3f,0xb4,0x00,0x6f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Bitwise OR - immedidate
|
||||
//----------------------------------------------------------------------
|
||||
orr v0.2s, #1
|
||||
orr v1.2s, #0
|
||||
orr v0.2s, #1, lsl #8
|
||||
orr v0.2s, #1, lsl #16
|
||||
orr v0.2s, #1, lsl #24
|
||||
orr v0.4s, #1
|
||||
orr v0.4s, #1, lsl #8
|
||||
orr v0.4s, #1, lsl #16
|
||||
orr v0.4s, #1, lsl #24
|
||||
orr v31.4h, #1
|
||||
orr v15.4h, #1, lsl #8
|
||||
orr v0.8h, #1
|
||||
orr v16.8h, #1, lsl #8
|
||||
|
||||
// CHECK: orr v0.2s, #0x1 // encoding: [0x20,0x14,0x00,0x0f]
|
||||
// CHECK: orr v1.2s, #0x0 // encoding: [0x01,0x14,0x00,0x0f]
|
||||
// CHECK: orr v0.2s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x0f]
|
||||
// CHECK: orr v0.2s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x0f]
|
||||
// CHECK: orr v0.2s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x0f]
|
||||
// CHECK: orr v0.4s, #0x1 // encoding: [0x20,0x14,0x00,0x4f]
|
||||
// CHECK: orr v0.4s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x4f]
|
||||
// CHECK: orr v0.4s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x4f]
|
||||
// CHECK: orr v0.4s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x4f]
|
||||
// CHECK: orr v31.4h, #0x1 // encoding: [0x3f,0x94,0x00,0x0f]
|
||||
// CHECK: orr v15.4h, #0x1, lsl #8 // encoding: [0x2f,0xb4,0x00,0x0f]
|
||||
// CHECK: orr v0.8h, #0x1 // encoding: [0x20,0x94,0x00,0x4f]
|
||||
// CHECK: orr v16.8h, #0x1, lsl #8 // encoding: [0x30,0xb4,0x00,0x4f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move Immediate Masked
|
||||
//----------------------------------------------------------------------
|
||||
movi v0.2s, #1, msl #8
|
||||
movi v1.2s, #1, msl #16
|
||||
movi v0.4s, #1, msl #8
|
||||
movi v31.4s, #1, msl #16
|
||||
|
||||
// CHECK: movi v0.2s, #0x1, msl #8 // encoding: [0x20,0xc4,0x00,0x0f]
|
||||
// CHECK: movi v1.2s, #0x1, msl #16 // encoding: [0x21,0xd4,0x00,0x0f]
|
||||
// CHECK: movi v0.4s, #0x1, msl #8 // encoding: [0x20,0xc4,0x00,0x4f]
|
||||
// CHECK: movi v31.4s, #0x1, msl #16 // encoding: [0x3f,0xd4,0x00,0x4f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move Inverted Immediate Masked
|
||||
//----------------------------------------------------------------------
|
||||
mvni v1.2s, #0x1, msl #8
|
||||
mvni v0.2s, #0x1, msl #16
|
||||
mvni v31.4s, #0x1, msl #8
|
||||
mvni v0.4s, #0x1, msl #16
|
||||
|
||||
// CHECK: mvni v1.2s, #0x1, msl #8 // encoding: [0x21,0xc4,0x00,0x2f]
|
||||
// CHECK: mvni v0.2s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x2f]
|
||||
// CHECK: mvni v31.4s, #0x1, msl #8 // encoding: [0x3f,0xc4,0x00,0x6f]
|
||||
// CHECK: mvni v0.4s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x6f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Immediate - per byte
|
||||
//----------------------------------------------------------------------
|
||||
movi v0.8b, #0
|
||||
movi v31.8b, #0xff
|
||||
movi v15.16b, #0xf
|
||||
movi v31.16b, #0x1f
|
||||
|
||||
// CHECK: movi v0.8b, #0x0 // encoding: [0x00,0xe4,0x00,0x0f]
|
||||
// CHECK: movi v31.8b, #0xff // encoding: [0xff,0xe7,0x07,0x0f]
|
||||
// CHECK: movi v15.16b, #0xf // encoding: [0xef,0xe5,0x00,0x4f]
|
||||
// CHECK: movi v31.16b, #0x1f // encoding: [0xff,0xe7,0x00,0x4f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move Immediate - bytemask, per doubleword
|
||||
//---------------------------------------------------------------------
|
||||
movi v0.2d, #0xff00ff00ff00ff00
|
||||
|
||||
// CHECK: movi v0.2d, #0xff00ff00ff00ff00 // encoding: [0x40,0xe5,0x05,0x6f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move Immediate - bytemask, one doubleword
|
||||
//----------------------------------------------------------------------
|
||||
movi d0, #0xff00ff00ff00ff00
|
||||
|
||||
// CHECK: movi d0, #0xff00ff00ff00ff00 // encoding: [0x40,0xe5,0x05,0x2f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Floating Point Move Immediate
|
||||
//----------------------------------------------------------------------
|
||||
fmov v1.2s, #1.0
|
||||
fmov v15.4s, #1.0
|
||||
fmov v31.2d, #1.0
|
||||
|
||||
// CHECK: fmov v1.2s, #1.00000000 // encoding: [0x01,0xf6,0x03,0x0f]
|
||||
// CHECK: fmov v15.4s, #1.00000000 // encoding: [0x0f,0xf6,0x03,0x4f]
|
||||
// CHECK: fmov v31.2d, #1.00000000 // encoding: [0x1f,0xf6,0x03,0x6f]
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move - register
|
||||
//----------------------------------------------------------------------
|
||||
mov v0.8b, v31.8b
|
||||
mov v15.16b, v16.16b
|
||||
orr v0.8b, v31.8b, v31.8b
|
||||
orr v15.16b, v16.16b, v16.16b
|
||||
|
||||
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
|
||||
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
|
||||
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
|
||||
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
|
||||
|
86
test/MC/AArch64/neon-mul-div-instructions.s
Normal file
86
test/MC/AArch64/neon-mul-div-instructions.s
Normal file
@ -0,0 +1,86 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Integer Mul
|
||||
//----------------------------------------------------------------------
|
||||
mul v0.8b, v1.8b, v2.8b
|
||||
mul v0.16b, v1.16b, v2.16b
|
||||
mul v0.4h, v1.4h, v2.4h
|
||||
mul v0.8h, v1.8h, v2.8h
|
||||
mul v0.2s, v1.2s, v2.2s
|
||||
mul v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: mul v0.8b, v1.8b, v2.8b // encoding: [0x20,0x9c,0x22,0x0e]
|
||||
// CHECK: mul v0.16b, v1.16b, v2.16b // encoding: [0x20,0x9c,0x22,0x4e]
|
||||
// CHECK: mul v0.4h, v1.4h, v2.4h // encoding: [0x20,0x9c,0x62,0x0e]
|
||||
// CHECK: mul v0.8h, v1.8h, v2.8h // encoding: [0x20,0x9c,0x62,0x4e]
|
||||
// CHECK: mul v0.2s, v1.2s, v2.2s // encoding: [0x20,0x9c,0xa2,0x0e]
|
||||
// CHECK: mul v0.4s, v1.4s, v2.4s // encoding: [0x20,0x9c,0xa2,0x4e]
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Floating-Point Mul
|
||||
//----------------------------------------------------------------------
|
||||
fmul v0.2s, v1.2s, v2.2s
|
||||
fmul v0.4s, v1.4s, v2.4s
|
||||
fmul v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: fmul v0.2s, v1.2s, v2.2s // encoding: [0x20,0xdc,0x22,0x2e]
|
||||
// CHECK: fmul v0.4s, v1.4s, v2.4s // encoding: [0x20,0xdc,0x22,0x6e]
|
||||
// CHECK: fmul v0.2d, v1.2d, v2.2d // encoding: [0x20,0xdc,0x62,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Floating-Point Div
|
||||
//----------------------------------------------------------------------
|
||||
fdiv v0.2s, v1.2s, v2.2s
|
||||
fdiv v0.4s, v1.4s, v2.4s
|
||||
fdiv v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: fdiv v0.2s, v1.2s, v2.2s // encoding: [0x20,0xfc,0x22,0x2e]
|
||||
// CHECK: fdiv v0.4s, v1.4s, v2.4s // encoding: [0x20,0xfc,0x22,0x6e]
|
||||
// CHECK: fdiv v0.2d, v1.2d, v2.2d // encoding: [0x20,0xfc,0x62,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Multiply (Polynomial)
|
||||
//----------------------------------------------------------------------
|
||||
pmul v17.8b, v31.8b, v16.8b
|
||||
pmul v0.16b, v1.16b, v2.16b
|
||||
|
||||
// CHECK: pmul v17.8b, v31.8b, v16.8b // encoding: [0xf1,0x9f,0x30,0x2e]
|
||||
// CHECK: pmul v0.16b, v1.16b, v2.16b // encoding: [0x20,0x9c,0x22,0x6e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Saturating Doubling Multiply High
|
||||
//----------------------------------------------------------------------
|
||||
sqdmulh v2.4h, v25.4h, v3.4h
|
||||
sqdmulh v12.8h, v5.8h, v13.8h
|
||||
sqdmulh v3.2s, v1.2s, v30.2s
|
||||
|
||||
// CHECK: sqdmulh v2.4h, v25.4h, v3.4h // encoding: [0x22,0xb7,0x63,0x0e]
|
||||
// CHECK: sqdmulh v12.8h, v5.8h, v13.8h // encoding: [0xac,0xb4,0x6d,0x4e]
|
||||
// CHECK: sqdmulh v3.2s, v1.2s, v30.2s // encoding: [0x23,0xb4,0xbe,0x0e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Saturating Rouding Doubling Multiply High
|
||||
//----------------------------------------------------------------------
|
||||
sqrdmulh v2.4h, v25.4h, v3.4h
|
||||
sqrdmulh v12.8h, v5.8h, v13.8h
|
||||
sqrdmulh v3.2s, v1.2s, v30.2s
|
||||
|
||||
// CHECK: sqrdmulh v2.4h, v25.4h, v3.4h // encoding: [0x22,0xb7,0x63,0x2e]
|
||||
// CHECK: sqrdmulh v12.8h, v5.8h, v13.8h // encoding: [0xac,0xb4,0x6d,0x6e]
|
||||
// CHECK: sqrdmulh v3.2s, v1.2s, v30.2s // encoding: [0x23,0xb4,0xbe,0x2e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Multiply Extended
|
||||
//----------------------------------------------------------------------
|
||||
fmulx v21.2s, v5.2s, v13.2s
|
||||
fmulx v1.4s, v25.4s, v3.4s
|
||||
fmulx v31.2d, v22.2d, v2.2d
|
||||
|
||||
// CHECK: fmulx v21.2s, v5.2s, v13.2s // encoding: [0xb5,0xdc,0x2d,0x0e]
|
||||
// CHECK: fmulx v1.4s, v25.4s, v3.4s // encoding: [0x21,0xdf,0x23,0x4e]
|
||||
// CHECK: fmulx v31.2d, v22.2d, v2.2d // encoding: [0xdf,0xde,0x62,0x4e]
|
||||
|
39
test/MC/AArch64/neon-rounding-halving-add.s
Normal file
39
test/MC/AArch64/neon-rounding-halving-add.s
Normal file
@ -0,0 +1,39 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Rouding Halving Add (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
srhadd v0.8b, v1.8b, v2.8b
|
||||
srhadd v0.16b, v1.16b, v2.16b
|
||||
srhadd v0.4h, v1.4h, v2.4h
|
||||
srhadd v0.8h, v1.8h, v2.8h
|
||||
srhadd v0.2s, v1.2s, v2.2s
|
||||
srhadd v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: srhadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x14,0x22,0x0e]
|
||||
// CHECK: srhadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x14,0x22,0x4e]
|
||||
// CHECK: srhadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x62,0x0e]
|
||||
// CHECK: srhadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x62,0x4e]
|
||||
// CHECK: srhadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x14,0xa2,0x0e]
|
||||
// CHECK: srhadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x14,0xa2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Rouding Halving Add (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
urhadd v0.8b, v1.8b, v2.8b
|
||||
urhadd v0.16b, v1.16b, v2.16b
|
||||
urhadd v0.4h, v1.4h, v2.4h
|
||||
urhadd v0.8h, v1.8h, v2.8h
|
||||
urhadd v0.2s, v1.2s, v2.2s
|
||||
urhadd v0.4s, v1.4s, v2.4s
|
||||
|
||||
// CHECK: urhadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x14,0x22,0x2e]
|
||||
// CHECK: urhadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x14,0x22,0x6e]
|
||||
// CHECK: urhadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x62,0x2e]
|
||||
// CHECK: urhadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x62,0x6e]
|
||||
// CHECK: urhadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x14,0xa2,0x2e]
|
||||
// CHECK: urhadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x14,0xa2,0x6e]
|
||||
|
57
test/MC/AArch64/neon-rounding-shift.s
Normal file
57
test/MC/AArch64/neon-rounding-shift.s
Normal file
@ -0,0 +1,57 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
srshl v0.8b, v1.8b, v2.8b
|
||||
srshl v0.16b, v1.16b, v2.16b
|
||||
srshl v0.4h, v1.4h, v2.4h
|
||||
srshl v0.8h, v1.8h, v2.8h
|
||||
srshl v0.2s, v1.2s, v2.2s
|
||||
srshl v0.4s, v1.4s, v2.4s
|
||||
srshl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: srshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x54,0x22,0x0e]
|
||||
// CHECK: srshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x54,0x22,0x4e]
|
||||
// CHECK: srshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x54,0x62,0x0e]
|
||||
// CHECK: srshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x54,0x62,0x4e]
|
||||
// CHECK: srshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x54,0xa2,0x0e]
|
||||
// CHECK: srshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x4e]
|
||||
// CHECK: srshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
urshl v0.8b, v1.8b, v2.8b
|
||||
urshl v0.16b, v1.16b, v2.16b
|
||||
urshl v0.4h, v1.4h, v2.4h
|
||||
urshl v0.8h, v1.8h, v2.8h
|
||||
urshl v0.2s, v1.2s, v2.2s
|
||||
urshl v0.4s, v1.4s, v2.4s
|
||||
urshl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: urshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x54,0x22,0x2e]
|
||||
// CHECK: urshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x54,0x22,0x6e]
|
||||
// CHECK: urshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x54,0x62,0x2e]
|
||||
// CHECK: urshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x54,0x62,0x6e]
|
||||
// CHECK: urshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x54,0xa2,0x2e]
|
||||
// CHECK: urshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x6e]
|
||||
// CHECK: urshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
srshl d17, d31, d8
|
||||
|
||||
// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
urshl d17, d31, d8
|
||||
|
||||
// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]
|
||||
|
133
test/MC/AArch64/neon-saturating-add-sub.s
Normal file
133
test/MC/AArch64/neon-saturating-add-sub.s
Normal file
@ -0,0 +1,133 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Add (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqadd v0.8b, v1.8b, v2.8b
|
||||
sqadd v0.16b, v1.16b, v2.16b
|
||||
sqadd v0.4h, v1.4h, v2.4h
|
||||
sqadd v0.8h, v1.8h, v2.8h
|
||||
sqadd v0.2s, v1.2s, v2.2s
|
||||
sqadd v0.4s, v1.4s, v2.4s
|
||||
sqadd v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: sqadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x0c,0x22,0x0e]
|
||||
// CHECK: sqadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x0c,0x22,0x4e]
|
||||
// CHECK: sqadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0x62,0x0e]
|
||||
// CHECK: sqadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0x62,0x4e]
|
||||
// CHECK: sqadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x0c,0xa2,0x0e]
|
||||
// CHECK: sqadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x0c,0xa2,0x4e]
|
||||
// CHECK: sqadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0x0c,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Add (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqadd v0.8b, v1.8b, v2.8b
|
||||
uqadd v0.16b, v1.16b, v2.16b
|
||||
uqadd v0.4h, v1.4h, v2.4h
|
||||
uqadd v0.8h, v1.8h, v2.8h
|
||||
uqadd v0.2s, v1.2s, v2.2s
|
||||
uqadd v0.4s, v1.4s, v2.4s
|
||||
uqadd v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: uqadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x0c,0x22,0x2e]
|
||||
// CHECK: uqadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x0c,0x22,0x6e]
|
||||
// CHECK: uqadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0x62,0x2e]
|
||||
// CHECK: uqadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0x62,0x6e]
|
||||
// CHECK: uqadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x0c,0xa2,0x2e]
|
||||
// CHECK: uqadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x0c,0xa2,0x6e]
|
||||
// CHECK: uqadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0x0c,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Sub (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqsub v0.8b, v1.8b, v2.8b
|
||||
sqsub v0.16b, v1.16b, v2.16b
|
||||
sqsub v0.4h, v1.4h, v2.4h
|
||||
sqsub v0.8h, v1.8h, v2.8h
|
||||
sqsub v0.2s, v1.2s, v2.2s
|
||||
sqsub v0.4s, v1.4s, v2.4s
|
||||
sqsub v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: sqsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x2c,0x22,0x0e]
|
||||
// CHECK: sqsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x2c,0x22,0x4e]
|
||||
// CHECK: sqsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x2c,0x62,0x0e]
|
||||
// CHECK: sqsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x2c,0x62,0x4e]
|
||||
// CHECK: sqsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x2c,0xa2,0x0e]
|
||||
// CHECK: sqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x4e]
|
||||
// CHECK: sqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Sub (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqsub v0.8b, v1.8b, v2.8b
|
||||
uqsub v0.16b, v1.16b, v2.16b
|
||||
uqsub v0.4h, v1.4h, v2.4h
|
||||
uqsub v0.8h, v1.8h, v2.8h
|
||||
uqsub v0.2s, v1.2s, v2.2s
|
||||
uqsub v0.4s, v1.4s, v2.4s
|
||||
uqsub v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: uqsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x2c,0x22,0x2e]
|
||||
// CHECK: uqsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x2c,0x22,0x6e]
|
||||
// CHECK: uqsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x2c,0x62,0x2e]
|
||||
// CHECK: uqsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x2c,0x62,0x6e]
|
||||
// CHECK: uqsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x2c,0xa2,0x2e]
|
||||
// CHECK: uqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x6e]
|
||||
// CHECK: uqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Add (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqadd b0, b1, b2
|
||||
sqadd h10, h11, h12
|
||||
sqadd s20, s21, s2
|
||||
sqadd d17, d31, d8
|
||||
|
||||
// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
|
||||
// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
|
||||
// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
|
||||
// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Add (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqadd b0, b1, b2
|
||||
uqadd h10, h11, h12
|
||||
uqadd s20, s21, s2
|
||||
uqadd d17, d31, d8
|
||||
|
||||
// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
|
||||
// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
|
||||
// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
|
||||
// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Sub (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqsub b0, b1, b2
|
||||
sqsub h10, h11, h12
|
||||
sqsub s20, s21, s2
|
||||
sqsub d17, d31, d8
|
||||
|
||||
// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
|
||||
// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
|
||||
// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
|
||||
// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Sub (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqsub b0, b1, b2
|
||||
uqsub h10, h11, h12
|
||||
uqsub s20, s21, s2
|
||||
uqsub d17, d31, d8
|
||||
|
||||
// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
|
||||
// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
|
||||
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
|
||||
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]
|
||||
|
70
test/MC/AArch64/neon-saturating-rounding-shift.s
Normal file
70
test/MC/AArch64/neon-saturating-rounding-shift.s
Normal file
@ -0,0 +1,70 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqrshl v0.8b, v1.8b, v2.8b
|
||||
sqrshl v0.16b, v1.16b, v2.16b
|
||||
sqrshl v0.4h, v1.4h, v2.4h
|
||||
sqrshl v0.8h, v1.8h, v2.8h
|
||||
sqrshl v0.2s, v1.2s, v2.2s
|
||||
sqrshl v0.4s, v1.4s, v2.4s
|
||||
sqrshl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: sqrshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x5c,0x22,0x0e]
|
||||
// CHECK: sqrshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x5c,0x22,0x4e]
|
||||
// CHECK: sqrshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x5c,0x62,0x0e]
|
||||
// CHECK: sqrshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x5c,0x62,0x4e]
|
||||
// CHECK: sqrshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x5c,0xa2,0x0e]
|
||||
// CHECK: sqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x4e]
|
||||
// CHECK: sqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqrshl v0.8b, v1.8b, v2.8b
|
||||
uqrshl v0.16b, v1.16b, v2.16b
|
||||
uqrshl v0.4h, v1.4h, v2.4h
|
||||
uqrshl v0.8h, v1.8h, v2.8h
|
||||
uqrshl v0.2s, v1.2s, v2.2s
|
||||
uqrshl v0.4s, v1.4s, v2.4s
|
||||
uqrshl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: uqrshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x5c,0x22,0x2e]
|
||||
// CHECK: uqrshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x5c,0x22,0x6e]
|
||||
// CHECK: uqrshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x5c,0x62,0x2e]
|
||||
// CHECK: uqrshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x5c,0x62,0x6e]
|
||||
// CHECK: uqrshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x5c,0xa2,0x2e]
|
||||
// CHECK: uqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x6e]
|
||||
// CHECK: uqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqrshl b0, b1, b2
|
||||
sqrshl h10, h11, h12
|
||||
sqrshl s20, s21, s2
|
||||
sqrshl d17, d31, d8
|
||||
|
||||
// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
|
||||
// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
|
||||
// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
|
||||
// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqrshl b0, b1, b2
|
||||
uqrshl h10, h11, h12
|
||||
uqrshl s20, s21, s2
|
||||
uqrshl d17, d31, d8
|
||||
|
||||
// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
|
||||
// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
|
||||
// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
|
||||
// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]
|
||||
|
||||
|
69
test/MC/AArch64/neon-saturating-shift.s
Normal file
69
test/MC/AArch64/neon-saturating-shift.s
Normal file
@ -0,0 +1,69 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqshl v0.8b, v1.8b, v2.8b
|
||||
sqshl v0.16b, v1.16b, v2.16b
|
||||
sqshl v0.4h, v1.4h, v2.4h
|
||||
sqshl v0.8h, v1.8h, v2.8h
|
||||
sqshl v0.2s, v1.2s, v2.2s
|
||||
sqshl v0.4s, v1.4s, v2.4s
|
||||
sqshl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: sqshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x4c,0x22,0x0e]
|
||||
// CHECK: sqshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x4c,0x22,0x4e]
|
||||
// CHECK: sqshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x4c,0x62,0x0e]
|
||||
// CHECK: sqshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x4c,0x62,0x4e]
|
||||
// CHECK: sqshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x4c,0xa2,0x0e]
|
||||
// CHECK: sqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x4e]
|
||||
// CHECK: sqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Saturating Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqshl v0.8b, v1.8b, v2.8b
|
||||
uqshl v0.16b, v1.16b, v2.16b
|
||||
uqshl v0.4h, v1.4h, v2.4h
|
||||
uqshl v0.8h, v1.8h, v2.8h
|
||||
uqshl v0.2s, v1.2s, v2.2s
|
||||
uqshl v0.4s, v1.4s, v2.4s
|
||||
uqshl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: uqshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x4c,0x22,0x2e]
|
||||
// CHECK: uqshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x4c,0x22,0x6e]
|
||||
// CHECK: uqshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x4c,0x62,0x2e]
|
||||
// CHECK: uqshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x4c,0x62,0x6e]
|
||||
// CHECK: uqshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x4c,0xa2,0x2e]
|
||||
// CHECK: uqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x6e]
|
||||
// CHECK: uqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqshl b0, b1, b2
|
||||
sqshl h10, h11, h12
|
||||
sqshl s20, s21, s2
|
||||
sqshl d17, d31, d8
|
||||
|
||||
// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
|
||||
// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
|
||||
// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
|
||||
// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqshl b0, b1, b2
|
||||
uqshl h10, h11, h12
|
||||
uqshl s20, s21, s2
|
||||
uqshl d17, d31, d8
|
||||
|
||||
// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
|
||||
// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
|
||||
// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
|
||||
// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]
|
||||
|
57
test/MC/AArch64/neon-shift.s
Normal file
57
test/MC/AArch64/neon-shift.s
Normal file
@ -0,0 +1,57 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sshl v0.8b, v1.8b, v2.8b
|
||||
sshl v0.16b, v1.16b, v2.16b
|
||||
sshl v0.4h, v1.4h, v2.4h
|
||||
sshl v0.8h, v1.8h, v2.8h
|
||||
sshl v0.2s, v1.2s, v2.2s
|
||||
sshl v0.4s, v1.4s, v2.4s
|
||||
sshl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: sshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x44,0x22,0x0e]
|
||||
// CHECK: sshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x44,0x22,0x4e]
|
||||
// CHECK: sshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x44,0x62,0x0e]
|
||||
// CHECK: sshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x44,0x62,0x4e]
|
||||
// CHECK: sshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x44,0xa2,0x0e]
|
||||
// CHECK: sshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x4e]
|
||||
// CHECK: sshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
ushl v0.8b, v1.8b, v2.8b
|
||||
ushl v0.16b, v1.16b, v2.16b
|
||||
ushl v0.4h, v1.4h, v2.4h
|
||||
ushl v0.8h, v1.8h, v2.8h
|
||||
ushl v0.2s, v1.2s, v2.2s
|
||||
ushl v0.4s, v1.4s, v2.4s
|
||||
ushl v0.2d, v1.2d, v2.2d
|
||||
|
||||
// CHECK: ushl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x44,0x22,0x2e]
|
||||
// CHECK: ushl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x44,0x22,0x6e]
|
||||
// CHECK: ushl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x44,0x62,0x2e]
|
||||
// CHECK: ushl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x44,0x62,0x6e]
|
||||
// CHECK: ushl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x44,0xa2,0x2e]
|
||||
// CHECK: ushl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x6e]
|
||||
// CHECK: ushl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sshl d17, d31, d8
|
||||
|
||||
// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
ushl d17, d31, d8
|
||||
|
||||
// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]
|
||||
|
28
test/MC/AArch64/noneon-diagnostics.s
Normal file
28
test/MC/AArch64/noneon-diagnostics.s
Normal file
@ -0,0 +1,28 @@
|
||||
// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-neon < %s 2> %t
|
||||
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
|
||||
|
||||
fmla v3.4s, v12.4s, v17.4s
|
||||
fmla v1.2d, v30.2d, v20.2d
|
||||
fmla v9.2s, v9.2s, v0.2s
|
||||
// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
|
||||
// CHECK-ERROR-NEXT: fmla v3.4s, v12.4s, v17.4s
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
|
||||
// CHECK-ERROR-NEXT: fmla v1.2d, v30.2d, v20.2d
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
|
||||
// CHECK-ERROR-NEXT: fmla v9.2s, v9.2s, v0.2s
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
|
||||
fmls v3.4s, v12.4s, v17.4s
|
||||
fmls v1.2d, v30.2d, v20.2d
|
||||
fmls v9.2s, v9.2s, v0.2s
|
||||
// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
|
||||
// CHECK-ERROR-NEXT: fmls v3.4s, v12.4s, v17.4s
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
|
||||
// CHECK-ERROR-NEXT: fmls v1.2d, v30.2d, v20.2d
|
||||
// CHECK-ERROR-NEXT: ^
|
||||
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
|
||||
// CHECK-ERROR-NEXT: fmls v9.2s, v9.2s, v0.2s
|
||||
// CHECK-ERROR-NEXT: ^
|
673
test/MC/Disassembler/AArch64/neon-instructions.txt
Normal file
673
test/MC/Disassembler/AArch64/neon-instructions.txt
Normal file
@ -0,0 +1,673 @@
|
||||
# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Integer Add/Sub
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: add v31.8b, v31.8b, v31.8b
|
||||
# CHECK: sub v0.2d, v0.2d, v0.2d
|
||||
0xff 0x87 0x3f 0x0e
|
||||
0x00 0x84 0xe0 0x6e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Floating-Point Add/Sub
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
# CHECK: fadd v0.4s, v0.4s, v0.4s
|
||||
# CHECK: fsub v31.2s, v31.2s, v31.2s
|
||||
0x00 0xd4 0x20 0x4e
|
||||
0xff 0xd7 0xbf 0x0e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Integer Mul
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: mul v0.8b, v1.8b, v2.8b
|
||||
0x20 0x9c 0x22 0x0e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Floating-Point Mul/Div
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: fmul v0.2s, v1.2s, v2.2s
|
||||
# CHECK: fdiv v31.2s, v31.2s, v31.2s
|
||||
0x20 0xdc 0x22 0x2e
|
||||
0xff 0xff 0x3f 0x2e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Polynomial Multiply
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: pmul v0.8b, v15.8b, v16.8b
|
||||
# CHECK: pmul v31.16b, v7.16b, v8.16b
|
||||
0xe0 0x9d 0x30 0x2e
|
||||
0xff 0x9c 0x28 0x6e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector And, Orr, Eor, Orn, Bic
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: and v2.8b, v2.8b, v2.8b
|
||||
# CHECK: orr v31.16b, v31.16b, v30.16b
|
||||
# CHECK: eor v0.16b, v1.16b, v2.16b
|
||||
# CHECK: orn v9.16b, v10.16b, v11.16b
|
||||
# CHECK: bic v31.8b, v30.8b, v29.8b
|
||||
0x42 0x1c 0x22 0x0e
|
||||
0xff 0x1f 0xbe 0x4e
|
||||
0x20 0x1c 0x22 0x6e
|
||||
0x49 0x1d 0xeb 0x4e
|
||||
0xdf 0x1f 0x7d 0x0e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Bsl, Bit, Bif
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: bsl v0.8b, v1.8b, v2.8b
|
||||
# CHECK: bit v31.16b, v31.16b, v31.16b
|
||||
# CHECK: bif v0.16b, v1.16b, v2.16b
|
||||
0x20 0x1c 0x62 0x2e
|
||||
0xff 0x1f 0xbf 0x6e
|
||||
0x20 0x1c 0xe2 0x6e
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Integer Multiply-accumulate and Multiply-subtract
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: mla v0.8b, v1.8b, v2.8b
|
||||
# CHECK: mls v31.4h, v31.4h, v31.4h
|
||||
0x20 0x94 0x22 0x0e
|
||||
0xff 0x97 0x7f 0x2e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Floating-Point Multiply-accumulate and Multiply-subtract
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: fmla v0.2s, v1.2s, v2.2s
|
||||
# CHECK: fmls v31.2s, v31.2s, v31.2s
|
||||
0x20 0xcc 0x22 0x0e
|
||||
0xff 0xcf 0xbf 0x0e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Move Immediate Shifted
|
||||
# Vector Move Inverted Immediate Shifted
|
||||
# Vector Bitwise Bit Clear (AND NOT) - immediate
|
||||
# Vector Bitwise OR - immedidate
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: movi v31.4s, #0xff, lsl #24
|
||||
# CHECK: mvni v0.2s, #0x0
|
||||
# CHECK: bic v15.4h, #0xf, lsl #8
|
||||
# CHECK: orr v16.8h, #0x1f
|
||||
0xff 0x67 0x07 0x4f
|
||||
0x00 0x04 0x00 0x2f
|
||||
0xef 0xb5 0x00 0x2f
|
||||
0xf0 0x97 0x00 0x4f
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Move Immediate Masked
|
||||
# Vector Move Inverted Immediate Masked
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: movi v8.2s, #0x8, msl #8
|
||||
# CHECK: mvni v16.4s, #0x10, msl #16
|
||||
0x08 0xc5 0x00 0x0f
|
||||
0x10 0xd6 0x00 0x6f
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Immediate - per byte
|
||||
# Vector Move Immediate - bytemask, per doubleword
|
||||
# Vector Move Immediate - bytemask, one doubleword
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: movi v16.8b, #0xff
|
||||
# CHECK: movi v31.16b, #0x1f
|
||||
# CHECK: movi d15, #0xff00ff00ff00ff
|
||||
# CHECK: movi v31.2d, #0xff0000ff0000ffff
|
||||
0xf0 0xe7 0x07 0x0f
|
||||
0xff 0xe7 0x00 0x4f
|
||||
0xaf 0xe6 0x02 0x2f
|
||||
0x7f 0xe6 0x04 0x6f
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Floating Point Move Immediate
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: fmov v0.2s, #13.0
|
||||
# CHECK: fmov v15.4s, #1.0
|
||||
# CHECK: fmov v31.2d, #-1.25
|
||||
0x40 0xf5 0x01 0x0f
|
||||
0x0f 0xf6 0x03 0x4f
|
||||
0x9f 0xf6 0x07 0x6f
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Move - register
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: mov v1.16b, v15.16b
|
||||
# CHECK: mov v25.8b, v4.8b
|
||||
0xe1 0x1d 0xaf 0x4e
|
||||
0x99 0x1c 0xa4 0x0e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Absolute Difference and Accumulate (Signed, Unsigned)
|
||||
# Vector Absolute Difference (Signed, Unsigned)
|
||||
# Vector Absolute Difference (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
|
||||
# CHECK: uaba v0.8b, v1.8b, v2.8b
|
||||
# CHECK: saba v31.16b, v30.16b, v29.16b
|
||||
# CHECK: uabd v15.4h, v16.4h, v17.4h
|
||||
# CHECK: sabd v5.4h, v4.4h, v6.4h
|
||||
# CHECK: fabd v1.4s, v31.4s, v16.4s
|
||||
0x20 0x7c 0x22 0x2e
|
||||
0xdf 0x7f 0x3d 0x4e
|
||||
0x0f 0x76 0x71 0x2e
|
||||
0x85 0x74 0x66 0x0e
|
||||
0xe1 0xd7 0xb0 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Integer Add
|
||||
# Scalar Integer Sub
|
||||
#----------------------------------------------------------------------
|
||||
|
||||
# CHECK: add d17, d31, d29
|
||||
# CHECK: sub d15, d5, d16
|
||||
0xf1 0x87 0xfd 0x5e
|
||||
0xaf 0x84 0xf0 0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Reciprocal Square Root Step (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: frsqrts v31.2d, v15.2d, v8.2d
|
||||
0xff 0xfd 0xe8 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Reciprocal Step (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: frecps v5.4s, v7.4s, v16.4s
|
||||
0xe5 0xfc 0x30 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: facge v0.4s, v31.4s, v16.4s
|
||||
0xe0 0xef 0x30 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Absolute Compare Mask Less Than (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: facgt v31.2d, v29.2d, v28.2d
|
||||
0xbf 0xef 0xfc 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Equal (Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmeq v5.16b, v15.16b, v31.16b
|
||||
0xe5 0x8d 0x3f 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Higher or Same (Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmhs v1.8b, v16.8b, v30.8b
|
||||
0x01 0x3e 0x3e 0x2e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than or Equal (Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmge v20.4h, v11.4h, v23.4h
|
||||
0x74 0x3d 0x77 0x0e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Higher (Unsigned Integer)
|
||||
# CHECK: cmhi v13.8h, v3.8h, v27.8h
|
||||
0x6d 0x34 0x7b 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than (Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmgt v9.4s, v4.4s, v28.4s
|
||||
0x89 0x34 0xbc 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Bitwise Test (Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmtst v21.2s, v19.2s, v18.2s
|
||||
0x75 0x8e 0xb2 0x0e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Equal (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmeq v0.2s, v15.2s, v16.2s
|
||||
0xe0 0xe5 0x30 0x0e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than Or Equal (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmge v31.4s, v7.4s, v29.4s
|
||||
0xff 0xe4 0x3d 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmgt v17.4s, v8.4s, v25.4s
|
||||
0x11 0xe5 0xb9 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Equal to Zero (Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmeq v31.16b, v15.16b, #0x0
|
||||
0xff 0x99 0x20 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmge v3.8b, v15.8b, #0x0
|
||||
0xe3 0x89 0x20 0x2e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than Zero (Signed Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmgt v22.2s, v9.2s, #0x0
|
||||
0x36 0x89 0xa0 0x0e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmle v5.2d, v14.2d, #0x0
|
||||
0xc5 0x99 0xe0 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Less Than Zero (Signed Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: cmlt v13.8h, v11.8h, #0x0
|
||||
0x6d 0xa9 0x60 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Equal to Zero (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmeq v15.2s, v21.2s, #0.0
|
||||
0xaf 0xda 0xa0 0x0e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmge v14.2d, v13.2d, #0.0
|
||||
0xae 0xc9 0xe0 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Greater Than Zero (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmgt v9.4s, v23.4s, #0.0
|
||||
0xe9 0xca 0xa0 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Less Than or Equal To Zero (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmle v11.2d, v6.2d, #0.0
|
||||
0xcb 0xd8 0xe0 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Compare Mask Less Than Zero (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcmlt v12.4s, v25.4s, #0.0
|
||||
0x2c 0xeb 0xa0 0x4e
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Integer Halving Add (Signed)
|
||||
# Vector Integer Halving Add (Unsigned)
|
||||
# Vector Integer Halving Sub (Signed)
|
||||
# Vector Integer Halving Sub (Unsigned)
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: shadd v0.8b, v31.8b, v29.8b
|
||||
# CHECK: uhadd v15.16b, v16.16b, v17.16b
|
||||
# CHECK: shsub v0.4h, v1.4h, v2.4h
|
||||
# CHECK: uhadd v5.8h, v7.8h, v8.8h
|
||||
# CHECK: shsub v9.2s, v11.2s, v21.2s
|
||||
# CHECK: uhsub v22.4s, v30.4s, v19.4s
|
||||
0xe0 0x07 0x3d 0x0e
|
||||
0x0f 0x06 0x31 0x6e
|
||||
0x20 0x24 0x62 0x0e
|
||||
0xe5 0x04 0x68 0x6e
|
||||
0x69 0x25 0xb5 0x0e
|
||||
0xd6 0x27 0xb3 0x6e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Integer Rouding Halving Add (Signed)
|
||||
# Vector Integer Rouding Halving Add (Unsigned)
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: srhadd v3.8b, v5.8b, v7.8b
|
||||
# CHECK: urhadd v7.16b, v17.16b, v27.16b
|
||||
# CHECK: srhadd v10.4h, v11.4h, v13.4h
|
||||
# CHECK: urhadd v1.8h, v2.8h, v3.8h
|
||||
# CHECK: srhadd v4.2s, v5.2s, v6.2s
|
||||
# CHECK: urhadd v7.4s, v7.4s, v7.4s
|
||||
0xa3 0x14 0x27 0x0e
|
||||
0x27 0x16 0x3b 0x6e
|
||||
0x6a 0x15 0x6d 0x0e
|
||||
0x41 0x14 0x63 0x6e
|
||||
0xa4 0x14 0xa6 0x0e
|
||||
0xe7 0x14 0xa7 0x6e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Integer Saturating Add (Signed)
|
||||
# Vector Integer Saturating Add (Unsigned)
|
||||
# Vector Integer Saturating Sub (Signed)
|
||||
# Vector Integer Saturating Sub (Unsigned)
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: sqsub v0.8b, v1.8b, v2.8b
|
||||
# CHECK: sqadd v0.16b, v1.16b, v2.16b
|
||||
# CHECK: uqsub v0.4h, v1.4h, v2.4h
|
||||
# CHECK: uqadd v0.8h, v1.8h, v2.8h
|
||||
# CHECK: sqadd v0.2s, v1.2s, v2.2s
|
||||
# CHECK: sqsub v0.4s, v1.4s, v2.4s
|
||||
# CHECK: sqsub v0.2d, v1.2d, v2.2d
|
||||
0x20 0x2c 0x22 0x0e
|
||||
0x20 0x0c 0x22 0x4e
|
||||
0x20 0x2c 0x62 0x2e
|
||||
0x20 0x0c 0x62 0x6e
|
||||
0x20 0x0c 0xa2 0x0e
|
||||
0x20 0x2c 0xa2 0x4e
|
||||
0x20 0x2c 0xe2 0x4e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Scalar Integer Saturating Add (Signed)
|
||||
# Scalar Integer Saturating Add (Unsigned)
|
||||
# Scalar Integer Saturating Sub (Signed)
|
||||
# Scalar Integer Saturating Add (Unsigned)
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: sqadd b20, b11, b15
|
||||
# CHECK: uqadd h0, h1, h5
|
||||
# CHECK: sqsub s20, s10, s7
|
||||
# CHECK: uqsub d16, d16, d16
|
||||
0x74 0x0d 0x2f 0x5e
|
||||
0x20 0x0c 0x65 0x7e
|
||||
0x54 0x2d 0xa7 0x5e
|
||||
0x10 0x2e 0xf0 0x7e
|
||||
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Shift Left (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sshl v10.8b, v15.8b, v22.8b
|
||||
# CHECK: ushl v10.16b, v5.16b, v2.16b
|
||||
# CHECK: sshl v10.4h, v15.4h, v22.4h
|
||||
# CHECK: ushl v10.8h, v5.8h, v2.8h
|
||||
# CHECK: sshl v10.2s, v15.2s, v22.2s
|
||||
# CHECK: ushl v10.4s, v5.4s, v2.4s
|
||||
# CHECK: sshl v0.2d, v1.2d, v2.2d
|
||||
0xea 0x45 0x36 0x0e
|
||||
0xaa 0x44 0x22 0x6e
|
||||
0xea 0x45 0x76 0x0e
|
||||
0xaa 0x44 0x62 0x6e
|
||||
0xea 0x45 0xb6 0x0e
|
||||
0xaa 0x44 0xa2 0x6e
|
||||
0x20 0x44 0xe2 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Saturating Shift Left (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sqshl v1.8b, v15.8b, v22.8b
|
||||
# CHECK: uqshl v2.16b, v14.16b, v23.16b
|
||||
# CHECK: sqshl v3.4h, v13.4h, v24.4h
|
||||
# CHECK: uqshl v4.8h, v12.8h, v25.8h
|
||||
# CHECK: sqshl v5.2s, v11.2s, v26.2s
|
||||
# CHECK: uqshl v6.4s, v10.4s, v27.4s
|
||||
# CHECK: uqshl v0.2d, v1.2d, v2.2d
|
||||
0xe1 0x4d 0x36 0x0e
|
||||
0xc2 0x4d 0x37 0x6e
|
||||
0xa3 0x4d 0x78 0x0e
|
||||
0x84 0x4d 0x79 0x6e
|
||||
0x65 0x4d 0xba 0x0e
|
||||
0x46 0x4d 0xbb 0x6e
|
||||
0x20 0x4c 0xe2 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Rouding Shift Left (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: srshl v10.8b, v5.8b, v22.8b
|
||||
# CHECK: urshl v10.16b, v5.16b, v2.16b
|
||||
# CHECK: srshl v1.4h, v5.4h, v31.4h
|
||||
# CHECK: urshl v1.8h, v5.8h, v2.8h
|
||||
# CHECK: srshl v10.2s, v15.2s, v2.2s
|
||||
# CHECK: urshl v1.4s, v5.4s, v2.4s
|
||||
# CHECK: urshl v0.2d, v1.2d, v2.2d
|
||||
0xaa 0x54 0x36 0x0e
|
||||
0xaa 0x54 0x22 0x6e
|
||||
0xa1 0x54 0x7f 0x0e
|
||||
0xa1 0x54 0x62 0x6e
|
||||
0xea 0x55 0xa2 0x0e
|
||||
0xa1 0x54 0xa2 0x6e
|
||||
0x20 0x54 0xe2 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sqrshl v1.8b, v15.8b, v22.8b
|
||||
# CHECK: uqrshl v2.16b, v14.16b, v23.16b
|
||||
# CHECK: sqrshl v3.4h, v13.4h, v24.4h
|
||||
# CHECK: uqrshl v4.8h, v12.8h, v25.8h
|
||||
# CHECK: sqrshl v5.2s, v11.2s, v26.2s
|
||||
# CHECK: uqrshl v6.4s, v10.4s, v27.4s
|
||||
# CHECK: uqrshl v6.4s, v10.4s, v27.4s
|
||||
0xe1 0x5d 0x36 0x0e
|
||||
0xc2 0x5d 0x37 0x6e
|
||||
0xa3 0x5d 0x78 0x0e
|
||||
0x84 0x5d 0x79 0x6e
|
||||
0x65 0x5d 0xba 0x0e
|
||||
0x46 0x5d 0xbb 0x6e
|
||||
0x46 0x5d 0xbb 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Integer Shift Left (Signed, Unsigned)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sshl d31, d31, d31
|
||||
# CHECK: ushl d0, d0, d0
|
||||
0xff 0x47 0xff 0x5e
|
||||
0x00 0x44 0xe0 0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Integer Saturating Shift Left (Signed, Unsigned)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sqshl d31, d31, d31
|
||||
# CHECK: uqshl s23, s20, s16
|
||||
# CHECK: sqshl h3, h4, h15
|
||||
# CHECK: uqshl b11, b20, b30
|
||||
0xff 0x4f 0xff 0x5e
|
||||
0x97 0x4e 0xb0 0x7e
|
||||
0x83 0x4c 0x6f 0x5e
|
||||
0x8b 0x4e 0x3e 0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Integer Rouding Shift Left (Signed, Unsigned)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: srshl d16, d16, d16
|
||||
# CHECK: urshl d8, d7, d4
|
||||
0x10 0x56 0xf0 0x5e
|
||||
0xe8 0x54 0xe4 0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sqrshl d31, d31, d31
|
||||
# CHECK: uqrshl s23, s20, s16
|
||||
# CHECK: sqrshl h3, h4, h15
|
||||
# CHECK: uqrshl b11, b20, b30
|
||||
0xff 0x5f 0xff 0x5e
|
||||
0x97 0x5e 0xb0 0x7e
|
||||
0x83 0x5c 0x6f 0x5e
|
||||
0x8b 0x5e 0x3e 0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Maximum (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: smax v1.8b, v15.8b, v22.8b
|
||||
# CHECK: umax v2.16b, v14.16b, v23.16b
|
||||
# CHECK: smax v3.4h, v13.4h, v24.4h
|
||||
# CHECK: umax v4.8h, v12.8h, v25.8h
|
||||
# CHECK: smax v5.2s, v11.2s, v26.2s
|
||||
# CHECK: umax v6.4s, v10.4s, v27.4s
|
||||
0xe1 0x65 0x36 0x0e
|
||||
0xc2 0x65 0x37 0x6e
|
||||
0xa3 0x65 0x78 0x0e
|
||||
0x84 0x65 0x79 0x6e
|
||||
0x65 0x65 0xba 0x0e
|
||||
0x46 0x65 0xbb 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Minimum (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: umin v1.8b, v15.8b, v22.8b
|
||||
# CHECK: smin v2.16b, v14.16b, v23.16b
|
||||
# CHECK: umin v3.4h, v13.4h, v24.4h
|
||||
# CHECK: smin v4.8h, v12.8h, v25.8h
|
||||
# CHECK: umin v5.2s, v11.2s, v26.2s
|
||||
# CHECK: smin v6.4s, v10.4s, v27.4s
|
||||
0xe1 0x6d 0x36 0x2e
|
||||
0xc2 0x6d 0x37 0x4e
|
||||
0xa3 0x6d 0x78 0x2e
|
||||
0x84 0x6d 0x79 0x4e
|
||||
0x65 0x6d 0xba 0x2e
|
||||
0x46 0x6d 0xbb 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Maximum (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fmax v29.2s, v28.2s, v25.2s
|
||||
# CHECK: fmax v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fmax v11.2d, v10.2d, v7.2d
|
||||
0x9d 0xf7 0x39 0x0e
|
||||
0x09 0xf5 0x25 0x4e
|
||||
0x4b 0xf5 0x67 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Minimum (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fmin v29.2s, v28.2s, v25.2s
|
||||
# CHECK: fmin v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fmin v11.2d, v10.2d, v7.2d
|
||||
0x9d 0xf7 0xb9 0x0e
|
||||
0x09 0xf5 0xa5 0x4e
|
||||
0x4b 0xf5 0xe7 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector maxNum (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fmaxnm v9.2s, v8.2s, v5.2s
|
||||
# CHECK: fmaxnm v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fmaxnm v11.2d, v10.2d, v7.2d
|
||||
0x09 0xc5 0x25 0x0e
|
||||
0x09 0xc5 0x25 0x4e
|
||||
0x4b 0xc5 0x67 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector minNum (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fminnm v2.2s, v8.2s, v25.2s
|
||||
# CHECK: fminnm v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fminnm v11.2d, v10.2d, v7.2d
|
||||
0x02 0xc5 0xb9 0x0e
|
||||
0x09 0xc5 0xa5 0x4e
|
||||
0x4b 0xc5 0xe7 0x4e
|
||||
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Maximum Pairwise (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: smaxp v1.8b, v15.8b, v22.8b
|
||||
# CHECK: umaxp v2.16b, v14.16b, v23.16b
|
||||
# CHECK: smaxp v3.4h, v13.4h, v24.4h
|
||||
# CHECK: umaxp v4.8h, v12.8h, v25.8h
|
||||
# CHECK: smaxp v5.2s, v11.2s, v26.2s
|
||||
# CHECK: umaxp v6.4s, v10.4s, v27.4s
|
||||
0xe1 0xa5 0x36 0x0e
|
||||
0xc2 0xa5 0x37 0x6e
|
||||
0xa3 0xa5 0x78 0x0e
|
||||
0x84 0xa5 0x79 0x6e
|
||||
0x65 0xa5 0xba 0x0e
|
||||
0x46 0xa5 0xbb 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Minimum Pairwise (Signed and Unsigned Integer)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: uminp v1.8b, v15.8b, v22.8b
|
||||
# CHECK: sminp v2.16b, v14.16b, v23.16b
|
||||
# CHECK: uminp v3.4h, v13.4h, v24.4h
|
||||
# CHECK: sminp v4.8h, v12.8h, v25.8h
|
||||
# CHECK: uminp v5.2s, v11.2s, v26.2s
|
||||
# CHECK: sminp v6.4s, v10.4s, v27.4s
|
||||
0xe1 0xad 0x36 0x2e
|
||||
0xc2 0xad 0x37 0x4e
|
||||
0xa3 0xad 0x78 0x2e
|
||||
0x84 0xad 0x79 0x4e
|
||||
0x65 0xad 0xba 0x2e
|
||||
0x46 0xad 0xbb 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Maximum Pairwise (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fmaxp v29.2s, v28.2s, v25.2s
|
||||
# CHECK: fmaxp v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fmaxp v11.2d, v10.2d, v7.2d
|
||||
0x9d 0xf7 0x39 0x2e
|
||||
0x09 0xf5 0x25 0x6e
|
||||
0x4b 0xf5 0x67 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Minimum Pairwise (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fminp v29.2s, v28.2s, v25.2s
|
||||
# CHECK: fminp v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fminp v11.2d, v10.2d, v7.2d
|
||||
0x9d 0xf7 0xb9 0x2e
|
||||
0x09 0xf5 0xa5 0x6e
|
||||
0x4b 0xf5 0xe7 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector maxNum Pairwise (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fmaxnmp v9.2s, v8.2s, v5.2s
|
||||
# CHECK: fmaxnmp v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fmaxnmp v11.2d, v10.2d, v7.2d
|
||||
0x09 0xc5 0x25 0x2e
|
||||
0x09 0xc5 0x25 0x6e
|
||||
0x4b 0xc5 0x67 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector minNum Pairwise (Floating Point)
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fminnmp v2.2s, v8.2s, v25.2s
|
||||
# CHECK: fminnmp v9.4s, v8.4s, v5.4s
|
||||
# CHECK: fminnmp v11.2d, v10.2d, v7.2d
|
||||
0x02 0xc5 0xb9 0x2e
|
||||
0x09 0xc5 0xa5 0x6e
|
||||
0x4b 0xc5 0xe7 0x6e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Add Pairwise (Integer)
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: addp v31.8b, v31.8b, v31.8b
|
||||
# CHECK: addp v0.2d, v0.2d, v0.2d
|
||||
0xff 0xbf 0x3f 0x0e
|
||||
0x00 0xbc 0xe0 0x4e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Add Pairwise (Floating Point)
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: faddp v0.4s, v0.4s, v0.4s
|
||||
# CHECK: faddp v31.2s, v31.2s, v31.2s
|
||||
0x00 0xd4 0x20 0x6e
|
||||
0xff 0xd7 0x3f 0x2e
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Saturating Doubling Multiply High
|
||||
# Vector Saturating Rouding Doubling Multiply High
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: sqdmulh v31.2s, v31.2s, v31.2s
|
||||
# CHECK: sqdmulh v5.4s, v7.4s, v9.4s
|
||||
# CHECK: sqrdmulh v31.4h, v3.4h, v13.4h
|
||||
# CHECK: sqrdmulh v0.8h, v10.8h, v20.8h
|
||||
0xff 0xb7 0xbf 0x0e
|
||||
0xe5 0xb4 0xa9 0x4e
|
||||
0x7f 0xb4 0x6d 0x2e
|
||||
0x40 0xb5 0x74 0x6e
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Multiply Extended
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: fmulx v1.2s, v22.2s, v2.2s
|
||||
# CHECK: fmulx v21.4s, v15.4s, v3.4s
|
||||
# CHECK: fmulx v11.2d, v5.2d, v23.2d
|
||||
0xc1 0xde 0x22 0x0e
|
||||
0xf5 0xdd 0x23 0x4e
|
||||
0xab 0xdc 0x77 0x4e
|
||||
|
Loading…
Reference in New Issue
Block a user