AArch64: add initial NEON support

Patch by Ana Pazos.

- Completed implementation of instruction formats:
AdvSIMD three same
AdvSIMD modified immediate
AdvSIMD scalar pairwise

- Completed implementation of instruction classes
(some of the instructions in these classes
belong to yet unfinished instruction formats):
Vector Arithmetic
Vector Immediate
Vector Pairwise Arithmetic

- Initial implementation of instruction formats:
AdvSIMD scalar two-reg misc
AdvSIMD scalar three same

- Intial implementation of instruction class:
Scalar Arithmetic

- Initial clang changes to support arm v8 intrinsics.
Note: no clang changes for scalar intrinsics function name mangling yet.

- Comprehensive test cases for added instructions
To verify auto codegen, encoding, decoding, diagnosis, intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187567 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-08-01 09:20:35 +00:00
parent 691aa094da
commit 87773c318f
66 changed files with 12503 additions and 41 deletions

View File

@ -494,6 +494,7 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
include "llvm/IR/IntrinsicsPowerPC.td"
include "llvm/IR/IntrinsicsX86.td"
include "llvm/IR/IntrinsicsARM.td"
include "llvm/IR/IntrinsicsAArch64.td"
include "llvm/IR/IntrinsicsXCore.td"
include "llvm/IR/IntrinsicsHexagon.td"
include "llvm/IR/IntrinsicsNVVM.td"

View File

@ -0,0 +1,41 @@
//===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the AArch64-specific intrinsics.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Advanced SIMD (NEON)
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// Vector Absolute Compare (Floating Point)
def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
// Vector maxNum (Floating Point)
def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
// Vector minNum (Floating Point)
def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
// Vector Pairwise maxNum (Floating Point)
def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
// Vector Pairwise minNum (Floating Point)
def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
// Vector Multiply Extended (Floating Point)
def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic;
}

View File

@ -61,7 +61,7 @@ def CC_A64_APCS : CallingConv<[
// Vectors and Floating-point types.
CCIfType<[v2i8], CCBitConvertToType<f16>>,
CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCBitConvertToType<f128>>,

View File

@ -42,6 +42,8 @@ static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
// SIMD compares set the entire lane's bits to 1
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@ -53,6 +55,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
if (Subtarget->hasNEON()) {
// And the vectors
addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
}
computeRegisterProperties();
// We combine OR nodes for bitfield and NEON BSL operations.
@ -251,6 +268,31 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setExceptionPointerRegister(AArch64::X0);
setExceptionSelectorRegister(AArch64::X1);
if (Subtarget->hasNEON()) {
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
}
}
EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
@ -777,7 +819,22 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
default: return NULL;
case AArch64ISD::NEON_BSL:
return "AArch64ISD::NEON_BSL";
case AArch64ISD::NEON_MOVIMM:
return "AArch64ISD::NEON_MOVIMM";
case AArch64ISD::NEON_MVNIMM:
return "AArch64ISD::NEON_MVNIMM";
case AArch64ISD::NEON_FMOVIMM:
return "AArch64ISD::NEON_FMOVIMM";
case AArch64ISD::NEON_CMP:
return "AArch64ISD::NEON_CMP";
case AArch64ISD::NEON_CMPZ:
return "AArch64ISD::NEON_CMPZ";
case AArch64ISD::NEON_TST:
return "AArch64ISD::NEON_TST";
default:
return NULL;
}
}
@ -2230,6 +2287,213 @@ AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(A64CC::NE, MVT::i32));
}
static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
EVT VT = Op.getValueType();
bool Invert = false;
SDValue Op0, Op1;
unsigned Opcode;
if (LHS.getValueType().isInteger()) {
// Attempt to use Vector Integer Compare Mask Test instruction.
// TST = icmp ne (and (op0, op1), zero).
if (CC == ISD::SETNE) {
if (((LHS.getOpcode() == ISD::AND) &&
ISD::isBuildVectorAllZeros(RHS.getNode())) ||
((RHS.getOpcode() == ISD::AND) &&
ISD::isBuildVectorAllZeros(LHS.getNode()))) {
SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
}
}
// Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
// Note: Compare against Zero does not support unsigned predicates.
if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
ISD::isBuildVectorAllZeros(LHS.getNode())) &&
!isUnsignedIntSetCC(CC)) {
// If LHS is the zero value, swap operands and CondCode.
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
CC = getSetCCSwappedOperands(CC);
Op0 = RHS;
} else
Op0 = LHS;
// Ensure valid CondCode for Compare Mask against Zero instruction:
// EQ, GE, GT, LE, LT.
if (ISD::SETNE == CC) {
Invert = true;
CC = ISD::SETEQ;
}
// Using constant type to differentiate integer and FP compares with zero.
Op1 = DAG.getConstant(0, MVT::i32);
Opcode = AArch64ISD::NEON_CMPZ;
} else {
// Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
// Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
bool Swap = false;
switch (CC) {
default:
llvm_unreachable("Illegal integer comparison.");
case ISD::SETEQ:
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETUGT:
case ISD::SETUGE:
break;
case ISD::SETNE:
Invert = true;
CC = ISD::SETEQ;
break;
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETLT:
case ISD::SETLE:
Swap = true;
CC = getSetCCSwappedOperands(CC);
}
if (Swap)
std::swap(LHS, RHS);
Opcode = AArch64ISD::NEON_CMP;
Op0 = LHS;
Op1 = RHS;
}
// Generate Compare Mask instr or Compare Mask against Zero instr.
SDValue NeonCmp =
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
if (Invert)
NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
return NeonCmp;
}
// Now handle Floating Point cases.
// Attempt to use Vector Floating Point Compare Mask against Zero instruction.
if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
ISD::isBuildVectorAllZeros(LHS.getNode())) {
// If LHS is the zero value, swap operands and CondCode.
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
CC = getSetCCSwappedOperands(CC);
Op0 = RHS;
} else
Op0 = LHS;
// Using constant type to differentiate integer and FP compares with zero.
Op1 = DAG.getConstantFP(0, MVT::f32);
Opcode = AArch64ISD::NEON_CMPZ;
} else {
// Attempt to use Vector Floating Point Compare Mask instruction.
Op0 = LHS;
Op1 = RHS;
Opcode = AArch64ISD::NEON_CMP;
}
SDValue NeonCmpAlt;
// Some register compares have to be implemented with swapped CC and operands,
// e.g.: OLT implemented as OGT with swapped operands.
bool SwapIfRegArgs = false;
// Ensure valid CondCode for FP Compare Mask against Zero instruction:
// EQ, GE, GT, LE, LT.
// And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
switch (CC) {
default:
llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
case ISD::SETNE:
Invert = true; // Fallthrough
case ISD::SETOEQ:
case ISD::SETEQ:
CC = ISD::SETEQ;
break;
case ISD::SETOLT:
case ISD::SETLT:
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
case ISD::SETOGT:
case ISD::SETGT:
CC = ISD::SETGT;
break;
case ISD::SETOLE:
case ISD::SETLE:
CC = ISD::SETLE;
SwapIfRegArgs = true;
break;
case ISD::SETOGE:
case ISD::SETGE:
CC = ISD::SETGE;
break;
case ISD::SETUGE:
Invert = true;
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
case ISD::SETULE:
Invert = true;
CC = ISD::SETGT;
break;
case ISD::SETUGT:
Invert = true;
CC = ISD::SETLE;
SwapIfRegArgs = true;
break;
case ISD::SETULT:
Invert = true;
CC = ISD::SETGE;
break;
case ISD::SETUEQ:
Invert = true; // Fallthrough
case ISD::SETONE:
// Expand this to (OGT |OLT).
NeonCmpAlt =
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
case ISD::SETUO:
Invert = true; // Fallthrough
case ISD::SETO:
// Expand this to (OGE | OLT).
NeonCmpAlt =
DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
CC = ISD::SETLT;
SwapIfRegArgs = true;
break;
}
if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
CC = getSetCCSwappedOperands(CC);
std::swap(Op0, Op1);
}
// Generate FP Compare Mask instr or FP Compare Mask against Zero instr
SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
if (NeonCmpAlt.getNode())
NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
if (Invert)
NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
return NeonCmp;
}
// (SETCC lhs, rhs, condcode)
SDValue
AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@ -2239,6 +2503,9 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorSETCC(Op, DAG);
if (LHS.getValueType() == MVT::f128) {
// f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
// for the rest of the function (some i32 or i64 values).
@ -2395,11 +2662,155 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
}
return SDValue();
}
/// Check if the specified splat value corresponds to a valid vector constant
/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
/// values.
static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
bool is128Bits, NeonModImmType type, EVT &VT,
unsigned &Imm, unsigned &OpCmode) {
switch (SplatBitSize) {
default:
llvm_unreachable("unexpected size for isNeonModifiedImm");
case 8: {
if (type != Neon_Mov_Imm)
return false;
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
// Neon movi per byte: Op=0, Cmode=1110.
OpCmode = 0xe;
Imm = SplatBits;
VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
break;
}
case 16: {
// Neon move inst per halfword
VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x00nn is 0x00nn LSL 0
// movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
// bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
// Op=x, Cmode=100y
Imm = SplatBits;
OpCmode = 0x8;
break;
}
if ((SplatBits & ~0xff00) == 0) {
// Value = 0xnn00 is 0x00nn LSL 8
// movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
// bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
// Op=x, Cmode=101x
Imm = SplatBits >> 8;
OpCmode = 0xa;
break;
}
// can't handle any other
return false;
}
case 32: {
// First the LSL variants (MSL is unusable by some interested instructions).
// Neon move instr per word, shift zeros
VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x000000nn is 0x000000nn LSL 0
// movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
// bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
// Op=x, Cmode=000x
Imm = SplatBits;
OpCmode = 0;
break;
}
if ((SplatBits & ~0xff00) == 0) {
// Value = 0x0000nn00 is 0x000000nn LSL 8
// movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
// bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
// Op=x, Cmode=001x
Imm = SplatBits >> 8;
OpCmode = 0x2;
break;
}
if ((SplatBits & ~0xff0000) == 0) {
// Value = 0x00nn0000 is 0x000000nn LSL 16
// movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
// bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
// Op=x, Cmode=010x
Imm = SplatBits >> 16;
OpCmode = 0x4;
break;
}
if ((SplatBits & ~0xff000000) == 0) {
// Value = 0xnn000000 is 0x000000nn LSL 24
// movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
// bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
// Op=x, Cmode=011x
Imm = SplatBits >> 24;
OpCmode = 0x6;
break;
}
// Now the MSL immediates.
// Neon move instr per word, shift ones
if ((SplatBits & ~0xffff) == 0 &&
((SplatBits | SplatUndef) & 0xff) == 0xff) {
// Value = 0x0000nnff is 0x000000nn MSL 8
// movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
// Op=x, Cmode=1100
Imm = SplatBits >> 8;
OpCmode = 0xc;
break;
}
if ((SplatBits & ~0xffffff) == 0 &&
((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
// Value = 0x00nnffff is 0x000000nn MSL 16
// movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
// Op=x, Cmode=1101
Imm = SplatBits >> 16;
OpCmode = 0xd;
break;
}
// can't handle any other
return false;
}
case 64: {
if (type != Neon_Mov_Imm)
return false;
// Neon move instr bytemask, where each byte is either 0x00 or 0xff.
// movi Op=1, Cmode=1110.
OpCmode = 0x1e;
uint64_t BitMask = 0xff;
uint64_t Val = 0;
unsigned ImmMask = 1;
Imm = 0;
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
Val |= BitMask;
Imm |= ImmMask;
} else if ((SplatBits & BitMask) != 0) {
return false;
}
BitMask <<= 8;
ImmMask <<= 1;
}
SplatBits = Val;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
break;
}
}
return true;
}
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
@ -2725,6 +3136,7 @@ static SDValue PerformORCombine(SDNode *N,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@ -2745,6 +3157,44 @@ static SDValue PerformORCombine(SDNode *N,
if (Res.getNode())
return Res;
if (!Subtarget->hasNEON())
return SDValue();
// Attempt to use vector immediate-form BSL
// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() != ISD::AND)
return SDValue();
if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
APInt SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
APInt SplatBits0;
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
HasAnyUndefs) &&
!HasAnyUndefs) {
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
APInt SplatBits1;
if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
HasAnyUndefs) &&
!HasAnyUndefs && SplatBits0 == ~SplatBits1) {
// Canonicalize the vector type to make instruction selection simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
N0->getOperand(1), N0->getOperand(0),
N1->getOperand(0));
return DAG.getNode(ISD::BITCAST, DL, VT, Result);
}
}
}
return SDValue();
}
@ -2819,6 +3269,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue
AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc DL(Op);
EVT VT = Op.getValueType();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
// Note we favor lowering MOVI over MVNI.
// This has implications on the definition of patterns in TableGen to select
// BIC immediate instructions but not ORR immediate instructions.
// If this lowering order is changed, TableGen patterns for BIC immediate and
// ORR immediate instructions have to be updated.
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
// First attempt to use vector immediate-form MOVI
EVT NeonMovVT;
unsigned Imm = 0;
unsigned OpCmode = 0;
if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
SplatBitSize, DAG, VT.is128BitVector(),
Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
if (ImmVal.getNode() && OpCmodeVal.getNode()) {
SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
ImmVal, OpCmodeVal);
return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
}
}
// Then attempt to use vector immediate-form MVNI
uint64_t NegatedImm = (~SplatBits).getZExtValue();
if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
Imm, OpCmode)) {
SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
if (ImmVal.getNode() && OpCmodeVal.getNode()) {
SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
ImmVal, OpCmodeVal);
return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
}
}
// Attempt to use vector immediate-form FMOV
if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
(VT == MVT::v2f64 && SplatBitSize == 64)) {
APFloat RealVal(
SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
SplatBits);
uint32_t ImmVal;
if (A64Imms::isFPImm(RealVal, ImmVal)) {
SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
}
}
}
}
return SDValue();
}
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {

View File

@ -111,7 +111,28 @@ namespace AArch64ISD {
// created using the small memory model style: i.e. adrp/add or
// adrp/mem-op. This exists to prevent bare TargetAddresses which may never
// get selected.
WrapperSmall
WrapperSmall,
// Vector bitwise select
NEON_BSL,
// Vector move immediate
NEON_MOVIMM,
// Vector Move Inverted Immediate
NEON_MVNIMM,
// Vector FP move immediate
NEON_FMOVIMM,
// Vector compare
NEON_CMP,
// Vector compare zero
NEON_CMPZ,
// Vector compare bitwise test
NEON_TST
};
}
@ -148,9 +169,11 @@ public:
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc DL, SDValue &Chain) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const;
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
SDValue &Chain) const;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
@ -253,6 +276,10 @@ private:
return &getTargetMachine().getSubtarget<AArch64Subtarget>();
}
};
enum NeonModImmType {
Neon_Mov_Imm,
Neon_Mvn_Imm
};
} // namespace llvm
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H

View File

@ -959,3 +959,96 @@ class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
let Inst{4-0} = op4;
}
//===----------------------------------------------------------------------===//
//
// Neon Instruction Format Definitions.
//
let Predicates = [HasNEON] in {
class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit> {
}
// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRdnm<outs, ins, asmstr, patterns, itin>
{
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29} = u;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
let Inst{21} = 0b1;
// Inherit Rm in 20-16
let Inst{15-11} = opcode;
let Inst{10} = 0b1;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
// Format AdvSIMD 1 vector register with modified immediate
class NeonI_1VModImm<bit q, bit op,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRd<outs,ins, asmstr, patterns, itin>
{
bits<8> Imm;
bits<4> cmode;
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29} = op;
let Inst{28-19} = 0b0111100000;
let Inst{15-12} = cmode;
let Inst{11} = 0b0; // o2
let Inst{10} = 1;
// Inherit Rd in 4-0
let Inst{18-16} = Imm{7-5}; // imm a:b:c
let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h
}
// Format AdvSIMD 3 scalar registers with same type
class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRdnm<outs, ins, asmstr, patterns, itin>
{
let Inst{31} = 0b0;
let Inst{30} = 0b1;
let Inst{29} = u;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
let Inst{21} = 0b1;
// Inherit Rm in 20-16
let Inst{15-11} = opcode;
let Inst{10} = 0b1;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
// Format AdvSIMD 2 vector registers miscellaneous
class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRdn<outs, ins, asmstr, patterns, itin>
{
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29} = u;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
let Inst{21-17} = 0b10000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
}

View File

@ -11,6 +11,17 @@
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto","crypto">;
// Use fused MAC if more precision in FP computation is allowed.
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast)">;
include "AArch64InstrFormats.td"
//===----------------------------------------------------------------------===//
@ -2173,6 +2184,29 @@ def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
// Extra patterns for when we're allowed to optimise separate multiplication and
// addition.
let Predicates = [UseFusedMAC] in {
def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra),
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)),
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
}
//===----------------------------------------------------------------------===//
// Floating-point <-> fixed-point conversion instructions
//===----------------------------------------------------------------------===//
@ -5123,3 +5157,9 @@ defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
(i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
//===----------------------------------------------------------------------===//
// Advanced SIMD (NEON) Support
//
include "AArch64InstrNEON.td"

File diff suppressed because it is too large Load Diff

View File

@ -109,6 +109,11 @@ bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_FPImmediate: {
assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported");
MCOp = MCOperand::CreateFPImm(0.0);
break;
}
case MachineOperand::MO_BlockAddress:
MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
break;

View File

@ -185,7 +185,7 @@ foreach Index = 0-31 in {
// These two classes contain the same registers, which should be reasonably
// sensible for MC and allocation purposes, but allows them to be treated
// separately for things like stack spilling.
def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64,
(sequence "V%u", 0, 31)>;
def VPR128 : RegisterClass<"AArch64",

View File

@ -26,10 +26,8 @@
using namespace llvm;
AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
: AArch64GenSubtargetInfo(TT, CPU, FS)
, HasNEON(true)
, HasCrypto(true)
, TargetTriple(TT) {
: AArch64GenSubtargetInfo(TT, CPU, FS), HasNEON(false), HasCrypto(false),
TargetTriple(TT) {
ParseSubtargetFeatures(CPU, FS);
}

View File

@ -48,6 +48,9 @@ public:
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
};
} // End llvm namespace

View File

@ -664,8 +664,42 @@ public:
return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
}
template<int MemSize> bool isSImm7Scaled() const {
if (!isImm()) return false;
bool isNeonMovImmShiftLSL() const {
if (!isShiftOrExtend())
return false;
if (ShiftExtend.ShiftType != A64SE::LSL)
return false;
// Valid shift amount is 0, 8, 16 and 24.
return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24;
}
bool isNeonMovImmShiftLSLH() const {
if (!isShiftOrExtend())
return false;
if (ShiftExtend.ShiftType != A64SE::LSL)
return false;
// Valid shift amount is 0 and 8.
return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8;
}
bool isNeonMovImmShiftMSL() const {
if (!isShiftOrExtend())
return false;
if (ShiftExtend.ShiftType != A64SE::MSL)
return false;
// Valid shift amount is 8 and 16.
return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16;
}
template <int MemSize> bool isSImm7Scaled() const {
if (!isImm())
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
@ -705,10 +739,27 @@ public:
return isa<MCConstantExpr>(getImm());
}
bool isNeonUImm64Mask() const {
if (!isImm())
return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE)
return false;
uint64_t Value = CE->getValue();
// i64 value with each byte being either 0x00 or 0xff.
for (unsigned i = 0; i < 8; ++i, Value >>= 8)
if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff)
return false;
return true;
}
static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
unsigned ShiftAmount,
bool ImplicitAmount,
SMLoc S, SMLoc E) {
SMLoc S,SMLoc E) {
AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
Op->ImmWithLSL.Val = Val;
Op->ImmWithLSL.ShiftAmount = ShiftAmount;
@ -1026,6 +1077,40 @@ public:
Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
}
// For Vector Immediates shifted imm operands.
void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24)
llvm_unreachable("Invalid shift amount for vector immediate inst.");
// Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3.
int64_t Imm = ShiftExtend.Amount / 8;
Inst.addOperand(MCOperand::CreateImm(Imm));
}
void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8)
llvm_unreachable("Invalid shift amount for vector immediate inst.");
// Encode LSLH shift amount 0, 8 as 0, 1.
int64_t Imm = ShiftExtend.Amount / 8;
Inst.addOperand(MCOperand::CreateImm(Imm));
}
void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16)
llvm_unreachable("Invalid shift amount for vector immediate inst.");
// Encode MSL shift amount 8, 16 as 0, 1.
int64_t Imm = ShiftExtend.Amount / 8 - 1;
Inst.addOperand(MCOperand::CreateImm(Imm));
}
// For the extend in load-store (register offset) instructions.
template<unsigned MemSize>
void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
@ -1065,6 +1150,20 @@ public:
Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
}
void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// A bit from each byte in the constant forms the encoded immediate
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
uint64_t Value = CE->getValue();
unsigned Imm = 0;
for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
Imm |= (Value & 1) << i;
}
Inst.addOperand(MCOperand::CreateImm(Imm));
}
};
} // end anonymous namespace.
@ -1660,20 +1759,21 @@ AArch64AsmParser::ParseShiftExtend(
std::string LowerID = IDVal.lower();
A64SE::ShiftExtSpecifiers Spec =
StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
.Case("lsl", A64SE::LSL)
.Case("lsr", A64SE::LSR)
.Case("asr", A64SE::ASR)
.Case("ror", A64SE::ROR)
.Case("uxtb", A64SE::UXTB)
.Case("uxth", A64SE::UXTH)
.Case("uxtw", A64SE::UXTW)
.Case("uxtx", A64SE::UXTX)
.Case("sxtb", A64SE::SXTB)
.Case("sxth", A64SE::SXTH)
.Case("sxtw", A64SE::SXTW)
.Case("sxtx", A64SE::SXTX)
.Default(A64SE::Invalid);
StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
.Case("lsl", A64SE::LSL)
.Case("msl", A64SE::MSL)
.Case("lsr", A64SE::LSR)
.Case("asr", A64SE::ASR)
.Case("ror", A64SE::ROR)
.Case("uxtb", A64SE::UXTB)
.Case("uxth", A64SE::UXTH)
.Case("uxtw", A64SE::UXTW)
.Case("uxtx", A64SE::UXTX)
.Case("sxtb", A64SE::SXTB)
.Case("sxth", A64SE::SXTH)
.Case("sxtw", A64SE::SXTW)
.Case("sxtx", A64SE::SXTX)
.Default(A64SE::Invalid);
if (Spec == A64SE::Invalid)
return MatchOperand_NoMatch;
@ -1683,8 +1783,8 @@ AArch64AsmParser::ParseShiftExtend(
S = Parser.getTok().getLoc();
Parser.Lex();
if (Spec != A64SE::LSL && Spec != A64SE::LSR &&
Spec != A64SE::ASR && Spec != A64SE::ROR) {
if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR &&
Spec != A64SE::ROR && Spec != A64SE::MSL) {
// The shift amount can be omitted for the extending versions, but not real
// shifts:
// add x0, x0, x0, uxtb
@ -2019,7 +2119,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"expected compatible register or floating-point constant");
case Match_FPZero:
return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
"expected floating-point constant #0.0");
"expected floating-point constant #0.0 or invalid register type");
case Match_Label:
return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
"expected label or encodable integer pc offset");

View File

@ -85,6 +85,9 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
@ -126,6 +129,10 @@ static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
unsigned ShiftAmount,
uint64_t Address,
const void *Decoder);
template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
static DecodeStatus
DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
uint64_t Address, const void *Decoder);
static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
unsigned ShiftAmount,
@ -336,9 +343,20 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus
DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@ -799,4 +817,24 @@ extern "C" void LLVMInitializeAArch64Disassembler() {
createAArch64Disassembler);
}
template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
static DecodeStatus
DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
uint64_t Address, const void *Decoder) {
bool IsLSL = false;
if (Ext == A64SE::LSL)
IsLSL = true;
else if (Ext != A64SE::MSL)
return MCDisassembler::Fail;
// MSL and LSLH accepts encoded shift amount 0 or 1.
if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1)
return MCDisassembler::Fail;
// LSL accepts encoded shift amount 0, 1, 2 or 3.
if (IsLSL && ShiftAmount > 3)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
return MCDisassembler::Success;
}

View File

@ -406,3 +406,84 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printAnnotation(O, Annot);
}
template <A64SE::ShiftExtSpecifiers Ext, bool isHalf>
void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
assert(MO.isImm() &&
"Immediate operand required for Neon vector immediate inst.");
bool IsLSL = false;
if (Ext == A64SE::LSL)
IsLSL = true;
else if (Ext != A64SE::MSL)
llvm_unreachable("Invalid shift specifier in movi instruction");
int64_t Imm = MO.getImm();
// MSL and LSLH accepts encoded shift amount 0 or 1.
if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1)
llvm_unreachable("Invalid shift amount in movi instruction");
// LSH accepts encoded shift amount 0, 1, 2 or 3.
if (IsLSL && (Imm < 0 || Imm > 3))
llvm_unreachable("Invalid shift amount in movi instruction");
// Print shift amount as multiple of 8 with MSL encoded shift amount
// 0 and 1 printed as 8 and 16.
if (!IsLSL)
Imm++;
Imm *= 8;
// LSL #0 is not printed
if (IsLSL) {
if (Imm == 0)
return;
O << ", lsl";
} else
O << ", msl";
O << " #" << Imm;
}
void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &o) {
o << "#0x0";
}
void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MOUImm = MI->getOperand(OpNum);
assert(MOUImm.isImm() &&
"Immediate operand required for Neon vector immediate inst.");
unsigned Imm = MOUImm.getImm();
O << "#0x";
O.write_hex(Imm);
}
void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
const MCOperand &MOUImm8 = MI->getOperand(OpNum);
assert(MOUImm8.isImm() &&
"Immediate operand required for Neon vector immediate bytemask inst.");
uint32_t UImm8 = MOUImm8.getImm();
uint64_t Mask = 0;
// Replicates 0x00 or 0xff byte in a 64-bit vector
for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
if ((UImm8 >> ByteNum) & 1)
Mask |= (uint64_t)0xff << (8 * ByteNum);
}
O << "#0x";
O.write_hex(Mask);
}

View File

@ -164,9 +164,14 @@ public:
return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
}
template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
};
}
#endif

View File

@ -40,7 +40,7 @@ MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitAArch64MCSubtargetInfo(X, TT, CPU, "");
InitAArch64MCSubtargetInfo(X, TT, CPU, FS);
return X;
}

View File

@ -1105,3 +1105,69 @@ bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
return isMOVNImm(RegWidth, Value, UImm16, Shift);
}
// decodeNeonModShiftImm - Decode a Neon OpCmode value into the
// the shift amount and the shift type (shift zeros or ones in) and
// returns whether the OpCmode value implies a shift operation.
bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm,
unsigned &ShiftOnesIn) {
ShiftImm = 0;
ShiftOnesIn = false;
bool HasShift = true;
if (OpCmode == 0xe) {
// movi byte
HasShift = false;
} else if (OpCmode == 0x1e) {
// movi 64-bit bytemask
HasShift = false;
} else if ((OpCmode & 0xc) == 0x8) {
// shift zeros, per halfword
ShiftImm = ((OpCmode & 0x2) >> 1);
} else if ((OpCmode & 0x8) == 0) {
// shift zeros, per word
ShiftImm = ((OpCmode & 0x6) >> 1);
} else if ((OpCmode & 0xe) == 0xc) {
// shift ones, per word
ShiftOnesIn = true;
ShiftImm = (OpCmode & 0x1);
} else {
// per byte, per bytemask
llvm_unreachable("Unsupported Neon modified immediate");
}
return HasShift;
}
// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values
// into the element value and the element size in bits.
uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode,
unsigned &EltBits) {
uint64_t DecodedVal = Val;
EltBits = 0;
if (OpCmode == 0xe) {
// movi byte
EltBits = 8;
} else if (OpCmode == 0x1e) {
// movi 64-bit bytemask
DecodedVal = 0;
for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
if ((Val >> ByteNum) & 1)
DecodedVal |= (uint64_t)0xff << (8 * ByteNum);
}
EltBits = 64;
} else if ((OpCmode & 0xc) == 0x8) {
// shift zeros, per halfword
EltBits = 16;
} else if ((OpCmode & 0x8) == 0) {
// shift zeros, per word
EltBits = 32;
} else if ((OpCmode & 0xe) == 0xc) {
// shift ones, per word
EltBits = 32;
} else {
llvm_unreachable("Unsupported Neon modified immediate");
}
return DecodedVal;
}

View File

@ -289,6 +289,7 @@ namespace A64SE {
enum ShiftExtSpecifiers {
Invalid = -1,
LSL,
MSL,
LSR,
ASR,
ROR,
@ -1068,7 +1069,10 @@ namespace A64Imms {
// MOVN but *not* with a MOVZ (because that would take priority).
bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
}
uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits);
bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm,
unsigned &ShiftOnesIn);
}
} // end namespace llvm;

View File

@ -0,0 +1,21 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s
; The DAG combiner decided to use a vector load/store for this struct copy
; previously. This probably shouldn't happen without NEON, but the most
; important thing is that it compiles.
define void @store_combine() nounwind {
%src = alloca { double, double }, align 8
%dst = alloca { double, double }, align 8
%src.realp = getelementptr inbounds { double, double }* %src, i32 0, i32 0
%src.real = load double* %src.realp
%src.imagp = getelementptr inbounds { double, double }* %src, i32 0, i32 1
%src.imag = load double* %src.imagp
%dst.realp = getelementptr inbounds { double, double }* %dst, i32 0, i32 0
%dst.imagp = getelementptr inbounds { double, double }* %dst, i32 0, i32 1
store double %src.real, double* %dst.realp
store double %src.imag, double* %dst.imagp
ret void
}

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
; CHECK-LABEL: test_inline_constraint_r:
@ -44,6 +44,26 @@ define i32 @test_inline_constraint_Q(i32 *%ptr) {
@dump = global fp128 zeroinitializer
define void @test_inline_constraint_w(<8 x i8> %vec64, <4 x float> %vec128, half %hlf, float %flt, double %dbl, fp128 %quad) {
; CHECK: test_inline_constraint_w:
call <8 x i8> asm sideeffect "add $0.8b, $1.8b, $1.8b", "=w,w"(<8 x i8> %vec64)
call <8 x i8> asm sideeffect "fadd $0.4s, $1.4s, $1.4s", "=w,w"(<4 x float> %vec128)
; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
; CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
; Arguably semantically dodgy to output "vN", but it's what GCC does
; so purely for compatibility we want vector registers to be output.
call float asm sideeffect "fcvt ${0:s}, ${1:h}", "=w,w"(half undef)
call float asm sideeffect "fadd $0.2s, $0.2s, $0.2s", "=w,w"(float %flt)
call double asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(double %dbl)
call fp128 asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(fp128 %quad)
; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
ret void
}
define void @test_inline_constraint_I() {
; CHECK-LABEL: test_inline_constraint_I:
call void asm sideeffect "add x0, x0, $0", "I"(i32 0)

View File

@ -0,0 +1,226 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uabd_v8i8:
%abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uabd v0.8b, v0.8b, v1.8b
ret <8 x i8> %abd
}
define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uaba_v8i8:
%abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
%aba = add <8 x i8> %lhs, %abd
; CHECK: uaba v0.8b, v0.8b, v1.8b
ret <8 x i8> %aba
}
define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_sabd_v8i8:
%abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: sabd v0.8b, v0.8b, v1.8b
ret <8 x i8> %abd
}
define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_saba_v8i8:
%abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
%aba = add <8 x i8> %lhs, %abd
; CHECK: saba v0.8b, v0.8b, v1.8b
ret <8 x i8> %aba
}
declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uabd_v16i8:
%abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uabd v0.16b, v0.16b, v1.16b
ret <16 x i8> %abd
}
define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uaba_v16i8:
%abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
%aba = add <16 x i8> %lhs, %abd
; CHECK: uaba v0.16b, v0.16b, v1.16b
ret <16 x i8> %aba
}
define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_sabd_v16i8:
%abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: sabd v0.16b, v0.16b, v1.16b
ret <16 x i8> %abd
}
define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_saba_v16i8:
%abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
%aba = add <16 x i8> %lhs, %abd
; CHECK: saba v0.16b, v0.16b, v1.16b
ret <16 x i8> %aba
}
declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uabd_v4i16:
%abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uabd v0.4h, v0.4h, v1.4h
ret <4 x i16> %abd
}
define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uaba_v4i16:
%abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
%aba = add <4 x i16> %lhs, %abd
; CHECK: uaba v0.4h, v0.4h, v1.4h
ret <4 x i16> %aba
}
define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sabd_v4i16:
%abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sabd v0.4h, v0.4h, v1.4h
ret <4 x i16> %abd
}
define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_saba_v4i16:
%abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
%aba = add <4 x i16> %lhs, %abd
; CHECK: saba v0.4h, v0.4h, v1.4h
ret <4 x i16> %aba
}
declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uabd_v8i16:
%abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uabd v0.8h, v0.8h, v1.8h
ret <8 x i16> %abd
}
define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uaba_v8i16:
%abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
%aba = add <8 x i16> %lhs, %abd
; CHECK: uaba v0.8h, v0.8h, v1.8h
ret <8 x i16> %aba
}
define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sabd_v8i16:
%abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sabd v0.8h, v0.8h, v1.8h
ret <8 x i16> %abd
}
define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_saba_v8i16:
%abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
%aba = add <8 x i16> %lhs, %abd
; CHECK: saba v0.8h, v0.8h, v1.8h
ret <8 x i16> %aba
}
declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uabd_v2i32:
%abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uabd v0.2s, v0.2s, v1.2s
ret <2 x i32> %abd
}
define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uaba_v2i32:
%abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
%aba = add <2 x i32> %lhs, %abd
; CHECK: uaba v0.2s, v0.2s, v1.2s
ret <2 x i32> %aba
}
define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sabd_v2i32:
%abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sabd v0.2s, v0.2s, v1.2s
ret <2 x i32> %abd
}
define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_saba_v2i32:
%abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
%aba = add <2 x i32> %lhs, %abd
; CHECK: saba v0.2s, v0.2s, v1.2s
ret <2 x i32> %aba
}
declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uabd_v4i32:
%abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uabd v0.4s, v0.4s, v1.4s
ret <4 x i32> %abd
}
define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uaba_v4i32:
%abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
%aba = add <4 x i32> %lhs, %abd
; CHECK: uaba v0.4s, v0.4s, v1.4s
ret <4 x i32> %aba
}
define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sabd_v4i32:
%abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sabd v0.4s, v0.4s, v1.4s
ret <4 x i32> %abd
}
define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_saba_v4i32:
%abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
%aba = add <4 x i32> %lhs, %abd
; CHECK: saba v0.4s, v0.4s, v1.4s
ret <4 x i32> %aba
}
declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>)
define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fabd_v2f32:
%abd = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fabd v0.2s, v0.2s, v1.2s
ret <2 x float> %abd
}
declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>)
define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fabd_v4f32:
%abd = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fabd v0.4s, v0.4s, v1.4s
ret <4 x float> %abd
}
declare <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double>, <2 x double>)
define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fabd_v2f64:
%abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fabd v0.2d, v0.2d, v1.2d
ret <2 x double> %abd
}

View File

@ -0,0 +1,92 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: test_addp_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: addp v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_addp_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: addp v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_addp_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: addp v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_addp_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: addp v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_addp_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: addp v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_addp_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: addp v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_addp_v2i64:
%val = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: addp v0.2d, v0.2d, v1.2d
ret <2 x i64> %val
}
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_faddp_v2f32:
%val = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: faddp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_faddp_v4f32:
%val = call <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: faddp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_faddp_v2f64:
%val = call <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: faddp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}

View File

@ -0,0 +1,132 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
;CHECK: add {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp3 = add <8 x i8> %A, %B;
ret <8 x i8> %tmp3
}
define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
;CHECK: add {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp3 = add <16 x i8> %A, %B;
ret <16 x i8> %tmp3
}
define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
;CHECK: add {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
%tmp3 = add <4 x i16> %A, %B;
ret <4 x i16> %tmp3
}
define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
;CHECK: add {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
%tmp3 = add <8 x i16> %A, %B;
ret <8 x i16> %tmp3
}
define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
;CHECK: add {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp3 = add <2 x i32> %A, %B;
ret <2 x i32> %tmp3
}
define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
;CHECK: add {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp3 = add <4 x i32> %A, %B;
ret <4 x i32> %tmp3
}
define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp3 = add <2 x i64> %A, %B;
ret <2 x i64> %tmp3
}
define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
;CHECK: fadd {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp3 = fadd <2 x float> %A, %B;
ret <2 x float> %tmp3
}
define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
;CHECK: fadd {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp3 = fadd <4 x float> %A, %B;
ret <4 x float> %tmp3
}
define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp3 = fadd <2 x double> %A, %B;
ret <2 x double> %tmp3
}
define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
;CHECK: sub {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp3 = sub <8 x i8> %A, %B;
ret <8 x i8> %tmp3
}
define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
;CHECK: sub {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp3 = sub <16 x i8> %A, %B;
ret <16 x i8> %tmp3
}
define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
;CHECK: sub {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
%tmp3 = sub <4 x i16> %A, %B;
ret <4 x i16> %tmp3
}
define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
;CHECK: sub {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
%tmp3 = sub <8 x i16> %A, %B;
ret <8 x i16> %tmp3
}
define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
;CHECK: sub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp3 = sub <2 x i32> %A, %B;
ret <2 x i32> %tmp3
}
define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
;CHECK: sub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp3 = sub <4 x i32> %A, %B;
ret <4 x i32> %tmp3
}
define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp3 = sub <2 x i64> %A, %B;
ret <2 x i64> %tmp3
}
define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
;CHECK: fsub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp3 = fsub <2 x float> %A, %B;
ret <2 x float> %tmp3
}
define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
;CHECK: fsub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp3 = fsub <4 x float> %A, %B;
ret <4 x float> %tmp3
}
define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp3 = fsub <2 x double> %A, %B;
ret <2 x double> %tmp3
}
define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
%tmp3 = add <1 x i64> %A, %B;
ret <1 x i64> %tmp3
}
define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
%tmp3 = sub <1 x i64> %A, %B;
ret <1 x i64> %tmp3
}

View File

@ -0,0 +1,574 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
; From <8 x i8>
define <1 x i64> @test_v8i8_to_v1i64(<8 x i8> %in) nounwind {
; CHECK: test_v8i8_to_v1i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i8> %in to <1 x i64>
ret <1 x i64> %val
}
define <2 x i32> @test_v8i8_to_v2i32(<8 x i8> %in) nounwind {
; CHECK: test_v8i8_to_v2i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i8> %in to <2 x i32>
ret <2 x i32> %val
}
define <2 x float> @test_v8i8_to_v1f32(<8 x i8> %in) nounwind{
; CHECK: test_v8i8_to_v1f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i8> %in to <2 x float>
ret <2 x float> %val
}
define <4 x i16> @test_v8i8_to_v4i16(<8 x i8> %in) nounwind{
; CHECK: test_v8i8_to_v4i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i8> %in to <4 x i16>
ret <4 x i16> %val
}
define <8 x i8> @test_v8i8_to_v8i8(<8 x i8> %in) nounwind{
; CHECK: test_v8i8_to_v8i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i8> %in to <8 x i8>
ret <8 x i8> %val
}
; From <4 x i16>
define <1 x i64> @test_v4i16_to_v1i64(<4 x i16> %in) nounwind {
; CHECK: test_v4i16_to_v1i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i16> %in to <1 x i64>
ret <1 x i64> %val
}
define <2 x i32> @test_v4i16_to_v2i32(<4 x i16> %in) nounwind {
; CHECK: test_v4i16_to_v2i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i16> %in to <2 x i32>
ret <2 x i32> %val
}
define <2 x float> @test_v4i16_to_v1f32(<4 x i16> %in) nounwind{
; CHECK: test_v4i16_to_v1f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i16> %in to <2 x float>
ret <2 x float> %val
}
define <4 x i16> @test_v4i16_to_v4i16(<4 x i16> %in) nounwind{
; CHECK: test_v4i16_to_v4i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i16> %in to <4 x i16>
ret <4 x i16> %val
}
define <8 x i8> @test_v4i16_to_v8i8(<4 x i16> %in) nounwind{
; CHECK: test_v4i16_to_v8i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i16> %in to <8 x i8>
ret <8 x i8> %val
}
; From <2 x i32>
define <1 x i64> @test_v2i32_to_v1i64(<2 x i32> %in) nounwind {
; CHECK: test_v2i32_to_v1i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i32> %in to <1 x i64>
ret <1 x i64> %val
}
define <2 x i32> @test_v2i32_to_v2i32(<2 x i32> %in) nounwind {
; CHECK: test_v2i32_to_v2i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i32> %in to <2 x i32>
ret <2 x i32> %val
}
define <2 x float> @test_v2i32_to_v1f32(<2 x i32> %in) nounwind{
; CHECK: test_v2i32_to_v1f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i32> %in to <2 x float>
ret <2 x float> %val
}
define <4 x i16> @test_v2i32_to_v4i16(<2 x i32> %in) nounwind{
; CHECK: test_v2i32_to_v4i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i32> %in to <4 x i16>
ret <4 x i16> %val
}
define <8 x i8> @test_v2i32_to_v8i8(<2 x i32> %in) nounwind{
; CHECK: test_v2i32_to_v8i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i32> %in to <8 x i8>
ret <8 x i8> %val
}
; From <2 x float>
define <1 x i64> @test_v2f32_to_v1i64(<2 x float> %in) nounwind {
; CHECK: test_v2f32_to_v1i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x float> %in to <1 x i64>
ret <1 x i64> %val
}
define <2 x i32> @test_v2f32_to_v2i32(<2 x float> %in) nounwind {
; CHECK: test_v2f32_to_v2i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x float> %in to <2 x i32>
ret <2 x i32> %val
}
define <2 x float> @test_v2f32_to_v2f32(<2 x float> %in) nounwind{
; CHECK: test_v2f32_to_v2f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x float> %in to <2 x float>
ret <2 x float> %val
}
define <4 x i16> @test_v2f32_to_v4i16(<2 x float> %in) nounwind{
; CHECK: test_v2f32_to_v4i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x float> %in to <4 x i16>
ret <4 x i16> %val
}
define <8 x i8> @test_v2f32_to_v8i8(<2 x float> %in) nounwind{
; CHECK: test_v2f32_to_v8i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x float> %in to <8 x i8>
ret <8 x i8> %val
}
; From <1 x i64>
define <1 x i64> @test_v1i64_to_v1i64(<1 x i64> %in) nounwind {
; CHECK: test_v1i64_to_v1i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <1 x i64> %in to <1 x i64>
ret <1 x i64> %val
}
define <2 x i32> @test_v1i64_to_v2i32(<1 x i64> %in) nounwind {
; CHECK: test_v1i64_to_v2i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <1 x i64> %in to <2 x i32>
ret <2 x i32> %val
}
define <2 x float> @test_v1i64_to_v2f32(<1 x i64> %in) nounwind{
; CHECK: test_v1i64_to_v2f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <1 x i64> %in to <2 x float>
ret <2 x float> %val
}
define <4 x i16> @test_v1i64_to_v4i16(<1 x i64> %in) nounwind{
; CHECK: test_v1i64_to_v4i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <1 x i64> %in to <4 x i16>
ret <4 x i16> %val
}
define <8 x i8> @test_v1i64_to_v8i8(<1 x i64> %in) nounwind{
; CHECK: test_v1i64_to_v8i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <1 x i64> %in to <8 x i8>
ret <8 x i8> %val
}
; From <16 x i8>
define <2 x double> @test_v16i8_to_v2f64(<16 x i8> %in) nounwind {
; CHECK: test_v16i8_to_v2f64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <16 x i8> %in to <2 x double>
ret <2 x double> %val
}
define <2 x i64> @test_v16i8_to_v2i64(<16 x i8> %in) nounwind {
; CHECK: test_v16i8_to_v2i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <16 x i8> %in to <2 x i64>
ret <2 x i64> %val
}
define <4 x i32> @test_v16i8_to_v4i32(<16 x i8> %in) nounwind {
; CHECK: test_v16i8_to_v4i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <16 x i8> %in to <4 x i32>
ret <4 x i32> %val
}
define <4 x float> @test_v16i8_to_v2f32(<16 x i8> %in) nounwind{
; CHECK: test_v16i8_to_v2f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <16 x i8> %in to <4 x float>
ret <4 x float> %val
}
define <8 x i16> @test_v16i8_to_v8i16(<16 x i8> %in) nounwind{
; CHECK: test_v16i8_to_v8i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <16 x i8> %in to <8 x i16>
ret <8 x i16> %val
}
define <16 x i8> @test_v16i8_to_v16i8(<16 x i8> %in) nounwind{
; CHECK: test_v16i8_to_v16i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <16 x i8> %in to <16 x i8>
ret <16 x i8> %val
}
; From <8 x i16>
define <2 x double> @test_v8i16_to_v2f64(<8 x i16> %in) nounwind {
; CHECK: test_v8i16_to_v2f64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i16> %in to <2 x double>
ret <2 x double> %val
}
define <2 x i64> @test_v8i16_to_v2i64(<8 x i16> %in) nounwind {
; CHECK: test_v8i16_to_v2i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i16> %in to <2 x i64>
ret <2 x i64> %val
}
define <4 x i32> @test_v8i16_to_v4i32(<8 x i16> %in) nounwind {
; CHECK: test_v8i16_to_v4i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i16> %in to <4 x i32>
ret <4 x i32> %val
}
define <4 x float> @test_v8i16_to_v2f32(<8 x i16> %in) nounwind{
; CHECK: test_v8i16_to_v2f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i16> %in to <4 x float>
ret <4 x float> %val
}
define <8 x i16> @test_v8i16_to_v8i16(<8 x i16> %in) nounwind{
; CHECK: test_v8i16_to_v8i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i16> %in to <8 x i16>
ret <8 x i16> %val
}
define <16 x i8> @test_v8i16_to_v16i8(<8 x i16> %in) nounwind{
; CHECK: test_v8i16_to_v16i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <8 x i16> %in to <16 x i8>
ret <16 x i8> %val
}
; From <4 x i32>
define <2 x double> @test_v4i32_to_v2f64(<4 x i32> %in) nounwind {
; CHECK: test_v4i32_to_v2f64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i32> %in to <2 x double>
ret <2 x double> %val
}
define <2 x i64> @test_v4i32_to_v2i64(<4 x i32> %in) nounwind {
; CHECK: test_v4i32_to_v2i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i32> %in to <2 x i64>
ret <2 x i64> %val
}
define <4 x i32> @test_v4i32_to_v4i32(<4 x i32> %in) nounwind {
; CHECK: test_v4i32_to_v4i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i32> %in to <4 x i32>
ret <4 x i32> %val
}
define <4 x float> @test_v4i32_to_v2f32(<4 x i32> %in) nounwind{
; CHECK: test_v4i32_to_v2f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i32> %in to <4 x float>
ret <4 x float> %val
}
define <8 x i16> @test_v4i32_to_v8i16(<4 x i32> %in) nounwind{
; CHECK: test_v4i32_to_v8i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i32> %in to <8 x i16>
ret <8 x i16> %val
}
define <16 x i8> @test_v4i32_to_v16i8(<4 x i32> %in) nounwind{
; CHECK: test_v4i32_to_v16i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x i32> %in to <16 x i8>
ret <16 x i8> %val
}
; From <4 x float>
define <2 x double> @test_v4f32_to_v2f64(<4 x float> %in) nounwind {
; CHECK: test_v4f32_to_v2f64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x float> %in to <2 x double>
ret <2 x double> %val
}
define <2 x i64> @test_v4f32_to_v2i64(<4 x float> %in) nounwind {
; CHECK: test_v4f32_to_v2i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x float> %in to <2 x i64>
ret <2 x i64> %val
}
define <4 x i32> @test_v4f32_to_v4i32(<4 x float> %in) nounwind {
; CHECK: test_v4f32_to_v4i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x float> %in to <4 x i32>
ret <4 x i32> %val
}
define <4 x float> @test_v4f32_to_v4f32(<4 x float> %in) nounwind{
; CHECK: test_v4f32_to_v4f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x float> %in to <4 x float>
ret <4 x float> %val
}
define <8 x i16> @test_v4f32_to_v8i16(<4 x float> %in) nounwind{
; CHECK: test_v4f32_to_v8i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x float> %in to <8 x i16>
ret <8 x i16> %val
}
define <16 x i8> @test_v4f32_to_v16i8(<4 x float> %in) nounwind{
; CHECK: test_v4f32_to_v16i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <4 x float> %in to <16 x i8>
ret <16 x i8> %val
}
; From <2 x i64>
define <2 x double> @test_v2i64_to_v2f64(<2 x i64> %in) nounwind {
; CHECK: test_v2i64_to_v2f64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i64> %in to <2 x double>
ret <2 x double> %val
}
define <2 x i64> @test_v2i64_to_v2i64(<2 x i64> %in) nounwind {
; CHECK: test_v2i64_to_v2i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i64> %in to <2 x i64>
ret <2 x i64> %val
}
define <4 x i32> @test_v2i64_to_v4i32(<2 x i64> %in) nounwind {
; CHECK: test_v2i64_to_v4i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i64> %in to <4 x i32>
ret <4 x i32> %val
}
define <4 x float> @test_v2i64_to_v4f32(<2 x i64> %in) nounwind{
; CHECK: test_v2i64_to_v4f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i64> %in to <4 x float>
ret <4 x float> %val
}
define <8 x i16> @test_v2i64_to_v8i16(<2 x i64> %in) nounwind{
; CHECK: test_v2i64_to_v8i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i64> %in to <8 x i16>
ret <8 x i16> %val
}
define <16 x i8> @test_v2i64_to_v16i8(<2 x i64> %in) nounwind{
; CHECK: test_v2i64_to_v16i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x i64> %in to <16 x i8>
ret <16 x i8> %val
}
; From <2 x double>
define <2 x double> @test_v2f64_to_v2f64(<2 x double> %in) nounwind {
; CHECK: test_v2f64_to_v2f64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x double> %in to <2 x double>
ret <2 x double> %val
}
define <2 x i64> @test_v2f64_to_v2i64(<2 x double> %in) nounwind {
; CHECK: test_v2f64_to_v2i64:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x double> %in to <2 x i64>
ret <2 x i64> %val
}
define <4 x i32> @test_v2f64_to_v4i32(<2 x double> %in) nounwind {
; CHECK: test_v2f64_to_v4i32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x double> %in to <4 x i32>
ret <4 x i32> %val
}
define <4 x float> @test_v2f64_to_v4f32(<2 x double> %in) nounwind{
; CHECK: test_v2f64_to_v4f32:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x double> %in to <4 x float>
ret <4 x float> %val
}
define <8 x i16> @test_v2f64_to_v8i16(<2 x double> %in) nounwind{
; CHECK: test_v2f64_to_v8i16:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x double> %in to <8 x i16>
ret <8 x i16> %val
}
define <16 x i8> @test_v2f64_to_v16i8(<2 x double> %in) nounwind{
; CHECK: test_v2f64_to_v16i8:
; CHECK-NEXT: // BB#0:
; CHECK-NEXT: ret
%val = bitcast <2 x double> %in to <16 x i8>
ret <16 x i8> %val
}

View File

@ -0,0 +1,594 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) {
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <8 x i8> %a, %b;
ret <8 x i8> %tmp1
}
define <16 x i8> @and16xi8(<16 x i8> %a, <16 x i8> %b) {
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <16 x i8> %a, %b;
ret <16 x i8> %tmp1
}
define <8 x i8> @orr8xi8(<8 x i8> %a, <8 x i8> %b) {
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = or <8 x i8> %a, %b;
ret <8 x i8> %tmp1
}
define <16 x i8> @orr16xi8(<16 x i8> %a, <16 x i8> %b) {
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = or <16 x i8> %a, %b;
ret <16 x i8> %tmp1
}
define <8 x i8> @xor8xi8(<8 x i8> %a, <8 x i8> %b) {
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <8 x i8> %a, %b;
ret <8 x i8> %tmp1
}
define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <16 x i8> %a, %b;
ret <16 x i8> %tmp1
}
define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
%tmp3 = or <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
%tmp3 = or <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b) {
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp2 = or <8 x i8> %a, %tmp1
ret <8 x i8> %tmp2
}
define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) {
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp2 = or <16 x i8> %a, %tmp1
ret <16 x i8> %tmp2
}
define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b) {
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp2 = and <8 x i8> %a, %tmp1
ret <8 x i8> %tmp2
}
define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) {
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp2 = and <16 x i8> %a, %tmp1
ret <16 x i8> %tmp2
}
define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.2s, #0xff
%tmp1 = or <2 x i32> %a, < i32 255, i32 255>
ret <2 x i32> %tmp1
}
define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #8
%tmp1 = or <2 x i32> %a, < i32 65280, i32 65280>
ret <2 x i32> %tmp1
}
define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #16
%tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680>
ret <2 x i32> %tmp1
}
define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #24
%tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080>
ret <2 x i32> %tmp1
}
define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.4s, #0xff
%tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255>
ret <4 x i32> %tmp1
}
define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #8
%tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280>
ret <4 x i32> %tmp1
}
define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #16
%tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680>
ret <4 x i32> %tmp1
}
define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) {
;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #24
%tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080>
ret <4 x i32> %tmp1
}
define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) {
;CHECK: orr {{v[0-31]+}}.4h, #0xff
%tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 >
ret <4 x i16> %tmp1
}
define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) {
;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8
%tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
ret <4 x i16> %tmp1
}
define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) {
;CHECK: orr {{v[0-31]+}}.8h, #0xff
%tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
ret <8 x i16> %tmp1
}
define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) {
;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8
%tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
ret <8 x i16> %tmp1
}
define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.2s, #0x10
%tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 >
ret <2 x i32> %tmp1
}
define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #8
%tmp1 = and <2 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519 >
ret <2 x i32> %tmp1
}
define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #16
%tmp1 = and <2 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039 >
ret <2 x i32> %tmp1
}
define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #24
%tmp1 = and <2 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159>
ret <2 x i32> %tmp1
}
define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.4s, #0x10
%tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
ret <4 x i32> %tmp1
}
define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #8
%tmp1 = and <4 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519 >
ret <4 x i32> %tmp1
}
define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #16
%tmp1 = and <4 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039 >
ret <4 x i32> %tmp1
}
define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) {
;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #24
%tmp1 = and <4 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159>
ret <4 x i32> %tmp1
}
define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.4h, #0x10
%tmp1 = and <4 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 >
ret <4 x i16> %tmp1
}
define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.4h, #0x0
%tmp1 = and <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
ret <4 x i16> %tmp1
}
define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.4h, #0x10, lsl #8
%tmp1 = and <4 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519>
ret <4 x i16> %tmp1
}
define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.4h, #0x0, lsl #8
%tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255>
ret <4 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.8h, #0x10
%tmp1 = and <8 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599,
i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 >
ret <8 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.8h, #0x0
%tmp1 = and <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
ret <8 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.8h, #0x10, lsl #8
%tmp1 = and <8 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519,
i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519>
ret <8 x i16> %tmp1
}
define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) {
;CHECK: bic {{v[0-31]+}}.8h, #0x0, lsl #8
%tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
ret <8 x i16> %tmp1
}
define <2 x i32> @and2xi32(<2 x i32> %a, <2 x i32> %b) {
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <2 x i32> %a, %b;
ret <2 x i32> %tmp1
}
define <4 x i16> @and4xi16(<4 x i16> %a, <4 x i16> %b) {
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <4 x i16> %a, %b;
ret <4 x i16> %tmp1
}
define <1 x i64> @and1xi64(<1 x i64> %a, <1 x i64> %b) {
;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <1 x i64> %a, %b;
ret <1 x i64> %tmp1
}
define <4 x i32> @and4xi32(<4 x i32> %a, <4 x i32> %b) {
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <4 x i32> %a, %b;
ret <4 x i32> %tmp1
}
define <8 x i16> @and8xi16(<8 x i16> %a, <8 x i16> %b) {
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <8 x i16> %a, %b;
ret <8 x i16> %tmp1
}
define <2 x i64> @and2xi64(<2 x i64> %a, <2 x i64> %b) {
;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <2 x i64> %a, %b;
ret <2 x i64> %tmp1
}
define <2 x i32> @orr2xi32(<2 x i32> %a, <2 x i32> %b) {
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = or <2 x i32> %a, %b;
ret <2 x i32> %tmp1
}
define <4 x i16> @orr4xi16(<4 x i16> %a, <4 x i16> %b) {
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = or <4 x i16> %a, %b;
ret <4 x i16> %tmp1
}
define <1 x i64> @orr1xi64(<1 x i64> %a, <1 x i64> %b) {
;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = or <1 x i64> %a, %b;
ret <1 x i64> %tmp1
}
define <4 x i32> @orr4xi32(<4 x i32> %a, <4 x i32> %b) {
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = or <4 x i32> %a, %b;
ret <4 x i32> %tmp1
}
define <8 x i16> @orr8xi16(<8 x i16> %a, <8 x i16> %b) {
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = or <8 x i16> %a, %b;
ret <8 x i16> %tmp1
}
define <2 x i64> @orr2xi64(<2 x i64> %a, <2 x i64> %b) {
;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = or <2 x i64> %a, %b;
ret <2 x i64> %tmp1
}
define <2 x i32> @eor2xi32(<2 x i32> %a, <2 x i32> %b) {
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <2 x i32> %a, %b;
ret <2 x i32> %tmp1
}
define <4 x i16> @eor4xi16(<4 x i16> %a, <4 x i16> %b) {
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <4 x i16> %a, %b;
ret <4 x i16> %tmp1
}
define <1 x i64> @eor1xi64(<1 x i64> %a, <1 x i64> %b) {
;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <1 x i64> %a, %b;
ret <1 x i64> %tmp1
}
define <4 x i32> @eor4xi32(<4 x i32> %a, <4 x i32> %b) {
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <4 x i32> %a, %b;
ret <4 x i32> %tmp1
}
define <8 x i16> @eor8xi16(<8 x i16> %a, <8 x i16> %b) {
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <8 x i16> %a, %b;
ret <8 x i16> %tmp1
}
define <2 x i64> @eor2xi64(<2 x i64> %a, <2 x i64> %b) {
;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <2 x i64> %a, %b;
ret <2 x i64> %tmp1
}
define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b) {
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
%tmp2 = and <2 x i32> %a, %tmp1
ret <2 x i32> %tmp2
}
define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b) {
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
%tmp2 = and <4 x i16> %a, %tmp1
ret <4 x i16> %tmp2
}
define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b) {
;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <1 x i64> %b, < i64 -1>
%tmp2 = and <1 x i64> %a, %tmp1
ret <1 x i64> %tmp2
}
define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b) {
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
%tmp2 = and <4 x i32> %a, %tmp1
ret <4 x i32> %tmp2
}
define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b) {
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp2 = and <8 x i16> %a, %tmp1
ret <8 x i16> %tmp2
}
define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b) {
;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
%tmp2 = and <2 x i64> %a, %tmp1
ret <2 x i64> %tmp2
}
define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b) {
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
%tmp2 = or <2 x i32> %a, %tmp1
ret <2 x i32> %tmp2
}
define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b) {
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
%tmp2 = or <4 x i16> %a, %tmp1
ret <4 x i16> %tmp2
}
define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b) {
;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = xor <1 x i64> %b, < i64 -1>
%tmp2 = or <1 x i64> %a, %tmp1
ret <1 x i64> %tmp2
}
define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b) {
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
%tmp2 = or <4 x i32> %a, %tmp1
ret <4 x i32> %tmp2
}
define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b) {
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp2 = or <8 x i16> %a, %tmp1
ret <8 x i16> %tmp2
}
define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) {
;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
%tmp2 = or <2 x i64> %a, %tmp1
ret <2 x i64> %tmp2
}
define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 >
%tmp2 = and <2 x i32> %b, < i32 0, i32 0 >
%tmp3 = or <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 >
%tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 >
%tmp3 = or <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = and <1 x i64> %a, < i64 -1 >
%tmp2 = and <1 x i64> %b, < i64 0 >
%tmp3 = or <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 >
%tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 >
%tmp3 = or <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 >
%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 >
%tmp3 = or <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 >
%tmp2 = and <2 x i64> %b, < i64 0, i64 0 >
%tmp3 = or <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%1 = and <8 x i8> %v1, %v2
%2 = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%3 = and <8 x i8> %2, %v3
%4 = or <8 x i8> %1, %3
ret <8 x i8> %4
}
define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%1 = and <4 x i16> %v1, %v2
%2 = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
%3 = and <4 x i16> %2, %v3
%4 = or <4 x i16> %1, %3
ret <4 x i16> %4
}
define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%1 = and <2 x i32> %v1, %v2
%2 = xor <2 x i32> %v1, <i32 -1, i32 -1>
%3 = and <2 x i32> %2, %v3
%4 = or <2 x i32> %1, %3
ret <2 x i32> %4
}
define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%1 = and <1 x i64> %v1, %v2
%2 = xor <1 x i64> %v1, <i64 -1>
%3 = and <1 x i64> %2, %v3
%4 = or <1 x i64> %1, %3
ret <1 x i64> %4
}
define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%1 = and <16 x i8> %v1, %v2
%2 = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%3 = and <16 x i8> %2, %v3
%4 = or <16 x i8> %1, %3
ret <16 x i8> %4
}
define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%1 = and <8 x i16> %v1, %v2
%2 = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%3 = and <8 x i16> %2, %v3
%4 = or <8 x i16> %1, %3
ret <8 x i16> %4
}
define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%1 = and <4 x i32> %v1, %v2
%2 = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
%3 = and <4 x i32> %2, %v3
%4 = or <4 x i32> %1, %3
ret <4 x i32> %4
}
define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%1 = and <2 x i64> %v1, %v2
%2 = xor <2 x i64> %v1, <i64 -1, i64 -1>
%3 = and <2 x i64> %2, %v3
%4 = or <2 x i64> %1, %3
ret <2 x i64> %4
}
define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) {
;CHECK: orr {{v[0-31]+}}.4h, #0xff
%val = or <8 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
ret <8 x i8> %val
}
define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) {
;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8
%val = or <8 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
ret <8 x i8> %val
}
define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) {
;CHECK: orr {{v[0-31]+}}.8h, #0xff
%val = or <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
ret <16 x i8> %val
}
define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) {
;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8
%val = or <16 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
ret <16 x i8> %val
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,56 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>)
declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>)
declare <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double>, <2 x double>)
define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: facge_from_intr_v2i32:
%val = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %A, <2 x float> %B)
; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
ret <2 x i32> %val
}
define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: facge_from_intr_v4i32:
%val = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %A, <4 x float> %B)
; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
ret <4 x i32> %val
}
define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: facge_from_intr_v2i64:
%val = call <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double> %A, <2 x double> %B)
; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
ret <2 x i64> %val
}
declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>)
declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>)
declare <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double>, <2 x double>)
define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: facgt_from_intr_v2i32:
%val = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %A, <2 x float> %B)
; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
ret <2 x i32> %val
}
define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: facgt_from_intr_v4i32:
%val = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %A, <4 x float> %B)
; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
ret <4 x i32> %val
}
define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: facgt_from_intr_v2i64:
%val = call <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double> %A, <2 x double> %B)
; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
ret <2 x i64> %val
}

View File

@ -0,0 +1,112 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp1 = fmul <2 x float> %A, %B;
%tmp2 = fadd <2 x float> %C, %tmp1;
ret <2 x float> %tmp2
}
define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp1 = fmul <4 x float> %A, %B;
%tmp2 = fadd <4 x float> %C, %tmp1;
ret <4 x float> %tmp2
}
define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp1 = fmul <2 x double> %A, %B;
%tmp2 = fadd <2 x double> %C, %tmp1;
ret <2 x double> %tmp2
}
define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp1 = fmul <2 x float> %A, %B;
%tmp2 = fsub <2 x float> %C, %tmp1;
ret <2 x float> %tmp2
}
define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp1 = fmul <4 x float> %A, %B;
%tmp2 = fsub <4 x float> %C, %tmp1;
ret <4 x float> %tmp2
}
define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp1 = fmul <2 x double> %A, %B;
%tmp2 = fsub <2 x double> %C, %tmp1;
ret <2 x double> %tmp2
}
; Another set of tests for when the intrinsic is used.
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
ret <2 x float> %val
}
define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
ret <4 x float> %val
}
define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
ret <2 x double> %val
}
define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%negA = fsub <2 x float> <float -0.0, float -0.0>, %A
%val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C)
ret <2 x float> %val
}
define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A
%val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C)
ret <4 x float> %val
}
define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%negA = fsub <2 x double> <double -0.0, double -0.0>, %A
%val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C)
ret <2 x double> %val
}
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
ret <2 x float> %val
}
define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
ret <4 x float> %val
}
define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
ret <2 x double> %val
}

View File

@ -0,0 +1,54 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
; Set of tests for when the intrinsic is used.
declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double>, <2 x double>)
define <2 x float> @frsqrts_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: frsqrts v0.2s, v0.2s, v1.2s
%val = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %lhs, <2 x float> %rhs)
ret <2 x float> %val
}
define <4 x float> @frsqrts_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: frsqrts v0.4s, v0.4s, v1.4s
%val = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %lhs, <4 x float> %rhs)
ret <4 x float> %val
}
define <2 x double> @frsqrts_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: frsqrts v0.2d, v0.2d, v1.2d
%val = call <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double> %lhs, <2 x double> %rhs)
ret <2 x double> %val
}
declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double>, <2 x double>)
define <2 x float> @frecps_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: frecps v0.2s, v0.2s, v1.2s
%val = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %lhs, <2 x float> %rhs)
ret <2 x float> %val
}
define <4 x float> @frecps_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: frecps v0.4s, v0.4s, v1.4s
%val = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %lhs, <4 x float> %rhs)
ret <4 x float> %val
}
define <2 x double> @frecps_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: frecps v0.2d, v0.2d, v1.2d
%val = call <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double> %lhs, <2 x double> %rhs)
ret <2 x double> %val
}

View File

@ -0,0 +1,207 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uhadd_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uhadd v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_shadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_shadd_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: shadd v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uhadd_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uhadd v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_shadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_shadd_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: shadd v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_uhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uhadd_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uhadd v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_shadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_shadd_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: shadd v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uhadd_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uhadd v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_shadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_shadd_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: shadd v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_uhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uhadd_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uhadd v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_shadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_shadd_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: shadd v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uhadd_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uhadd v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_shadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_shadd_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: shadd v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uhsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uhsub_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uhsub v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_shsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_shsub_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: shsub v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uhsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uhsub_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uhsub v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_shsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_shsub_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: shsub v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_uhsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uhsub_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uhsub v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_shsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_shsub_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: shsub v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uhsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uhsub_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uhsub v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_shsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_shsub_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: shsub v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_uhsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uhsub_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uhsub v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_shsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_shsub_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: shsub v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uhsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uhsub_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uhsub v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_shsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_shsub_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: shsub v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}

View File

@ -0,0 +1,310 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: test_smaxp_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: smaxp v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
%tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: umaxp v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_smaxp_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: smaxp v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_umaxp_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: umaxp v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_smaxp_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: smaxp v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_umaxp_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: umaxp v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_smaxp_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: smaxp v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_umaxp_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: umaxp v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_smaxp_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: smaxp v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_umaxp_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: umaxp v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_smaxp_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: smaxp v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_umaxp_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: umaxp v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: test_sminp_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: sminp v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
%tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uminp v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_sminp_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: sminp v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uminp_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uminp v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sminp_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sminp v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uminp_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uminp v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sminp_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sminp v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uminp_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uminp v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sminp_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sminp v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uminp_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uminp v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sminp_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sminp v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uminp_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uminp v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fmaxp_v2f32:
%val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fmaxp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fmaxp_v4f32:
%val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fmaxp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fmaxp_v2f64:
%val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fmaxp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fminp_v2f32:
%val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fminp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fminp_v4f32:
%val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fminp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fminp_v2f64:
%val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fminp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fmaxnmp_v2f32:
%val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fmaxnmp_v4f32:
%val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fmaxnmp_v2f64:
%val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fminnmp_v2f32:
%val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fminnmp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fminnmp_v4f32:
%val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fminnmp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fminnmp_v2f64:
%val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fminnmp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}

View File

@ -0,0 +1,310 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: test_smax_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: smax v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
%tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: umax v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_smax_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: smax v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_umax_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: umax v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_smax_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: smax v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_umax_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: umax v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_smax_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: smax v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_umax_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: umax v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_smax_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: smax v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_umax_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: umax v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_smax_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: smax v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_umax_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: umax v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
; CHECK: test_smin_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: smin v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
%tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: umin v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_smin_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: smin v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_umin_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: umin v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_smin_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: smin v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_umin_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: umin v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_smin_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: smin v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_umin_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: umin v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_smin_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: smin v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_umin_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: umin v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_smin_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: smin v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_umin_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: umin v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fmax_v2f32:
%val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fmax v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fmax_v4f32:
%val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fmax v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fmax_v2f64:
%val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fmax v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fmin_v2f32:
%val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fmin v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fmin_v4f32:
%val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fmin v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fmin_v2f64:
%val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fmin v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fmaxnm_v2f32:
%val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fmaxnm v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fmaxnm_v4f32:
%val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fmaxnm v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fmaxnm_v2f64:
%val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fmaxnm v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; CHECK: test_fminnm_v2f32:
%val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
; CHECK: fminnm v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK: test_fminnm_v4f32:
%val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
; CHECK: fminnm v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; CHECK: test_fminnm_v2f64:
%val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
; CHECK: fminnm v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}

View File

@ -0,0 +1,88 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
;CHECK: mla {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = mul <8 x i8> %A, %B;
%tmp2 = add <8 x i8> %C, %tmp1;
ret <8 x i8> %tmp2
}
define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
;CHECK: mla {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = mul <16 x i8> %A, %B;
%tmp2 = add <16 x i8> %C, %tmp1;
ret <16 x i8> %tmp2
}
define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
;CHECK: mla {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
%tmp1 = mul <4 x i16> %A, %B;
%tmp2 = add <4 x i16> %C, %tmp1;
ret <4 x i16> %tmp2
}
define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
;CHECK: mla {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
%tmp1 = mul <8 x i16> %A, %B;
%tmp2 = add <8 x i16> %C, %tmp1;
ret <8 x i16> %tmp2
}
define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
;CHECK: mla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp1 = mul <2 x i32> %A, %B;
%tmp2 = add <2 x i32> %C, %tmp1;
ret <2 x i32> %tmp2
}
define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
;CHECK: mla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp1 = mul <4 x i32> %A, %B;
%tmp2 = add <4 x i32> %C, %tmp1;
ret <4 x i32> %tmp2
}
define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
;CHECK: mls {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp1 = mul <8 x i8> %A, %B;
%tmp2 = sub <8 x i8> %C, %tmp1;
ret <8 x i8> %tmp2
}
define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
;CHECK: mls {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp1 = mul <16 x i8> %A, %B;
%tmp2 = sub <16 x i8> %C, %tmp1;
ret <16 x i8> %tmp2
}
define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
;CHECK: mls {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
%tmp1 = mul <4 x i16> %A, %B;
%tmp2 = sub <4 x i16> %C, %tmp1;
ret <4 x i16> %tmp2
}
define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
;CHECK: mls {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
%tmp1 = mul <8 x i16> %A, %B;
%tmp2 = sub <8 x i16> %C, %tmp1;
ret <8 x i16> %tmp2
}
define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
;CHECK: mls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp1 = mul <2 x i32> %A, %B;
%tmp2 = sub <2 x i32> %C, %tmp1;
ret <2 x i32> %tmp2
}
define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
;CHECK: mls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp1 = mul <4 x i32> %A, %B;
%tmp2 = sub <4 x i32> %C, %tmp1;
ret <4 x i32> %tmp2
}

View File

@ -0,0 +1,205 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @movi8b() {
;CHECK: movi {{v[0-31]+}}.8b, #0x8
ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
}
define <16 x i8> @movi16b() {
;CHECK: movi {{v[0-31]+}}.16b, #0x8
ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
}
define <2 x i32> @movi2s_lsl0() {
;CHECK: movi {{v[0-31]+}}.2s, #0xff
ret <2 x i32> < i32 255, i32 255 >
}
define <2 x i32> @movi2s_lsl8() {
;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #8
ret <2 x i32> < i32 65280, i32 65280 >
}
define <2 x i32> @movi2s_lsl16() {
;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #16
ret <2 x i32> < i32 16711680, i32 16711680 >
}
define <2 x i32> @movi2s_lsl24() {
;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #24
ret <2 x i32> < i32 4278190080, i32 4278190080 >
}
define <4 x i32> @movi4s_lsl0() {
;CHECK: movi {{v[0-31]+}}.4s, #0xff
ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 >
}
define <4 x i32> @movi4s_lsl8() {
;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #8
ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 >
}
define <4 x i32> @movi4s_lsl16() {
;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #16
ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 >
}
define <4 x i32> @movi4s_lsl24() {
;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #24
ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 >
}
define <4 x i16> @movi4h_lsl0() {
;CHECK: movi {{v[0-31]+}}.4h, #0xff
ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 >
}
define <4 x i16> @movi4h_lsl8() {
;CHECK: movi {{v[0-31]+}}.4h, #0xff, lsl #8
ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 >
}
define <8 x i16> @movi8h_lsl0() {
;CHECK: movi {{v[0-31]+}}.8h, #0xff
ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
}
define <8 x i16> @movi8h_lsl8() {
;CHECK: movi {{v[0-31]+}}.8h, #0xff, lsl #8
ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
}
define <2 x i32> @mvni2s_lsl0() {
;CHECK: mvni {{v[0-31]+}}.2s, #0x10
ret <2 x i32> < i32 4294967279, i32 4294967279 >
}
define <2 x i32> @mvni2s_lsl8() {
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #8
ret <2 x i32> < i32 4294963199, i32 4294963199 >
}
define <2 x i32> @mvni2s_lsl16() {
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #16
ret <2 x i32> < i32 4293918719, i32 4293918719 >
}
define <2 x i32> @mvni2s_lsl24() {
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #24
ret <2 x i32> < i32 4026531839, i32 4026531839 >
}
define <4 x i32> @mvni4s_lsl0() {
;CHECK: mvni {{v[0-31]+}}.4s, #0x10
ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
}
define <4 x i32> @mvni4s_lsl8() {
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #8
ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 >
}
define <4 x i32> @mvni4s_lsl16() {
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #16
ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 >
}
define <4 x i32> @mvni4s_lsl24() {
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #24
ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 >
}
define <4 x i16> @mvni4h_lsl0() {
;CHECK: mvni {{v[0-31]+}}.4h, #0x10
ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
}
define <4 x i16> @mvni4h_lsl8() {
;CHECK: mvni {{v[0-31]+}}.4h, #0x10, lsl #8
ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
}
define <8 x i16> @mvni8h_lsl0() {
;CHECK: mvni {{v[0-31]+}}.8h, #0x10
ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 >
}
define <8 x i16> @mvni8h_lsl8() {
;CHECK: mvni {{v[0-31]+}}.8h, #0x10, lsl #8
ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 >
}
define <2 x i32> @movi2s_msl8(<2 x i32> %a) {
;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #8
ret <2 x i32> < i32 65535, i32 65535 >
}
define <2 x i32> @movi2s_msl16() {
;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #16
ret <2 x i32> < i32 16777215, i32 16777215 >
}
define <4 x i32> @movi4s_msl8() {
;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #8
ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 >
}
define <4 x i32> @movi4s_msl16() {
;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #16
ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 >
}
define <2 x i32> @mvni2s_msl8() {
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #8
ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264>
}
define <2 x i32> @mvni2s_msl16() {
;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #16
ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504>
}
define <4 x i32> @mvni4s_msl8() {
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #8
ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264>
}
define <4 x i32> @mvni4s_msl16() {
;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #16
ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504>
}
define <2 x i64> @movi2d() {
;CHECK: movi {{v[0-31]+}}.2d, #0xff0000ff0000ffff
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}
define <1 x i64> @movid() {
;CHECK: movi {{d[0-31]+}}, #0xff0000ff0000ffff
ret <1 x i64> < i64 18374687574888349695 >
}
define <2 x float> @fmov2s() {
;CHECK: fmov {{v[0-31]+}}.2s, #-12.00000000
ret <2 x float> < float -1.2e1, float -1.2e1>
}
define <4 x float> @fmov4s() {
;CHECK: fmov {{v[0-31]+}}.4s, #-12.00000000
ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1>
}
define <2 x double> @fmov2d() {
;CHECK: fmov {{v[0-31]+}}.2d, #-12.00000000
ret <2 x double> < double -1.2e1, double -1.2e1>
}

View File

@ -0,0 +1,181 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
;CHECK: mul {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
%tmp3 = mul <8 x i8> %A, %B;
ret <8 x i8> %tmp3
}
define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
;CHECK: mul {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
%tmp3 = mul <16 x i8> %A, %B;
ret <16 x i8> %tmp3
}
define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
;CHECK: mul {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
%tmp3 = mul <4 x i16> %A, %B;
ret <4 x i16> %tmp3
}
define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
;CHECK: mul {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
%tmp3 = mul <8 x i16> %A, %B;
ret <8 x i16> %tmp3
}
define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
;CHECK: mul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp3 = mul <2 x i32> %A, %B;
ret <2 x i32> %tmp3
}
define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
;CHECK: mul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp3 = mul <4 x i32> %A, %B;
ret <4 x i32> %tmp3
}
define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
;CHECK: fmul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp3 = fmul <2 x float> %A, %B;
ret <2 x float> %tmp3
}
define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
;CHECK: fmul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp3 = fmul <4 x float> %A, %B;
ret <4 x float> %tmp3
}
define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
;CHECK: fmul {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp3 = fmul <2 x double> %A, %B;
ret <2 x double> %tmp3
}
define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
;CHECK: fdiv {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
%tmp3 = fdiv <2 x float> %A, %B;
ret <2 x float> %tmp3
}
define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
;CHECK: fdiv {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
%tmp3 = fdiv <4 x float> %A, %B;
ret <4 x float> %tmp3
}
define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
;CHECK: fdiv {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
%tmp3 = fdiv <2 x double> %A, %B;
ret <2 x double> %tmp3
}
declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>)
define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: poly_mulv8i8:
%prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: pmul v0.8b, v0.8b, v1.8b
ret <8 x i8> %prod
}
define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: poly_mulv16i8:
%prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: pmul v0.16b, v0.16b, v1.16b
ret <16 x i8> %prod
}
declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sqdmulh_v4i16:
%prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
ret <4 x i16> %prod
}
define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sqdmulh_v8i16:
%prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
ret <8 x i16> %prod
}
define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sqdmulh_v2i32:
%prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
ret <2 x i32> %prod
}
define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sqdmulh_v4i32:
%prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
ret <4 x i32> %prod
}
declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sqrdmulh_v4i16:
%prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
ret <4 x i16> %prod
}
define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sqrdmulh_v8i16:
%prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
ret <8 x i16> %prod
}
define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sqrdmulh_v2i32:
%prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
ret <2 x i32> %prod
}
define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sqrdmulh_v4i32:
%prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
ret <4 x i32> %prod
}
declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: fmulx v0.2s, v0.2s, v1.2s
%val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
ret <2 x float> %val
}
define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: fmulx v0.4s, v0.4s, v1.4s
%val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
ret <4 x float> %val
}
define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
; Using registers other than v0, v1 and v2 are possible, but would be odd.
; CHECK: fmulx v0.2d, v0.2d, v1.2d
%val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
ret <2 x double> %val
}

View File

@ -0,0 +1,105 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_urhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_urhadd_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: urhadd v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_srhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_srhadd_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: srhadd v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_urhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_urhadd_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: urhadd v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_srhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_srhadd_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: srhadd v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_urhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_urhadd_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: urhadd v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_srhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_srhadd_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: srhadd v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_urhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_urhadd_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: urhadd v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_srhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_srhadd_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: srhadd v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_urhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_urhadd_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: urhadd v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_srhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_srhadd_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: srhadd v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_urhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_urhadd_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: urhadd v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_srhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_srhadd_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: srhadd v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}

View File

@ -0,0 +1,138 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_urshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_urshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: urshl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_srshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_srshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: srshl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_urshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_urshl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: urshl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_srshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_srshl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: srshl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_urshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_urshl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: urshl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_srshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_srshl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: srshl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_urshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_urshl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: urshl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_srshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_srshl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: srshl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_urshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_urshl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: urshl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_srshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_srshl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: srshl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_urshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_urshl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: urshl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_srshl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: srshl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_urshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: urshl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_srshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: srshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_urshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_urshl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: urshl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
define <2 x i64> @test_srshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_srshl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: srshl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}

View File

@ -0,0 +1,274 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uqadd_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uqadd v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_sqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_sqadd_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: sqadd v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uqadd_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uqadd v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_sqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_sqadd_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: sqadd v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_uqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uqadd_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uqadd v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_sqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sqadd_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sqadd v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uqadd_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uqadd v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_sqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sqadd_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sqadd v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_uqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uqadd_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uqadd v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_sqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sqadd_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sqadd v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uqadd_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uqadd v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sqadd_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sqadd v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqadd_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqadd d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqadd_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqadd d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_uqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_uqadd_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: uqadd v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
define <2 x i64> @test_sqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_sqadd_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: sqadd v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uqsub_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uqsub v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_sqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_sqsub_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: sqsub v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uqsub_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uqsub v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_sqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_sqsub_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: sqsub v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_uqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uqsub_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uqsub v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_sqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sqsub_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sqsub v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uqsub_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uqsub v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_sqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sqsub_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sqsub v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_uqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uqsub_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uqsub v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_sqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sqsub_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sqsub v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uqsub_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uqsub v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_sqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sqsub_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sqsub v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_uqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_uqsub_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: uqsub v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_sqsub_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: sqsub v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqsub_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqsub d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqsub_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqsub d0, d0, d1
ret <1 x i64> %tmp1
}

View File

@ -0,0 +1,138 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uqrshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uqrshl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_sqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_sqrshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: sqrshl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uqrshl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uqrshl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_sqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_sqrshl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: sqrshl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_uqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uqrshl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uqrshl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_sqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sqrshl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sqrshl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uqrshl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uqrshl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_sqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sqrshl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sqrshl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_uqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uqrshl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uqrshl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_sqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sqrshl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sqrshl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uqrshl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uqrshl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sqrshl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sqrshl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqrshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqrshl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqrshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqrshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_uqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_uqrshl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: uqrshl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
define <2 x i64> @test_sqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_sqrshl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: sqrshl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}

View File

@ -0,0 +1,138 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uqshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: uqshl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_sqshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: sqshl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_uqshl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: uqshl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_sqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_sqshl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: sqshl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_uqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_uqshl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: uqshl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_sqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sqshl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sqshl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_uqshl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: uqshl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_sqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sqshl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sqshl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_uqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_uqshl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: uqshl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_sqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sqshl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sqshl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_uqshl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: uqshl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sqshl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sqshl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqshl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_uqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_uqshl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: uqshl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
define <2 x i64> @test_sqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_sqshl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: sqshl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}

View File

@ -0,0 +1,140 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_uqshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: ushl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; CHECK: test_sqshl_v8i8:
%tmp1 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
; CHECK: sshl v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_ushl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_ushl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: ushl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
define <16 x i8> @test_sshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
; CHECK: test_sshl_v16i8:
%tmp1 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
; CHECK: sshl v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_ushl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_ushl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: ushl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
define <4 x i16> @test_sshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; CHECK: test_sshl_v4i16:
%tmp1 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
; CHECK: sshl v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_ushl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_ushl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: ushl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
define <8 x i16> @test_sshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; CHECK: test_sshl_v8i16:
%tmp1 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
; CHECK: sshl v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_ushl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_ushl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: ushl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
define <2 x i32> @test_sshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; CHECK: test_sshl_v2i32:
%tmp1 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
; CHECK: sshl v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_ushl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_ushl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: ushl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; CHECK: test_sshl_v4i32:
%tmp1 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
; CHECK: sshl v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_ushl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: ushl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_ushl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_ushl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: ushl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: test_sshl_v2i64:
%tmp1 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
; CHECK: sshl v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}

View File

@ -1,4 +1,4 @@
// RUN: not llvm-mc -triple=aarch64 < %s 2> %t
// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
//------------------------------------------------------------------------------
@ -2892,13 +2892,13 @@
movi wzr, #0x44444444
movi w3, #0xffff
movi x9, #0x0000ffff00000000
// CHECK-ERROR: error: invalid instruction
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR-NEXT: movi wzr, #0x44444444
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR: error: invalid instruction
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR-NEXT: movi w3, #0xffff
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR: error: invalid instruction
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR-NEXT: movi x9, #0x0000ffff00000000
// CHECK-ERROR-NEXT: ^

View File

@ -1,4 +1,4 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding < %s | FileCheck %s
// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
.globl _func
// Check that the assembler can handle the documented syntax from the ARM ARM.

View File

@ -0,0 +1,78 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Absolute Difference and Accumulate (Signed, Unsigned)
//----------------------------------------------------------------------
uaba v0.8b, v1.8b, v2.8b
uaba v0.16b, v1.16b, v2.16b
uaba v0.4h, v1.4h, v2.4h
uaba v0.8h, v1.8h, v2.8h
uaba v0.2s, v1.2s, v2.2s
uaba v0.4s, v1.4s, v2.4s
// CHECK: uaba v0.8b, v1.8b, v2.8b // encoding: [0x20,0x7c,0x22,0x2e]
// CHECK: uaba v0.16b, v1.16b, v2.16b // encoding: [0x20,0x7c,0x22,0x6e]
// CHECK: uaba v0.4h, v1.4h, v2.4h // encoding: [0x20,0x7c,0x62,0x2e]
// CHECK: uaba v0.8h, v1.8h, v2.8h // encoding: [0x20,0x7c,0x62,0x6e]
// CHECK: uaba v0.2s, v1.2s, v2.2s // encoding: [0x20,0x7c,0xa2,0x2e]
// CHECK: uaba v0.4s, v1.4s, v2.4s // encoding: [0x20,0x7c,0xa2,0x6e]
saba v0.8b, v1.8b, v2.8b
saba v0.16b, v1.16b, v2.16b
saba v0.4h, v1.4h, v2.4h
saba v0.8h, v1.8h, v2.8h
saba v0.2s, v1.2s, v2.2s
saba v0.4s, v1.4s, v2.4s
// CHECK: saba v0.8b, v1.8b, v2.8b // encoding: [0x20,0x7c,0x22,0x0e]
// CHECK: saba v0.16b, v1.16b, v2.16b // encoding: [0x20,0x7c,0x22,0x4e]
// CHECK: saba v0.4h, v1.4h, v2.4h // encoding: [0x20,0x7c,0x62,0x0e]
// CHECK: saba v0.8h, v1.8h, v2.8h // encoding: [0x20,0x7c,0x62,0x4e]
// CHECK: saba v0.2s, v1.2s, v2.2s // encoding: [0x20,0x7c,0xa2,0x0e]
// CHECK: saba v0.4s, v1.4s, v2.4s // encoding: [0x20,0x7c,0xa2,0x4e]
//----------------------------------------------------------------------
// Vector Absolute Difference (Signed, Unsigned)
//----------------------------------------------------------------------
uabd v0.8b, v1.8b, v2.8b
uabd v0.16b, v1.16b, v2.16b
uabd v0.4h, v1.4h, v2.4h
uabd v0.8h, v1.8h, v2.8h
uabd v0.2s, v1.2s, v2.2s
uabd v0.4s, v1.4s, v2.4s
// CHECK: uabd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x74,0x22,0x2e]
// CHECK: uabd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x74,0x22,0x6e]
// CHECK: uabd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x74,0x62,0x2e]
// CHECK: uabd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x74,0x62,0x6e]
// CHECK: uabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x74,0xa2,0x2e]
// CHECK: uabd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x74,0xa2,0x6e]
sabd v0.8b, v1.8b, v2.8b
sabd v0.16b, v1.16b, v2.16b
sabd v0.4h, v1.4h, v2.4h
sabd v0.8h, v1.8h, v2.8h
sabd v0.2s, v1.2s, v2.2s
sabd v0.4s, v1.4s, v2.4s
// CHECK: sabd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x74,0x22,0x0e]
// CHECK: sabd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x74,0x22,0x4e]
// CHECK: sabd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x74,0x62,0x0e]
// CHECK: sabd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x74,0x62,0x4e]
// CHECK: sabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x74,0xa2,0x0e]
// CHECK: sabd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x74,0xa2,0x4e]
//----------------------------------------------------------------------
// Vector Absolute Difference (Floating Point)
//----------------------------------------------------------------------
fabd v0.2s, v1.2s, v2.2s
fabd v31.4s, v15.4s, v16.4s
fabd v7.2d, v8.2d, v25.2d
// CHECK: fabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x2e]
// CHECK: fabd v31.4s, v15.4s, v16.4s // encoding: [0xff,0xd5,0xb0,0x6e]
// CHECK: fabd v7.2d, v8.2d, v25.2d // encoding: [0x07,0xd5,0xf9,0x6e]

View File

@ -0,0 +1,35 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Add Pairwise (Integer)
//------------------------------------------------------------------------------
addp v0.8b, v1.8b, v2.8b
addp v0.16b, v1.16b, v2.16b
addp v0.4h, v1.4h, v2.4h
addp v0.8h, v1.8h, v2.8h
addp v0.2s, v1.2s, v2.2s
addp v0.4s, v1.4s, v2.4s
addp v0.2d, v1.2d, v2.2d
// CHECK: addp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xbc,0x22,0x0e]
// CHECK: addp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xbc,0x22,0x4e]
// CHECK: addp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xbc,0x62,0x0e]
// CHECK: addp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xbc,0x62,0x4e]
// CHECK: addp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xbc,0xa2,0x0e]
// CHECK: addp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xbc,0xa2,0x4e]
// CHECK: addp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xbc,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Add Pairwise (Floating Point
//------------------------------------------------------------------------------
faddp v0.2s, v1.2s, v2.2s
faddp v0.4s, v1.4s, v2.4s
faddp v0.2d, v1.2d, v2.2d
// CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e]
// CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e]
// CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e]

View File

@ -0,0 +1,82 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Add
//------------------------------------------------------------------------------
add v0.8b, v1.8b, v2.8b
add v0.16b, v1.16b, v2.16b
add v0.4h, v1.4h, v2.4h
add v0.8h, v1.8h, v2.8h
add v0.2s, v1.2s, v2.2s
add v0.4s, v1.4s, v2.4s
add v0.2d, v1.2d, v2.2d
// CHECK: add v0.8b, v1.8b, v2.8b // encoding: [0x20,0x84,0x22,0x0e]
// CHECK: add v0.16b, v1.16b, v2.16b // encoding: [0x20,0x84,0x22,0x4e]
// CHECK: add v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x62,0x0e]
// CHECK: add v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x62,0x4e]
// CHECK: add v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0xa2,0x0e]
// CHECK: add v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0xa2,0x4e]
// CHECK: add v0.2d, v1.2d, v2.2d // encoding: [0x20,0x84,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Sub
//------------------------------------------------------------------------------
sub v0.8b, v1.8b, v2.8b
sub v0.16b, v1.16b, v2.16b
sub v0.4h, v1.4h, v2.4h
sub v0.8h, v1.8h, v2.8h
sub v0.2s, v1.2s, v2.2s
sub v0.4s, v1.4s, v2.4s
sub v0.2d, v1.2d, v2.2d
// CHECK: sub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x84,0x22,0x2e]
// CHECK: sub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x84,0x22,0x6e]
// CHECK: sub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x62,0x2e]
// CHECK: sub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x62,0x6e]
// CHECK: sub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0xa2,0x2e]
// CHECK: sub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0xa2,0x6e]
// CHECK: sub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x84,0xe2,0x6e]
//------------------------------------------------------------------------------
// Vector Floating-Point Add
//------------------------------------------------------------------------------
fadd v0.2s, v1.2s, v2.2s
fadd v0.4s, v1.4s, v2.4s
fadd v0.2d, v1.2d, v2.2d
// CHECK: fadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x0e]
// CHECK: fadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x4e]
// CHECK: fadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x4e]
//------------------------------------------------------------------------------
// Vector Floating-Point Sub
//------------------------------------------------------------------------------
fsub v0.2s, v1.2s, v2.2s
fsub v0.4s, v1.4s, v2.4s
fsub v0.2d, v1.2d, v2.2d
// CHECK: fsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x0e]
// CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e]
// CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e]
//------------------------------------------------------------------------------
// Scalar Integer Add
//------------------------------------------------------------------------------
add d31, d0, d16
// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Sub
//------------------------------------------------------------------------------
sub d1, d7, d8
// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]

View File

@ -0,0 +1,60 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector And
//------------------------------------------------------------------------------
and v0.8b, v1.8b, v2.8b
and v0.16b, v1.16b, v2.16b
// CHECK: and v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x22,0x0e]
// CHECK: and v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x22,0x4e]
//------------------------------------------------------------------------------
// Vector Orr
//------------------------------------------------------------------------------
orr v0.8b, v1.8b, v2.8b
orr v0.16b, v1.16b, v2.16b
// CHECK: orr v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xa2,0x0e]
// CHECK: orr v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xa2,0x4e]
//------------------------------------------------------------------------------
// Vector Eor
//------------------------------------------------------------------------------
eor v0.8b, v1.8b, v2.8b
eor v0.16b, v1.16b, v2.16b
// CHECK: eor v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x22,0x2e]
// CHECK: eor v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x22,0x6e]
//----------------------------------------------------------------------
// Vector Bitwise
//----------------------------------------------------------------------
bit v0.8b, v1.8b, v2.8b
bit v0.16b, v1.16b, v2.16b
bif v0.8b, v1.8b, v2.8b
bif v0.16b, v1.16b, v2.16b
bsl v0.8b, v1.8b, v2.8b
bsl v0.16b, v1.16b, v2.16b
orn v0.8b, v1.8b, v2.8b
orn v0.16b, v1.16b, v2.16b
bic v0.8b, v1.8b, v2.8b
bic v0.16b, v1.16b, v2.16b
// CHECK: bit v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xa2,0x2e]
// CHECK: bit v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xa2,0x6e]
// CHECK: bif v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xe2,0x2e]
// CHECK: bif v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xe2,0x6e]
// CHECK: bsl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x62,0x2e]
// CHECK: bsl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x62,0x6e]
// CHECK: orn v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0xe2,0x0e]
// CHECK: orn v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0xe2,0x4e]
// CHECK: bic v0.8b, v1.8b, v2.8b // encoding: [0x20,0x1c,0x62,0x0e]
// CHECK: bic v0.16b, v1.16b, v2.16b // encoding: [0x20,0x1c,0x62,0x4e]

View File

@ -0,0 +1,405 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Compare Mask Equal (Integer)
//----------------------------------------------------------------------
cmeq v0.8b, v15.8b, v17.8b
cmeq v1.16b, v31.16b, v8.16b
cmeq v15.4h, v16.4h, v17.4h
cmeq v5.8h, v6.8h, v7.8h
cmeq v29.2s, v27.2s, v28.2s
cmeq v9.4s, v7.4s, v8.4s
cmeq v3.2d, v31.2d, v21.2d
// CHECK: cmeq v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x8d,0x31,0x2e]
// CHECK: cmeq v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x8f,0x28,0x6e]
// CHECK: cmeq v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x8e,0x71,0x2e]
// CHECK: cmeq v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x8c,0x67,0x6e]
// CHECK: cmeq v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x8f,0xbc,0x2e]
// CHECK: cmeq v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x8c,0xa8,0x6e]
// CHECK: cmeq v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x8f,0xf5,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Higher or Same (Unsigned Integer)
// Vector Compare Mask Less or Same (Unsigned Integer)
// CMLS is alias for CMHS with operands reversed.
//----------------------------------------------------------------------
cmhs v0.8b, v15.8b, v17.8b
cmhs v1.16b, v31.16b, v8.16b
cmhs v15.4h, v16.4h, v17.4h
cmhs v5.8h, v6.8h, v7.8h
cmhs v29.2s, v27.2s, v28.2s
cmhs v9.4s, v7.4s, v8.4s
cmhs v3.2d, v31.2d, v21.2d
cmls v0.8b, v17.8b, v15.8b
cmls v1.16b, v8.16b, v31.16b
cmls v15.4h, v17.4h, v16.4h
cmls v5.8h, v7.8h, v6.8h
cmls v29.2s, v28.2s, v27.2s
cmls v9.4s, v8.4s, v7.4s
cmls v3.2d, v21.2d, v31.2d
// CHECK: cmhs v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x2e]
// CHECK: cmhs v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x6e]
// CHECK: cmhs v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x2e]
// CHECK: cmhs v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x6e]
// CHECK: cmhs v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x2e]
// CHECK: cmhs v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x6e]
// CHECK: cmhs v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x6e]
// CHECK: cmhs v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x2e]
// CHECK: cmhs v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x6e]
// CHECK: cmhs v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x2e]
// CHECK: cmhs v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x6e]
// CHECK: cmhs v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x2e]
// CHECK: cmhs v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x6e]
// CHECK: cmhs v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than or Equal (Integer)
// Vector Compare Mask Less Than or Equal (Integer)
// CMLE is alias for CMGE with operands reversed.
//----------------------------------------------------------------------
cmge v0.8b, v15.8b, v17.8b
cmge v1.16b, v31.16b, v8.16b
cmge v15.4h, v16.4h, v17.4h
cmge v5.8h, v6.8h, v7.8h
cmge v29.2s, v27.2s, v28.2s
cmge v9.4s, v7.4s, v8.4s
cmge v3.2d, v31.2d, v21.2d
cmle v0.8b, v17.8b, v15.8b
cmle v1.16b, v8.16b, v31.16b
cmle v15.4h, v17.4h, v16.4h
cmle v5.8h, v7.8h, v6.8h
cmle v29.2s, v28.2s, v27.2s
cmle v9.4s, v8.4s, v7.4s
cmle v3.2d, v21.2d, v31.2d
// CHECK: cmge v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x0e]
// CHECK: cmge v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x4e]
// CHECK: cmge v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x0e]
// CHECK: cmge v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x4e]
// CHECK: cmge v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x0e]
// CHECK: cmge v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x4e]
// CHECK: cmge v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x4e]
// CHECK: cmge v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x3d,0x31,0x0e]
// CHECK: cmge v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x4e]
// CHECK: cmge v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x3e,0x71,0x0e]
// CHECK: cmge v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x3c,0x67,0x4e]
// CHECK: cmge v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x3f,0xbc,0x0e]
// CHECK: cmge v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x3c,0xa8,0x4e]
// CHECK: cmge v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x3f,0xf5,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Higher (Unsigned Integer)
// Vector Compare Mask Lower (Unsigned Integer)
// CMLO is alias for CMHI with operands reversed.
//----------------------------------------------------------------------
cmhi v0.8b, v15.8b, v17.8b
cmhi v1.16b, v31.16b, v8.16b
cmhi v15.4h, v16.4h, v17.4h
cmhi v5.8h, v6.8h, v7.8h
cmhi v29.2s, v27.2s, v28.2s
cmhi v9.4s, v7.4s, v8.4s
cmhi v3.2d, v31.2d, v21.2d
cmlo v0.8b, v17.8b, v15.8b
cmlo v1.16b, v8.16b, v31.16b
cmlo v15.4h, v17.4h, v16.4h
cmlo v5.8h, v7.8h, v6.8h
cmlo v29.2s, v28.2s, v27.2s
cmlo v9.4s, v8.4s, v7.4s
cmlo v3.2d, v21.2d, v31.2d
// CHECK: cmhi v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x2e]
// CHECK: cmhi v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x6e]
// CHECK: cmhi v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x2e]
// CHECK: cmhi v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x6e]
// CHECK: cmhi v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x2e]
// CHECK: cmhi v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x6e]
// CHECK: cmhi v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x6e]
// CHECK: cmhi v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x2e]
// CHECK: cmhi v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x6e]
// CHECK: cmhi v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x2e]
// CHECK: cmhi v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x6e]
// CHECK: cmhi v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x2e]
// CHECK: cmhi v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x6e]
// CHECK: cmhi v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than (Integer)
// Vector Compare Mask Less Than (Integer)
// CMLT is alias for CMGT with operands reversed.
//----------------------------------------------------------------------
cmgt v0.8b, v15.8b, v17.8b
cmgt v1.16b, v31.16b, v8.16b
cmgt v15.4h, v16.4h, v17.4h
cmgt v5.8h, v6.8h, v7.8h
cmgt v29.2s, v27.2s, v28.2s
cmgt v9.4s, v7.4s, v8.4s
cmgt v3.2d, v31.2d, v21.2d
cmlt v0.8b, v17.8b, v15.8b
cmlt v1.16b, v8.16b, v31.16b
cmlt v15.4h, v17.4h, v16.4h
cmlt v5.8h, v7.8h, v6.8h
cmlt v29.2s, v28.2s, v27.2s
cmlt v9.4s, v8.4s, v7.4s
cmlt v3.2d, v21.2d, v31.2d
// CHECK: cmgt v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x0e]
// CHECK: cmgt v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x4e]
// CHECK: cmgt v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x0e]
// CHECK: cmgt v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x4e]
// CHECK: cmgt v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x0e]
// CHECK: cmgt v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x4e]
// CHECK: cmgt v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x4e]
// CHECK: cmgt v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x35,0x31,0x0e]
// CHECK: cmgt v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x37,0x28,0x4e]
// CHECK: cmgt v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x36,0x71,0x0e]
// CHECK: cmgt v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x34,0x67,0x4e]
// CHECK: cmgt v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x37,0xbc,0x0e]
// CHECK: cmgt v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x34,0xa8,0x4e]
// CHECK: cmgt v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x37,0xf5,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Bitwise Test (Integer)
//----------------------------------------------------------------------
cmtst v0.8b, v15.8b, v17.8b
cmtst v1.16b, v31.16b, v8.16b
cmtst v15.4h, v16.4h, v17.4h
cmtst v5.8h, v6.8h, v7.8h
cmtst v29.2s, v27.2s, v28.2s
cmtst v9.4s, v7.4s, v8.4s
cmtst v3.2d, v31.2d, v21.2d
// CHECK: cmtst v0.8b, v15.8b, v17.8b // encoding: [0xe0,0x8d,0x31,0x0e]
// CHECK: cmtst v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x8f,0x28,0x4e]
// CHECK: cmtst v15.4h, v16.4h, v17.4h // encoding: [0x0f,0x8e,0x71,0x0e]
// CHECK: cmtst v5.8h, v6.8h, v7.8h // encoding: [0xc5,0x8c,0x67,0x4e]
// CHECK: cmtst v29.2s, v27.2s, v28.2s // encoding: [0x7d,0x8f,0xbc,0x0e]
// CHECK: cmtst v9.4s, v7.4s, v8.4s // encoding: [0xe9,0x8c,0xa8,0x4e]
// CHECK: cmtst v3.2d, v31.2d, v21.2d // encoding: [0xe3,0x8f,0xf5,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Equal (Floating Point)
//----------------------------------------------------------------------
fcmeq v0.2s, v31.2s, v16.2s
fcmeq v4.4s, v7.4s, v15.4s
fcmeq v29.2d, v2.2d, v5.2d
// CHECK: fcmeq v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0x30,0x0e]
// CHECK: fcmeq v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0x2f,0x4e]
// CHECK: fcmeq v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0x65,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than Or Equal (Floating Point)
// Vector Compare Mask Less Than Or Equal (Floating Point)
// FCMLE is alias for FCMGE with operands reversed.
//----------------------------------------------------------------------
fcmge v31.4s, v29.4s, v28.4s
fcmge v3.2s, v8.2s, v12.2s
fcmge v17.2d, v15.2d, v13.2d
fcmle v31.4s, v28.4s, v29.4s
fcmle v3.2s, v12.2s, v8.2s
fcmle v17.2d, v13.2d, v15.2d
// CHECK: fcmge v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xe7,0x3c,0x6e]
// CHECK: fcmge v3.2s, v8.2s, v12.2s // encoding: [0x03,0xe5,0x2c,0x2e]
// CHECK: fcmge v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xe5,0x6d,0x6e]
// CHECK: fcmge v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xe7,0x3c,0x6e]
// CHECK: fcmge v3.2s, v8.2s, v12.2s // encoding: [0x03,0xe5,0x2c,0x2e]
// CHECK: fcmge v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xe5,0x6d,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than (Floating Point)
// Vector Compare Mask Less Than (Floating Point)
// FCMLT is alias for FCMGT with operands reversed.
//----------------------------------------------------------------------
fcmgt v0.2s, v31.2s, v16.2s
fcmgt v4.4s, v7.4s, v15.4s
fcmgt v29.2d, v2.2d, v5.2d
fcmlt v0.2s, v16.2s, v31.2s
fcmlt v4.4s, v15.4s, v7.4s
fcmlt v29.2d, v5.2d, v2.2d
// CHECK: fcmgt v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0xb0,0x2e]
// CHECK: fcmgt v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0xaf,0x6e]
// CHECK: fcmgt v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0xe5,0x6e]
// CHECK: fcmgt v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0xb0,0x2e]
// CHECK: fcmgt v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0xaf,0x6e]
// CHECK: fcmgt v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0xe5,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Equal to Zero (Integer)
//----------------------------------------------------------------------
cmeq v0.8b, v15.8b, #0
cmeq v1.16b, v31.16b, #0
cmeq v15.4h, v16.4h, #0
cmeq v5.8h, v6.8h, #0
cmeq v29.2s, v27.2s, #0
cmeq v9.4s, v7.4s, #0
cmeq v3.2d, v31.2d, #0
// CHECK: cmeq v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x99,0x20,0x0e]
// CHECK: cmeq v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x9b,0x20,0x4e]
// CHECK: cmeq v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x9a,0x60,0x0e]
// CHECK: cmeq v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x98,0x60,0x4e]
// CHECK: cmeq v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x9b,0xa0,0x0e]
// CHECK: cmeq v9.4s, v7.4s, #0x0 // encoding: [0xe9,0x98,0xa0,0x4e]
// CHECK: cmeq v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x9b,0xe0,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
//----------------------------------------------------------------------
cmge v0.8b, v15.8b, #0
cmge v1.16b, v31.16b, #0
cmge v15.4h, v16.4h, #0
cmge v5.8h, v6.8h, #0
cmge v29.2s, v27.2s, #0
cmge v17.4s, v20.4s, #0
cmge v3.2d, v31.2d, #0
// CHECK: cmge v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x89,0x20,0x2e]
// CHECK: cmge v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x8b,0x20,0x6e]
// CHECK: cmge v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x8a,0x60,0x2e]
// CHECK: cmge v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x88,0x60,0x6e]
// CHECK: cmge v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x8b,0xa0,0x2e]
// CHECK: cmge v17.4s, v20.4s, #0x0 // encoding: [0x91,0x8a,0xa0,0x6e]
// CHECK: cmge v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x8b,0xe0,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than Zero (Signed Integer)
//----------------------------------------------------------------------
cmgt v0.8b, v15.8b, #0
cmgt v1.16b, v31.16b, #0
cmgt v15.4h, v16.4h, #0
cmgt v5.8h, v6.8h, #0
cmgt v29.2s, v27.2s, #0
cmgt v9.4s, v7.4s, #0
cmgt v3.2d, v31.2d, #0
// CHECK: cmgt v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x89,0x20,0x0e]
// CHECK: cmgt v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x8b,0x20,0x4e]
// CHECK: cmgt v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x8a,0x60,0x0e]
// CHECK: cmgt v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x88,0x60,0x4e]
// CHECK: cmgt v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x8b,0xa0,0x0e]
// CHECK: cmgt v9.4s, v7.4s, #0x0 // encoding: [0xe9,0x88,0xa0,0x4e]
// CHECK: cmgt v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x8b,0xe0,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
//----------------------------------------------------------------------
cmle v0.8b, v15.8b, #0
cmle v1.16b, v31.16b, #0
cmle v15.4h, v16.4h, #0
cmle v5.8h, v6.8h, #0
cmle v29.2s, v27.2s, #0
cmle v9.4s, v7.4s, #0
cmle v3.2d, v31.2d, #0
// CHECK: cmle v0.8b, v15.8b, #0x0 // encoding: [0xe0,0x99,0x20,0x2e]
// CHECK: cmle v1.16b, v31.16b, #0x0 // encoding: [0xe1,0x9b,0x20,0x6e]
// CHECK: cmle v15.4h, v16.4h, #0x0 // encoding: [0x0f,0x9a,0x60,0x2e]
// CHECK: cmle v5.8h, v6.8h, #0x0 // encoding: [0xc5,0x98,0x60,0x6e]
// CHECK: cmle v29.2s, v27.2s, #0x0 // encoding: [0x7d,0x9b,0xa0,0x2e]
// CHECK: cmle v9.4s, v7.4s, #0x0 // encoding: [0xe9,0x98,0xa0,0x6e]
// CHECK: cmle v3.2d, v31.2d, #0x0 // encoding: [0xe3,0x9b,0xe0,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Less Than Zero (Signed Integer)
//----------------------------------------------------------------------
cmlt v0.8b, v15.8b, #0
cmlt v1.16b, v31.16b, #0
cmlt v15.4h, v16.4h, #0
cmlt v5.8h, v6.8h, #0
cmlt v29.2s, v27.2s, #0
cmlt v9.4s, v7.4s, #0
cmlt v3.2d, v31.2d, #0
// CHECK: cmlt v0.8b, v15.8b, #0x0 // encoding: [0xe0,0xa9,0x20,0x0e]
// CHECK: cmlt v1.16b, v31.16b, #0x0 // encoding: [0xe1,0xab,0x20,0x4e]
// CHECK: cmlt v15.4h, v16.4h, #0x0 // encoding: [0x0f,0xaa,0x60,0x0e]
// CHECK: cmlt v5.8h, v6.8h, #0x0 // encoding: [0xc5,0xa8,0x60,0x4e]
// CHECK: cmlt v29.2s, v27.2s, #0x0 // encoding: [0x7d,0xab,0xa0,0x0e]
// CHECK: cmlt v9.4s, v7.4s, #0x0 // encoding: [0xe9,0xa8,0xa0,0x4e]
// CHECK: cmlt v3.2d, v31.2d, #0x0 // encoding: [0xe3,0xab,0xe0,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Equal to Zero (Floating Point)
//----------------------------------------------------------------------
fcmeq v0.2s, v31.2s, #0.0
fcmeq v4.4s, v7.4s, #0.0
fcmeq v29.2d, v2.2d, #0.0
// CHECK: fcmeq v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xdb,0xa0,0x0e]
// CHECK: fcmeq v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xd8,0xa0,0x4e]
// CHECK: fcmeq v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xd8,0xe0,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
//----------------------------------------------------------------------
fcmge v31.4s, v29.4s, #0.0
fcmge v3.2s, v8.2s, #0.0
fcmge v17.2d, v15.2d, #0.0
// CHECK: fcmge v31.4s, v29.4s, #0.0 // encoding: [0xbf,0xcb,0xa0,0x6e]
// CHECK: fcmge v3.2s, v8.2s, #0.0 // encoding: [0x03,0xc9,0xa0,0x2e]
// CHECK: fcmge v17.2d, v15.2d, #0.0 // encoding: [0xf1,0xc9,0xe0,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than Zero (Floating Point)
//----------------------------------------------------------------------
fcmgt v0.2s, v31.2s, #0.0
fcmgt v4.4s, v7.4s, #0.0
fcmgt v29.2d, v2.2d, #0.0
// CHECK: fcmgt v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xcb,0xa0,0x0e]
// CHECK: fcmgt v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xc8,0xa0,0x4e]
// CHECK: fcmgt v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xc8,0xe0,0x4e]
//----------------------------------------------------------------------
// Vector Compare Mask Less Than or Equal To Zero (Floating Point)
//----------------------------------------------------------------------
fcmle v1.4s, v8.4s, #0.0
fcmle v3.2s, v20.2s, #0.0
fcmle v7.2d, v13.2d, #0.0
// CHECK: fcmle v1.4s, v8.4s, #0.0 // encoding: [0x01,0xd9,0xa0,0x6e]
// CHECK: fcmle v3.2s, v20.2s, #0.0 // encoding: [0x83,0xda,0xa0,0x2e]
// CHECK: fcmle v7.2d, v13.2d, #0.0 // encoding: [0xa7,0xd9,0xe0,0x6e]
//----------------------------------------------------------------------
// Vector Compare Mask Less Than Zero (Floating Point)
//----------------------------------------------------------------------
fcmlt v16.2s, v2.2s, #0.0
fcmlt v15.4s, v4.4s, #0.0
fcmlt v5.2d, v29.2d, #0.0
// CHECK: fcmlt v16.2s, v2.2s, #0.0 // encoding: [0x50,0xe8,0xa0,0x0e]
// CHECK: fcmlt v15.4s, v4.4s, #0.0 // encoding: [0x8f,0xe8,0xa0,0x4e]
// CHECK: fcmlt v5.2d, v29.2d, #0.0 // encoding: [0xa5,0xeb,0xe0,0x4e]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
// FACLE is alias for FACGE with operands reversed
//----------------------------------------------------------------------
facge v0.2s, v31.2s, v16.2s
facge v4.4s, v7.4s, v15.4s
facge v29.2d, v2.2d, v5.2d
facle v0.2s, v16.2s, v31.2s
facle v4.4s, v15.4s, v7.4s
facle v29.2d, v5.2d, v2.2d
// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
// CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e]
// CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e]
// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
// CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e]
// CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e]
//----------------------------------------------------------------------
// Vector Absolute Compare Mask Less Than (Floating Point)
// FACLT is alias for FACGT with operands reversed
//----------------------------------------------------------------------
facgt v31.4s, v29.4s, v28.4s
facgt v3.2s, v8.2s, v12.2s
facgt v17.2d, v15.2d, v13.2d
faclt v31.4s, v28.4s, v29.4s
faclt v3.2s, v12.2s, v8.2s
faclt v17.2d, v13.2d, v15.2d
// CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e]
// CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e]
// CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e]
// CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e]
// CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e]
// CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e]

View File

@ -0,0 +1,27 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Reciprocal Square Root Step (Floating Point)
//----------------------------------------------------------------------
frsqrts v0.2s, v31.2s, v16.2s
frsqrts v4.4s, v7.4s, v15.4s
frsqrts v29.2d, v2.2d, v5.2d
// CHECK: frsqrts v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xff,0xb0,0x0e]
// CHECK: frsqrts v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xfc,0xaf,0x4e]
// CHECK: frsqrts v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xfc,0xe5,0x4e]
//----------------------------------------------------------------------
// Vector Reciprocal Step (Floating Point)
//----------------------------------------------------------------------
frecps v31.4s, v29.4s, v28.4s
frecps v3.2s, v8.2s, v12.2s
frecps v17.2d, v15.2d, v13.2d
// CHECK: frecps v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xff,0x3c,0x4e]
// CHECK: frecps v3.2s, v8.2s, v12.2s // encoding: [0x03,0xfd,0x2c,0x0e]
// CHECK: frecps v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xfd,0x6d,0x4e]

View File

@ -0,0 +1,74 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Halving Add (Signed)
//------------------------------------------------------------------------------
shadd v0.8b, v1.8b, v2.8b
shadd v0.16b, v1.16b, v2.16b
shadd v0.4h, v1.4h, v2.4h
shadd v0.8h, v1.8h, v2.8h
shadd v0.2s, v1.2s, v2.2s
shadd v0.4s, v1.4s, v2.4s
// CHECK: shadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x04,0x22,0x0e]
// CHECK: shadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x04,0x22,0x4e]
// CHECK: shadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x62,0x0e]
// CHECK: shadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x04,0x62,0x4e]
// CHECK: shadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x04,0xa2,0x0e]
// CHECK: shadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x04,0xa2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Halving Add (Unsigned)
//------------------------------------------------------------------------------
uhadd v0.8b, v1.8b, v2.8b
uhadd v0.16b, v1.16b, v2.16b
uhadd v0.4h, v1.4h, v2.4h
uhadd v0.8h, v1.8h, v2.8h
uhadd v0.2s, v1.2s, v2.2s
uhadd v0.4s, v1.4s, v2.4s
// CHECK: uhadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x04,0x22,0x2e]
// CHECK: uhadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x04,0x22,0x6e]
// CHECK: uhadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x62,0x2e]
// CHECK: uhadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x04,0x62,0x6e]
// CHECK: uhadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x04,0xa2,0x2e]
// CHECK: uhadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x04,0xa2,0x6e]
//------------------------------------------------------------------------------
// Vector Integer Halving Sub (Signed)
//------------------------------------------------------------------------------
shsub v0.8b, v1.8b, v2.8b
shsub v0.16b, v1.16b, v2.16b
shsub v0.4h, v1.4h, v2.4h
shsub v0.8h, v1.8h, v2.8h
shsub v0.2s, v1.2s, v2.2s
shsub v0.4s, v1.4s, v2.4s
// CHECK: shsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x24,0x22,0x0e]
// CHECK: shsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x24,0x22,0x4e]
// CHECK: shsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x24,0x62,0x0e]
// CHECK: shsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x24,0x62,0x4e]
// CHECK: shsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x24,0xa2,0x0e]
// CHECK: shsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x24,0xa2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Halving Sub (Unsigned)
//------------------------------------------------------------------------------
uhsub v0.8b, v1.8b, v2.8b
uhsub v0.16b, v1.16b, v2.16b
uhsub v0.4h, v1.4h, v2.4h
uhsub v0.8h, v1.8h, v2.8h
uhsub v0.2s, v1.2s, v2.2s
uhsub v0.4s, v1.4s, v2.4s
// CHECK: uhsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x24,0x22,0x2e]
// CHECK: uhsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x24,0x22,0x6e]
// CHECK: uhsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x24,0x62,0x2e]
// CHECK: uhsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x24,0x62,0x6e]
// CHECK: uhsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x24,0xa2,0x2e]
// CHECK: uhsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x24,0xa2,0x6e]

View File

@ -0,0 +1,110 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Maximum Pairwise (Signed and Unsigned Integer)
//----------------------------------------------------------------------
smaxp v0.8b, v1.8b, v2.8b
smaxp v0.16b, v1.16b, v2.16b
smaxp v0.4h, v1.4h, v2.4h
smaxp v0.8h, v1.8h, v2.8h
smaxp v0.2s, v1.2s, v2.2s
smaxp v0.4s, v1.4s, v2.4s
// CHECK: smaxp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xa4,0x22,0x0e]
// CHECK: smaxp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xa4,0x22,0x4e]
// CHECK: smaxp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xa4,0x62,0x0e]
// CHECK: smaxp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xa4,0x62,0x4e]
// CHECK: smaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xa4,0xa2,0x0e]
// CHECK: smaxp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xa4,0xa2,0x4e]
umaxp v0.8b, v1.8b, v2.8b
umaxp v0.16b, v1.16b, v2.16b
umaxp v0.4h, v1.4h, v2.4h
umaxp v0.8h, v1.8h, v2.8h
umaxp v0.2s, v1.2s, v2.2s
umaxp v0.4s, v1.4s, v2.4s
// CHECK: umaxp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xa4,0x22,0x2e]
// CHECK: umaxp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xa4,0x22,0x6e]
// CHECK: umaxp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xa4,0x62,0x2e]
// CHECK: umaxp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xa4,0x62,0x6e]
// CHECK: umaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xa4,0xa2,0x2e]
// CHECK: umaxp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xa4,0xa2,0x6e]
//----------------------------------------------------------------------
// Vector Minimum Pairwise (Signed and Unsigned Integer)
//----------------------------------------------------------------------
sminp v0.8b, v1.8b, v2.8b
sminp v0.16b, v1.16b, v2.16b
sminp v0.4h, v1.4h, v2.4h
sminp v0.8h, v1.8h, v2.8h
sminp v0.2s, v1.2s, v2.2s
sminp v0.4s, v1.4s, v2.4s
// CHECK: sminp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xac,0x22,0x0e]
// CHECK: sminp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xac,0x22,0x4e]
// CHECK: sminp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xac,0x62,0x0e]
// CHECK: sminp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xac,0x62,0x4e]
// CHECK: sminp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xac,0xa2,0x0e]
// CHECK: sminp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xac,0xa2,0x4e]
uminp v0.8b, v1.8b, v2.8b
uminp v0.16b, v1.16b, v2.16b
uminp v0.4h, v1.4h, v2.4h
uminp v0.8h, v1.8h, v2.8h
uminp v0.2s, v1.2s, v2.2s
uminp v0.4s, v1.4s, v2.4s
// CHECK: uminp v0.8b, v1.8b, v2.8b // encoding: [0x20,0xac,0x22,0x2e]
// CHECK: uminp v0.16b, v1.16b, v2.16b // encoding: [0x20,0xac,0x22,0x6e]
// CHECK: uminp v0.4h, v1.4h, v2.4h // encoding: [0x20,0xac,0x62,0x2e]
// CHECK: uminp v0.8h, v1.8h, v2.8h // encoding: [0x20,0xac,0x62,0x6e]
// CHECK: uminp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xac,0xa2,0x2e]
// CHECK: uminp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xac,0xa2,0x6e]
//----------------------------------------------------------------------
// Vector Maximum Pairwise (Floating Point)
//----------------------------------------------------------------------
fmaxp v0.2s, v1.2s, v2.2s
fmaxp v31.4s, v15.4s, v16.4s
fmaxp v7.2d, v8.2d, v25.2d
// CHECK: fmaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x2e]
// CHECK: fmaxp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x6e]
// CHECK: fmaxp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x6e]
//----------------------------------------------------------------------
// Vector Minimum Pairwise (Floating Point)
//----------------------------------------------------------------------
fminp v10.2s, v15.2s, v22.2s
fminp v3.4s, v5.4s, v6.4s
fminp v17.2d, v13.2d, v2.2d
// CHECK: fminp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x2e]
// CHECK: fminp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x6e]
// CHECK: fminp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x6e]
//----------------------------------------------------------------------
// Vector maxNum Pairwise (Floating Point)
//----------------------------------------------------------------------
fmaxnmp v0.2s, v1.2s, v2.2s
fmaxnmp v31.4s, v15.4s, v16.4s
fmaxnmp v7.2d, v8.2d, v25.2d
// CHECK: fmaxnmp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x2e]
// CHECK: fmaxnmp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x6e]
// CHECK: fmaxnmp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x6e]
//----------------------------------------------------------------------
// Vector minNum Pairwise (Floating Point)
//----------------------------------------------------------------------
fminnmp v10.2s, v15.2s, v22.2s
fminnmp v3.4s, v5.4s, v6.4s
fminnmp v17.2d, v13.2d, v2.2d
// CHECK: fminnmp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x2e]
// CHECK: fminnmp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x6e]
// CHECK: fminnmp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x6e]

View File

@ -0,0 +1,110 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Maximum (Signed and Unsigned Integer)
//----------------------------------------------------------------------
smax v0.8b, v1.8b, v2.8b
smax v0.16b, v1.16b, v2.16b
smax v0.4h, v1.4h, v2.4h
smax v0.8h, v1.8h, v2.8h
smax v0.2s, v1.2s, v2.2s
smax v0.4s, v1.4s, v2.4s
// CHECK: smax v0.8b, v1.8b, v2.8b // encoding: [0x20,0x64,0x22,0x0e]
// CHECK: smax v0.16b, v1.16b, v2.16b // encoding: [0x20,0x64,0x22,0x4e]
// CHECK: smax v0.4h, v1.4h, v2.4h // encoding: [0x20,0x64,0x62,0x0e]
// CHECK: smax v0.8h, v1.8h, v2.8h // encoding: [0x20,0x64,0x62,0x4e]
// CHECK: smax v0.2s, v1.2s, v2.2s // encoding: [0x20,0x64,0xa2,0x0e]
// CHECK: smax v0.4s, v1.4s, v2.4s // encoding: [0x20,0x64,0xa2,0x4e]
umax v0.8b, v1.8b, v2.8b
umax v0.16b, v1.16b, v2.16b
umax v0.4h, v1.4h, v2.4h
umax v0.8h, v1.8h, v2.8h
umax v0.2s, v1.2s, v2.2s
umax v0.4s, v1.4s, v2.4s
// CHECK: umax v0.8b, v1.8b, v2.8b // encoding: [0x20,0x64,0x22,0x2e]
// CHECK: umax v0.16b, v1.16b, v2.16b // encoding: [0x20,0x64,0x22,0x6e]
// CHECK: umax v0.4h, v1.4h, v2.4h // encoding: [0x20,0x64,0x62,0x2e]
// CHECK: umax v0.8h, v1.8h, v2.8h // encoding: [0x20,0x64,0x62,0x6e]
// CHECK: umax v0.2s, v1.2s, v2.2s // encoding: [0x20,0x64,0xa2,0x2e]
// CHECK: umax v0.4s, v1.4s, v2.4s // encoding: [0x20,0x64,0xa2,0x6e]
//----------------------------------------------------------------------
// Vector Minimum (Signed and Unsigned Integer)
//----------------------------------------------------------------------
smin v0.8b, v1.8b, v2.8b
smin v0.16b, v1.16b, v2.16b
smin v0.4h, v1.4h, v2.4h
smin v0.8h, v1.8h, v2.8h
smin v0.2s, v1.2s, v2.2s
smin v0.4s, v1.4s, v2.4s
// CHECK: smin v0.8b, v1.8b, v2.8b // encoding: [0x20,0x6c,0x22,0x0e]
// CHECK: smin v0.16b, v1.16b, v2.16b // encoding: [0x20,0x6c,0x22,0x4e]
// CHECK: smin v0.4h, v1.4h, v2.4h // encoding: [0x20,0x6c,0x62,0x0e]
// CHECK: smin v0.8h, v1.8h, v2.8h // encoding: [0x20,0x6c,0x62,0x4e]
// CHECK: smin v0.2s, v1.2s, v2.2s // encoding: [0x20,0x6c,0xa2,0x0e]
// CHECK: smin v0.4s, v1.4s, v2.4s // encoding: [0x20,0x6c,0xa2,0x4e]
umin v0.8b, v1.8b, v2.8b
umin v0.16b, v1.16b, v2.16b
umin v0.4h, v1.4h, v2.4h
umin v0.8h, v1.8h, v2.8h
umin v0.2s, v1.2s, v2.2s
umin v0.4s, v1.4s, v2.4s
// CHECK: umin v0.8b, v1.8b, v2.8b // encoding: [0x20,0x6c,0x22,0x2e]
// CHECK: umin v0.16b, v1.16b, v2.16b // encoding: [0x20,0x6c,0x22,0x6e]
// CHECK: umin v0.4h, v1.4h, v2.4h // encoding: [0x20,0x6c,0x62,0x2e]
// CHECK: umin v0.8h, v1.8h, v2.8h // encoding: [0x20,0x6c,0x62,0x6e]
// CHECK: umin v0.2s, v1.2s, v2.2s // encoding: [0x20,0x6c,0xa2,0x2e]
// CHECK: umin v0.4s, v1.4s, v2.4s // encoding: [0x20,0x6c,0xa2,0x6e]
//----------------------------------------------------------------------
// Vector Maximum (Floating Point)
//----------------------------------------------------------------------
fmax v0.2s, v1.2s, v2.2s
fmax v31.4s, v15.4s, v16.4s
fmax v7.2d, v8.2d, v25.2d
// CHECK: fmax v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x0e]
// CHECK: fmax v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x4e]
// CHECK: fmax v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x4e]
//----------------------------------------------------------------------
// Vector Minimum (Floating Point)
//----------------------------------------------------------------------
fmin v10.2s, v15.2s, v22.2s
fmin v3.4s, v5.4s, v6.4s
fmin v17.2d, v13.2d, v2.2d
// CHECK: fmin v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x0e]
// CHECK: fmin v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x4e]
// CHECK: fmin v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x4e]
//----------------------------------------------------------------------
// Vector maxNum (Floating Point)
//----------------------------------------------------------------------
fmaxnm v0.2s, v1.2s, v2.2s
fmaxnm v31.4s, v15.4s, v16.4s
fmaxnm v7.2d, v8.2d, v25.2d
// CHECK: fmaxnm v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x0e]
// CHECK: fmaxnm v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x4e]
// CHECK: fmaxnm v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x4e]
//----------------------------------------------------------------------
// Vector minNum (Floating Point)
//----------------------------------------------------------------------
fminnm v10.2s, v15.2s, v22.2s
fminnm v3.4s, v5.4s, v6.4s
fminnm v17.2d, v13.2d, v2.2d
// CHECK: fminnm v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x0e]
// CHECK: fminnm v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x4e]
// CHECK: fminnm v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x4e]

View File

@ -0,0 +1,61 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Integer Multiply-accumulate
//----------------------------------------------------------------------
mla v0.8b, v1.8b, v2.8b
mla v0.16b, v1.16b, v2.16b
mla v0.4h, v1.4h, v2.4h
mla v0.8h, v1.8h, v2.8h
mla v0.2s, v1.2s, v2.2s
mla v0.4s, v1.4s, v2.4s
// CHECK: mla v0.8b, v1.8b, v2.8b // encoding: [0x20,0x94,0x22,0x0e]
// CHECK: mla v0.16b, v1.16b, v2.16b // encoding: [0x20,0x94,0x22,0x4e]
// CHECK: mla v0.4h, v1.4h, v2.4h // encoding: [0x20,0x94,0x62,0x0e]
// CHECK: mla v0.8h, v1.8h, v2.8h // encoding: [0x20,0x94,0x62,0x4e]
// CHECK: mla v0.2s, v1.2s, v2.2s // encoding: [0x20,0x94,0xa2,0x0e]
// CHECK: mla v0.4s, v1.4s, v2.4s // encoding: [0x20,0x94,0xa2,0x4e]
//----------------------------------------------------------------------
// Vector Integer Multiply-subtract
//----------------------------------------------------------------------
mls v0.8b, v1.8b, v2.8b
mls v0.16b, v1.16b, v2.16b
mls v0.4h, v1.4h, v2.4h
mls v0.8h, v1.8h, v2.8h
mls v0.2s, v1.2s, v2.2s
mls v0.4s, v1.4s, v2.4s
// CHECK: mls v0.8b, v1.8b, v2.8b // encoding: [0x20,0x94,0x22,0x2e]
// CHECK: mls v0.16b, v1.16b, v2.16b // encoding: [0x20,0x94,0x22,0x6e]
// CHECK: mls v0.4h, v1.4h, v2.4h // encoding: [0x20,0x94,0x62,0x2e]
// CHECK: mls v0.8h, v1.8h, v2.8h // encoding: [0x20,0x94,0x62,0x6e]
// CHECK: mls v0.2s, v1.2s, v2.2s // encoding: [0x20,0x94,0xa2,0x2e]
// CHECK: mls v0.4s, v1.4s, v2.4s // encoding: [0x20,0x94,0xa2,0x6e]
//----------------------------------------------------------------------
// Vector Floating-Point Multiply-accumulate
//----------------------------------------------------------------------
fmla v0.2s, v1.2s, v2.2s
fmla v0.4s, v1.4s, v2.4s
fmla v0.2d, v1.2d, v2.2d
// CHECK: fmla v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0x22,0x0e]
// CHECK: fmla v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0x22,0x4e]
// CHECK: fmla v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0x62,0x4e]
//----------------------------------------------------------------------
// Vector Floating-Point Multiply-subtract
//----------------------------------------------------------------------
fmls v0.2s, v1.2s, v2.2s
fmls v0.4s, v1.4s, v2.4s
fmls v0.2d, v1.2d, v2.2d
// CHECK: fmls v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0xa2,0x0e]
// CHECK: fmls v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0xa2,0x4e]
// CHECK: fmls v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0xe2,0x4e]

207
test/MC/AArch64/neon-mov.s Normal file
View File

@ -0,0 +1,207 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Move Immediate Shifted
//----------------------------------------------------------------------
movi v0.2s, #1
movi v1.2s, #0
movi v15.2s, #1, lsl #8
movi v16.2s, #1, lsl #16
movi v31.2s, #1, lsl #24
movi v0.4s, #1
movi v0.4s, #1, lsl #8
movi v0.4s, #1, lsl #16
movi v0.4s, #1, lsl #24
movi v0.4h, #1
movi v0.4h, #1, lsl #8
movi v0.8h, #1
movi v0.8h, #1, lsl #8
// CHECK: movi v0.2s, #0x1 // encoding: [0x20,0x04,0x00,0x0f]
// CHECK: movi v1.2s, #0x0 // encoding: [0x01,0x04,0x00,0x0f]
// CHECK: movi v15.2s, #0x1, lsl #8 // encoding: [0x2f,0x24,0x00,0x0f]
// CHECK: movi v16.2s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x0f]
// CHECK: movi v31.2s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x0f]
// CHECK: movi v0.4s, #0x1 // encoding: [0x20,0x04,0x00,0x4f]
// CHECK: movi v0.4s, #0x1, lsl #8 // encoding: [0x20,0x24,0x00,0x4f]
// CHECK: movi v0.4s, #0x1, lsl #16 // encoding: [0x20,0x44,0x00,0x4f]
// CHECK: movi v0.4s, #0x1, lsl #24 // encoding: [0x20,0x64,0x00,0x4f]
// CHECK: movi v0.4h, #0x1 // encoding: [0x20,0x84,0x00,0x0f]
// CHECK: movi v0.4h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x0f]
// CHECK: movi v0.8h, #0x1 // encoding: [0x20,0x84,0x00,0x4f]
// CHECK: movi v0.8h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x4f]
//----------------------------------------------------------------------
// Vector Move Inverted Immediate Shifted
//----------------------------------------------------------------------
mvni v0.2s, #1
mvni v1.2s, #0
mvni v0.2s, #1, lsl #8
mvni v0.2s, #1, lsl #16
mvni v0.2s, #1, lsl #24
mvni v0.4s, #1
mvni v15.4s, #1, lsl #8
mvni v16.4s, #1, lsl #16
mvni v31.4s, #1, lsl #24
mvni v0.4h, #1
mvni v0.4h, #1, lsl #8
mvni v0.8h, #1
mvni v0.8h, #1, lsl #8
// CHECK: mvni v0.2s, #0x1 // encoding: [0x20,0x04,0x00,0x2f]
// CHECK: mvni v1.2s, #0x0 // encoding: [0x01,0x04,0x00,0x2f]
// CHECK: mvni v0.2s, #0x1, lsl #8 // encoding: [0x20,0x24,0x00,0x2f]
// CHECK: mvni v0.2s, #0x1, lsl #16 // encoding: [0x20,0x44,0x00,0x2f]
// CHECK: mvni v0.2s, #0x1, lsl #24 // encoding: [0x20,0x64,0x00,0x2f]
// CHECK: mvni v0.4s, #0x1 // encoding: [0x20,0x04,0x00,0x6f]
// CHECK: mvni v15.4s, #0x1, lsl #8 // encoding: [0x2f,0x24,0x00,0x6f]
// CHECK: mvni v16.4s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x6f]
// CHECK: mvni v31.4s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x6f]
// CHECK: mvni v0.4h, #0x1 // encoding: [0x20,0x84,0x00,0x2f]
// CHECK: mvni v0.4h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x2f]
// CHECK: mvni v0.8h, #0x1 // encoding: [0x20,0x84,0x00,0x6f]
// CHECK: mvni v0.8h, #0x1, lsl #8 // encoding: [0x20,0xa4,0x00,0x6f]
//----------------------------------------------------------------------
// Vector Bitwise Bit Clear (AND NOT) - immediate
//----------------------------------------------------------------------
bic v0.2s, #1
bic v1.2s, #0
bic v0.2s, #1, lsl #8
bic v0.2s, #1, lsl #16
bic v0.2s, #1, lsl #24
bic v0.4s, #1
bic v0.4s, #1, lsl #8
bic v0.4s, #1, lsl #16
bic v0.4s, #1, lsl #24
bic v15.4h, #1
bic v16.4h, #1, lsl #8
bic v0.8h, #1
bic v31.8h, #1, lsl #8
// CHECK: bic v0.2s, #0x1 // encoding: [0x20,0x14,0x00,0x2f]
// CHECK: bic v1.2s, #0x0 // encoding: [0x01,0x14,0x00,0x2f]
// CHECK: bic v0.2s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x2f]
// CHECK: bic v0.2s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x2f]
// CHECK: bic v0.2s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x2f]
// CHECK: bic v0.4s, #0x1 // encoding: [0x20,0x14,0x00,0x6f]
// CHECK: bic v0.4s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x6f]
// CHECK: bic v0.4s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x6f]
// CHECK: bic v0.4s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x6f]
// CHECK: bic v15.4h, #0x1 // encoding: [0x2f,0x94,0x00,0x2f]
// CHECK: bic v16.4h, #0x1, lsl #8 // encoding: [0x30,0xb4,0x00,0x2f]
// CHECK: bic v0.8h, #0x1 // encoding: [0x20,0x94,0x00,0x6f]
// CHECK: bic v31.8h, #0x1, lsl #8 // encoding: [0x3f,0xb4,0x00,0x6f]
//----------------------------------------------------------------------
// Vector Bitwise OR - immedidate
//----------------------------------------------------------------------
orr v0.2s, #1
orr v1.2s, #0
orr v0.2s, #1, lsl #8
orr v0.2s, #1, lsl #16
orr v0.2s, #1, lsl #24
orr v0.4s, #1
orr v0.4s, #1, lsl #8
orr v0.4s, #1, lsl #16
orr v0.4s, #1, lsl #24
orr v31.4h, #1
orr v15.4h, #1, lsl #8
orr v0.8h, #1
orr v16.8h, #1, lsl #8
// CHECK: orr v0.2s, #0x1 // encoding: [0x20,0x14,0x00,0x0f]
// CHECK: orr v1.2s, #0x0 // encoding: [0x01,0x14,0x00,0x0f]
// CHECK: orr v0.2s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x0f]
// CHECK: orr v0.2s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x0f]
// CHECK: orr v0.2s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x0f]
// CHECK: orr v0.4s, #0x1 // encoding: [0x20,0x14,0x00,0x4f]
// CHECK: orr v0.4s, #0x1, lsl #8 // encoding: [0x20,0x34,0x00,0x4f]
// CHECK: orr v0.4s, #0x1, lsl #16 // encoding: [0x20,0x54,0x00,0x4f]
// CHECK: orr v0.4s, #0x1, lsl #24 // encoding: [0x20,0x74,0x00,0x4f]
// CHECK: orr v31.4h, #0x1 // encoding: [0x3f,0x94,0x00,0x0f]
// CHECK: orr v15.4h, #0x1, lsl #8 // encoding: [0x2f,0xb4,0x00,0x0f]
// CHECK: orr v0.8h, #0x1 // encoding: [0x20,0x94,0x00,0x4f]
// CHECK: orr v16.8h, #0x1, lsl #8 // encoding: [0x30,0xb4,0x00,0x4f]
//----------------------------------------------------------------------
// Vector Move Immediate Masked
//----------------------------------------------------------------------
movi v0.2s, #1, msl #8
movi v1.2s, #1, msl #16
movi v0.4s, #1, msl #8
movi v31.4s, #1, msl #16
// CHECK: movi v0.2s, #0x1, msl #8 // encoding: [0x20,0xc4,0x00,0x0f]
// CHECK: movi v1.2s, #0x1, msl #16 // encoding: [0x21,0xd4,0x00,0x0f]
// CHECK: movi v0.4s, #0x1, msl #8 // encoding: [0x20,0xc4,0x00,0x4f]
// CHECK: movi v31.4s, #0x1, msl #16 // encoding: [0x3f,0xd4,0x00,0x4f]
//----------------------------------------------------------------------
// Vector Move Inverted Immediate Masked
//----------------------------------------------------------------------
mvni v1.2s, #0x1, msl #8
mvni v0.2s, #0x1, msl #16
mvni v31.4s, #0x1, msl #8
mvni v0.4s, #0x1, msl #16
// CHECK: mvni v1.2s, #0x1, msl #8 // encoding: [0x21,0xc4,0x00,0x2f]
// CHECK: mvni v0.2s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x2f]
// CHECK: mvni v31.4s, #0x1, msl #8 // encoding: [0x3f,0xc4,0x00,0x6f]
// CHECK: mvni v0.4s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x6f]
//----------------------------------------------------------------------
// Vector Immediate - per byte
//----------------------------------------------------------------------
movi v0.8b, #0
movi v31.8b, #0xff
movi v15.16b, #0xf
movi v31.16b, #0x1f
// CHECK: movi v0.8b, #0x0 // encoding: [0x00,0xe4,0x00,0x0f]
// CHECK: movi v31.8b, #0xff // encoding: [0xff,0xe7,0x07,0x0f]
// CHECK: movi v15.16b, #0xf // encoding: [0xef,0xe5,0x00,0x4f]
// CHECK: movi v31.16b, #0x1f // encoding: [0xff,0xe7,0x00,0x4f]
//----------------------------------------------------------------------
// Vector Move Immediate - bytemask, per doubleword
//---------------------------------------------------------------------
movi v0.2d, #0xff00ff00ff00ff00
// CHECK: movi v0.2d, #0xff00ff00ff00ff00 // encoding: [0x40,0xe5,0x05,0x6f]
//----------------------------------------------------------------------
// Vector Move Immediate - bytemask, one doubleword
//----------------------------------------------------------------------
movi d0, #0xff00ff00ff00ff00
// CHECK: movi d0, #0xff00ff00ff00ff00 // encoding: [0x40,0xe5,0x05,0x2f]
//----------------------------------------------------------------------
// Vector Floating Point Move Immediate
//----------------------------------------------------------------------
fmov v1.2s, #1.0
fmov v15.4s, #1.0
fmov v31.2d, #1.0
// CHECK: fmov v1.2s, #1.00000000 // encoding: [0x01,0xf6,0x03,0x0f]
// CHECK: fmov v15.4s, #1.00000000 // encoding: [0x0f,0xf6,0x03,0x4f]
// CHECK: fmov v31.2d, #1.00000000 // encoding: [0x1f,0xf6,0x03,0x6f]
//----------------------------------------------------------------------
// Vector Move - register
//----------------------------------------------------------------------
mov v0.8b, v31.8b
mov v15.16b, v16.16b
orr v0.8b, v31.8b, v31.8b
orr v15.16b, v16.16b, v16.16b
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]

View File

@ -0,0 +1,86 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Integer Mul
//----------------------------------------------------------------------
mul v0.8b, v1.8b, v2.8b
mul v0.16b, v1.16b, v2.16b
mul v0.4h, v1.4h, v2.4h
mul v0.8h, v1.8h, v2.8h
mul v0.2s, v1.2s, v2.2s
mul v0.4s, v1.4s, v2.4s
// CHECK: mul v0.8b, v1.8b, v2.8b // encoding: [0x20,0x9c,0x22,0x0e]
// CHECK: mul v0.16b, v1.16b, v2.16b // encoding: [0x20,0x9c,0x22,0x4e]
// CHECK: mul v0.4h, v1.4h, v2.4h // encoding: [0x20,0x9c,0x62,0x0e]
// CHECK: mul v0.8h, v1.8h, v2.8h // encoding: [0x20,0x9c,0x62,0x4e]
// CHECK: mul v0.2s, v1.2s, v2.2s // encoding: [0x20,0x9c,0xa2,0x0e]
// CHECK: mul v0.4s, v1.4s, v2.4s // encoding: [0x20,0x9c,0xa2,0x4e]
//----------------------------------------------------------------------
// Vector Floating-Point Mul
//----------------------------------------------------------------------
fmul v0.2s, v1.2s, v2.2s
fmul v0.4s, v1.4s, v2.4s
fmul v0.2d, v1.2d, v2.2d
// CHECK: fmul v0.2s, v1.2s, v2.2s // encoding: [0x20,0xdc,0x22,0x2e]
// CHECK: fmul v0.4s, v1.4s, v2.4s // encoding: [0x20,0xdc,0x22,0x6e]
// CHECK: fmul v0.2d, v1.2d, v2.2d // encoding: [0x20,0xdc,0x62,0x6e]
//----------------------------------------------------------------------
// Vector Floating-Point Div
//----------------------------------------------------------------------
fdiv v0.2s, v1.2s, v2.2s
fdiv v0.4s, v1.4s, v2.4s
fdiv v0.2d, v1.2d, v2.2d
// CHECK: fdiv v0.2s, v1.2s, v2.2s // encoding: [0x20,0xfc,0x22,0x2e]
// CHECK: fdiv v0.4s, v1.4s, v2.4s // encoding: [0x20,0xfc,0x22,0x6e]
// CHECK: fdiv v0.2d, v1.2d, v2.2d // encoding: [0x20,0xfc,0x62,0x6e]
//----------------------------------------------------------------------
// Vector Multiply (Polynomial)
//----------------------------------------------------------------------
pmul v17.8b, v31.8b, v16.8b
pmul v0.16b, v1.16b, v2.16b
// CHECK: pmul v17.8b, v31.8b, v16.8b // encoding: [0xf1,0x9f,0x30,0x2e]
// CHECK: pmul v0.16b, v1.16b, v2.16b // encoding: [0x20,0x9c,0x22,0x6e]
//----------------------------------------------------------------------
// Vector Saturating Doubling Multiply High
//----------------------------------------------------------------------
sqdmulh v2.4h, v25.4h, v3.4h
sqdmulh v12.8h, v5.8h, v13.8h
sqdmulh v3.2s, v1.2s, v30.2s
// CHECK: sqdmulh v2.4h, v25.4h, v3.4h // encoding: [0x22,0xb7,0x63,0x0e]
// CHECK: sqdmulh v12.8h, v5.8h, v13.8h // encoding: [0xac,0xb4,0x6d,0x4e]
// CHECK: sqdmulh v3.2s, v1.2s, v30.2s // encoding: [0x23,0xb4,0xbe,0x0e]
//----------------------------------------------------------------------
// Vector Saturating Rouding Doubling Multiply High
//----------------------------------------------------------------------
sqrdmulh v2.4h, v25.4h, v3.4h
sqrdmulh v12.8h, v5.8h, v13.8h
sqrdmulh v3.2s, v1.2s, v30.2s
// CHECK: sqrdmulh v2.4h, v25.4h, v3.4h // encoding: [0x22,0xb7,0x63,0x2e]
// CHECK: sqrdmulh v12.8h, v5.8h, v13.8h // encoding: [0xac,0xb4,0x6d,0x6e]
// CHECK: sqrdmulh v3.2s, v1.2s, v30.2s // encoding: [0x23,0xb4,0xbe,0x2e]
//----------------------------------------------------------------------
// Vector Multiply Extended
//----------------------------------------------------------------------
fmulx v21.2s, v5.2s, v13.2s
fmulx v1.4s, v25.4s, v3.4s
fmulx v31.2d, v22.2d, v2.2d
// CHECK: fmulx v21.2s, v5.2s, v13.2s // encoding: [0xb5,0xdc,0x2d,0x0e]
// CHECK: fmulx v1.4s, v25.4s, v3.4s // encoding: [0x21,0xdf,0x23,0x4e]
// CHECK: fmulx v31.2d, v22.2d, v2.2d // encoding: [0xdf,0xde,0x62,0x4e]

View File

@ -0,0 +1,39 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Rouding Halving Add (Signed)
//------------------------------------------------------------------------------
srhadd v0.8b, v1.8b, v2.8b
srhadd v0.16b, v1.16b, v2.16b
srhadd v0.4h, v1.4h, v2.4h
srhadd v0.8h, v1.8h, v2.8h
srhadd v0.2s, v1.2s, v2.2s
srhadd v0.4s, v1.4s, v2.4s
// CHECK: srhadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x14,0x22,0x0e]
// CHECK: srhadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x14,0x22,0x4e]
// CHECK: srhadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x62,0x0e]
// CHECK: srhadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x62,0x4e]
// CHECK: srhadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x14,0xa2,0x0e]
// CHECK: srhadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x14,0xa2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Rouding Halving Add (Unsigned)
//------------------------------------------------------------------------------
urhadd v0.8b, v1.8b, v2.8b
urhadd v0.16b, v1.16b, v2.16b
urhadd v0.4h, v1.4h, v2.4h
urhadd v0.8h, v1.8h, v2.8h
urhadd v0.2s, v1.2s, v2.2s
urhadd v0.4s, v1.4s, v2.4s
// CHECK: urhadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x14,0x22,0x2e]
// CHECK: urhadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x14,0x22,0x6e]
// CHECK: urhadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x62,0x2e]
// CHECK: urhadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x62,0x6e]
// CHECK: urhadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x14,0xa2,0x2e]
// CHECK: urhadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x14,0xa2,0x6e]

View File

@ -0,0 +1,57 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
srshl v0.8b, v1.8b, v2.8b
srshl v0.16b, v1.16b, v2.16b
srshl v0.4h, v1.4h, v2.4h
srshl v0.8h, v1.8h, v2.8h
srshl v0.2s, v1.2s, v2.2s
srshl v0.4s, v1.4s, v2.4s
srshl v0.2d, v1.2d, v2.2d
// CHECK: srshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x54,0x22,0x0e]
// CHECK: srshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x54,0x22,0x4e]
// CHECK: srshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x54,0x62,0x0e]
// CHECK: srshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x54,0x62,0x4e]
// CHECK: srshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x54,0xa2,0x0e]
// CHECK: srshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x4e]
// CHECK: srshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
urshl v0.8b, v1.8b, v2.8b
urshl v0.16b, v1.16b, v2.16b
urshl v0.4h, v1.4h, v2.4h
urshl v0.8h, v1.8h, v2.8h
urshl v0.2s, v1.2s, v2.2s
urshl v0.4s, v1.4s, v2.4s
urshl v0.2d, v1.2d, v2.2d
// CHECK: urshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x54,0x22,0x2e]
// CHECK: urshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x54,0x22,0x6e]
// CHECK: urshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x54,0x62,0x2e]
// CHECK: urshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x54,0x62,0x6e]
// CHECK: urshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x54,0xa2,0x2e]
// CHECK: urshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x6e]
// CHECK: urshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
srshl d17, d31, d8
// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
urshl d17, d31, d8
// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]

View File

@ -0,0 +1,133 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Saturating Add (Signed)
//------------------------------------------------------------------------------
sqadd v0.8b, v1.8b, v2.8b
sqadd v0.16b, v1.16b, v2.16b
sqadd v0.4h, v1.4h, v2.4h
sqadd v0.8h, v1.8h, v2.8h
sqadd v0.2s, v1.2s, v2.2s
sqadd v0.4s, v1.4s, v2.4s
sqadd v0.2d, v1.2d, v2.2d
// CHECK: sqadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x0c,0x22,0x0e]
// CHECK: sqadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x0c,0x22,0x4e]
// CHECK: sqadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0x62,0x0e]
// CHECK: sqadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0x62,0x4e]
// CHECK: sqadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x0c,0xa2,0x0e]
// CHECK: sqadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x0c,0xa2,0x4e]
// CHECK: sqadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0x0c,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Saturating Add (Unsigned)
//------------------------------------------------------------------------------
uqadd v0.8b, v1.8b, v2.8b
uqadd v0.16b, v1.16b, v2.16b
uqadd v0.4h, v1.4h, v2.4h
uqadd v0.8h, v1.8h, v2.8h
uqadd v0.2s, v1.2s, v2.2s
uqadd v0.4s, v1.4s, v2.4s
uqadd v0.2d, v1.2d, v2.2d
// CHECK: uqadd v0.8b, v1.8b, v2.8b // encoding: [0x20,0x0c,0x22,0x2e]
// CHECK: uqadd v0.16b, v1.16b, v2.16b // encoding: [0x20,0x0c,0x22,0x6e]
// CHECK: uqadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0x62,0x2e]
// CHECK: uqadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0x62,0x6e]
// CHECK: uqadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0x0c,0xa2,0x2e]
// CHECK: uqadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0x0c,0xa2,0x6e]
// CHECK: uqadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0x0c,0xe2,0x6e]
//------------------------------------------------------------------------------
// Vector Integer Saturating Sub (Signed)
//------------------------------------------------------------------------------
sqsub v0.8b, v1.8b, v2.8b
sqsub v0.16b, v1.16b, v2.16b
sqsub v0.4h, v1.4h, v2.4h
sqsub v0.8h, v1.8h, v2.8h
sqsub v0.2s, v1.2s, v2.2s
sqsub v0.4s, v1.4s, v2.4s
sqsub v0.2d, v1.2d, v2.2d
// CHECK: sqsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x2c,0x22,0x0e]
// CHECK: sqsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x2c,0x22,0x4e]
// CHECK: sqsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x2c,0x62,0x0e]
// CHECK: sqsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x2c,0x62,0x4e]
// CHECK: sqsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x2c,0xa2,0x0e]
// CHECK: sqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x4e]
// CHECK: sqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Saturating Sub (Unsigned)
//------------------------------------------------------------------------------
uqsub v0.8b, v1.8b, v2.8b
uqsub v0.16b, v1.16b, v2.16b
uqsub v0.4h, v1.4h, v2.4h
uqsub v0.8h, v1.8h, v2.8h
uqsub v0.2s, v1.2s, v2.2s
uqsub v0.4s, v1.4s, v2.4s
uqsub v0.2d, v1.2d, v2.2d
// CHECK: uqsub v0.8b, v1.8b, v2.8b // encoding: [0x20,0x2c,0x22,0x2e]
// CHECK: uqsub v0.16b, v1.16b, v2.16b // encoding: [0x20,0x2c,0x22,0x6e]
// CHECK: uqsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x2c,0x62,0x2e]
// CHECK: uqsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x2c,0x62,0x6e]
// CHECK: uqsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0x2c,0xa2,0x2e]
// CHECK: uqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x6e]
// CHECK: uqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Add (Signed)
//------------------------------------------------------------------------------
sqadd b0, b1, b2
sqadd h10, h11, h12
sqadd s20, s21, s2
sqadd d17, d31, d8
// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Add (Unsigned)
//------------------------------------------------------------------------------
uqadd b0, b1, b2
uqadd h10, h11, h12
uqadd s20, s21, s2
uqadd d17, d31, d8
// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Sub (Signed)
//------------------------------------------------------------------------------
sqsub b0, b1, b2
sqsub h10, h11, h12
sqsub s20, s21, s2
sqsub d17, d31, d8
// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Sub (Unsigned)
//------------------------------------------------------------------------------
uqsub b0, b1, b2
uqsub h10, h11, h12
uqsub s20, s21, s2
uqsub d17, d31, d8
// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]

View File

@ -0,0 +1,70 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Saturating Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
sqrshl v0.8b, v1.8b, v2.8b
sqrshl v0.16b, v1.16b, v2.16b
sqrshl v0.4h, v1.4h, v2.4h
sqrshl v0.8h, v1.8h, v2.8h
sqrshl v0.2s, v1.2s, v2.2s
sqrshl v0.4s, v1.4s, v2.4s
sqrshl v0.2d, v1.2d, v2.2d
// CHECK: sqrshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x5c,0x22,0x0e]
// CHECK: sqrshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x5c,0x22,0x4e]
// CHECK: sqrshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x5c,0x62,0x0e]
// CHECK: sqrshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x5c,0x62,0x4e]
// CHECK: sqrshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x5c,0xa2,0x0e]
// CHECK: sqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x4e]
// CHECK: sqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Saturating Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqrshl v0.8b, v1.8b, v2.8b
uqrshl v0.16b, v1.16b, v2.16b
uqrshl v0.4h, v1.4h, v2.4h
uqrshl v0.8h, v1.8h, v2.8h
uqrshl v0.2s, v1.2s, v2.2s
uqrshl v0.4s, v1.4s, v2.4s
uqrshl v0.2d, v1.2d, v2.2d
// CHECK: uqrshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x5c,0x22,0x2e]
// CHECK: uqrshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x5c,0x22,0x6e]
// CHECK: uqrshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x5c,0x62,0x2e]
// CHECK: uqrshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x5c,0x62,0x6e]
// CHECK: uqrshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x5c,0xa2,0x2e]
// CHECK: uqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x6e]
// CHECK: uqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
sqrshl b0, b1, b2
sqrshl h10, h11, h12
sqrshl s20, s21, s2
sqrshl d17, d31, d8
// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqrshl b0, b1, b2
uqrshl h10, h11, h12
uqrshl s20, s21, s2
uqrshl d17, d31, d8
// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]

View File

@ -0,0 +1,69 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Saturating Shift Lef (Signed)
//------------------------------------------------------------------------------
sqshl v0.8b, v1.8b, v2.8b
sqshl v0.16b, v1.16b, v2.16b
sqshl v0.4h, v1.4h, v2.4h
sqshl v0.8h, v1.8h, v2.8h
sqshl v0.2s, v1.2s, v2.2s
sqshl v0.4s, v1.4s, v2.4s
sqshl v0.2d, v1.2d, v2.2d
// CHECK: sqshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x4c,0x22,0x0e]
// CHECK: sqshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x4c,0x22,0x4e]
// CHECK: sqshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x4c,0x62,0x0e]
// CHECK: sqshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x4c,0x62,0x4e]
// CHECK: sqshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x4c,0xa2,0x0e]
// CHECK: sqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x4e]
// CHECK: sqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Saturating Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqshl v0.8b, v1.8b, v2.8b
uqshl v0.16b, v1.16b, v2.16b
uqshl v0.4h, v1.4h, v2.4h
uqshl v0.8h, v1.8h, v2.8h
uqshl v0.2s, v1.2s, v2.2s
uqshl v0.4s, v1.4s, v2.4s
uqshl v0.2d, v1.2d, v2.2d
// CHECK: uqshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x4c,0x22,0x2e]
// CHECK: uqshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x4c,0x22,0x6e]
// CHECK: uqshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x4c,0x62,0x2e]
// CHECK: uqshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x4c,0x62,0x6e]
// CHECK: uqshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x4c,0xa2,0x2e]
// CHECK: uqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x6e]
// CHECK: uqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Shift Lef (Signed)
//------------------------------------------------------------------------------
sqshl b0, b1, b2
sqshl h10, h11, h12
sqshl s20, s21, s2
sqshl d17, d31, d8
// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqshl b0, b1, b2
uqshl h10, h11, h12
uqshl s20, s21, s2
uqshl d17, d31, d8
// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]

View File

@ -0,0 +1,57 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Vector Integer Shift Lef (Signed)
//------------------------------------------------------------------------------
sshl v0.8b, v1.8b, v2.8b
sshl v0.16b, v1.16b, v2.16b
sshl v0.4h, v1.4h, v2.4h
sshl v0.8h, v1.8h, v2.8h
sshl v0.2s, v1.2s, v2.2s
sshl v0.4s, v1.4s, v2.4s
sshl v0.2d, v1.2d, v2.2d
// CHECK: sshl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x44,0x22,0x0e]
// CHECK: sshl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x44,0x22,0x4e]
// CHECK: sshl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x44,0x62,0x0e]
// CHECK: sshl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x44,0x62,0x4e]
// CHECK: sshl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x44,0xa2,0x0e]
// CHECK: sshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x4e]
// CHECK: sshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x4e]
//------------------------------------------------------------------------------
// Vector Integer Shift Lef (Unsigned)
//------------------------------------------------------------------------------
ushl v0.8b, v1.8b, v2.8b
ushl v0.16b, v1.16b, v2.16b
ushl v0.4h, v1.4h, v2.4h
ushl v0.8h, v1.8h, v2.8h
ushl v0.2s, v1.2s, v2.2s
ushl v0.4s, v1.4s, v2.4s
ushl v0.2d, v1.2d, v2.2d
// CHECK: ushl v0.8b, v1.8b, v2.8b // encoding: [0x20,0x44,0x22,0x2e]
// CHECK: ushl v0.16b, v1.16b, v2.16b // encoding: [0x20,0x44,0x22,0x6e]
// CHECK: ushl v0.4h, v1.4h, v2.4h // encoding: [0x20,0x44,0x62,0x2e]
// CHECK: ushl v0.8h, v1.8h, v2.8h // encoding: [0x20,0x44,0x62,0x6e]
// CHECK: ushl v0.2s, v1.2s, v2.2s // encoding: [0x20,0x44,0xa2,0x2e]
// CHECK: ushl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x6e]
// CHECK: ushl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Shift Lef (Signed)
//------------------------------------------------------------------------------
sshl d17, d31, d8
// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Shift Lef (Unsigned)
//------------------------------------------------------------------------------
ushl d17, d31, d8
// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]

View File

@ -0,0 +1,28 @@
// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-neon < %s 2> %t
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
fmla v3.4s, v12.4s, v17.4s
fmla v1.2d, v30.2d, v20.2d
fmla v9.2s, v9.2s, v0.2s
// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
// CHECK-ERROR-NEXT: fmla v3.4s, v12.4s, v17.4s
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
// CHECK-ERROR-NEXT: fmla v1.2d, v30.2d, v20.2d
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
// CHECK-ERROR-NEXT: fmla v9.2s, v9.2s, v0.2s
// CHECK-ERROR-NEXT: ^
fmls v3.4s, v12.4s, v17.4s
fmls v1.2d, v30.2d, v20.2d
fmls v9.2s, v9.2s, v0.2s
// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
// CHECK-ERROR-NEXT: fmls v3.4s, v12.4s, v17.4s
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
// CHECK-ERROR-NEXT: fmls v1.2d, v30.2d, v20.2d
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
// CHECK-ERROR-NEXT: fmls v9.2s, v9.2s, v0.2s
// CHECK-ERROR-NEXT: ^

View File

@ -0,0 +1,673 @@
# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
#------------------------------------------------------------------------------
# Vector Integer Add/Sub
#------------------------------------------------------------------------------
# CHECK: add v31.8b, v31.8b, v31.8b
# CHECK: sub v0.2d, v0.2d, v0.2d
0xff 0x87 0x3f 0x0e
0x00 0x84 0xe0 0x6e
#------------------------------------------------------------------------------
# Vector Floating-Point Add/Sub
#------------------------------------------------------------------------------
# CHECK: fadd v0.4s, v0.4s, v0.4s
# CHECK: fsub v31.2s, v31.2s, v31.2s
0x00 0xd4 0x20 0x4e
0xff 0xd7 0xbf 0x0e
#------------------------------------------------------------------------------
# Vector Integer Mul
#------------------------------------------------------------------------------
# CHECK: mul v0.8b, v1.8b, v2.8b
0x20 0x9c 0x22 0x0e
#------------------------------------------------------------------------------
# Vector Floating-Point Mul/Div
#------------------------------------------------------------------------------
# CHECK: fmul v0.2s, v1.2s, v2.2s
# CHECK: fdiv v31.2s, v31.2s, v31.2s
0x20 0xdc 0x22 0x2e
0xff 0xff 0x3f 0x2e
#----------------------------------------------------------------------
# Vector Polynomial Multiply
#----------------------------------------------------------------------
# CHECK: pmul v0.8b, v15.8b, v16.8b
# CHECK: pmul v31.16b, v7.16b, v8.16b
0xe0 0x9d 0x30 0x2e
0xff 0x9c 0x28 0x6e
#------------------------------------------------------------------------------
# Vector And, Orr, Eor, Orn, Bic
#------------------------------------------------------------------------------
# CHECK: and v2.8b, v2.8b, v2.8b
# CHECK: orr v31.16b, v31.16b, v30.16b
# CHECK: eor v0.16b, v1.16b, v2.16b
# CHECK: orn v9.16b, v10.16b, v11.16b
# CHECK: bic v31.8b, v30.8b, v29.8b
0x42 0x1c 0x22 0x0e
0xff 0x1f 0xbe 0x4e
0x20 0x1c 0x22 0x6e
0x49 0x1d 0xeb 0x4e
0xdf 0x1f 0x7d 0x0e
#------------------------------------------------------------------------------
# Vector Bsl, Bit, Bif
#------------------------------------------------------------------------------
# CHECK: bsl v0.8b, v1.8b, v2.8b
# CHECK: bit v31.16b, v31.16b, v31.16b
# CHECK: bif v0.16b, v1.16b, v2.16b
0x20 0x1c 0x62 0x2e
0xff 0x1f 0xbf 0x6e
0x20 0x1c 0xe2 0x6e
#------------------------------------------------------------------------------
# Vector Integer Multiply-accumulate and Multiply-subtract
#------------------------------------------------------------------------------
# CHECK: mla v0.8b, v1.8b, v2.8b
# CHECK: mls v31.4h, v31.4h, v31.4h
0x20 0x94 0x22 0x0e
0xff 0x97 0x7f 0x2e
#------------------------------------------------------------------------------
# Vector Floating-Point Multiply-accumulate and Multiply-subtract
#------------------------------------------------------------------------------
# CHECK: fmla v0.2s, v1.2s, v2.2s
# CHECK: fmls v31.2s, v31.2s, v31.2s
0x20 0xcc 0x22 0x0e
0xff 0xcf 0xbf 0x0e
#------------------------------------------------------------------------------
# Vector Move Immediate Shifted
# Vector Move Inverted Immediate Shifted
# Vector Bitwise Bit Clear (AND NOT) - immediate
# Vector Bitwise OR - immedidate
#------------------------------------------------------------------------------
# CHECK: movi v31.4s, #0xff, lsl #24
# CHECK: mvni v0.2s, #0x0
# CHECK: bic v15.4h, #0xf, lsl #8
# CHECK: orr v16.8h, #0x1f
0xff 0x67 0x07 0x4f
0x00 0x04 0x00 0x2f
0xef 0xb5 0x00 0x2f
0xf0 0x97 0x00 0x4f
#------------------------------------------------------------------------------
# Vector Move Immediate Masked
# Vector Move Inverted Immediate Masked
#------------------------------------------------------------------------------
# CHECK: movi v8.2s, #0x8, msl #8
# CHECK: mvni v16.4s, #0x10, msl #16
0x08 0xc5 0x00 0x0f
0x10 0xd6 0x00 0x6f
#------------------------------------------------------------------------------
# Vector Immediate - per byte
# Vector Move Immediate - bytemask, per doubleword
# Vector Move Immediate - bytemask, one doubleword
#------------------------------------------------------------------------------
# CHECK: movi v16.8b, #0xff
# CHECK: movi v31.16b, #0x1f
# CHECK: movi d15, #0xff00ff00ff00ff
# CHECK: movi v31.2d, #0xff0000ff0000ffff
0xf0 0xe7 0x07 0x0f
0xff 0xe7 0x00 0x4f
0xaf 0xe6 0x02 0x2f
0x7f 0xe6 0x04 0x6f
#------------------------------------------------------------------------------
# Vector Floating Point Move Immediate
#------------------------------------------------------------------------------
# CHECK: fmov v0.2s, #13.0
# CHECK: fmov v15.4s, #1.0
# CHECK: fmov v31.2d, #-1.25
0x40 0xf5 0x01 0x0f
0x0f 0xf6 0x03 0x4f
0x9f 0xf6 0x07 0x6f
#------------------------------------------------------------------------------
# Vector Move - register
#------------------------------------------------------------------------------
# CHECK: mov v1.16b, v15.16b
# CHECK: mov v25.8b, v4.8b
0xe1 0x1d 0xaf 0x4e
0x99 0x1c 0xa4 0x0e
#----------------------------------------------------------------------
# Vector Absolute Difference and Accumulate (Signed, Unsigned)
# Vector Absolute Difference (Signed, Unsigned)
# Vector Absolute Difference (Floating Point)
#----------------------------------------------------------------------
# CHECK: uaba v0.8b, v1.8b, v2.8b
# CHECK: saba v31.16b, v30.16b, v29.16b
# CHECK: uabd v15.4h, v16.4h, v17.4h
# CHECK: sabd v5.4h, v4.4h, v6.4h
# CHECK: fabd v1.4s, v31.4s, v16.4s
0x20 0x7c 0x22 0x2e
0xdf 0x7f 0x3d 0x4e
0x0f 0x76 0x71 0x2e
0x85 0x74 0x66 0x0e
0xe1 0xd7 0xb0 0x6e
#----------------------------------------------------------------------
# Scalar Integer Add
# Scalar Integer Sub
#----------------------------------------------------------------------
# CHECK: add d17, d31, d29
# CHECK: sub d15, d5, d16
0xf1 0x87 0xfd 0x5e
0xaf 0x84 0xf0 0x7e
#----------------------------------------------------------------------
# Vector Reciprocal Square Root Step (Floating Point)
#----------------------------------------------------------------------
# CHECK: frsqrts v31.2d, v15.2d, v8.2d
0xff 0xfd 0xe8 0x4e
#----------------------------------------------------------------------
# Vector Reciprocal Step (Floating Point)
#----------------------------------------------------------------------
# CHECK: frecps v5.4s, v7.4s, v16.4s
0xe5 0xfc 0x30 0x4e
#----------------------------------------------------------------------
# Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
#----------------------------------------------------------------------
# CHECK: facge v0.4s, v31.4s, v16.4s
0xe0 0xef 0x30 0x6e
#----------------------------------------------------------------------
# Vector Absolute Compare Mask Less Than (Floating Point)
#----------------------------------------------------------------------
# CHECK: facgt v31.2d, v29.2d, v28.2d
0xbf 0xef 0xfc 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Equal (Integer)
#----------------------------------------------------------------------
# CHECK: cmeq v5.16b, v15.16b, v31.16b
0xe5 0x8d 0x3f 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Higher or Same (Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: cmhs v1.8b, v16.8b, v30.8b
0x01 0x3e 0x3e 0x2e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than or Equal (Integer)
#----------------------------------------------------------------------
# CHECK: cmge v20.4h, v11.4h, v23.4h
0x74 0x3d 0x77 0x0e
#----------------------------------------------------------------------
# Vector Compare Mask Higher (Unsigned Integer)
# CHECK: cmhi v13.8h, v3.8h, v27.8h
0x6d 0x34 0x7b 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than (Integer)
#----------------------------------------------------------------------
# CHECK: cmgt v9.4s, v4.4s, v28.4s
0x89 0x34 0xbc 0x4e
#----------------------------------------------------------------------
# Vector Compare Mask Bitwise Test (Integer)
#----------------------------------------------------------------------
# CHECK: cmtst v21.2s, v19.2s, v18.2s
0x75 0x8e 0xb2 0x0e
#----------------------------------------------------------------------
# Vector Compare Mask Equal (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmeq v0.2s, v15.2s, v16.2s
0xe0 0xe5 0x30 0x0e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than Or Equal (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmge v31.4s, v7.4s, v29.4s
0xff 0xe4 0x3d 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmgt v17.4s, v8.4s, v25.4s
0x11 0xe5 0xb9 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Equal to Zero (Integer)
#----------------------------------------------------------------------
# CHECK: cmeq v31.16b, v15.16b, #0x0
0xff 0x99 0x20 0x4e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
#----------------------------------------------------------------------
# CHECK: cmge v3.8b, v15.8b, #0x0
0xe3 0x89 0x20 0x2e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than Zero (Signed Integer)
#----------------------------------------------------------------------
# CHECK: cmgt v22.2s, v9.2s, #0x0
0x36 0x89 0xa0 0x0e
#----------------------------------------------------------------------
# Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
#----------------------------------------------------------------------
# CHECK: cmle v5.2d, v14.2d, #0x0
0xc5 0x99 0xe0 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Less Than Zero (Signed Integer)
#----------------------------------------------------------------------
# CHECK: cmlt v13.8h, v11.8h, #0x0
0x6d 0xa9 0x60 0x4e
#----------------------------------------------------------------------
# Vector Compare Mask Equal to Zero (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmeq v15.2s, v21.2s, #0.0
0xaf 0xda 0xa0 0x0e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmge v14.2d, v13.2d, #0.0
0xae 0xc9 0xe0 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Greater Than Zero (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmgt v9.4s, v23.4s, #0.0
0xe9 0xca 0xa0 0x4e
#----------------------------------------------------------------------
# Vector Compare Mask Less Than or Equal To Zero (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmle v11.2d, v6.2d, #0.0
0xcb 0xd8 0xe0 0x6e
#----------------------------------------------------------------------
# Vector Compare Mask Less Than Zero (Floating Point)
#----------------------------------------------------------------------
# CHECK: fcmlt v12.4s, v25.4s, #0.0
0x2c 0xeb 0xa0 0x4e
#------------------------------------------------------------------------------
# Vector Integer Halving Add (Signed)
# Vector Integer Halving Add (Unsigned)
# Vector Integer Halving Sub (Signed)
# Vector Integer Halving Sub (Unsigned)
#------------------------------------------------------------------------------
# CHECK: shadd v0.8b, v31.8b, v29.8b
# CHECK: uhadd v15.16b, v16.16b, v17.16b
# CHECK: shsub v0.4h, v1.4h, v2.4h
# CHECK: uhadd v5.8h, v7.8h, v8.8h
# CHECK: shsub v9.2s, v11.2s, v21.2s
# CHECK: uhsub v22.4s, v30.4s, v19.4s
0xe0 0x07 0x3d 0x0e
0x0f 0x06 0x31 0x6e
0x20 0x24 0x62 0x0e
0xe5 0x04 0x68 0x6e
0x69 0x25 0xb5 0x0e
0xd6 0x27 0xb3 0x6e
#------------------------------------------------------------------------------
# Vector Integer Rouding Halving Add (Signed)
# Vector Integer Rouding Halving Add (Unsigned)
#------------------------------------------------------------------------------
# CHECK: srhadd v3.8b, v5.8b, v7.8b
# CHECK: urhadd v7.16b, v17.16b, v27.16b
# CHECK: srhadd v10.4h, v11.4h, v13.4h
# CHECK: urhadd v1.8h, v2.8h, v3.8h
# CHECK: srhadd v4.2s, v5.2s, v6.2s
# CHECK: urhadd v7.4s, v7.4s, v7.4s
0xa3 0x14 0x27 0x0e
0x27 0x16 0x3b 0x6e
0x6a 0x15 0x6d 0x0e
0x41 0x14 0x63 0x6e
0xa4 0x14 0xa6 0x0e
0xe7 0x14 0xa7 0x6e
#------------------------------------------------------------------------------
# Vector Integer Saturating Add (Signed)
# Vector Integer Saturating Add (Unsigned)
# Vector Integer Saturating Sub (Signed)
# Vector Integer Saturating Sub (Unsigned)
#------------------------------------------------------------------------------
# CHECK: sqsub v0.8b, v1.8b, v2.8b
# CHECK: sqadd v0.16b, v1.16b, v2.16b
# CHECK: uqsub v0.4h, v1.4h, v2.4h
# CHECK: uqadd v0.8h, v1.8h, v2.8h
# CHECK: sqadd v0.2s, v1.2s, v2.2s
# CHECK: sqsub v0.4s, v1.4s, v2.4s
# CHECK: sqsub v0.2d, v1.2d, v2.2d
0x20 0x2c 0x22 0x0e
0x20 0x0c 0x22 0x4e
0x20 0x2c 0x62 0x2e
0x20 0x0c 0x62 0x6e
0x20 0x0c 0xa2 0x0e
0x20 0x2c 0xa2 0x4e
0x20 0x2c 0xe2 0x4e
#------------------------------------------------------------------------------
# Scalar Integer Saturating Add (Signed)
# Scalar Integer Saturating Add (Unsigned)
# Scalar Integer Saturating Sub (Signed)
# Scalar Integer Saturating Add (Unsigned)
#------------------------------------------------------------------------------
# CHECK: sqadd b20, b11, b15
# CHECK: uqadd h0, h1, h5
# CHECK: sqsub s20, s10, s7
# CHECK: uqsub d16, d16, d16
0x74 0x0d 0x2f 0x5e
0x20 0x0c 0x65 0x7e
0x54 0x2d 0xa7 0x5e
0x10 0x2e 0xf0 0x7e
#----------------------------------------------------------------------
# Vector Shift Left (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: sshl v10.8b, v15.8b, v22.8b
# CHECK: ushl v10.16b, v5.16b, v2.16b
# CHECK: sshl v10.4h, v15.4h, v22.4h
# CHECK: ushl v10.8h, v5.8h, v2.8h
# CHECK: sshl v10.2s, v15.2s, v22.2s
# CHECK: ushl v10.4s, v5.4s, v2.4s
# CHECK: sshl v0.2d, v1.2d, v2.2d
0xea 0x45 0x36 0x0e
0xaa 0x44 0x22 0x6e
0xea 0x45 0x76 0x0e
0xaa 0x44 0x62 0x6e
0xea 0x45 0xb6 0x0e
0xaa 0x44 0xa2 0x6e
0x20 0x44 0xe2 0x4e
#----------------------------------------------------------------------
# Vector Saturating Shift Left (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: sqshl v1.8b, v15.8b, v22.8b
# CHECK: uqshl v2.16b, v14.16b, v23.16b
# CHECK: sqshl v3.4h, v13.4h, v24.4h
# CHECK: uqshl v4.8h, v12.8h, v25.8h
# CHECK: sqshl v5.2s, v11.2s, v26.2s
# CHECK: uqshl v6.4s, v10.4s, v27.4s
# CHECK: uqshl v0.2d, v1.2d, v2.2d
0xe1 0x4d 0x36 0x0e
0xc2 0x4d 0x37 0x6e
0xa3 0x4d 0x78 0x0e
0x84 0x4d 0x79 0x6e
0x65 0x4d 0xba 0x0e
0x46 0x4d 0xbb 0x6e
0x20 0x4c 0xe2 0x6e
#----------------------------------------------------------------------
# Vector Rouding Shift Left (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: srshl v10.8b, v5.8b, v22.8b
# CHECK: urshl v10.16b, v5.16b, v2.16b
# CHECK: srshl v1.4h, v5.4h, v31.4h
# CHECK: urshl v1.8h, v5.8h, v2.8h
# CHECK: srshl v10.2s, v15.2s, v2.2s
# CHECK: urshl v1.4s, v5.4s, v2.4s
# CHECK: urshl v0.2d, v1.2d, v2.2d
0xaa 0x54 0x36 0x0e
0xaa 0x54 0x22 0x6e
0xa1 0x54 0x7f 0x0e
0xa1 0x54 0x62 0x6e
0xea 0x55 0xa2 0x0e
0xa1 0x54 0xa2 0x6e
0x20 0x54 0xe2 0x6e
#----------------------------------------------------------------------
# Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: sqrshl v1.8b, v15.8b, v22.8b
# CHECK: uqrshl v2.16b, v14.16b, v23.16b
# CHECK: sqrshl v3.4h, v13.4h, v24.4h
# CHECK: uqrshl v4.8h, v12.8h, v25.8h
# CHECK: sqrshl v5.2s, v11.2s, v26.2s
# CHECK: uqrshl v6.4s, v10.4s, v27.4s
# CHECK: uqrshl v6.4s, v10.4s, v27.4s
0xe1 0x5d 0x36 0x0e
0xc2 0x5d 0x37 0x6e
0xa3 0x5d 0x78 0x0e
0x84 0x5d 0x79 0x6e
0x65 0x5d 0xba 0x0e
0x46 0x5d 0xbb 0x6e
0x46 0x5d 0xbb 0x6e
#----------------------------------------------------------------------
# Scalar Integer Shift Left (Signed, Unsigned)
#----------------------------------------------------------------------
# CHECK: sshl d31, d31, d31
# CHECK: ushl d0, d0, d0
0xff 0x47 0xff 0x5e
0x00 0x44 0xe0 0x7e
#----------------------------------------------------------------------
# Scalar Integer Saturating Shift Left (Signed, Unsigned)
#----------------------------------------------------------------------
# CHECK: sqshl d31, d31, d31
# CHECK: uqshl s23, s20, s16
# CHECK: sqshl h3, h4, h15
# CHECK: uqshl b11, b20, b30
0xff 0x4f 0xff 0x5e
0x97 0x4e 0xb0 0x7e
0x83 0x4c 0x6f 0x5e
0x8b 0x4e 0x3e 0x7e
#----------------------------------------------------------------------
# Scalar Integer Rouding Shift Left (Signed, Unsigned)
#----------------------------------------------------------------------
# CHECK: srshl d16, d16, d16
# CHECK: urshl d8, d7, d4
0x10 0x56 0xf0 0x5e
0xe8 0x54 0xe4 0x7e
#----------------------------------------------------------------------
# Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
#----------------------------------------------------------------------
# CHECK: sqrshl d31, d31, d31
# CHECK: uqrshl s23, s20, s16
# CHECK: sqrshl h3, h4, h15
# CHECK: uqrshl b11, b20, b30
0xff 0x5f 0xff 0x5e
0x97 0x5e 0xb0 0x7e
0x83 0x5c 0x6f 0x5e
0x8b 0x5e 0x3e 0x7e
#----------------------------------------------------------------------
# Vector Maximum (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: smax v1.8b, v15.8b, v22.8b
# CHECK: umax v2.16b, v14.16b, v23.16b
# CHECK: smax v3.4h, v13.4h, v24.4h
# CHECK: umax v4.8h, v12.8h, v25.8h
# CHECK: smax v5.2s, v11.2s, v26.2s
# CHECK: umax v6.4s, v10.4s, v27.4s
0xe1 0x65 0x36 0x0e
0xc2 0x65 0x37 0x6e
0xa3 0x65 0x78 0x0e
0x84 0x65 0x79 0x6e
0x65 0x65 0xba 0x0e
0x46 0x65 0xbb 0x6e
#----------------------------------------------------------------------
# Vector Minimum (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: umin v1.8b, v15.8b, v22.8b
# CHECK: smin v2.16b, v14.16b, v23.16b
# CHECK: umin v3.4h, v13.4h, v24.4h
# CHECK: smin v4.8h, v12.8h, v25.8h
# CHECK: umin v5.2s, v11.2s, v26.2s
# CHECK: smin v6.4s, v10.4s, v27.4s
0xe1 0x6d 0x36 0x2e
0xc2 0x6d 0x37 0x4e
0xa3 0x6d 0x78 0x2e
0x84 0x6d 0x79 0x4e
0x65 0x6d 0xba 0x2e
0x46 0x6d 0xbb 0x4e
#----------------------------------------------------------------------
# Vector Maximum (Floating Point)
#----------------------------------------------------------------------
# CHECK: fmax v29.2s, v28.2s, v25.2s
# CHECK: fmax v9.4s, v8.4s, v5.4s
# CHECK: fmax v11.2d, v10.2d, v7.2d
0x9d 0xf7 0x39 0x0e
0x09 0xf5 0x25 0x4e
0x4b 0xf5 0x67 0x4e
#----------------------------------------------------------------------
# Vector Minimum (Floating Point)
#----------------------------------------------------------------------
# CHECK: fmin v29.2s, v28.2s, v25.2s
# CHECK: fmin v9.4s, v8.4s, v5.4s
# CHECK: fmin v11.2d, v10.2d, v7.2d
0x9d 0xf7 0xb9 0x0e
0x09 0xf5 0xa5 0x4e
0x4b 0xf5 0xe7 0x4e
#----------------------------------------------------------------------
# Vector maxNum (Floating Point)
#----------------------------------------------------------------------
# CHECK: fmaxnm v9.2s, v8.2s, v5.2s
# CHECK: fmaxnm v9.4s, v8.4s, v5.4s
# CHECK: fmaxnm v11.2d, v10.2d, v7.2d
0x09 0xc5 0x25 0x0e
0x09 0xc5 0x25 0x4e
0x4b 0xc5 0x67 0x4e
#----------------------------------------------------------------------
# Vector minNum (Floating Point)
#----------------------------------------------------------------------
# CHECK: fminnm v2.2s, v8.2s, v25.2s
# CHECK: fminnm v9.4s, v8.4s, v5.4s
# CHECK: fminnm v11.2d, v10.2d, v7.2d
0x02 0xc5 0xb9 0x0e
0x09 0xc5 0xa5 0x4e
0x4b 0xc5 0xe7 0x4e
#----------------------------------------------------------------------
# Vector Maximum Pairwise (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: smaxp v1.8b, v15.8b, v22.8b
# CHECK: umaxp v2.16b, v14.16b, v23.16b
# CHECK: smaxp v3.4h, v13.4h, v24.4h
# CHECK: umaxp v4.8h, v12.8h, v25.8h
# CHECK: smaxp v5.2s, v11.2s, v26.2s
# CHECK: umaxp v6.4s, v10.4s, v27.4s
0xe1 0xa5 0x36 0x0e
0xc2 0xa5 0x37 0x6e
0xa3 0xa5 0x78 0x0e
0x84 0xa5 0x79 0x6e
0x65 0xa5 0xba 0x0e
0x46 0xa5 0xbb 0x6e
#----------------------------------------------------------------------
# Vector Minimum Pairwise (Signed and Unsigned Integer)
#----------------------------------------------------------------------
# CHECK: uminp v1.8b, v15.8b, v22.8b
# CHECK: sminp v2.16b, v14.16b, v23.16b
# CHECK: uminp v3.4h, v13.4h, v24.4h
# CHECK: sminp v4.8h, v12.8h, v25.8h
# CHECK: uminp v5.2s, v11.2s, v26.2s
# CHECK: sminp v6.4s, v10.4s, v27.4s
0xe1 0xad 0x36 0x2e
0xc2 0xad 0x37 0x4e
0xa3 0xad 0x78 0x2e
0x84 0xad 0x79 0x4e
0x65 0xad 0xba 0x2e
0x46 0xad 0xbb 0x4e
#----------------------------------------------------------------------
# Vector Maximum Pairwise (Floating Point)
#----------------------------------------------------------------------
# CHECK: fmaxp v29.2s, v28.2s, v25.2s
# CHECK: fmaxp v9.4s, v8.4s, v5.4s
# CHECK: fmaxp v11.2d, v10.2d, v7.2d
0x9d 0xf7 0x39 0x2e
0x09 0xf5 0x25 0x6e
0x4b 0xf5 0x67 0x6e
#----------------------------------------------------------------------
# Vector Minimum Pairwise (Floating Point)
#----------------------------------------------------------------------
# CHECK: fminp v29.2s, v28.2s, v25.2s
# CHECK: fminp v9.4s, v8.4s, v5.4s
# CHECK: fminp v11.2d, v10.2d, v7.2d
0x9d 0xf7 0xb9 0x2e
0x09 0xf5 0xa5 0x6e
0x4b 0xf5 0xe7 0x6e
#----------------------------------------------------------------------
# Vector maxNum Pairwise (Floating Point)
#----------------------------------------------------------------------
# CHECK: fmaxnmp v9.2s, v8.2s, v5.2s
# CHECK: fmaxnmp v9.4s, v8.4s, v5.4s
# CHECK: fmaxnmp v11.2d, v10.2d, v7.2d
0x09 0xc5 0x25 0x2e
0x09 0xc5 0x25 0x6e
0x4b 0xc5 0x67 0x6e
#----------------------------------------------------------------------
# Vector minNum Pairwise (Floating Point)
#----------------------------------------------------------------------
# CHECK: fminnmp v2.2s, v8.2s, v25.2s
# CHECK: fminnmp v9.4s, v8.4s, v5.4s
# CHECK: fminnmp v11.2d, v10.2d, v7.2d
0x02 0xc5 0xb9 0x2e
0x09 0xc5 0xa5 0x6e
0x4b 0xc5 0xe7 0x6e
#------------------------------------------------------------------------------
# Vector Add Pairwise (Integer)
#------------------------------------------------------------------------------
# CHECK: addp v31.8b, v31.8b, v31.8b
# CHECK: addp v0.2d, v0.2d, v0.2d
0xff 0xbf 0x3f 0x0e
0x00 0xbc 0xe0 0x4e
#------------------------------------------------------------------------------
# Vector Add Pairwise (Floating Point)
#------------------------------------------------------------------------------
# CHECK: faddp v0.4s, v0.4s, v0.4s
# CHECK: faddp v31.2s, v31.2s, v31.2s
0x00 0xd4 0x20 0x6e
0xff 0xd7 0x3f 0x2e
#------------------------------------------------------------------------------
# Vector Saturating Doubling Multiply High
# Vector Saturating Rouding Doubling Multiply High
#------------------------------------------------------------------------------
# CHECK: sqdmulh v31.2s, v31.2s, v31.2s
# CHECK: sqdmulh v5.4s, v7.4s, v9.4s
# CHECK: sqrdmulh v31.4h, v3.4h, v13.4h
# CHECK: sqrdmulh v0.8h, v10.8h, v20.8h
0xff 0xb7 0xbf 0x0e
0xe5 0xb4 0xa9 0x4e
0x7f 0xb4 0x6d 0x2e
0x40 0xb5 0x74 0x6e
#------------------------------------------------------------------------------
# Vector Multiply Extended
#------------------------------------------------------------------------------
# CHECK: fmulx v1.2s, v22.2s, v2.2s
# CHECK: fmulx v21.4s, v15.4s, v3.4s
# CHECK: fmulx v11.2d, v5.2d, v23.2d
0xc1 0xde 0x22 0x0e
0xf5 0xdd 0x23 0x4e
0xab 0xdc 0x77 0x4e