From 81ea345894636edc6dc016b6b93ecb7d259b4ae6 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Wed, 23 Apr 2014 06:22:48 +0000 Subject: [PATCH] [ARM64] Enable feature predicates for NEON / FP / CRYPTO. AArch64 has feature predicates for NEON, FP and CRYPTO instructions. This allows the compiler to generate code without using FP, NEON or CRYPTO instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206949 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64CallingConvention.td | 8 +- lib/Target/ARM64/ARM64ISelLowering.cpp | 262 ++++++++++-------- lib/Target/ARM64/ARM64InstrFormats.td | 45 +-- lib/Target/ARM64/ARM64InstrInfo.cpp | 151 +++++++--- lib/Target/ARM64/ARM64InstrInfo.h | 2 + lib/Target/ARM64/ARM64InstrInfo.td | 10 + lib/Target/ARM64/ARM64TargetTransformInfo.cpp | 16 +- lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp | 22 +- test/CodeGen/ARM64/complex-copy-noneon.ll | 21 ++ test/CodeGen/ARM64/crypto.ll | 2 +- test/CodeGen/ARM64/reg-copy-noneon.ll | 20 ++ test/MC/ARM64/advsimd.s | 2 +- test/MC/ARM64/aliases.s | 2 +- test/MC/ARM64/arithmetic-encoding.s | 2 +- test/MC/ARM64/crypto.s | 2 +- test/MC/ARM64/diagno-predicate.s | 19 ++ test/MC/ARM64/fp-encoding.s | 2 +- test/MC/ARM64/nv-cond.s | 2 +- test/MC/ARM64/simd-ldst.s | 2 +- test/MC/ARM64/vector-lists.s | 2 +- test/MC/ARM64/verbose-vector-case.s | 2 +- test/MC/Disassembler/ARM64/advsimd.txt | 2 +- test/MC/Disassembler/ARM64/canonical-form.txt | 2 +- test/MC/Disassembler/ARM64/crypto.txt | 4 +- test/MC/Disassembler/ARM64/non-apple-fmov.txt | 2 +- test/MC/Disassembler/ARM64/scalar-fp.txt | 2 +- 26 files changed, 405 insertions(+), 203 deletions(-) create mode 100644 test/CodeGen/ARM64/complex-copy-noneon.ll create mode 100644 test/CodeGen/ARM64/reg-copy-noneon.ll create mode 100644 test/MC/ARM64/diagno-predicate.s diff --git a/lib/Target/ARM64/ARM64CallingConvention.td b/lib/Target/ARM64/ARM64CallingConvention.td index 0ba309a91b4..1a2463062ef 100644 --- a/lib/Target/ARM64/ARM64CallingConvention.td +++ b/lib/Target/ARM64/ARM64CallingConvention.td @@ -21,7 +21,7 @@ class CCIfAlign : def CC_ARM64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, // An SRet is passed in X8, not X0 like a normal pointer parameter. CCIfSRet>>, @@ -51,7 +51,7 @@ def CC_ARM64_AAPCS : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. @@ -64,7 +64,7 @@ def CC_ARM64_AAPCS : CallingConv<[ def RetCC_ARM64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, @@ -77,7 +77,7 @@ def RetCC_ARM64_AAPCS : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 9ff9567ac28..154306c02be 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -84,27 +84,32 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) // Set up the register classes. addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass); addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass); - addRegisterClass(MVT::f16, &ARM64::FPR16RegClass); - addRegisterClass(MVT::f32, &ARM64::FPR32RegClass); - addRegisterClass(MVT::f64, &ARM64::FPR64RegClass); - addRegisterClass(MVT::f128, &ARM64::FPR128RegClass); - addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass); - addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass); - // Someone set us up the NEON. - addDRTypeForNEON(MVT::v2f32); - addDRTypeForNEON(MVT::v8i8); - addDRTypeForNEON(MVT::v4i16); - addDRTypeForNEON(MVT::v2i32); - addDRTypeForNEON(MVT::v1i64); - addDRTypeForNEON(MVT::v1f64); + if (Subtarget->hasFPARMv8()) { + addRegisterClass(MVT::f16, &ARM64::FPR16RegClass); + addRegisterClass(MVT::f32, &ARM64::FPR32RegClass); + addRegisterClass(MVT::f64, &ARM64::FPR64RegClass); + addRegisterClass(MVT::f128, &ARM64::FPR128RegClass); + } - addQRTypeForNEON(MVT::v4f32); - addQRTypeForNEON(MVT::v2f64); - addQRTypeForNEON(MVT::v16i8); - addQRTypeForNEON(MVT::v8i16); - addQRTypeForNEON(MVT::v4i32); - addQRTypeForNEON(MVT::v2i64); + if (Subtarget->hasNEON()) { + addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass); + addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass); + // Someone set us up the NEON. + addDRTypeForNEON(MVT::v2f32); + addDRTypeForNEON(MVT::v8i8); + addDRTypeForNEON(MVT::v4i16); + addDRTypeForNEON(MVT::v2i32); + addDRTypeForNEON(MVT::v1i64); + addDRTypeForNEON(MVT::v1f64); + + addQRTypeForNEON(MVT::v4f32); + addQRTypeForNEON(MVT::v2f64); + addQRTypeForNEON(MVT::v16i8); + addQRTypeForNEON(MVT::v8i16); + addQRTypeForNEON(MVT::v4i32); + addQRTypeForNEON(MVT::v2i64); + } // Compute derived properties from the register classes computeRegisterProperties(); @@ -140,42 +145,6 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f80, Expand); - // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to - // silliness like this: - setOperationAction(ISD::FABS, MVT::v1f64, Expand); - setOperationAction(ISD::FADD, MVT::v1f64, Expand); - setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); - setOperationAction(ISD::FCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FDIV, MVT::v1f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); - setOperationAction(ISD::FMA, MVT::v1f64, Expand); - setOperationAction(ISD::FMUL, MVT::v1f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); - setOperationAction(ISD::FNEG, MVT::v1f64, Expand); - setOperationAction(ISD::FPOW, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FROUND, MVT::v1f64, Expand); - setOperationAction(ISD::FRINT, MVT::v1f64, Expand); - setOperationAction(ISD::FSIN, MVT::v1f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); - setOperationAction(ISD::FSUB, MVT::v1f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); - setOperationAction(ISD::SETCC, MVT::v1f64, Expand); - setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); - - setOperationAction(ISD::MUL, MVT::v1i64, Expand); - // Custom lowering hooks are needed for XOR // to fold it into CSINC/CSINV. setOperationAction(ISD::XOR, MVT::i32, Custom); @@ -258,24 +227,10 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); - // ARM64 doesn't have a direct vector ->f32 conversion instructions for - // elements smaller than i32, so promote the input to i32 first. - setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); - // Similarly, there is no direct i32 -> f64 vector conversion instruction. - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); - // ARM64 doesn't have {U|S}MUL_LOHI. setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - // ARM64 doesn't have MUL.2d: - setOperationAction(ISD::MUL, MVT::v2i64, Expand); // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero // counterparts, which ARM64 supports directly. @@ -320,8 +275,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // ARM64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingTypes[] = { MVT::f32, MVT::f64, MVT::v2f32, - MVT::v4f32, MVT::v2f64 }; + static MVT RoundingTypes[] = { MVT::f32, MVT::f64}; for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { MVT Ty = RoundingTypes[I]; setOperationAction(ISD::FFLOOR, Ty, Legal); @@ -358,7 +312,6 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f16, Expand); - setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); // Indexed loads and stores are supported. for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -376,26 +329,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setIndexedStoreAction(im, MVT::f32, Legal); } - // Likewise, narrowing and extending vector loads/stores aren't handled - // directly. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - - setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, - Expand); - - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction((MVT::SimpleValueType)VT, - (MVT::SimpleValueType)InnerVT, Expand); - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); - } - // Trap. setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); // We combine OR nodes for bitfield operations. setTargetDAGCombine(ISD::OR); @@ -440,6 +375,89 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) RequireStrictAlign = StrictAlign; setHasExtractBitsInsn(true); + + if (Subtarget->hasNEON()) { + // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to + // silliness like this: + setOperationAction(ISD::FABS, MVT::v1f64, Expand); + setOperationAction(ISD::FADD, MVT::v1f64, Expand); + setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); + setOperationAction(ISD::FCOS, MVT::v1f64, Expand); + setOperationAction(ISD::FDIV, MVT::v1f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); + setOperationAction(ISD::FMA, MVT::v1f64, Expand); + setOperationAction(ISD::FMUL, MVT::v1f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); + setOperationAction(ISD::FNEG, MVT::v1f64, Expand); + setOperationAction(ISD::FPOW, MVT::v1f64, Expand); + setOperationAction(ISD::FREM, MVT::v1f64, Expand); + setOperationAction(ISD::FROUND, MVT::v1f64, Expand); + setOperationAction(ISD::FRINT, MVT::v1f64, Expand); + setOperationAction(ISD::FSIN, MVT::v1f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); + setOperationAction(ISD::FSUB, MVT::v1f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); + setOperationAction(ISD::SETCC, MVT::v1f64, Expand); + setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); + setOperationAction(ISD::SELECT, MVT::v1f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); + + setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); + + setOperationAction(ISD::MUL, MVT::v1i64, Expand); + + // ARM64 doesn't have a direct vector ->f32 conversion instructions for + // elements smaller than i32, so promote the input to i32 first. + setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); + // Similarly, there is no direct i32 -> f64 vector conversion instruction. + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); + + // ARM64 doesn't have MUL.2d: + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); + // Likewise, narrowing and extending vector loads/stores aren't handled + // directly. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, + Expand); + + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + } + + // ARM64 has implementations of a lot of rounding-like FP operations. + static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 }; + for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) { + MVT Ty = RoundingVecTypes[I]; + setOperationAction(ISD::FFLOOR, Ty, Legal); + setOperationAction(ISD::FNEARBYINT, Ty, Legal); + setOperationAction(ISD::FCEIL, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + setOperationAction(ISD::FTRUNC, Ty, Legal); + setOperationAction(ISD::FROUND, Ty, Legal); + } + } } void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { @@ -1662,8 +1680,9 @@ SDValue ARM64TargetLowering::LowerFormalArguments( RegVT == MVT::v1f64 || RegVT == MVT::v2i32 || RegVT == MVT::v4i16 || RegVT == MVT::v8i8) RC = &ARM64::FPR64RegClass; - else if (RegVT == MVT::v2i64 || RegVT == MVT::v4i32 || - RegVT == MVT::v8i16 || RegVT == MVT::v16i8) + else if (RegVT == MVT::f128 ||RegVT == MVT::v2i64 || + RegVT == MVT::v4i32||RegVT == MVT::v8i16 || + RegVT == MVT::v16i8) RC = &ARM64::FPR128RegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -1747,13 +1766,6 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs); - static const MCPhysReg FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2, - ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7 }; - static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); - unsigned FirstVariadicFPR = - CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs); - unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { @@ -1772,30 +1784,38 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, DAG.getConstant(8, getPointerTy())); } } - - unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); - int FPRIdx = 0; - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::v2i64); - SDValue Store = - DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); - } - } - FuncInfo->setVarArgsGPRIndex(GPRIdx); FuncInfo->setVarArgsGPRSize(GPRSaveSize); - FuncInfo->setVarArgsFPRIndex(FPRIdx); - FuncInfo->setVarArgsFPRSize(FPRSaveSize); + + if (Subtarget->hasFPARMv8()) { + static const MCPhysReg FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2, + ARM64::Q3, ARM64::Q4, ARM64::Q5, + ARM64::Q6, ARM64::Q7 }; + static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); + unsigned FirstVariadicFPR = + CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs); + + unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); + int FPRIdx = 0; + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); + + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::v2i64); + SDValue Store = + DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + } + FuncInfo->setVarArgsFPRIndex(FPRIdx); + FuncInfo->setVarArgsFPRSize(FPRSaveSize); + } if (!MemOps.empty()) { Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td index e6239fcbc5c..64113d89625 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/ARM64/ARM64InstrFormats.td @@ -3077,6 +3077,8 @@ class ExceptionGeneration op1, bits<2> ll, string asm> let Inst{1-0} = ll; } +let Predicates = [HasFPARMv8] in { + //--- // Floating point to integer conversion //--- @@ -3708,6 +3710,7 @@ multiclass FPMoveImmediate { let Inst{22} = 1; } } +} // end of 'let Predicates = [HasFPARMv8]' //---------------------------------------------------------------------------- // AdvSIMD @@ -3746,6 +3749,20 @@ def VectorIndexD : Operand, ImmLeaf, + ComplexPattern { + let PrintMethod = "printAMNoIndex"; + let ParserMatchClass = MemorySIMDNoIndexOperand; + let MIOperandInfo = (ops GPR64sp:$base); + let DecoderMethod = "DecodeGPR64spRegisterClass"; +} + +let Predicates = [HasNEON] in { + //---------------------------------------------------------------------------- // AdvSIMD three register vector instructions //---------------------------------------------------------------------------- @@ -4634,12 +4651,14 @@ multiclass SIMDDifferentThreeVectorBD opc, string asm, def v16i8 : BaseSIMDDifferentThreeVector; - def v1i64 : BaseSIMDDifferentThreeVector; - def v2i64 : BaseSIMDDifferentThreeVector; + let Predicates = [HasCrypto] in { + def v1i64 : BaseSIMDDifferentThreeVector; + def v2i64 : BaseSIMDDifferentThreeVector; + } def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), (v8i8 (extract_high_v16i8 V128:$Rm)))), @@ -7366,17 +7385,6 @@ multiclass SIMDVectorLShiftLongBHSD opc, string asm, // ", #0" constant and handle post-indexing explicitly, so we use // a more specialized parse method for them. Otherwise, it's the same as // the general am_noindex handling. -def MemorySIMDNoIndexOperand : AsmOperandClass { - let Name = "MemorySIMDNoIndex"; - let ParserMethod = "tryParseNoIndexMemory"; -} -def am_simdnoindex : Operand, - ComplexPattern { - let PrintMethod = "printAMNoIndex"; - let ParserMatchClass = MemorySIMDNoIndexOperand; - let MIOperandInfo = (ops GPR64sp:$base); - let DecoderMethod = "DecodeGPR64spRegisterClass"; -} class BaseSIMDLdSt opcode, bits<2> size, string asm, dag oops, dag iops, list pattern> @@ -8191,11 +8199,13 @@ multiclass SIMDLdSt4SingleAliases { defm : SIMDLdStSingleAliases; defm : SIMDLdStSingleAliases; } +} // end of 'let Predicates = [HasNEON]' //---------------------------------------------------------------------------- // Crypto extensions //---------------------------------------------------------------------------- +let Predicates = [HasCrypto] in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class AESBase opc, string asm, dag outs, dag ins, string cstr, list pat> @@ -8285,6 +8295,7 @@ class SHATiedInstVV opc, string asm, Intrinsic OpNode> class SHAInstSS opc, string asm, Intrinsic OpNode> : SHA2OpInst; +} // end of 'let Predicates = [HasCrypto]' // Allow the size specifier tokens to be upper case, not just lower. def : TokenAlias<".8B", ".8b">; diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/ARM64/ARM64InstrInfo.cpp index 4a164b15e2e..2c2b3ec19c6 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.cpp +++ b/lib/Target/ARM64/ARM64InstrInfo.cpp @@ -1102,6 +1102,8 @@ void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, unsigned SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef Indices) const { + assert(getSubTarget().hasNEON() && + "Unexpected register copy without NEON"); const TargetRegisterInfo *TRI = &getRegisterInfo(); uint16_t DestEncoding = TRI->getEncodingValue(DestReg); uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); @@ -1261,52 +1263,91 @@ void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (ARM64::FPR128RegClass.contains(DestReg) && ARM64::FPR128RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + if(getSubTarget().hasNEON()) { + BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( + SrcReg, getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(ARM64::STRQpre)) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(ARM64::SP) + .addImm(-16); + BuildMI(MBB, I, DL, get(ARM64::LDRQpre)) + .addReg(DestReg, RegState::Define) + .addReg(ARM64::SP) + .addImm(16); + } return; } if (ARM64::FPR64RegClass.contains(DestReg) && ARM64::FPR64RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + if(getSubTarget().hasNEON()) { + DestReg = + RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass); + BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( + SrcReg, getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(ARM64::FMOVDr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } return; } if (ARM64::FPR32RegClass.contains(DestReg) && ARM64::FPR32RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + if(getSubTarget().hasNEON()) { + DestReg = + RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass); + BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( + SrcReg, getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } return; } if (ARM64::FPR16RegClass.contains(DestReg) && ARM64::FPR16RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + if(getSubTarget().hasNEON()) { + DestReg = + RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass); + BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( + SrcReg, getKillRegState(KillSrc)); + } else { + DestReg = + RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR32RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR32RegClass); + BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } return; } if (ARM64::FPR8RegClass.contains(DestReg) && ARM64::FPR8RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); + if(getSubTarget().hasNEON()) { + DestReg = + RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass); + BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( + SrcReg, getKillRegState(KillSrc)); + } else { + DestReg = + RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR32RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR32RegClass); + BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } return; } @@ -1389,26 +1430,43 @@ void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, case 16: if (ARM64::FPR128RegClass.hasSubClassEq(RC)) Opc = ARM64::STRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) + else if (ARM64::DDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); Opc = ARM64::ST1Twov1d, Offset = false; + } break; case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) + if (ARM64::DDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); Opc = ARM64::ST1Threev1d, Offset = false; + } break; case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) + if (ARM64::DDDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); Opc = ARM64::ST1Fourv1d, Offset = false; - else if (ARM64::QQRegClass.hasSubClassEq(RC)) + } else if (ARM64::QQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); Opc = ARM64::ST1Twov2d, Offset = false; + } break; case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) + if (ARM64::QQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); Opc = ARM64::ST1Threev2d, Offset = false; + } break; case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) + if (ARM64::QQQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); Opc = ARM64::ST1Fourv2d, Offset = false; + } break; } assert(Opc && "Unknown register class"); @@ -1471,26 +1529,43 @@ void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, case 16: if (ARM64::FPR128RegClass.hasSubClassEq(RC)) Opc = ARM64::LDRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) + else if (ARM64::DDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); Opc = ARM64::LD1Twov1d, Offset = false; + } break; case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) + if (ARM64::DDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); Opc = ARM64::LD1Threev1d, Offset = false; + } break; case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) + if (ARM64::DDDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); Opc = ARM64::LD1Fourv1d, Offset = false; - else if (ARM64::QQRegClass.hasSubClassEq(RC)) + } else if (ARM64::QQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); Opc = ARM64::LD1Twov2d, Offset = false; + } break; case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) + if (ARM64::QQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); Opc = ARM64::LD1Threev2d, Offset = false; + } break; case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) + if (ARM64::QQQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); Opc = ARM64::LD1Fourv2d, Offset = false; + } break; } assert(Opc && "Unknown register class"); diff --git a/lib/Target/ARM64/ARM64InstrInfo.h b/lib/Target/ARM64/ARM64InstrInfo.h index 2591ca072d0..7f02e94cb28 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.h +++ b/lib/Target/ARM64/ARM64InstrInfo.h @@ -44,6 +44,8 @@ public: /// always be able to get register info as well (through this method). const ARM64RegisterInfo &getRegisterInfo() const { return RI; } + const ARM64Subtarget &getSubTarget() const { return Subtarget; } + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 9cfb38f48f0..ab129cb46a5 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -11,6 +11,16 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; +def HasNEON : Predicate<"Subtarget->hasNEON()">, + AssemblerPredicate<"FeatureNEON", "neon">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; + //===----------------------------------------------------------------------===// // ARM64-specific DAG Nodes. // diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp index f4116891809..40228021e42 100644 --- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp +++ b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp @@ -87,16 +87,20 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) - return 32; - + if (Vector) { + if (ST->hasNEON()) + return 32; + return 0; + } return 31; } unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) - return 128; - + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } return 64; } diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp index 3c5f1d8c66c..d1fee2e7c6a 100644 --- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp +++ b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp @@ -107,6 +107,9 @@ public: const MCInstrInfo &MII) : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, @@ -3815,6 +3818,8 @@ bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { } } +static const char *getSubtargetFeatureName(unsigned Val); + bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -4247,7 +4252,21 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Out.EmitInstruction(Inst, STI); return false; } - case Match_MissingFeature: + case Match_MissingFeature: { + assert(ErrorInfo && "Unknown missing feature!"); + // Special case the error message for the very common case where only + // a single subtarget feature is missing (neon, e.g.). + std::string Msg = "instruction requires:"; + unsigned Mask = 1; + for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { + if (ErrorInfo & Mask) { + Msg += " "; + Msg += getSubtargetFeatureName(ErrorInfo & Mask); + } + Mask <<= 1; + } + return Error(IDLoc, Msg); + } case Match_MnemonicFail: return showMatchError(IDLoc, MatchResult); case Match_InvalidOperand: { @@ -4494,6 +4513,7 @@ extern "C" void LLVMInitializeARM64AsmParser() { } #define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION #include "ARM64GenAsmMatcher.inc" diff --git a/test/CodeGen/ARM64/complex-copy-noneon.ll b/test/CodeGen/ARM64/complex-copy-noneon.ll new file mode 100644 index 00000000000..f65b1161282 --- /dev/null +++ b/test/CodeGen/ARM64/complex-copy-noneon.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s + +; The DAG combiner decided to use a vector load/store for this struct copy +; previously. This probably shouldn't happen without NEON, but the most +; important thing is that it compiles. + +define void @store_combine() nounwind { + %src = alloca { double, double }, align 8 + %dst = alloca { double, double }, align 8 + + %src.realp = getelementptr inbounds { double, double }* %src, i32 0, i32 0 + %src.real = load double* %src.realp + %src.imagp = getelementptr inbounds { double, double }* %src, i32 0, i32 1 + %src.imag = load double* %src.imagp + + %dst.realp = getelementptr inbounds { double, double }* %dst, i32 0, i32 0 + %dst.imagp = getelementptr inbounds { double, double }* %dst, i32 0, i32 1 + store double %src.real, double* %dst.realp + store double %src.imag, double* %dst.imagp + ret void +} diff --git a/test/CodeGen/ARM64/crypto.ll b/test/CodeGen/ARM64/crypto.ll index 3804310287e..0020865bcd5 100644 --- a/test/CodeGen/ARM64/crypto.ll +++ b/test/CodeGen/ARM64/crypto.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s | FileCheck %s +; RUN: llc -march=arm64 -mattr=crypto -arm64-neon-syntax=apple -o - %s | FileCheck %s declare <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key) declare <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) diff --git a/test/CodeGen/ARM64/reg-copy-noneon.ll b/test/CodeGen/ARM64/reg-copy-noneon.ll new file mode 100644 index 00000000000..29255ef187c --- /dev/null +++ b/test/CodeGen/ARM64/reg-copy-noneon.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s | FileCheck %s + +define float @copy_FPR32(float %a, float %b) { +;CHECK-LABEL: copy_FPR32: +;CHECK: fmov s0, s1 + ret float %b; +} + +define double @copy_FPR64(double %a, double %b) { +;CHECK-LABEL: copy_FPR64: +;CHECK: fmov d0, d1 + ret double %b; +} + +define fp128 @copy_FPR128(fp128 %a, fp128 %b) { +;CHECK-LABEL: copy_FPR128: +;CHECK: str q1, [sp, #-16]! +;CHECK-NEXT: ldr q0, [sp, #16]! + ret fp128 %b; +} diff --git a/test/MC/ARM64/advsimd.s b/test/MC/ARM64/advsimd.s index fce0832f12d..bd1924e4cd3 100644 --- a/test/MC/ARM64/advsimd.s +++ b/test/MC/ARM64/advsimd.s @@ -1,4 +1,4 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 -show-encoding < %s | FileCheck %s +; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 -show-encoding < %s | FileCheck %s foo: diff --git a/test/MC/ARM64/aliases.s b/test/MC/ARM64/aliases.s index 4fe4b93d4fc..262149de3e6 100644 --- a/test/MC/ARM64/aliases.s +++ b/test/MC/ARM64/aliases.s @@ -1,4 +1,4 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 -show-encoding < %s | FileCheck %s +; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -output-asm-variant=1 -show-encoding < %s | FileCheck %s foo: ;----------------------------------------------------------------------------- diff --git a/test/MC/ARM64/arithmetic-encoding.s b/test/MC/ARM64/arithmetic-encoding.s index 6d28bce5a83..2193feb3f6a 100644 --- a/test/MC/ARM64/arithmetic-encoding.s +++ b/test/MC/ARM64/arithmetic-encoding.s @@ -1,4 +1,4 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s +; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding < %s | FileCheck %s foo: ;==---------------------------------------------------------------------------== diff --git a/test/MC/ARM64/crypto.s b/test/MC/ARM64/crypto.s index d7c4ec3df49..51efd2132a7 100644 --- a/test/MC/ARM64/crypto.s +++ b/test/MC/ARM64/crypto.s @@ -1,4 +1,4 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding -output-asm-variant=1 < %s | FileCheck %s +; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -show-encoding -output-asm-variant=1 < %s | FileCheck %s foo: aese.16b v0, v1 diff --git a/test/MC/ARM64/diagno-predicate.s b/test/MC/ARM64/diagno-predicate.s new file mode 100644 index 00000000000..399a85c631c --- /dev/null +++ b/test/MC/ARM64/diagno-predicate.s @@ -0,0 +1,19 @@ +// RUN: not llvm-mc -triple arm64-linux-gnu -mattr=-fp-armv8 < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + + + fcvt d0, s0 +// CHECK-ERROR: error: instruction requires: fp-armv8 +// CHECK-ERROR-NEXT: fcvt d0, s0 +// CHECK-ERROR-NEXT: ^ + + fmla v9.2s, v9.2s, v0.2s +// CHECK-ERROR: error: instruction requires: neon +// CHECK-ERROR-NEXT: fmla v9.2s, v9.2s, v0.2s +// CHECK-ERROR-NEXT: ^ + + pmull v0.1q, v1.1d, v2.1d +// CHECK-ERROR: error: instruction requires: crypto +// CHECK-ERROR-NEXT: pmull v0.1q, v1.1d, v2.1d +// CHECK-ERROR-NEXT: ^ + diff --git a/test/MC/ARM64/fp-encoding.s b/test/MC/ARM64/fp-encoding.s index 7c7208f770d..08a7b6f0278 100644 --- a/test/MC/ARM64/fp-encoding.s +++ b/test/MC/ARM64/fp-encoding.s @@ -1,4 +1,4 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding -output-asm-variant=1 < %s | FileCheck %s +; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding -output-asm-variant=1 < %s | FileCheck %s foo: ;----------------------------------------------------------------------------- diff --git a/test/MC/ARM64/nv-cond.s b/test/MC/ARM64/nv-cond.s index ded5ec6ad98..1b4d054d248 100644 --- a/test/MC/ARM64/nv-cond.s +++ b/test/MC/ARM64/nv-cond.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc < %s -triple arm64 -show-encoding | FileCheck %s +// RUN: llvm-mc < %s -triple arm64 -mattr=neon -show-encoding | FileCheck %s fcsel d28,d31,d31,nv csel x0,x0,x0,nv diff --git a/test/MC/ARM64/simd-ldst.s b/test/MC/ARM64/simd-ldst.s index 75d038307e6..30854852c28 100644 --- a/test/MC/ARM64/simd-ldst.s +++ b/test/MC/ARM64/simd-ldst.s @@ -1,4 +1,4 @@ -; RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 -show-encoding < %s | FileCheck %s +; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -output-asm-variant=1 -show-encoding < %s | FileCheck %s _ld1st1_multiple: ld1.8b {v0}, [x1] diff --git a/test/MC/ARM64/vector-lists.s b/test/MC/ARM64/vector-lists.s index e4cef610d74..0d026022375 100644 --- a/test/MC/ARM64/vector-lists.s +++ b/test/MC/ARM64/vector-lists.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple arm64 -show-encoding < %s 2>%t | FileCheck %s +// RUN: not llvm-mc -triple arm64 -mattr=neon -show-encoding < %s 2>%t | FileCheck %s // RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s ST4 {v0.8B-v3.8B}, [x0] diff --git a/test/MC/ARM64/verbose-vector-case.s b/test/MC/ARM64/verbose-vector-case.s index bd363805bbc..6f0a3812dd7 100644 --- a/test/MC/ARM64/verbose-vector-case.s +++ b/test/MC/ARM64/verbose-vector-case.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple arm64 -show-encoding < %s | FileCheck %s +// RUN: llvm-mc -triple arm64 -mattr=crypto -show-encoding < %s | FileCheck %s pmull v8.8h, v8.8b, v8.8b pmull2 v8.8h, v8.16b, v8.16b diff --git a/test/MC/Disassembler/ARM64/advsimd.txt b/test/MC/Disassembler/ARM64/advsimd.txt index a943aecdc16..f9bfc8859c4 100644 --- a/test/MC/Disassembler/ARM64/advsimd.txt +++ b/test/MC/Disassembler/ARM64/advsimd.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 --disassemble < %s | FileCheck %s 0x00 0xb8 0x20 0x0e 0x00 0xb8 0x20 0x4e diff --git a/test/MC/Disassembler/ARM64/canonical-form.txt b/test/MC/Disassembler/ARM64/canonical-form.txt index 09467a31849..1c94b13b4ac 100644 --- a/test/MC/Disassembler/ARM64/canonical-form.txt +++ b/test/MC/Disassembler/ARM64/canonical-form.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon --disassemble < %s | FileCheck %s 0x00 0x08 0x00 0xc8 diff --git a/test/MC/Disassembler/ARM64/crypto.txt b/test/MC/Disassembler/ARM64/crypto.txt index e163b2cd590..b905b92c636 100644 --- a/test/MC/Disassembler/ARM64/crypto.txt +++ b/test/MC/Disassembler/ARM64/crypto.txt @@ -1,5 +1,5 @@ -# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s -# RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 --disassemble < %s | FileCheck %s --check-prefix=CHECK-APPLE +# RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 --disassemble < %s | FileCheck %s --check-prefix=CHECK-APPLE 0x20 0x48 0x28 0x4e 0x20 0x58 0x28 0x4e diff --git a/test/MC/Disassembler/ARM64/non-apple-fmov.txt b/test/MC/Disassembler/ARM64/non-apple-fmov.txt index e3c3a996c46..75cb95ce186 100644 --- a/test/MC/Disassembler/ARM64/non-apple-fmov.txt +++ b/test/MC/Disassembler/ARM64/non-apple-fmov.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc -triple arm64 -disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple arm64 -mattr=neon -disassemble < %s | FileCheck %s 0x00 0x00 0xae 0x9e 0x00 0x00 0xaf 0x9e diff --git a/test/MC/Disassembler/ARM64/scalar-fp.txt b/test/MC/Disassembler/ARM64/scalar-fp.txt index 732e1c12d2e..1f76dee64d9 100644 --- a/test/MC/Disassembler/ARM64/scalar-fp.txt +++ b/test/MC/Disassembler/ARM64/scalar-fp.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc -triple arm64-apple-darwin --disassemble -output-asm-variant=1 < %s | FileCheck %s +# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon --disassemble -output-asm-variant=1 < %s | FileCheck %s #----------------------------------------------------------------------------- # Floating-point arithmetic