diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index dd35367d1c5..735670bf0ae 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1089,5 +1089,45 @@ class NeonI_2VShiftImm opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD duplicate and insert +class NeonI_copy imm4, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<5> Imm5; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{20-16} = Imm5; + let Inst{15} = 0b0; + let Inst{14-11} = imm4; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} +// Format AdvSIMD insert from element to vector +class NeonI_insert patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<5> Imm5; + bits<4> Imm4; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{20-16} = Imm5; + let Inst{15} = 0b0; + let Inst{14-11} = Imm4; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index d8f45eb5a61..14daab33ecf 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -68,23 +68,49 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) .addImm(A64SysReg::NZCV); } else if (AArch64::GPR64RegClass.contains(DestReg)) { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - Opc = AArch64::ORRxxx_lsl; - ZeroReg = AArch64::XZR; + if(AArch64::GPR64RegClass.contains(SrcReg)){ + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else{ + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::GPR32RegClass.contains(DestReg)) { - assert(AArch64::GPR32RegClass.contains(SrcReg)); - Opc = AArch64::ORRwww_lsl; - ZeroReg = AArch64::WZR; + if(AArch64::GPR32RegClass.contains(SrcReg)){ + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else{ + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR32RegClass.contains(DestReg)) { - assert(AArch64::FPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR32RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR64RegClass.contains(DestReg)) { - assert(AArch64::FPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR64RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR128RegClass.contains(DestReg)) { assert(AArch64::FPR128RegClass.contains(SrcReg)); diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index f600d248b68..5506affc079 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -1034,6 +1034,20 @@ defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ return (HasShift && !ShiftOnesIn); }]>; +def neon_uimm1_asmoperand : AsmOperandClass +{ + let Name = "UImm1"; + let PredicateMethod = "isUImm<1>"; + let RenderMethod = "addImmOperands"; +} + +def neon_uimm2_asmoperand : AsmOperandClass +{ + let Name = "UImm2"; + let PredicateMethod = "isUImm<2>"; + let RenderMethod = "addImmOperands"; +} + def neon_uimm8_asmoperand : AsmOperandClass { let Name = "UImm8"; @@ -3015,3 +3029,363 @@ def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; + +def neon_uimm0_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm0_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm1_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm1_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm2_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm2_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm3_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm4_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +class NeonI_INS_main + : NeonI_copy<0b1, 0b0, 0b0011, + (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm), + asmop # "\t$Rd." # Res # "[$Imm], $Rn", + [(set (ResTy VPR128:$Rd), + (ResTy (vector_insert + (ResTy VPR128:$src), + (OpTy OpGPR:$Rn), + (OpImm:$Imm))))], + NoItinerary> { + bits<4> Imm; + let Constraints = "$src = $Rd"; +} + + +//Insert element (vector, from main) +def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32, + neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32, + neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32, + neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64, + neon_uimm1_bare> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +class Neon_INS_main_pattern + : Pat<(ResTy (vector_insert + (ResTy VPR64:$src), + (OpTy OpGPR:$Rn), + (OpImm:$Imm))), + (ResTy (EXTRACT_SUBREG + (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + OpGPR:$Rn, OpImm:$Imm)), sub_64))>; + +def INSbw_pattern : Neon_INS_main_pattern; +def INShw_pattern : Neon_INS_main_pattern; +def INSsw_pattern : Neon_INS_main_pattern; +def INSdx_pattern : Neon_INS_main_pattern; + +class NeonI_INS_element + : NeonI_insert<0b1, 0b1, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, + ResImm:$Immd, ResImm:$Immn), + asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", + [(set (ResTy VPR128:$Rd), + (ResTy (vector_insert + (ResTy VPR128:$src), + (MidTy (vector_extract + (ResTy VPR128:$Rn), + (ResImm:$Immn))), + (ResImm:$Immd))))], + NoItinerary> { + let Constraints = "$src = $Rd"; + bits<4> Immd; + bits<4> Immn; +} + +//Insert element (vector, from element) +def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> { + let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1}; + let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}}; +} +def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> { + let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0}; + let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}}; + // bit 11 is unspecified. +} +def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> { + let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0}; + let Inst{14-13} = {Immn{1}, Immn{0}}; + // bits 11-12 are unspecified. +} +def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> { + let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0}; + let Inst{14} = Immn{0}; + // bits 11-13 are unspecified. +} + +multiclass Neon_INS_elt_pattern { +def : Pat<(NaTy (vector_insert + (NaTy VPR64:$src), + (MidTy (vector_extract + (StTy VPR128:$Rn), + (StImm:$Immn))), + (NaImm:$Immd))), + (NaTy (EXTRACT_SUBREG + (StTy (INS + (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + (StTy VPR128:$Rn), + NaImm:$Immd, + StImm:$Immn)), + sub_64))>; + +def : Pat<(StTy (vector_insert + (StTy VPR128:$src), + (MidTy (vector_extract + (NaTy VPR64:$Rn), + (NaImm:$Immn))), + (StImm:$Immd))), + (StTy (INS + (StTy VPR128:$src), + (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + StImm:$Immd, + NaImm:$Immn))>; + +def : Pat<(NaTy (vector_insert + (NaTy VPR64:$src), + (MidTy (vector_extract + (NaTy VPR64:$Rn), + (NaImm:$Immn))), + (NaImm:$Immd))), + (NaTy (EXTRACT_SUBREG + (StTy (INS + (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Immd, + NaImm:$Immn)), + sub_64))>; +} + +defm INSb_pattern : Neon_INS_elt_pattern; +defm INSh_pattern : Neon_INS_elt_pattern; +defm INSs_pattern : Neon_INS_elt_pattern; +defm INSd_pattern : Neon_INS_elt_pattern; + +class NeonI_SMOV + : NeonI_copy { + bits<4> Imm; +} + +//Signed integer move (main, from element) +def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} + +multiclass Neon_SMOVx_pattern { + def : Pat<(i64 (sext_inreg + (i64 (anyext + (i32 (vector_extract + (StTy VPR128:$Rn), (StImm:$Imm))))), + eleTy)), + (SMOVI VPR128:$Rn, StImm:$Imm)>; + + def : Pat<(i64 (sext + (i32 (vector_extract + (StTy VPR128:$Rn), (StImm:$Imm))))), + (SMOVI VPR128:$Rn, StImm:$Imm)>; + + def : Pat<(i64 (sext_inreg + (i64 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + + def : Pat<(i64 (sext_inreg + (i64 (anyext + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + + def : Pat<(i64 (sext + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))))), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; +} + +defm SMOVxb_pattern : Neon_SMOVx_pattern; +defm SMOVxh_pattern : Neon_SMOVx_pattern; +defm SMOVxs_pattern : Neon_SMOVx_pattern; + +class Neon_SMOVw_pattern + : Pat<(i32 (sext_inreg + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + +def SMOVwb_pattern : Neon_SMOVw_pattern; +def SMOVwh_pattern : Neon_SMOVw_pattern; + + +class NeonI_UMOV + : NeonI_copy { + bits<4> Imm; +} + +//Unsigned integer move (main, from element) +def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare, + GPR64, i64> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +class Neon_UMOV_pattern + : Pat<(ResTy (vector_extract + (NaTy VPR64:$Rn), NaImm:$Imm)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + +def UMOVwb_pattern : Neon_UMOV_pattern; +def UMOVwh_pattern : Neon_UMOV_pattern; +def UMOVws_pattern : Neon_UMOV_pattern; +def UMOVxd_pattern : Neon_UMOV_pattern; + +def : Pat<(i32 (and + (i32 (vector_extract + (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))), + 255)), + (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))), + 65535)), + (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>; + +def : Pat<(i64 (zext + (i32 (vector_extract + (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))), + (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))), + 255)), + (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm3_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))), + 65535)), + (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm2_bare:$Imm)>; + +def : Pat<(i64 (zext + (i32 (vector_extract + (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))), + (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm0_bare:$Imm)>; + diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index d2b6f6d0127..26bd797e3b3 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -475,6 +475,18 @@ void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, O.write_hex(Imm); } +void AArch64InstPrinter::printNeonUImm8OperandBare(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOUImm = MI->getOperand(OpNum); + + assert(MOUImm.isImm() + && "Immediate operand required for Neon vector immediate inst."); + + unsigned Imm = MOUImm.getImm(); + O << Imm; +} + void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index b608908ca5b..71c9f4a0291 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -170,6 +170,8 @@ public: raw_ostream &O); void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNeonUImm8OperandBare(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll new file mode 100644 index 00000000000..c2854ed6471 --- /dev/null +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -0,0 +1,232 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + + +define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[15], {{w[0-31]+}} + %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 + ret <16 x i8> %tmp3 +} + +define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[6], {{w[0-31]+}} + %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 + ret <8 x i16> %tmp3 +} + +define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[2], {{w[0-31]+}} + %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 + ret <4 x i32> %tmp3 +} + +define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { +;CHECK: ins {{v[0-31]+}}.d[1], {{x[0-31]+}} + %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 + ret <2 x i64> %tmp3 +} + +define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[5], {{w[0-31]+}} + %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 + ret <8 x i8> %tmp3 +} + +define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[3], {{w[0-31]+}} + %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 + ret <4 x i16> %tmp3 +} + +define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[1], {{w[0-31]+}} + %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 + ret <2 x i32> %tmp3 +} + +define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2] + %tmp3 = extractelement <16 x i8> %tmp1, i32 2 + %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] + %tmp3 = extractelement <4 x i32> %tmp1, i32 2 + %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 + ret <4 x i32> %tmp4 +} + +define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { +;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] + %tmp3 = extractelement <2 x i64> %tmp1, i32 0 + %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 + ret <2 x i64> %tmp4 +} + +define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { +;CHECK: ins {{v[0-31]+}}.b[4], {{v[0-31]+}}.b[2] + %tmp3 = extractelement <8 x i8> %tmp1, i32 2 + %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 + ret <8 x i8> %tmp4 +} + +define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { +;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { +;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0] + %tmp3 = extractelement <2 x i32> %tmp1, i32 0 + %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 + ret <2 x i32> %tmp4 +} + +define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { +;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] + %tmp3 = extractelement <1 x i64> %tmp1, i32 0 + %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 + ret <1 x i64> %tmp4 +} + +define i32 @umovw16b(<16 x i8> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[8] + %tmp3 = extractelement <16 x i8> %tmp1, i32 8 + %tmp4 = zext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw8h(<8 x i16> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = zext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw4s(<4 x i32> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[2] + %tmp3 = extractelement <4 x i32> %tmp1, i32 2 + ret i32 %tmp3 +} + +define i64 @umovx2d(<2 x i64> %tmp1) { +;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] + %tmp3 = extractelement <2 x i64> %tmp1, i32 0 + ret i64 %tmp3 +} + +define i32 @umovw8b(<8 x i8> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[7] + %tmp3 = extractelement <8 x i8> %tmp1, i32 7 + %tmp4 = zext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw4h(<4 x i16> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = zext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @umovw2s(<2 x i32> %tmp1) { +;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[1] + %tmp3 = extractelement <2 x i32> %tmp1, i32 1 + ret i32 %tmp3 +} + +define i64 @umovx1d(<1 x i64> %tmp1) { +;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] + %tmp3 = extractelement <1 x i64> %tmp1, i32 0 + ret i64 %tmp3 +} + +define i32 @smovw16b(<16 x i8> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[8] + %tmp3 = extractelement <16 x i8> %tmp1, i32 8 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovw8h(<8 x i16> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovx16b(<16 x i8> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[8] + %tmp3 = extractelement <16 x i8> %tmp1, i32 8 + %tmp4 = sext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @smovx8h(<8 x i16> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <8 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i64 @smovx4s(<4 x i32> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[2] + %tmp3 = extractelement <4 x i32> %tmp1, i32 2 + %tmp4 = sext i32 %tmp3 to i64 + ret i64 %tmp4 +} + +define i32 @smovw8b(<8 x i8> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[4] + %tmp3 = extractelement <8 x i8> %tmp1, i32 4 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovw4h(<4 x i16> %tmp1) { +;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + %tmp5 = add i32 5, %tmp4 + ret i32 %tmp5 +} + +define i32 @smovx8b(<8 x i8> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[6] + %tmp3 = extractelement <8 x i8> %tmp1, i32 6 + %tmp4 = sext i8 %tmp3 to i32 + ret i32 %tmp4 +} + +define i32 @smovx4h(<4 x i16> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] + %tmp3 = extractelement <4 x i16> %tmp1, i32 2 + %tmp4 = sext i16 %tmp3 to i32 + ret i32 %tmp4 +} + +define i64 @smovx2s(<2 x i32> %tmp1) { +;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[1] + %tmp3 = extractelement <2 x i32> %tmp1, i32 1 + %tmp4 = sext i32 %tmp3 to i64 + ret i64 %tmp4 +} + + + + + + diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s new file mode 100644 index 00000000000..44b502769c5 --- /dev/null +++ b/test/MC/AArch64/neon-simd-copy.s @@ -0,0 +1,71 @@ +// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + + +//------------------------------------------------------------------------------ +// Insert element (vector, from main) +//------------------------------------------------------------------------------ + ins v2.b[2], w1 + ins v7.h[7], w14 + ins v20.s[0], w30 + ins v1.d[1], x7 + +// CHECK: ins v2.b[2], w1 // encoding: [0x22,0x1c,0x05,0x4e] +// CHECK: ins v7.h[7], w14 // encoding: [0xc7,0x1d,0x1e,0x4e] +// CHECK: ins v20.s[0], w30 // encoding: [0xd4,0x1f,0x04,0x4e] +// CHECK: ins v1.d[1], x7 // encoding: [0xe1,0x1c,0x18,0x4e] + + +//------------------------------------------------------------------------------ +// Signed integer move (main, from element) +//------------------------------------------------------------------------------ + smov w1, v0.b[15] + smov w14, v6.h[4] + smov x1, v0.b[15] + smov x14, v6.h[4] + smov x20, v9.s[2] + +// CHECK: smov w1, v0.b[15] // encoding: [0x01,0x2c,0x1f,0x0e] +// CHECK: smov w14, v6.h[4] // encoding: [0xce,0x2c,0x12,0x0e] +// CHECK: smov x1, v0.b[15] // encoding: [0x01,0x2c,0x1f,0x4e] +// CHECK: smov x14, v6.h[4] // encoding: [0xce,0x2c,0x12,0x4e] +// CHECK: smov x20, v9.s[2] // encoding: [0x34,0x2d,0x14,0x4e] + + +//------------------------------------------------------------------------------ +// Unsigned integer move (main, from element) +//------------------------------------------------------------------------------ + umov w1, v0.b[15] + umov w14, v6.h[4] + umov w20, v9.s[2] + umov x7, v18.d[1] + +// CHECK: umov w1, v0.b[15] // encoding: [0x01,0x3c,0x1f,0x0e] +// CHECK: umov w14, v6.h[4] // encoding: [0xce,0x3c,0x12,0x0e] +// CHECK: umov w20, v9.s[2] // encoding: [0x34,0x3d,0x14,0x0e] +// CHECK: umov x7, v18.d[1] // encoding: [0x47,0x3e,0x18,0x4e] + +//------------------------------------------------------------------------------ +// Insert element (vector, from element) +//------------------------------------------------------------------------------ + + Ins v1.b[14], v3.b[6] + Ins v6.h[7], v7.h[5] + Ins v15.s[3], v22.s[2] + Ins v0.d[0], v4.d[1] + +// CHECK: ins v1.b[14], v3.b[6] // encoding: [0x61,0x34,0x1d,0x6e] +// CHECK: ins v6.h[7], v7.h[5] // encoding: [0xe6,0x54,0x1e,0x6e] +// CHECK: ins v15.s[3], v22.s[2] // encoding: [0xcf,0x5e,0x1c,0x6e] +// CHECK: ins v0.d[0], v4.d[1] // encoding: [0x80,0x44,0x08,0x6e] + + + + + + + + + +