From 507df402b09be986b40251fb83cb0c8a2e586b68 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Wed, 21 Oct 2009 02:15:46 +0000 Subject: [PATCH] Leave some NEON instruction encoding bits unspecified instead of setting a default value of zero. This is important for decoding the instructions. Patch by Johnny Chen, with some changes from me, too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84730 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 17 +- lib/Target/ARM/ARMInstrNEON.td | 428 ++++++++++++++++-------------- 2 files changed, 247 insertions(+), 198 deletions(-) diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 15f8dff8fbb..8225fd741bb 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1262,15 +1262,26 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{4} = op4; } +// NEON Vector Duplicate (scalar). +// Inst{19-16} is specified by subclasses. +class N2VDup op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list pattern> + : NDataI { + let Inst{24-23} = op24_23; + let Inst{21-20} = op21_20; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; +} + // NEON 2 vector register with immediate. -class N2VImm op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, +class N2VImm op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list pattern> : NDataI { let Inst{24} = op24; let Inst{23} = op23; - let Inst{21-16} = op21_16; let Inst{11-8} = op11_8; let Inst{7} = op7; let Inst{6} = op6; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 461028fe2cd..dd415dbceef 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -623,12 +623,12 @@ class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; -// Long 2-register intrinsics. (This is currently only used for VMOVL and is -// derived from N2VImm instead of N2V because of the way the size is encoded.) -class N2VLInt op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, InstrItinClass itin, string OpcodeStr, +// Long 2-register intrinsics (currently only used for VMOVL). +class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> - : N2VImm; @@ -1016,36 +1016,33 @@ class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. -class N2VDSh op21_16, bits<4> op11_8, bit op7, - bit op4, InstrItinClass itin, string OpcodeStr, - ValueType Ty, SDNode OpNode> - : N2VImm op11_8, bit op7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm; -class N2VQSh op21_16, bits<4> op11_8, bit op7, - bit op4, InstrItinClass itin, string OpcodeStr, - ValueType Ty, SDNode OpNode> - : N2VImm op11_8, bit op7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm; // Long shift by immediate. -class N2VLSh op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, - ValueType OpTy, SDNode OpNode> - : N2VImm op11_8, bit op7, bit op6, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2VImm; // Narrow shift by immediate. -class N2VNSh op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, InstrItinClass itin, string OpcodeStr, +class N2VNSh op11_8, bit op7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2VImm op21_16, bits<4> op11_8, bit op7, // Shift right by immediate and accumulate, // both double- and quad-register. -class N2VDShAdd op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm; -class N2VQShAdd op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm; // Shift by immediate and insert, // both double- and quad-register. -class N2VDShIns op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm; -class N2VQShIns op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm; // Convert, with fractional bits immediate, // both double- and quad-register. -class N2VCvtD op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, +class N2VCvtD op11_8, bit op7, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2VImm; -class N2VCvtQ op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, +class N2VCvtQ op11_8, bit op7, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2VImm; @@ -1175,14 +1168,14 @@ multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: -multiclass N2VLInt_QHS op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, Intrinsic IntOp> { - def v8i16 : N2VLInt; - def v4i32 : N2VLInt; - def v2i64 : N2VLInt; +multiclass N2VLInt_QHS op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, Intrinsic IntOp> { + def v8i16 : N2VLInt; + def v4i32 : N2VLInt; + def v2i64 : N2VLInt; } @@ -1461,24 +1454,38 @@ multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VSh_QHSD op11_8, bit op4, InstrItinClass itin, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh; - def v4i16 : N2VDSh; - def v2i32 : N2VDSh; - def v1i64 : N2VDSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh; - def v8i16 : N2VQSh; - def v4i32 : N2VQSh; - def v2i64 : N2VQSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh; + // imm6 = xxxxxx } @@ -1487,24 +1494,38 @@ multiclass N2VSh_QHSD op11_8, bit op4, multiclass N2VShAdd_QHSD op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShAdd; - def v4i16 : N2VDShAdd; - def v2i32 : N2VDShAdd; - def v1i64 : N2VDShAdd { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShAdd { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShAdd { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShAdd; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShAdd; - def v8i16 : N2VQShAdd; - def v4i32 : N2VQShAdd; - def v2i64 : N2VQShAdd { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShAdd { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShAdd { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShAdd; + // imm6 = xxxxxx } @@ -1513,24 +1534,75 @@ multiclass N2VShAdd_QHSD op11_8, bit op4, multiclass N2VShIns_QHSD op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShIns; - def v4i16 : N2VDShIns; - def v2i32 : N2VDShIns; - def v1i64 : N2VDShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShIns; - def v8i16 : N2VQShIns; - def v4i32 : N2VQShIns; - def v2i64 : N2VQShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns; + // imm6 = xxxxxx +} + +// Neon Shift Long operations, +// element sizes of 8, 16, 32 bits: +multiclass N2VLSh_QHS op11_8, bit op7, bit op6, + bit op4, string OpcodeStr, SDNode OpNode> { + def v8i16 : N2VLSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i32 : N2VLSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i64 : N2VLSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } +} + +// Neon Shift Narrow operations, +// element sizes of 16, 32, 64 bits: +multiclass N2VNSh_HSD op11_8, bit op7, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + SDNode OpNode> { + def v8i8 : N2VNSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VNSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VNSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } } //===----------------------------------------------------------------------===// @@ -2044,34 +2116,25 @@ defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; // VSHLL : Vector Shift Left Long -def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", - v8i16, v8i8, NEONvshlls>; -def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", - v4i32, v4i16, NEONvshlls>; -def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", - v2i64, v2i32, NEONvshlls>; -def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", - v8i16, v8i8, NEONvshllu>; -def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", - v4i32, v4i16, NEONvshllu>; -def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", - v2i64, v2i32, NEONvshllu>; +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) -def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", - v8i16, v8i8, NEONvshlli>; -def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", - v4i32, v4i16, NEONvshlli>; -def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", - v2i64, v2i32, NEONvshlli>; +class N2VLShMax op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, string OpcodeStr, ValueType ResTy, + ValueType OpTy, SDNode OpNode> + : N2VLSh { + let Inst{21-16} = op21_16; +} +def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", + v8i16, v8i8, NEONvshlli>; +def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", + v4i32, v4i16, NEONvshlli>; +def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", + v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; -def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; -def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2083,12 +2146,8 @@ defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; -def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; -def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; +defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i", + NEONvrshrn>; // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2102,26 +2161,14 @@ defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; -def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; -def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; -def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; -def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; -def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; +defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s", + NEONvqshrns>; +defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u", + NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; -def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; -def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; +defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s", + NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2130,26 +2177,14 @@ defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; -def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; -def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; -def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; -def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; -def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; +defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s", + NEONvqrshrns>; +defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u", + NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; -def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; -def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; +defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s", + NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; @@ -2491,27 +2526,28 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, +class VDUPLND + : N2VDup<0b11, 0b11, 0b11000, 0, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, - ValueType ResTy, ValueType OpTy> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, +class VDUPLNQ + : N2VDup<0b11, 0b11, 0b11000, 1, 0, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; -def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; -def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; -def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; -def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; -def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; -def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; -def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; -def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; +// Inst{19-16} is partially specified depending on the element size. + +def VDUPLN8d : VDUPLND<"vdup.8", v8i8> { let Inst{16} = 1; } +def VDUPLN16d : VDUPLND<"vdup.16", v4i16> { let Inst{17-16} = 0b10; } +def VDUPLN32d : VDUPLND<"vdup.32", v2i32> { let Inst{18-16} = 0b100; } +def VDUPLNfd : VDUPLND<"vdup.32", v2f32> { let Inst{18-16} = 0b100; } +def VDUPLN8q : VDUPLNQ<"vdup.8", v16i8, v8i8> { let Inst{16} = 1; } +def VDUPLN16q : VDUPLNQ<"vdup.16", v8i16, v4i16> { let Inst{17-16} = 0b10; } +def VDUPLN32q : VDUPLNQ<"vdup.32", v4i32, v2i32> { let Inst{18-16} = 0b100; } +def VDUPLNfq : VDUPLNQ<"vdup.32", v4f32, v2f32> { let Inst{18-16} = 0b100; } def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -2530,15 +2566,19 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, - (outs DPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; +def VDUPfdf : N2VDup<0b11, 0b11, 0b11000, 0, 0, + (outs DPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]> { + let Inst{18-16} = 0b100; +} -def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, - (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; +def VDUPfqf : N2VDup<0b11, 0b11, 0b11000, 1, 0, + (outs QPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]> { + let Inst{18-16} = 0b100; +} def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, @@ -2560,8 +2600,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; +defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>; +defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>; // Vector Conversions. @@ -2585,24 +2625,22 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. -// Note: Some of the opcode bits in the following VCVT instructions need to -// be encoded based on the immed values. -def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; -def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; // Vector Reverse.