diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 3dc66a1f238..7d7a641a2e3 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -463,7 +463,7 @@ defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD, (outs GPR32:$Rd)>; -let Rd = 0b11111, isCompare = 1 in { +let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in { defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV, (outs), extends_to_i64>, addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV, (outs)>; @@ -689,7 +689,7 @@ multiclass addsubimm_varieties shift, [(set NZCV, (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteCMP, ReadCMP]> { let Rd = 0b11111; let Defs = [NZCV]; let isCompare = 1; @@ -1086,7 +1086,7 @@ def BFMwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1095,7 +1095,7 @@ def BFMxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1295,7 +1295,7 @@ defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1304,7 +1304,7 @@ def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1407,7 +1407,7 @@ defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1416,7 +1416,7 @@ def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1560,7 +1560,8 @@ class A64I_condcmpregImpl (outs), (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteCMP, ReadCMP, ReadCMP]> { let Defs = [NZCV]; } @@ -1608,7 +1609,7 @@ multiclass A64I_condselSizes op2, string asmop, !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), [(set i32:$Rd, (select i32:$Rn, i32:$Rm))], NoItinerary>, - Sched<[WriteCMP, ReadCMP]>; + Sched<[WriteCMP, ReadCMP, ReadCMP]>; def xxxc : A64I_condsel<0b1, op, 0b0, op2, @@ -1617,7 +1618,7 @@ multiclass A64I_condselSizes op2, string asmop, !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), [(set i64:$Rd, (select i64:$Rn, i64:$Rm))], NoItinerary>, - Sched<[WriteCMP, ReadCMP]>; + Sched<[WriteCMP, ReadCMP, ReadCMP]>; } } @@ -1797,7 +1798,8 @@ multiclass dp_2src_crc { def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } multiclass dp_2src_zext opcode, string asmop, SDPatternOperator op> { @@ -2630,7 +2632,7 @@ let mayLoad = 1 in { (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), "prfm\t$Rt, $Imm19", [], NoItinerary>, - Sched<[WriteLd]>; + Sched<[WriteLd, ReadLd]>; } //===----------------------------------------------------------------------===// @@ -2685,19 +2687,23 @@ class A64I_SRexs_impl size, bits<3> opcode, string asm, dag outs, multiclass A64I_SRex opcode, string prefix> { def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [],NoItinerary>; + [],NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _word: A64I_SRexs_impl<0b10, opcode, asmstr, (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; } defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; @@ -2792,22 +2798,26 @@ multiclass A64I_SLex opcode, string prefix> { def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_8 i64:$Rn, i32:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_16 i64:$Rn, i32:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _word: A64I_SLexs_impl<0b10, opcode, asmstr, (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_32 i64:$Rn, i32:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_64 i64:$Rn, i64:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; } defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; @@ -2832,12 +2842,14 @@ multiclass A64I_SPex opcode> { def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; } defm STXP : A64I_SPex<"stxp", 0b010>; @@ -2865,13 +2877,13 @@ multiclass A64I_LPex opcode> { (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64xsp0:$Rn), [], NoItinerary>, - Sched<[WriteLd]>; + Sched<[WriteLd, WriteLd, ReadLd]>; def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64xsp0:$Rn), [], NoItinerary>, - Sched<[WriteLd]>; + Sched<[WriteLd, WriteLd, ReadLd]>; } defm LDXP : A64I_LPex<"ldxp", 0b010>; @@ -3085,7 +3097,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _STR : A64I_LSunsigimm { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let mayStore = 1; } def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", @@ -3126,13 +3139,15 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; def _Xm_RegOffset_STR : A64I_LSregoff; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; } def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", (!cast(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, @@ -3142,7 +3157,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _STUR : A64I_LSunalimm { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let mayStore = 1; } def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", @@ -3163,7 +3179,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR64xsp:$Rn_wb), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let Constraints = "$Rn = $Rn_wb"; let mayStore = 1; @@ -3176,7 +3193,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3187,7 +3204,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR64xsp:$Rn_wb), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let Constraints = "$Rn = $Rn_wb"; let mayStore = 1; @@ -3200,7 +3218,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3340,7 +3358,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3350,7 +3368,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3361,7 +3379,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3371,7 +3389,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3431,7 +3449,7 @@ def LDRSWx_PostInd (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3442,7 +3460,7 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3652,7 +3670,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3666,7 +3684,8 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, GPR64xsp:$Rn, simm7:$SImm7), "stp\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { let mayStore = 1; let Constraints = "$Rn = $Rn_wb"; @@ -3679,16 +3698,17 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn], $SImm7", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; } def _PreInd_STR : A64I_LSPpreind { + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { let mayStore = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3699,15 +3719,16 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; } def _NonTemp_STR : A64I_LSPnontemp { + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { let mayStore = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3719,7 +3740,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3745,7 +3766,7 @@ def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3756,7 +3777,8 @@ def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3767,7 +3789,7 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -4150,7 +4172,8 @@ let isMoveImm = 1, isReMaterializable = 1, (ins movz64_imm:$FullImm)>; } -let Constraints = "$src = $Rd" in +let Constraints = "$src = $Rd", + SchedRW = [WriteALU, ReadALU] in defm MOVK : A64I_movwSizes<0b11, "movk", (ins GPR32:$src, movk32_imm:$FullImm), (ins GPR64:$src, movk64_imm:$FullImm)>; diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 3b919b388b2..0b97e3bdf5a 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -122,14 +122,16 @@ multiclass NeonI_3VSame_B_sizes size, bits<5> opcode, asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", [(set (v8i8 VPR64:$Rd), (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _16B : NeonI_3VSame<0b1, u, size, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", [(set (v16i8 VPR128:$Rd), (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -143,28 +145,32 @@ multiclass NeonI_3VSame_HS_sizes opcode, asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", [(set (v4i16 VPR64:$Rd), (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", [(set (v8i16 VPR128:$Rd), (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", [(set (v2i32 VPR64:$Rd), (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", [(set (v4i32 VPR128:$Rd), (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } multiclass NeonI_3VSame_BHS_sizes opcode, @@ -177,14 +183,16 @@ multiclass NeonI_3VSame_BHS_sizes opcode, asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", [(set (v8i8 VPR64:$Rd), (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", [(set (v16i8 VPR128:$Rd), (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -198,7 +206,8 @@ multiclass NeonI_3VSame_BHSD_sizes opcode, asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", [(set (v2i64 VPR128:$Rd), (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -214,21 +223,24 @@ multiclass NeonI_3VSame_SD_sizes opcode, asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", [(set (ResTy2S VPR64:$Rd), (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", [(set (ResTy4S VPR128:$Rd), (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", [(set (ResTy2D VPR128:$Rd), (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -286,9 +298,11 @@ def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)), // Vector Multiply (Integer and Floating-Point) +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, v2f32, v4f32, v2f64, 1>; +} // Patterns to match mul of v1i8/v1i16/v1i32 types def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)), @@ -309,8 +323,10 @@ def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)), // Vector Multiply (Polynomial) +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; +} // Vector Multiply-accumulate and Multiply-subtract (Integer) @@ -324,7 +340,8 @@ class NeonI_3VSame_Constraint_impl { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -335,6 +352,7 @@ def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), (sub node:$Ra, (mul node:$Rn, node:$Rm))>; +let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8, 0b0, 0b0, 0b00, 0b10010, Neon_mla>; def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, @@ -360,6 +378,7 @@ def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32, 0b0, 0b1, 0b10, 0b10010, Neon_mls>; def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32, 0b1, 0b1, 0b10, 0b10010, Neon_mls>; +} // Vector Multiply-accumulate and Multiply-subtract (Floating Point) @@ -369,7 +388,8 @@ def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>; -let Predicates = [HasNEON, UseFusedMAC] in { +let Predicates = [HasNEON, UseFusedMAC], + SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, 0b0, 0b0, 0b00, 0b11001, Neon_fmla>; def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32, @@ -403,8 +423,10 @@ def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), // Vector Divide (Floating-Point) +let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, v2f32, v4f32, v2f64, 0>; +} // Vector Bitwise Operations @@ -770,49 +792,56 @@ multiclass NeonI_cmpz_sizes opcode, string asmop, CondCode CC> asmop # "\t$Rd.8b, $Rn.8b, $Imm", [(set (v8i8 VPR64:$Rd), (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.16b, $Rn.16b, $Imm", [(set (v16i8 VPR128:$Rd), (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.4h, $Rn.4h, $Imm", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.8h, $Rn.8h, $Imm", [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.2s, $Rn.2s, $Imm", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.4s, $Rn.4s, $Imm", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.2d, $Rn.2d, $Imm", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } // Vector Compare Mask Equal to Zero (Integer) @@ -879,21 +908,24 @@ multiclass NeonI_fpcmpz_sizes opcode, asmop # "\t$Rd.2s, $Rn.2s, $FPImm", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), asmop # "\t$Rd.4s, $Rn.4s, $FPImm", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), asmop # "\t$Rd.2d, $Rn.2d, $FPImm", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } // Vector Compare Mask Equal to Zero (Floating Point) @@ -1051,6 +1083,7 @@ defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", int_arm_neon_vpadd, v2f32, v4f32, v2f64, 1>; +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { // Vector Saturating Doubling Multiply High defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", int_arm_neon_vqdmulh, 1>; @@ -1063,6 +1096,7 @@ defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", int_aarch64_neon_vmulx, v2f32, v4f32, v2f64, 1>; +} // Patterns to match llvm.aarch64.* intrinsic for // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output @@ -1202,7 +1236,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; } @@ -1215,7 +1250,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; } @@ -1229,7 +1265,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b0}; } @@ -1242,7 +1279,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b0}; } @@ -1263,7 +1301,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; } @@ -1277,7 +1316,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; } @@ -1292,7 +1332,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b1}; } @@ -1306,7 +1347,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b1}; } @@ -1325,7 +1367,8 @@ multiclass NeonI_mov_imm_msl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b1, 0b0, Simm}; } @@ -1338,7 +1381,8 @@ multiclass NeonI_mov_imm_msl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b1, 0b0, Simm}; } @@ -1565,7 +1609,8 @@ def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, "movi\t$Rd.8b, $Imm", [(set (v8i8 VPR64:$Rd), (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } @@ -1574,7 +1619,8 @@ def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, "movi\t$Rd.16b, $Imm", [(set (v16i8 VPR128:$Rd), (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } } @@ -1586,7 +1632,8 @@ def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, "movi\t $Rd.2d, $Imm", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } } @@ -1599,7 +1646,8 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1, "movi\t $Rd, $Imm", [(set (v1i64 FPR64:$Rd), (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } } @@ -1613,7 +1661,8 @@ class NeonI_FMOV_impl { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1111; } @@ -1692,7 +1741,8 @@ class N2VShift opcode, string asmop, string T, [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn), (Ty (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_N2VShL opcode, string asmop> { // 64-bit vector types. @@ -1873,7 +1923,8 @@ class N2VShiftLong opcode, string asmop, string DestT, (DestTy (shl (DestTy (ExtOp (SrcTy VPR64:$Rn))), (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; class N2VShiftLongHigh opcode, string asmop, string DestT, string SrcT, ValueType DestTy, ValueType SrcTy, @@ -1887,7 +1938,8 @@ class N2VShiftLongHigh opcode, string asmop, string DestT, (DestTy (ExtOp (SrcTy (getTop VPR128:$Rn)))), (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_N2VShLL opcode, string asmop, SDNode ExtOp> { @@ -1988,7 +2040,8 @@ class N2VShift_RQ opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; // shift right (vector by immediate) multiclass NeonI_N2VShR_RQ opcode, string asmop, @@ -2091,7 +2144,8 @@ class N2VShiftAdd opcode, string asmop, string T, [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), (Ty (OpNode (Ty VPRC:$Rn), (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2146,7 +2200,8 @@ class N2VShiftAdd_R opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2201,7 +2256,8 @@ class N2VShiftIns opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2295,14 +2351,16 @@ class N2VShR_Narrow opcode, string asmop, string DestT, : NeonI_2VShiftImm; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; class N2VShR_Narrow_Hi opcode, string asmop, string DestT, string SrcT, Operand ImmTy> : NeonI_2VShiftImm { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2461,7 +2519,8 @@ class N2VCvt_Fx opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn), (i32 ImmTy:$Imm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_N2VCvt_Fx2fp opcode, string asmop, SDPatternOperator IntOp> { @@ -2539,28 +2598,32 @@ multiclass NeonI_2VAcross_1 opcode, asmop # "\t$Rd, $Rn.8b", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode, (outs FPR16:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.16b", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode, (outs FPR32:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd, $Rn.4h", [(set (v1i32 FPR32:$Rd), (v1i32 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode, (outs FPR32:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.8h", [(set (v1i32 FPR32:$Rd), (v1i32 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; // _1d2s doesn't exist! @@ -2569,7 +2632,8 @@ multiclass NeonI_2VAcross_1 opcode, asmop # "\t$Rd, $Rn.4s", [(set (v1i64 FPR64:$Rd), (v1i64 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>; @@ -2585,28 +2649,32 @@ multiclass NeonI_2VAcross_2 opcode, asmop # "\t$Rd, $Rn.8b", [(set (v1i8 FPR8:$Rd), (v1i8 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode, (outs FPR8:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.16b", [(set (v1i8 FPR8:$Rd), (v1i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode, (outs FPR16:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd, $Rn.4h", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode, (outs FPR16:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.8h", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; // _1s2s doesn't exist! @@ -2615,7 +2683,8 @@ multiclass NeonI_2VAcross_2 opcode, asmop # "\t$Rd, $Rn.4s", [(set (v1i32 FPR32:$Rd), (v1i32 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>; @@ -2635,7 +2704,8 @@ multiclass NeonI_2VAcross_3 opcode, bits<2> size, asmop # "\t$Rd, $Rn.4s", [(set (f32 FPR32:$Rd), (f32 (opnode (v4f32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv", @@ -2658,7 +2728,8 @@ class NeonI_Permute size, bits<3> opcode, asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS, [(set (Ty OpVPR:$Rd), (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_Perm_pat opcode, string asmop, SDPatternOperator opnode> { @@ -2717,7 +2788,8 @@ class NeonI_3VDL size, bits<4> opcode, [(set (ResTy VPR128:$Rd), (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))), (ResTy (ext (OpTy OpVPR:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDL_s opcode, string asmop, SDPatternOperator opnode, @@ -2792,7 +2864,8 @@ class NeonI_3VDW size, bits<4> opcode, [(set (ResTy VPR128:$Rd), (ResTy (opnode (ResTy VPR128:$Rn), (ResTy (ext (OpTy OpVPR:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDW_s opcode, string asmop, SDPatternOperator opnode> { @@ -2873,7 +2946,8 @@ class NeonI_3VDN_addhn_2Op size, bits<4> opcode, (ResTy (get_hi (OpTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDN_addhn_2Op opcode, string asmop, SDPatternOperator opnode, bit Commutable = 0> { @@ -2901,7 +2975,8 @@ class NeonI_3VD_2Op size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, [(set (ResTy ResVPR:$Rd), (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; // normal narrow pattern multiclass NeonI_3VDN_2Op opcode, string asmop, @@ -2925,7 +3000,8 @@ class NeonI_3VDN_3Op size, bits<4> opcode, : NeonI_3VDiff { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let neverHasSideEffects = 1; } @@ -2990,7 +3066,8 @@ class NeonI_3VDL_Ext size, bits<4> opcode, [(set (ResTy VPR128:$Rd), (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDL_zext opcode, string asmop, SDPatternOperator opnode, bit Commutable = 0> { @@ -3058,7 +3135,8 @@ class NeonI_3VDL_Aba size, bits<4> opcode, (ResTy VPR128:$src), (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -3098,7 +3176,8 @@ defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, // Long pattern with 2 operands multiclass NeonI_3VDL_2Op opcode, string asmop, SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { + let isCommutable = Commutable, + SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", opnode, VPR128, VPR64, v8i16, v8i8>; def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", @@ -3120,7 +3199,8 @@ class NeonI_3VDL2_2Op_mull size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, [(set (ResTy VPR128:$Rd), (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; multiclass NeonI_3VDL2_2Op_mull_v1 opcode, string asmop, string opnode, bit Commutable = 0> { @@ -3154,7 +3234,8 @@ class NeonI_3VDL_3Op size, bits<4> opcode, (ResTy (opnode (ResTy VPR128:$src), (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { let Constraints = "$src = $Rd"; } @@ -3202,7 +3283,8 @@ class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, (ResTy (subop (ResTy VPR128:$src), (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { let Constraints = "$src = $Rd"; } @@ -3254,8 +3336,10 @@ multiclass NeonI_3VDL_v2 opcode, string asmop, } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", int_arm_neon_vqdmull, 1>; +} multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, string opnode, bit Commutable = 0> { @@ -3299,6 +3383,7 @@ multiclass NeonI_3VDL_v3 opcode, string asmop, } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, int_aarch64_neon_vmull_p64, 1>; @@ -3319,7 +3404,8 @@ multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))), (v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; } def : Pat<(v16i8 (int_aarch64_neon_vmull_p64 @@ -3355,7 +3441,8 @@ class NeonI_LDVList opcode, bits<2> size, (outs VecList:$Rt), (ins GPR64xsp:$Rn), asmop # "\t$Rt, [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, ReadVecLd]> { let mayLoad = 1; let neverHasSideEffects = 1; } @@ -3409,7 +3496,8 @@ class NeonI_STVList opcode, bits<2> size, (outs), (ins GPR64xsp:$Rn, VecList:$Rt), asmop # "\t$Rt, [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let mayStore = 1; let neverHasSideEffects = 1; } @@ -3642,7 +3730,8 @@ multiclass NeonI_LDWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, ImmTy:$amt), asmop # "\t$Rt, [$Rn], $amt", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { let Rm = 0b11111; } @@ -3651,7 +3740,8 @@ multiclass NeonI_LDWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, GPR64noxzr:$Rm), asmop # "\t$Rt, [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; } } @@ -3725,7 +3815,8 @@ multiclass NeonI_STWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt), asmop # "\t$Rt, [$Rn], $amt", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let Rm = 0b11111; } @@ -3734,7 +3825,8 @@ multiclass NeonI_STWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt), asmop # "\t$Rt, [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; } } @@ -3838,7 +3930,8 @@ class NeonI_LDN_Dup opcode, bits<2> size, (outs VecList:$Rt), (ins GPR64xsp:$Rn), asmop # "\t$Rt, [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, ReadVecLd]> { let mayLoad = 1; let neverHasSideEffects = 1; } @@ -3932,7 +4025,8 @@ class NeonI_LDN_Lane op2_1, bit op0, RegisterOperand VList, (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> { let mayLoad = 1; let neverHasSideEffects = 1; let hasExtraDefRegAllocReq = 1; @@ -4017,7 +4111,8 @@ class NeonI_STN_Lane op2_1, bit op0, RegisterOperand VList, (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let mayStore = 1; let neverHasSideEffects = 1; let hasExtraDefRegAllocReq = 1; @@ -4109,16 +4204,18 @@ multiclass NeonI_LDN_WB_Dup opcode, bits<2> size, (ins GPR64xsp:$Rn, ImmTy:$amt), asmop # "\t$Rt, [$Rn], $amt", [], - NoItinerary> { - let Rm = 0b11111; - } + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { + let Rm = 0b11111; + } def _register : NeonI_LdOne_Dup_Post; + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; } } @@ -4182,7 +4279,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, VList:$src, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn], $amt", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> { let Rm = 0b11111; } @@ -4194,7 +4292,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, VList:$src, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>; } multiclass LD_Lane_WB_BHSD { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let Rm = 0b11111; } @@ -4294,7 +4394,8 @@ let mayStore = 1, neverHasSideEffects = 1, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; } multiclass ST_Lane_WB_BHSD size, bits<5> opcode, string asmop, (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; class NeonI_Scalar3Same_D_size opcode, string asmop> : NeonI_Scalar3Same_size; @@ -4465,7 +4567,8 @@ class NeonI_Scalar3Diff_size size, bits<4> opcode, string asmop, (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_Scalar3Diff_HS_size opcode, string asmop> { def shh : NeonI_Scalar3Diff_size; @@ -4478,12 +4581,14 @@ multiclass NeonI_Scalar3Diff_ml_HS_size opcode, string asmop> { (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; def dss : NeonI_Scalar3Diff; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; } } @@ -4513,7 +4618,8 @@ class NeonI_Scalar2SameMisc_size size, bits<5> opcode, string asm (outs FPRCD:$Rd), (ins FPRCS:$Rn), !strconcat(asmop, "\t$Rd, $Rn"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_Scalar2SameMisc_SD_size opcode, string asmop> { @@ -4550,7 +4656,8 @@ class NeonI_Scalar2SameMisc_accum_size size, bits<5> opcode, (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn), !strconcat(asmop, "\t$Rd, $Rn"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_Scalar2SameMisc_accum_BHSD_size opcode, string asmop> { @@ -4610,7 +4717,8 @@ class NeonI_Scalar2SameMisc_cmpz_D_size opcode, string asmop> (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_Scalar2SameMisc_cmpz_SD_size opcode, string asmop> { @@ -4618,12 +4726,14 @@ multiclass NeonI_Scalar2SameMisc_cmpz_SD_size opcode, (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm), !strconcat(asmop, "\t$Rd, $Rn, $FPImm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def ddi : NeonI_Scalar2SameMisc; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } class Neon_Scalar2SameMisc_cmpz_D_size_patterns opcode, string asmop, : NeonI_ScalarShiftImm; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_ScalarShiftRightImm_D_size opcode, string asmop> { @@ -4772,7 +4883,8 @@ class NeonI_ScalarShiftRightImm_accum_D_size opcode, string asmop (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<6> Imm; let Inst{22} = 0b1; // immh:immb = 1xxxxxx let Inst{21-16} = Imm; @@ -4784,7 +4896,8 @@ class NeonI_ScalarShiftLeftImm_accum_D_size opcode, string asmop> (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<6> Imm; let Inst{22} = 0b1; // immh:immb = 1xxxxxx let Inst{21-16} = Imm; @@ -4797,7 +4910,8 @@ class NeonI_ScalarShiftImm_narrow_size opcode, string asmop, : NeonI_ScalarShiftImm; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_ScalarShiftImm_narrow_HSD_size opcode, string asmop> { @@ -5111,10 +5225,13 @@ defm : Neon_Scalar3Same_BHSD_size_patterns; // Scalar Integer Saturating Doubling Multiply Half High +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; // Scalar Integer Saturating Rounding Doubling Multiply Half High +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; +} // Patterns to match llvm.arm.* intrinsic for // Scalar Integer Saturating Doubling Multiply Half High and @@ -5124,8 +5241,10 @@ defm : Neon_Scalar3Same_HS_size_patterns; +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { // Scalar Floating-point Multiply Extended defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; +} // Scalar Floating-point Reciprocal Step defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; @@ -5218,18 +5337,24 @@ defm : Neon_Scalar3Same_BHSD_size_patterns; defm : Neon_Scalar3Same_D_size_patterns; +let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { // Signed Saturating Doubling Multiply-Add Long defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">; +} defm : Neon_Scalar3Diff_ml_HS_size_patterns; // Signed Saturating Doubling Multiply-Subtract Long +let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">; +} defm : Neon_Scalar3Diff_ml_HS_size_patterns; // Signed Saturating Doubling Multiply Long +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">; +} defm : Neon_Scalar3Diff_HS_size_patterns; @@ -5557,7 +5682,8 @@ multiclass NeonI_ScalarPair_D_sizes opcode, (outs FPR64:$Rd), (ins VPR128:$Rn), !strconcat(asmop, "\t$Rd, $Rn.2d"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } } @@ -5569,7 +5695,8 @@ multiclass NeonI_ScalarPair_SD_sizes opcode, (outs FPR32:$Rd), (ins VPR64:$Rn), !strconcat(asmop, "\t$Rd, $Rn.2s"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } } @@ -5642,7 +5769,8 @@ class NeonI_ScalarXIndexedElemArith opcode, (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> { bits<3> Imm; bits<5> MRm; } @@ -5659,7 +5787,8 @@ class NeonI_ScalarXIndexedElemArith_Constraint_Impl opcode (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { let Constraints = "$src = $Rd"; bits<3> Imm; bits<5> MRm; @@ -6170,7 +6299,8 @@ class NeonI_Scalar_DUP { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -6581,7 +6711,8 @@ class NeonI_Extract op2, string asmop, asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS # ", $Index", [], - NoItinerary>{ + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{ bits<4> Index; } @@ -6622,7 +6753,8 @@ class NI_TBL op2, bits<2> len, bit op, (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm), asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; // The vectors in look up table are always 16b multiclass NI_TBL_pat len, bit op, string asmop, string List> { @@ -6646,7 +6778,8 @@ class NI_TBX op2, bits<2> len, bit op, (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm), asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -6674,7 +6807,8 @@ class NeonI_INS_main { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<4> Imm; let Constraints = "$src = $Rd"; } @@ -6732,7 +6866,8 @@ class NeonI_INS_element ResImm:$Immd, ResImm:$Immn), asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; bits<4> Immd; bits<4> Immn; @@ -6876,7 +7011,8 @@ class NeonI_SMOV { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -6970,7 +7106,8 @@ class NeonI_UMOV { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -7128,7 +7265,8 @@ class NeonI_DUP_Elt { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -7234,7 +7372,8 @@ class NeonI_DUP; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> { let Inst{20-16} = 0b00001; @@ -7335,7 +7474,8 @@ class NI_2VE size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Re." # EleOpS # "[$Index]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { bits<3> Index; bits<5> Re; @@ -7434,7 +7574,8 @@ class NI_2VE_2op size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Re." # EleOpS # "[$Index]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<3> Index; bits<5> Re; } @@ -7473,9 +7614,11 @@ multiclass NI_2VE_v1_2op opcode, string asmop> { } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; +} // Pattern for lane in 128-bit vector class NI_2VE_mul_laneq opcode, string asmop> { } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; +} class NI_2VE_mul_lane_2d opcode, string asmop> { } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; +} def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), (FMOVdd $src)>; @@ -8074,7 +8221,8 @@ class NeonI_REV size, bit Q, bit U, asmop # "\t$Rd." # Res # ", $Rn." # Res, [(set (ResTy ResVPR:$Rd), (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))], - NoItinerary> ; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, v16i8, Neon_rev64>; @@ -8113,42 +8261,48 @@ multiclass NeonI_PairwiseAdd opcode, asmop # "\t$Rd.8h, $Rn.16b", [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.8b", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.8h", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.4h", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.4s", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.1d, $Rn.2s", [(set (v1i64 VPR64:$Rd), (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, @@ -8170,7 +8324,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Padd (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), @@ -8178,7 +8333,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Padd (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), @@ -8186,7 +8342,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Padd (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), @@ -8194,7 +8351,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Padd (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), @@ -8202,7 +8360,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_Padd (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), @@ -8210,7 +8369,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v1i64 VPR64:$Rd), (v1i64 (Neon_Padd (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8223,37 +8383,44 @@ multiclass NeonI_2VMisc_BHSDsize_1Arg opcode> { def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; @@ -8323,37 +8490,44 @@ multiclass NeonI_2VMisc_BHSDsize_2Args opcode> { def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8401,42 +8575,48 @@ multiclass NeonI_2VMisc_BHSsizes; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", [(set (v8i8 VPR64:$Rd), (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; @@ -8447,12 +8627,14 @@ multiclass NeonI_2VMisc_Bsize size, def 16b : NeonI_2VMisc<0b1, U, size, Opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, size, Opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; @@ -8510,21 +8692,24 @@ multiclass NeonI_2VMisc_SDsizes opcode, asmop # "\t$Rd.4s, $Rn.4s", [(set (v4f32 VPR128:$Rd), (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", [(set (v2f64 VPR128:$Rd), (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (v2f32 VPR64:$Rd), (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; @@ -8534,33 +8719,39 @@ multiclass NeonI_2VMisc_HSD_Narrow opcode> { def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8b, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; let Constraints = "$Rd = $src" in { def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.16b, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8613,37 +8804,43 @@ multiclass NeonI_2VMisc_SHIFT opcode> { (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact8:$Imm), asmop # "\t$Rd.8h, $Rn.8b, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact16:$Imm), asmop # "\t$Rd.4s, $Rn.4h, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact32:$Imm), asmop # "\t$Rd.2d, $Rn.2s, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact8:$Imm), asmop # "2\t$Rd.8h, $Rn.16b, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact16:$Imm), asmop # "2\t$Rd.4s, $Rn.8h, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact32:$Imm), asmop # "2\t$Rd.2d, $Rn.4s, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } } @@ -8691,23 +8888,27 @@ multiclass NeonI_2VMisc_SD_Narrow opcode> { def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; let Constraints = "$src = $Rd" in { def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8745,12 +8946,14 @@ multiclass NeonI_2VMisc_D_Narrow; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -8774,22 +8977,26 @@ multiclass NeonI_2VMisc_HS_Extend opcode> { def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4s, $Rn.4h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2d, $Rn.2s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "2\t$Rd.2d, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; @@ -8825,21 +9032,24 @@ multiclass NeonI_2VMisc_SD_Conv opcode, asmop # "\t$Rd.4s, $Rn.4s", [(set (ResTy4s VPR128:$Rd), (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", [(set (ResTy2d VPR128:$Rd), (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (ResTy2s VPR64:$Rd), (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } multiclass NeonI_2VMisc_fp_to_int; defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, int_arm_neon_vrsqrte>; +let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; +} multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { @@ -8903,14 +9115,16 @@ multiclass NeonI_2VMisc_S_Conv; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, @@ -8927,7 +9141,8 @@ class NeonI_Cryptoaes_2v size, bits<5> opcode, [(set (v16i8 VPR128:$Rd), (v16i8 (opnode (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))))], - NoItinerary>{ + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -8942,7 +9157,8 @@ class NeonI_Cryptoaes size, bits<5> opcode, asmop # "\t$Rd.16b, $Rn.16b", [(set (v16i8 VPR128:$Rd), (v16i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>; def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>; @@ -8955,7 +9171,8 @@ class NeonI_Cryptosha_vv size, bits<5> opcode, [(set (v4i32 VPR128:$Rd), (v4i32 (opnode (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -8970,7 +9187,8 @@ class NeonI_Cryptosha_ss size, bits<5> opcode, : NeonI_Crypto_SHA { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { let Predicates = [HasNEON, HasCrypto]; let hasSideEffects = 0; } @@ -8990,7 +9208,8 @@ class NeonI_Cryptosha3_vvv size, bits<3> opcode, string asmop, (v4i32 (opnode (v4i32 VPR128:$src), (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -9010,7 +9229,8 @@ class NeonI_Cryptosha3_qqv size, bits<3> opcode, string asmop, (v4i32 (opnode (v4i32 FPR128:$src), (v4i32 FPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -9025,7 +9245,8 @@ class NeonI_Cryptosha3_qsv size, bits<3> opcode, string asmop> (outs FPR128:$Rd), (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm), asmop # "\t$Rd, $Rn, $Rm.4s", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let hasSideEffects = 0; let Predicates = [HasNEON, HasCrypto]; diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td index 6fcb1116b6c..ec8450b9c1f 100644 --- a/lib/Target/AArch64/AArch64Schedule.td +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -37,8 +37,16 @@ def ReadDiv : SchedRead; // Loads def WriteLd : SchedWrite; def WritePreLd : SchedWrite; +def WriteVecLd : SchedWrite; def ReadLd : SchedRead; def ReadPreLd : SchedRead; +def ReadVecLd : SchedRead; + +// Stores +def WriteSt : SchedWrite; +def WriteVecSt : SchedWrite; +def ReadSt : SchedRead; +def ReadVecSt : SchedRead; // Branches def WriteBr : SchedWrite; diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td index e288a24eb2c..20a14e79228 100644 --- a/lib/Target/AArch64/AArch64ScheduleA53.td +++ b/lib/Target/AArch64/AArch64ScheduleA53.td @@ -71,9 +71,18 @@ def : WriteRes { let Latency = 4; } // Div def : WriteRes { let Latency = 4; } -// Load +// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below, +// choosing the median of 3 which makes the latency 6. May model this more +// carefully in the future. def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 6; } + +// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below, +// choosing the median of 2 which makes the latency 5. May model this more +// carefully in the future. +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 5; } // Branch def : WriteRes; @@ -114,9 +123,14 @@ def : ReadAdvance; // No forwarding defined for ReadDiv yet. def : ReadAdvance; -// No forwarding defined for ReadLd, ReadPreLd yet. +// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet. def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; + +// No forwarding defined for ReadSt and ReadVecSt yet. +def : ReadAdvance; +def : ReadAdvance; // No forwarding defined for ReadFPALU yet. def : ReadAdvance; diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll index 0d5534eca54..1555c4868e1 100644 --- a/test/CodeGen/AArch64/misched-basic-A53.ll +++ b/test/CodeGen/AArch64/misched-basic-A53.ll @@ -4,13 +4,15 @@ ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled ; much higher than the ADD instructions in order to hide latency. When not ; specifying a subtarget, the MADD will remain near the end of the block. +; +; CHECK: ********** MI Scheduling ********** ; CHECK: main ; CHECK: *** Final schedule for BB#2 *** ; CHECK: SU(13) ; CHECK: MADDwwww ; CHECK: SU(4) ; CHECK: ADDwwi_lsl0_s -; CHECK: ********** MI Scheduling ********** +; CHECK: ********** INTERVALS ********** @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 @@ -76,6 +78,33 @@ for.end: ; preds = %for.cond ret i32 %add6 } + +; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to +; hide latency. Whereas normally there would only be a single FADDvvv_4s +; after it, this test checks to make sure there are more than one. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: neon4xfloat:BB#0 +; CHECK: *** Final schedule for BB#0 *** +; CHECK: FDIVvvv_4S +; CHECK: FADDvvv_4S +; CHECK: FADDvvv_4S +; CHECK: ********** INTERVALS ********** +define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { + %tmp1 = fadd <4 x float> %A, %B; + %tmp2 = fadd <4 x float> %A, %tmp1; + %tmp3 = fadd <4 x float> %A, %tmp2; + %tmp4 = fadd <4 x float> %A, %tmp3; + %tmp5 = fadd <4 x float> %A, %tmp4; + %tmp6 = fadd <4 x float> %A, %tmp5; + %tmp7 = fadd <4 x float> %A, %tmp6; + %tmp8 = fadd <4 x float> %A, %tmp7; + %tmp9 = fdiv <4 x float> %A, %B; + %tmp10 = fadd <4 x float> %tmp8, %tmp9; + + ret <4 x float> %tmp10 +} + ; Function Attrs: nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1