From 514d703ff6d7714b30b6c702aeb8c6d7a9967d75 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 6 Mar 2014 16:04:00 +0000 Subject: [PATCH] [AArch64] This is a work in progress to provide a machine description for the Cortex-A53 subtarget in the AArch64 backend. This patch lays the ground work to annotate each AArch64 instruction (no NEON yet) with a list of SchedReadWrite types. The patch also provides the Cortex-A53 processor resources, maps those the the default SchedReadWrites, and provides basic latency. NEON support will be added in a subsequent patch with proper forwarding logic. Verification was done by setting the pre-RA scheduler to linearize to better gauge the effect of the MIScheduler. Even without modeling the forward logic, the results show a modest improvement for Cortex-A53. Reviewers: apazos, mcrosier, atrick Patch by Dave Estes ! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203125 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64.td | 19 +- lib/Target/AArch64/AArch64InstrInfo.td | 513 ++++++++++++++-------- lib/Target/AArch64/AArch64Schedule.td | 62 +++ lib/Target/AArch64/AArch64ScheduleA53.td | 130 ++++++ lib/Target/AArch64/AArch64Subtarget.h | 5 + test/CodeGen/AArch64/misched-basic-A53.ll | 83 ++++ 6 files changed, 628 insertions(+), 184 deletions(-) create mode 100644 lib/Target/AArch64/AArch64ScheduleA53.td create mode 100644 test/CodeGen/AArch64/misched-basic-A53.ll diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index af0136df9f0..d0f9bd46002 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -41,13 +41,20 @@ class ProcNoItin Features> def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>; -def : ProcNoItin<"cortex-a53", [FeatureFPARMv8, - FeatureNEON, - FeatureCrypto]>; +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", + [FeatureFPARMv8, + FeatureNEON, + FeatureCrypto]>; -def : ProcNoItin<"cortex-a57", [FeatureFPARMv8, - FeatureNEON, - FeatureCrypto]>; +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", + [FeatureFPARMv8, + FeatureNEON, + FeatureCrypto]>; + +def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; +def : Processor<"cortex-a57", NoItineraries, [ProcA57]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 16ab6b04bf0..4588eb73c5d 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -350,33 +350,39 @@ multiclass addsub_exts; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def w_uxth : A64I_addsubext; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def w_uxtw : A64I_addsubext; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def w_sxtb : A64I_addsubext; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def w_sxth : A64I_addsubext; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def w_sxtw : A64I_addsubext; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } // These two could be merge in with the above, but their patterns aren't really @@ -388,30 +394,32 @@ multiclass addsub_xxtx; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, outs, (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), !strconcat(asmop, "$Rn, $Rm, $Imm3"), [/* No Pattern: same as uxtx */], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } multiclass addsub_wxtx { def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, - outs, - (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No pattern: probably same as uxtw */], - NoItinerary>; + outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No pattern: probably same as uxtw */], + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, - outs, - (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No Pattern: probably same as uxtw */], - NoItinerary>; + outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: probably same as uxtw */], + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } class SetRD @@ -657,7 +665,8 @@ multiclass addsubimm_varieties shift, (ins GPRsp:$Rn, imm_operand:$Imm12), !strconcat(asmop, "\t$Rd, $Rn, $Imm12"), [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU]>; // S variants can read SP but would write to ZR @@ -666,7 +675,8 @@ multiclass addsubimm_varieties shift, (ins GPRsp:$Rn, imm_operand:$Imm12), !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"), [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))], - NoItinerary> { + NoItinerary>, + Sched<[WriteALU, ReadALU]> { let Defs = [NZCV]; } @@ -678,7 +688,8 @@ multiclass addsubimm_varieties shift, !strconcat(cmpasmop, " $Rn, $Imm12"), [(set NZCV, (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))], - NoItinerary> { + NoItinerary>, + Sched<[WriteALU, ReadALU]> { let Rd = 0b11111; let Defs = [NZCV]; let isCompare = 1; @@ -740,7 +751,7 @@ defm MOVww : MOVsp; // Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS //===------------------------------- -// 1. The "shifed register" operands. Shared with logical insts. +// 1. The "shifted register" operands. Shared with logical insts. //===------------------------------- multiclass shift_operands { @@ -800,7 +811,8 @@ multiclass addsub_shifts("lsl_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU]>; def _lsr : A64I_addsubshift("lsr_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU]>; def _asr : A64I_addsubshift("asr_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU]>; } def _noshift @@ -906,7 +920,8 @@ multiclass cmp_shifts("lsl_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteCMP, ReadCMP, ReadCMP]>; def _lsr : A64I_addsubshift("lsr_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteCMP, ReadCMP, ReadCMP]>; def _asr : A64I_addsubshift("asr_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteCMP, ReadCMP, ReadCMP]>; } def _noshift @@ -953,12 +970,14 @@ multiclass A64I_addsubcarrySizes { def www : A64I_addsubcarry<0b0, op, s, 0b000000, (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } } @@ -1044,14 +1063,16 @@ multiclass A64I_bitfieldSizes opc, string asmop> { def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; } def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; } } @@ -1064,7 +1085,8 @@ defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; def BFMwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1072,7 +1094,8 @@ def BFMwwii : def BFMxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1094,7 +1117,8 @@ class A64I_bf_ext opc, RegisterClass GPRDest, ValueType dty, : A64I_bitfield { + [(set dty:$Rd, pattern)], NoItinerary>, + Sched<[WriteALU, ReadALU]> { let ImmR = 0b000000; let ImmS = imms; } @@ -1148,7 +1172,8 @@ multiclass A64I_shift opc, string asmop, SDNode opnode> { (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))], - NoItinerary> { + NoItinerary>, + Sched<[WriteALU, ReadALU]> { let ImmS = 31; } @@ -1156,7 +1181,8 @@ multiclass A64I_shift opc, string asmop, SDNode opnode> { (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))], - NoItinerary> { + NoItinerary>, + Sched<[WriteALU, ReadALU]> { let ImmS = 63; } @@ -1197,7 +1223,8 @@ class A64I_bitfield_lsl { + NoItinerary>, + Sched<[WriteALU, ReadALU]> { bits<12> FullImm; let ImmR = FullImm{5-0}; let ImmS = FullImm{11-6}; @@ -1244,7 +1271,8 @@ multiclass A64I_bitfield_extract opc, string asmop, SDNode op> { (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary> { + NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } @@ -1253,7 +1281,8 @@ multiclass A64I_bitfield_extract opc, string asmop, SDNode op> { (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary> { + NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } @@ -1264,16 +1293,18 @@ defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; // Again, variants based on BFM modify Rd so need it as an input too. def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; } def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1353,7 +1384,8 @@ multiclass A64I_bitfield_insert opc, string asmop> { def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } @@ -1361,7 +1393,8 @@ multiclass A64I_bitfield_insert opc, string asmop> { def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; } @@ -1373,7 +1406,8 @@ defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1381,7 +1415,8 @@ def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, + Sched<[WriteALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1418,14 +1453,16 @@ multiclass cmpbr_sizes { (ins GPR64:$Rt, bcc_target:$Label), !strconcat(asmop,"\t$Rt, $Label"), [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>; + NoItinerary>, + Sched<[WriteBr, ReadBr]>; def w : A64I_cmpbr<0b0, op, (outs), (ins GPR32:$Rt, bcc_target:$Label), !strconcat(asmop,"\t$Rt, $Label"), [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>; + NoItinerary>, + Sched<[WriteBr, ReadBr]>; } } @@ -1456,7 +1493,8 @@ def cond_code : Operand, ImmLeaf { + NoItinerary>, + Sched<[WriteBr]> { let Uses = [NZCV]; let isBranch = 1; let isTerminator = 1; @@ -1502,7 +1540,8 @@ class A64I_condcmpimmImpl : A64I_condcmpimm { + [], NoItinerary>, + Sched<[WriteCMP, ReadCMP]> { let Defs = [NZCV]; } @@ -1568,7 +1607,8 @@ multiclass A64I_condselSizes op2, string asmop, (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), [(set i32:$Rd, (select i32:$Rn, i32:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteCMP, ReadCMP]>; def xxxc : A64I_condsel<0b1, op, 0b0, op2, @@ -1576,7 +1616,8 @@ multiclass A64I_condselSizes op2, string asmop, (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), [(set i64:$Rd, (select i64:$Rn, i64:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteCMP, ReadCMP]>; } } @@ -1686,7 +1727,8 @@ class A64I_dp_1src_impl opcode, string asmop, (outs GPRrc:$Rd), (ins GPRrc:$Rn), patterns, - itin>; + itin>, + Sched<[WriteALU, ReadALU]>; multiclass A64I_dp_1src opcode, string asmop> { let hasSideEffects = 0 in { @@ -1742,7 +1784,8 @@ class dp_2src_impl opcode, string asmop, list patterns, (outs GPRsp:$Rd), (ins GPRsp:$Rn, GPRsp:$Rm), patterns, - itin>; + itin>, + Sched<[WriteALU, ReadALU, ReadALU]>; multiclass dp_2src_crc { def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0}, @@ -1793,13 +1836,17 @@ multiclass dp_2src opcode, string asmop, SDPatternOperator op> { defm CRC32 : dp_2src_crc<0b0, "crc32">; defm CRC32C : dp_2src_crc<0b1, "crc32c">; -defm UDIV : dp_2src<0b000010, "udiv", udiv>; -defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; +let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in { + defm UDIV : dp_2src<0b000010, "udiv", udiv>; + defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; +} -defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; -defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; -defm ASRV : dp_2src_zext<0b001010, "asr", sra>; -defm RORV : dp_2src_zext<0b001011, "ror", rotr>; +let SchedRW = [WriteALUs, ReadALU, ReadALU] in { + defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; + defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; + defm ASRV : dp_2src_zext<0b001010, "asr", sra>; + defm RORV : dp_2src_zext<0b001011, "ror", rotr>; +} // Extra patterns for an incoming 64-bit value for a 32-bit // operation. Since the LLVM operations are undefined (as in C) if the @@ -1832,7 +1879,8 @@ class A64I_dp3_4operand opcode, RegisterClass AccReg, : A64I_dp3 { + [(set AccTy:$Rd, pattern)], NoItinerary>, + Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> { RegisterClass AccGPR = AccReg; RegisterClass SrcGPR = SrcReg; } @@ -1862,13 +1910,15 @@ let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in { (ins GPR64:$Rn, GPR64:$Rm), "umulh\t$Rd, $Rn, $Rm", [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteMAC, ReadMAC, ReadMAC]>; def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), "smulh\t$Rd, $Rn, $Rm", [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteMAC, ReadMAC, ReadMAC]>; } multiclass A64I_dp3_3operand { class A64I_exceptImpl opc, bits<2> ll, string asmop> : A64I_exception { + !strconcat(asmop, "\t$UImm16"), [], NoItinerary>, + Sched<[WriteBr]> { let isBranch = 1; let isTerminator = 1; } @@ -1947,14 +1998,16 @@ def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, "extr\t$Rd, $Rn, $Rm, $LSB", [(set i32:$Rd, (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), "extr\t$Rd, $Rn, $Rm, $LSB", [(set i64:$Rd, (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def : InstAlias<"ror $Rd, $Rs, $LSB", (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; @@ -2001,12 +2054,14 @@ def fpz64movi : Operand, multiclass A64I_fpcmpSignal type, bit imm, dag ins, dag pattern> { def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, (outs), ins, "fcmp\t$Rn, $Rm", [pattern], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Defs = [NZCV]; } def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, - (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> { + (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Defs = [NZCV]; } } @@ -2035,7 +2090,8 @@ class A64I_fpccmpImpl type, bit op, RegisterClass FPR, string asmop> (outs), (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Defs = [NZCV]; } @@ -2053,9 +2109,10 @@ let Uses = [NZCV] in { def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), "fcsel\t$Rd, $Rn, $Rm, $Cond", - [(set f32:$Rd, + [(set f32:$Rd, (simple_select f32:$Rn, f32:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), @@ -2063,7 +2120,8 @@ let Uses = [NZCV] in { "fcsel\t$Rd, $Rn, $Rm, $Cond", [(set f64:$Rd, (simple_select f64:$Rn, f64:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } //===----------------------------------------------------------------------===// @@ -2081,18 +2139,22 @@ multiclass A64I_fpdp1sizes opcode, string asmstr, def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), !strconcat(asmstr, "\t$Rd, $Rn"), [(set f32:$Rd, (opnode f32:$Rn))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), !strconcat(asmstr, "\t$Rd, $Rn"), [(set f64:$Rd, (opnode f64:$Rn))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; -defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; +let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { + defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; +} defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; @@ -2121,7 +2183,8 @@ class A64I_fpdp1_fcvt {0,0,0,1, DestReg.t1, DestReg.t0}, (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), "fcvt\t$Rd, $Rn", - [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>; + [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def FCVTds : A64I_fpdp1_fcvt; def FCVThs : A64I_fpdp1_fcvt; @@ -2146,18 +2209,22 @@ multiclass A64I_fpdp2sizes opcode, string asmstr, (ins FPR32:$Rn, FPR32:$Rm), !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm), !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } let isCommutable = 1 in { - defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; + let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { + defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; + } defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; // No patterns for these. @@ -2166,12 +2233,16 @@ let isCommutable = 1 in { defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; - defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", - PatFrag<(ops node:$lhs, node:$rhs), - (fneg (fmul node:$lhs, node:$rhs))> >; + let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { + defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", + PatFrag<(ops node:$lhs, node:$rhs), + (fneg (fmul node:$lhs, node:$rhs))> >; + } } -defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; +let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { + defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; +} defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; //===----------------------------------------------------------------------===// @@ -2192,7 +2263,8 @@ class A64I_fpdp3Impl; + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>; def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; @@ -2271,14 +2343,15 @@ class cvtfix_i64_op // worth going for a multiclass here. Oh well. class A64I_fptofix type, bits<3> opcode, - RegisterClass GPR, RegisterClass FPR, - ValueType DstTy, ValueType SrcTy, + RegisterClass GPR, RegisterClass FPR, + ValueType DstTy, ValueType SrcTy, Operand scale_op, string asmop, SDNode cvtop> : A64I_fpfixed; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32, cvtfix_i32_op, "fcvtzs", fp_to_sint>; @@ -2307,7 +2380,8 @@ class A64I_fixtofp type, bits<3> opcode, (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale), !strconcat(asmop, "\t$Rd, $Rn, $Scale"), [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32, cvtfix_i32_op, "scvtf", sint_to_fp>; @@ -2334,7 +2408,8 @@ def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64, class A64I_fpintI type, bits<2> rmode, bits<3> opcode, RegisterClass DestPR, RegisterClass SrcPR, string asmop> : A64I_fpint; + !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass A64I_fptointRM rmode, bit o2, string asmop> { def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, @@ -2420,11 +2495,13 @@ def lane1 : Operand { let DecoderMethod = "DecodeFMOVLaneInstruction" in { def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), - "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>; + "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), - "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>; + "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } let Predicates = [HasFPARMv8] in { @@ -2471,7 +2548,8 @@ class A64I_fpimm_impl type, RegisterClass Reg, ValueType VT, (ins fmov_operand:$Imm8), "fmov\t$Rd, $Imm8", [(set VT:$Rd, fmov_operand:$Imm8)], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU]>; def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; @@ -2520,7 +2598,8 @@ defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; class A64I_LDRlitSimple opc, bit v, RegisterClass OutReg, list patterns = []> : A64I_LDRlit; + "ldr\t$Rt, $Imm19", patterns, NoItinerary>, + Sched<[WriteLd]>; let mayLoad = 1 in { def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; @@ -2541,12 +2620,14 @@ let mayLoad = 1 in { (outs GPR64:$Rt), (ins ldrlit_label:$Imm19), "ldrsw\t$Rt, $Imm19", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; def PRFM_lit : A64I_LDRlit<0b11, 0b0, (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), "prfm\t$Rt, $Imm19", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; } //===----------------------------------------------------------------------===// @@ -2638,19 +2719,23 @@ class A64I_LRexs_impl size, bits<3> opcode, string asm, dag outs, multiclass A64I_LRex opcode> { def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; def _word: A64I_LRexs_impl<0b10, opcode, asmstr, (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; } defm LDXR : A64I_LRex<"ldxr", 0b000>; @@ -2776,12 +2861,14 @@ multiclass A64I_LPex opcode> { def _word: A64I_LPexs_impl<0b10, opcode, asmstr, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd]>; } defm LDXP : A64I_LPex<"ldxp", 0b010>; @@ -3004,7 +3091,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _LDR : A64I_LSunsigimm { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", @@ -3016,13 +3104,15 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR:$Rt), (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; def _Xm_RegOffset_LDR : A64I_LSregoff; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; } def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", (!cast(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, @@ -3058,7 +3148,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _LDUR : A64I_LSunalimm { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", @@ -3081,7 +3172,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3104,7 +3196,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3164,7 +3257,8 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR32:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12), "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", @@ -3174,7 +3268,8 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR64:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12), "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", @@ -3186,25 +3281,29 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR32:$Rt), (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; def w_Xm_RegOffset : A64I_LSregoff; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; def x_Wm_RegOffset : A64I_LSregoff; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; def x_Xm_RegOffset : A64I_LSregoff; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; } def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", (!cast(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, @@ -3221,13 +3320,15 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR32:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd, ReadLd]>; def x_U : A64I_LSunalimm; + [], NoItinerary>, + Sched<[WriteLd, ReadLd]>; // Post-indexed @@ -3235,7 +3336,8 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR32:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3244,7 +3346,8 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3254,7 +3357,8 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR32:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3263,7 +3367,8 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3281,7 +3386,8 @@ def LDRSWx (outs GPR64:$Rt), (ins GPR64xsp:$Rn, word_uimm12:$UImm12), "ldrsw\t$Rt, [$Rn, $UImm12]", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; @@ -3291,13 +3397,15 @@ let mayLoad = 1 in { (outs GPR64:$Rt), (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, (outs GPR64:$Rt), (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd, ReadLd, ReadLd]>; } def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; @@ -3308,7 +3416,8 @@ def LDURSWx (outs GPR64:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldursw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; @@ -3318,7 +3427,8 @@ def LDRSWx_PostInd (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn], $SImm9", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3328,7 +3438,8 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, (outs GPR64:$Rt, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3341,7 +3452,8 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), "prfm\t$Rt, [$Rn, $UImm12]", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WritePreLd, ReadPreLd]> { let mayLoad = 1; } def : InstAlias<"prfm $Rt, [$Rn]", @@ -3352,12 +3464,14 @@ let mayLoad = 1 in { (ins prefetch_op:$Rt, GPR64xsp:$Rn, GPR32:$Rm, dword_Wm_regext:$Ext), "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WritePreLd, ReadPreLd]>; def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), (ins prefetch_op:$Rt, GPR64xsp:$Rn, GPR64:$Rm, dword_Xm_regext:$Ext), "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WritePreLd, ReadPreLd]>; } def : InstAlias<"prfm $Rt, [$Rn, $Rm]", @@ -3368,7 +3482,8 @@ def : InstAlias<"prfm $Rt, [$Rn, $Rm]", def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "prfum\t$Rt, [$Rn, $SImm9]", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WritePreLd, ReadPreLd]> { let mayLoad = 1; } def : InstAlias<"prfum $Rt, [$Rn]", @@ -3388,7 +3503,8 @@ multiclass A64I_LDTRSTTR size, string asmsuffix, RegisterClass GPR, def _UnPriv_STR : A64I_LSunpriv { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayStore = 1; } @@ -3398,7 +3514,8 @@ multiclass A64I_LDTRSTTR size, string asmsuffix, RegisterClass GPR, def _UnPriv_LDR : A64I_LSunpriv { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } @@ -3427,13 +3544,15 @@ multiclass A64I_LDTR_signed size, string asmopcode, string prefix> { (outs GPR32:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteLd, ReadLd]>; def x : A64I_LSunpriv; + [], NoItinerary>, + Sched<[WriteLd, ReadLd]>; } def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", @@ -3454,7 +3573,8 @@ def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, (outs GPR64:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), "ldtrsw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; } def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; @@ -3516,7 +3636,8 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, Operand simm7, string prefix> { def _STR : A64I_LSPoffset { + "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayStore = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3527,7 +3648,8 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, def _LDR : A64I_LSPoffset { + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3553,7 +3675,8 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3572,7 +3695,8 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3591,7 +3715,8 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, def _NonTemp_LDR : A64I_LSPnontemp { + "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3616,7 +3741,8 @@ defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3637,7 +3763,8 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3682,14 +3809,16 @@ multiclass A64I_logimmSizes opc, string asmop, SDNode opnode> { !strconcat(asmop, "\t$Rd, $Rn, $Imm"), [(set i32:$Rd, (opnode i32:$Rn, logical_imm32_operand:$Imm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU]>; def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), (ins GPR64:$Rn, logical_imm64_operand:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), [(set i64:$Rd, (opnode i64:$Rn, logical_imm64_operand:$Imm))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU]>; } defm AND : A64I_logimmSizes<0b00, "and", and>; @@ -3700,12 +3829,14 @@ let Defs = [NZCV] in { def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), (ins GPR32:$Rn, logical_imm32_operand:$Imm), "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteALU, ReadALU]>; def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), (ins GPR64:$Rn, logical_imm64_operand:$Imm), "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteALU, ReadALU]>; } @@ -3750,7 +3881,8 @@ multiclass logical_shifts opc, [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, !cast("lsl_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _lsr : A64I_logicalshift opc, [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, !cast("lsr_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _asr : A64I_logicalshift opc, [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, !cast("asr_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _ror : A64I_logicalshift opc, [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm, !cast("ror_operand_" # ty):$Imm6)) )], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } def _noshift @@ -3835,7 +3970,8 @@ multiclass tst_shifts { [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm, !cast("lsl_operand_" # ty):$Imm6)), 0, signed_cond))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _lsr : A64I_logicalshift { [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm, !cast("lsr_operand_" # ty):$Imm6)), 0, signed_cond))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _asr : A64I_logicalshift { [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm, !cast("asr_operand_" # ty):$Imm6)), 0, signed_cond))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _ror : A64I_logicalshift { [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm, !cast("ror_operand_" # ty):$Imm6)), 0, signed_cond))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } def _noshift : InstAlias<"tst $Rn, $Rm", @@ -3889,7 +4028,8 @@ multiclass mvn_shifts { "mvn\t$Rd, $Rm, $Imm6", [(set ty:$Rd, (not (shl ty:$Rm, !cast("lsl_operand_" # ty):$Imm6)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _lsr : A64I_logicalshift { "mvn\t$Rd, $Rm, $Imm6", [(set ty:$Rd, (not (srl ty:$Rm, !cast("lsr_operand_" # ty):$Imm6)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _asr : A64I_logicalshift { "mvn\t$Rd, $Rm, $Imm6", [(set ty:$Rd, (not (sra ty:$Rm, !cast("asr_operand_" # ty):$Imm6)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; def _ror : A64I_logicalshift { "mvn\t$Rd, $Rm, $Imm6", [(set ty:$Rd, (not (rotr ty:$Rm, !cast("lsl_operand_" # ty):$Imm6)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } def _noshift : InstAlias<"mvn $Rn, $Rm", @@ -3972,7 +4115,8 @@ multiclass A64I_movwSizes opc, string asmop, dag ins32bit, def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteALU]> { bits<18> FullImm; let UImm16 = FullImm{15-0}; let Shift = FullImm{17-16}; @@ -3980,7 +4124,8 @@ multiclass A64I_movwSizes opc, string asmop, dag ins32bit, def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteALU]> { bits<18> FullImm; let UImm16 = FullImm{15-0}; let Shift = FullImm{17-16}; @@ -4088,10 +4233,12 @@ def adrp_label : Operand { let hasSideEffects = 0 in { def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), - "adr\t$Rd, $Label", [], NoItinerary>; + "adr\t$Rd, $Label", [], NoItinerary>, + Sched<[WriteALUs]>; def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), - "adrp\t$Rd, $Label", [], NoItinerary>; + "adrp\t$Rd, $Label", [], NoItinerary>, + Sched<[WriteALUs]>; } //===----------------------------------------------------------------------===// @@ -4377,14 +4524,16 @@ let isBranch = 1, isTerminator = 1 in { "tbz\t$Rt, $Imm, $Label", [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), A64eq, bb:$Label)], - NoItinerary>; + NoItinerary>, + Sched<[WriteBr]>; def TBNZxii : A64I_TBimm<0b1, (outs), (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), "tbnz\t$Rt, $Imm, $Label", [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), A64ne, bb:$Label)], - NoItinerary>; + NoItinerary>, + Sched<[WriteBr]>; // Note, these instructions overlap with the above 64-bit patterns. This is @@ -4396,7 +4545,8 @@ let isBranch = 1, isTerminator = 1 in { "tbz\t$Rt, $Imm, $Label", [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), A64eq, bb:$Label)], - NoItinerary> { + NoItinerary>, + Sched<[WriteBr]> { let Imm{5} = 0b0; } @@ -4405,7 +4555,8 @@ let isBranch = 1, isTerminator = 1 in { "tbnz\t$Rt, $Imm, $Label", [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), A64ne, bb:$Label)], - NoItinerary> { + NoItinerary>, + Sched<[WriteBr]> { let Imm{5} = 0b0; } } @@ -4440,7 +4591,8 @@ def blimm_target : Operand { class A64I_BimmImpl patterns, Operand lbl_type> : A64I_Bimm; + NoItinerary>, + Sched<[WriteBr]>; let isBranch = 1 in { def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> { @@ -4448,10 +4600,12 @@ let isBranch = 1 in { let isBarrier = 1; } - def BLimm : A64I_BimmImpl<0b1, "bl", - [(AArch64Call tglobaladdr:$Label)], blimm_target> { - let isCall = 1; - let Defs = [X30]; + let SchedRW = [WriteBrL] in { + def BLimm : A64I_BimmImpl<0b1, "bl", + [(AArch64Call tglobaladdr:$Label)], blimm_target> { + let isCall = 1; + let Defs = [X30]; + } } } @@ -4468,7 +4622,8 @@ class A64I_BregImpl opc, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin = NoItinerary> : A64I_Breg { + outs, ins, asmstr, patterns, itin>, + Sched<[WriteBr]> { let isBranch = 1; let isIndirectBranch = 1; } @@ -4484,11 +4639,13 @@ let isBranch = 1 in { let isTerminator = 1; } - def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), - "blr\t$Rn", [(AArch64Call i64:$Rn)]> { - let isBarrier = 0; - let isCall = 1; - let Defs = [X30]; + let SchedRW = [WriteBrL] in { + def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), + "blr\t$Rn", [(AArch64Call i64:$Rn)]> { + let isBarrier = 0; + let isCall = 1; + let Defs = [X30]; + } } def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td index e17cdaa1f6d..6fcb1116b6c 100644 --- a/lib/Target/AArch64/AArch64Schedule.td +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -7,4 +7,66 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Generic processor itineraries for legacy compatibility. + def GenericItineraries : ProcessorItineraries<[], [], []>; + + +//===----------------------------------------------------------------------===// +// Base SchedReadWrite types + +// Basic ALU +def WriteALU : SchedWrite; // Generic: may contain shift and/or ALU operation +def WriteALUs : SchedWrite; // Shift only with no ALU operation +def ReadALU : SchedRead; // Operand not needed for shifting +def ReadALUs : SchedRead; // Operand needed for shifting + +// Multiply with optional accumulate +def WriteMAC : SchedWrite; +def ReadMAC : SchedRead; + +// Compares +def WriteCMP : SchedWrite; +def ReadCMP : SchedRead; + +// Division +def WriteDiv : SchedWrite; +def ReadDiv : SchedRead; + +// Loads +def WriteLd : SchedWrite; +def WritePreLd : SchedWrite; +def ReadLd : SchedRead; +def ReadPreLd : SchedRead; + +// Branches +def WriteBr : SchedWrite; +def WriteBrL : SchedWrite; +def ReadBr : SchedRead; + +// Floating Point ALU +def WriteFPALU : SchedWrite; +def ReadFPALU : SchedRead; + +// Floating Point MAC, Mul, Div, Sqrt +// Most processors will simply send all of these down a dedicated pipe, but +// they're explicitly seperated here for flexibility of modeling later. May +// consider consolidating them into a single WriteFPXXXX type in the future. +def WriteFPMAC : SchedWrite; +def WriteFPMul : SchedWrite; +def WriteFPDiv : SchedWrite; +def WriteFPSqrt : SchedWrite; +def ReadFPMAC : SchedRead; +def ReadFPMul : SchedRead; +def ReadFPDiv : SchedRead; +def ReadFPSqrt : SchedRead; + +// Noop +def WriteNoop : SchedWrite; + + +//===----------------------------------------------------------------------===// +// Subtarget specific Machine Models. + +include "AArch64ScheduleA53.td" diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td new file mode 100644 index 00000000000..e288a24eb2c --- /dev/null +++ b/lib/Target/AArch64/AArch64ScheduleA53.td @@ -0,0 +1,130 @@ +//=- AArch64ScheduleA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A53 processors. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedModel.h for details. + +// Cortex-A53 machine model for scheduling and other instruction cost heuristics. +def CortexA53Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 2; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation + // Specification - Instruction Timings" + // v 1.0 Spreadsheet +} + + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the default BufferSize = -1. +// Cortex-A53 is in-order and therefore should be using BufferSize = 0. The +// current configuration performs better with the basic latencies provided so +// far. Will revisit BufferSize once the latency information is more accurate. + +let SchedModel = CortexA53Model in { + +def A53UnitALU : ProcResource<2>; // Int ALU +def A53UnitMAC : ProcResource<1>; // Int MAC +def A53UnitDiv : ProcResource<1>; // Int Division +def A53UnitLdSt : ProcResource<1>; // Load/Store +def A53UnitB : ProcResource<1>; // Branch +def A53UnitFPALU : ProcResource<1>; // FP ALU +def A53UnitFPMDS : ProcResource<1>; // FP Mult/Div/Sqrt + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +// Issue - Every instruction must consume an A53WriteIssue. Optionally, +// instructions that cannot be dual-issued will also include the +// A53WriteIssue2nd in their SchedRW list. That second WriteRes will +// ensure that a second issue slot is consumed. +def A53WriteIssue : SchedWriteRes<[]>; +def A53WriteIssue2nd : SchedWriteRes<[]> { let Latency = 0; } + +// ALU - These are reduced to 1 despite a true latency of 4 in order to easily +// model forwarding logic. Once forwarding is properly modelled, then +// they'll be corrected. +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +// MAC +def : WriteRes { let Latency = 4; } + +// Div +def : WriteRes { let Latency = 4; } + +// Load +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Branch +def : WriteRes; +def : WriteRes; + +// FP ALU +def : WriteRes {let Latency = 6; } + +// FP MAC, Mul, Div, Sqrt +// Using Double Precision numbers for now as a worst case. Additionally, not +// modeling the exact hazard but instead treating the whole pipe as a hazard. +// As an example VMUL, VMLA, and others are actually pipelined. VDIV and VSQRT +// have a total latency of 33 and 32 respectively but only a hazard of 29 and +// 28 (double-prescion example). +def : WriteRes { let Latency = 10; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 33; + let ResourceCycles = [29]; } +def : WriteRes { let Latency = 32; + let ResourceCycles = [28]; } + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +// No forwarding defined for ReadALU yet. +def : ReadAdvance; + +// No forwarding defined for ReadCMP yet. +def : ReadAdvance; + +// No forwarding defined for ReadBr yet. +def : ReadAdvance; + +// No forwarding defined for ReadMAC yet. +def : ReadAdvance; + +// No forwarding defined for ReadDiv yet. +def : ReadAdvance; + +// No forwarding defined for ReadLd, ReadPreLd yet. +def : ReadAdvance; +def : ReadAdvance; + +// No forwarding defined for ReadFPALU yet. +def : ReadAdvance; + +// No forwarding defined for ReadFPMAC/Mul/Div/Sqrt yet. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index adceba7842d..68c6c4b63cc 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -29,6 +29,11 @@ class GlobalValue; class AArch64Subtarget : public AArch64GenSubtargetInfo { virtual void anchor(); protected: + enum ARMProcFamilyEnum {Others, CortexA53, CortexA57}; + + /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. + ARMProcFamilyEnum ARMProcFamily; + bool HasFPARMv8; bool HasNEON; bool HasCrypto; diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll new file mode 100644 index 00000000000..0d5534eca54 --- /dev/null +++ b/test/CodeGen/AArch64/misched-basic-A53.ll @@ -0,0 +1,83 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; +; The Cortex-A53 machine model will cause the MADD instruction to be scheduled +; much higher than the ADD instructions in order to hide latency. When not +; specifying a subtarget, the MADD will remain near the end of the block. +; CHECK: main +; CHECK: *** Final schedule for BB#2 *** +; CHECK: SU(13) +; CHECK: MADDwwww +; CHECK: SU(4) +; CHECK: ADDwwi_lsl0_s +; CHECK: ********** MI Scheduling ********** +@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 +@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %x = alloca [8 x i32], align 4 + %y = alloca [8 x i32], align 4 + %i = alloca i32, align 4 + %xx = alloca i32, align 4 + %yy = alloca i32, align 4 + store i32 0, i32* %retval + %0 = bitcast [8 x i32]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + %1 = bitcast [8 x i32]* %y to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + store i32 0, i32* %xx, align 4 + store i32 0, i32* %yy, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %2 = load i32* %i, align 4 + %cmp = icmp slt i32 %2, 8 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %3 = load i32* %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom + %4 = load i32* %arrayidx, align 4 + %add = add nsw i32 %4, 1 + store i32 %add, i32* %xx, align 4 + %5 = load i32* %xx, align 4 + %add1 = add nsw i32 %5, 12 + store i32 %add1, i32* %xx, align 4 + %6 = load i32* %xx, align 4 + %add2 = add nsw i32 %6, 23 + store i32 %add2, i32* %xx, align 4 + %7 = load i32* %xx, align 4 + %add3 = add nsw i32 %7, 34 + store i32 %add3, i32* %xx, align 4 + %8 = load i32* %i, align 4 + %idxprom4 = sext i32 %8 to i64 + %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4 + %9 = load i32* %arrayidx5, align 4 + %10 = load i32* %yy, align 4 + %mul = mul nsw i32 %10, %9 + store i32 %mul, i32* %yy, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %11 = load i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %12 = load i32* %xx, align 4 + %13 = load i32* %yy, align 4 + %add6 = add nsw i32 %12, %13 + ret i32 %add6 +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind }