diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 3dc66a1f238..7d7a641a2e3 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -463,7 +463,7 @@ defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
                          (outs GPR32:$Rd)>;
 
 
-let Rd = 0b11111, isCompare = 1 in {
+let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in {
 defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
                         (outs), extends_to_i64>,
             addsub_xxtx<     0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
@@ -689,7 +689,7 @@ multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
                             [(set NZCV,
                                   (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
                             NoItinerary>,
-           Sched<[WriteALU, ReadALU]> {
+           Sched<[WriteCMP, ReadCMP]> {
     let Rd = 0b11111;
     let Defs = [NZCV];
     let isCompare = 1;
@@ -1086,7 +1086,7 @@ def BFMwwii :
   A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
         (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
         "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-  Sched<[WriteALU, ReadALU]> {
+  Sched<[WriteALU, ReadALU, ReadALU]> {
   let DecoderMethod = "DecodeBitfieldInstruction";
   let Constraints = "$src = $Rd";
 }
@@ -1095,7 +1095,7 @@ def BFMxxii :
   A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
         (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
         "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-  Sched<[WriteALU, ReadALU]> {
+  Sched<[WriteALU, ReadALU, ReadALU]> {
   let DecoderMethod = "DecodeBitfieldInstruction";
   let Constraints = "$src = $Rd";
 }
@@ -1295,7 +1295,7 @@ defm UBFX :  A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
 def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
                           (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
                           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-                Sched<[WriteALU, ReadALU]> {
+                Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1304,7 +1304,7 @@ def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
 def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
                           (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
                           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-                Sched<[WriteALU, ReadALU]> {
+                Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1407,7 +1407,7 @@ defm UBFIZ :  A64I_bitfield_insert<0b10, "ubfiz">;
 def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
                 (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
                 "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-              Sched<[WriteALU, ReadALU]> {
+              Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1416,7 +1416,7 @@ def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
 def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
                 (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
                 "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-              Sched<[WriteALU, ReadALU]> {
+              Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1560,7 +1560,8 @@ class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
                     (outs),
                     (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
                     !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
-                    [], NoItinerary> {
+                    [], NoItinerary>,
+    Sched<[WriteCMP, ReadCMP, ReadCMP]> {
   let Defs = [NZCV];
 }
 
@@ -1608,7 +1609,7 @@ multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
                             [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
                             NoItinerary>,
-               Sched<[WriteCMP, ReadCMP]>;
+               Sched<[WriteCMP, ReadCMP, ReadCMP]>;
 
 
     def xxxc : A64I_condsel<0b1, op, 0b0, op2,
@@ -1617,7 +1618,7 @@ multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
                             [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
                             NoItinerary>,
-               Sched<[WriteCMP, ReadCMP]>;
+               Sched<[WriteCMP, ReadCMP, ReadCMP]>;
   }
 }
 
@@ -1797,7 +1798,8 @@ multiclass dp_2src_crc<bit c, string asmop> {
   def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
                            !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
                            (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
-                           NoItinerary>;
+                           NoItinerary>,
+	          Sched<[WriteALU, ReadALU, ReadALU]>;
 }
 
 multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
@@ -2630,7 +2632,7 @@ let mayLoad = 1 in {
                              (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
                              "prfm\t$Rt, $Imm19",
                              [], NoItinerary>,
-                 Sched<[WriteLd]>;
+                 Sched<[WriteLd, ReadLd]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2685,19 +2687,23 @@ class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
 multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
   def _byte:  A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
                               (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                              [], NoItinerary>;
+                              [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _hword:  A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
                                (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                               [],NoItinerary>;
+                               [],NoItinerary>,
+               Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _word:  A64I_SRexs_impl<0b10, opcode, asmstr,
                               (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                              [], NoItinerary>;
+                              [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
                               (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
-                              [], NoItinerary>;
+                              [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 }
 
 defm STXR  : A64I_SRex<"stxr",  0b000, "STXR">;
@@ -2792,22 +2798,26 @@ multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
   def _byte:  A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
                             (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
                             [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
-                            NoItinerary>;
+                            NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _hword:  A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
                            [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
-                           NoItinerary>;
+                           NoItinerary>,
+               Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _word:  A64I_SLexs_impl<0b10, opcode, asmstr,
                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
                            [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
-                           NoItinerary>;
+                           NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
                            (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
                            [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
-                           NoItinerary>;
+                           NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 }
 
 defm STLR  : A64I_SLex<"stlr", 0b101, "STLR">;
@@ -2832,12 +2842,14 @@ multiclass A64I_SPex<string asmstr, bits<3> opcode> {
   def _word:  A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
                             (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
                                  GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
 
   def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
                             (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
                                             GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
 }
 
 defm STXP  : A64I_SPex<"stxp", 0b010>;
@@ -2865,13 +2877,13 @@ multiclass A64I_LPex<string asmstr, bits<3> opcode> {
                             (outs GPR32:$Rt, GPR32:$Rt2),
                             (ins GPR64xsp0:$Rn),
                             [], NoItinerary>,
-              Sched<[WriteLd]>;
+              Sched<[WriteLd, WriteLd, ReadLd]>;
 
   def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
                             (outs GPR64:$Rt, GPR64:$Rt2),
                             (ins GPR64xsp0:$Rn),
                             [], NoItinerary>,
-              Sched<[WriteLd]>;
+              Sched<[WriteLd, WriteLd, ReadLd]>;
 }
 
 defm LDXP  : A64I_LPex<"ldxp", 0b010>;
@@ -3085,7 +3097,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
   def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
                      (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
                      "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
-                     [], NoItinerary> {
+                     [], NoItinerary>,
+             Sched<[WriteSt, ReadSt, ReadSt]> {
     let mayStore = 1;
   }
   def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
@@ -3126,13 +3139,15 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                   (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
                                                params.regextWm:$Ext),
                                   "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
-                                  [], NoItinerary>;
+                                  [], NoItinerary>,
+                            Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
 
     def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
                                   (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
                                                params.regextXm:$Ext),
                                   "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
-                                  [], NoItinerary>;
+                                  [], NoItinerary>,
+                            Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
   }
   def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
       (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
@@ -3142,7 +3157,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
   def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
                              (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                              "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
-                             [], NoItinerary> {
+                             [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]> {
     let mayStore = 1;
   }
   def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
@@ -3163,7 +3179,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                (outs GPR64xsp:$Rn_wb),
                                (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                                "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
-                               [], NoItinerary> {
+                               [], NoItinerary>,
+                     Sched<[WriteSt, ReadSt, ReadSt]> {
     let Constraints = "$Rn = $Rn_wb";
     let mayStore = 1;
 
@@ -3176,7 +3193,7 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                                     "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
                                     [], NoItinerary>,
-                     Sched<[WriteLd, ReadLd]> {
+                     Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3187,7 +3204,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                (outs GPR64xsp:$Rn_wb),
                                (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                                "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
-                               [], NoItinerary> {
+                               [], NoItinerary>,
+                    Sched<[WriteSt, ReadSt, ReadSt]> {
     let Constraints = "$Rn = $Rn_wb";
     let mayStore = 1;
 
@@ -3200,7 +3218,7 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                                     "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
                                     [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3340,7 +3358,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
                                  [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3350,7 +3368,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
                                    "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
                                    [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3361,7 +3379,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
                                  [], NoItinerary>,
-                   Sched<[WriteLd, ReadLd]> {
+                   Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3371,7 +3389,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
                                  [], NoItinerary>,
-                   Sched<[WriteLd, ReadLd]> {
+                   Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3431,7 +3449,7 @@ def LDRSWx_PostInd
                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                     "ldrsw\t$Rt, [$Rn], $SImm9",
                     [], NoItinerary>,
-      Sched<[WriteLd, ReadLd]> {
+      Sched<[WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3442,7 +3460,7 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrsw\t$Rt, [$Rn, $SImm9]!",
                                  [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3652,7 +3670,7 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                             (outs SomeReg:$Rt, SomeReg:$Rt2),
                             (ins GPR64xsp:$Rn, simm7:$SImm7),
                             "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
-             Sched<[WriteLd, ReadLd]> {
+             Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3666,7 +3684,8 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                                     GPR64xsp:$Rn,
                                     simm7:$SImm7),
                                "stp\t$Rt, $Rt2, [$Rn], $SImm7",
-                               [], NoItinerary> {
+                               [], NoItinerary>,
+                     Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
     let mayStore = 1;
     let Constraints = "$Rn = $Rn_wb";
 
@@ -3679,16 +3698,17 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                         (ins GPR64xsp:$Rn, simm7:$SImm7),
                         "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
                         [], NoItinerary>,
-                     Sched<[WriteLd, ReadLd]> {
+                     Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
 
   def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
-                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
-                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
-                    [], NoItinerary> {
+                       (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                       "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                       [], NoItinerary>,
+                    Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
     let mayStore = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -3699,15 +3719,16 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                               (ins GPR64xsp:$Rn, simm7:$SImm7),
                               "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
                               [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
 
   def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
-                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
-                    "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+                       (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                       "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
+                     Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
     let mayStore = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3719,7 +3740,7 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                             (outs SomeReg:$Rt, SomeReg:$Rt2),
                             (ins GPR64xsp:$Rn, simm7:$SImm7),
                             "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
-                     Sched<[WriteLd, ReadLd]> {
+                     Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3745,7 +3766,7 @@ def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
                            (outs GPR64:$Rt, GPR64:$Rt2),
                            (ins GPR64xsp:$Rn, word_simm7:$SImm7),
                            "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
-             Sched<[WriteLd, ReadLd]> {
+             Sched<[WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let DecoderMethod = "DecodeLDSTPairInstruction";
 }
@@ -3756,7 +3777,8 @@ def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
                                   (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
                                   (ins GPR64xsp:$Rn, word_simm7:$SImm7),
                                   "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
-                                  [], NoItinerary> {
+                                  [], NoItinerary>,
+                     Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -3767,7 +3789,7 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
                                    (ins GPR64xsp:$Rn, word_simm7:$SImm7),
                                    "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
                                    [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -4150,7 +4172,8 @@ let isMoveImm = 1, isReMaterializable = 1,
                              (ins movz64_imm:$FullImm)>;
 }
 
-let Constraints = "$src = $Rd" in
+let Constraints = "$src = $Rd",
+    SchedRW = [WriteALU, ReadALU] in
 defm MOVK : A64I_movwSizes<0b11, "movk",
                            (ins GPR32:$src, movk32_imm:$FullImm),
                            (ins GPR64:$src, movk64_imm:$FullImm)>;
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 3b919b388b2..0b97e3bdf5a 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -122,14 +122,16 @@ multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
                [(set (v8i8 VPR64:$Rd),
                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _16B : NeonI_3VSame<0b1, u, size, opcode,
                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
                [(set (v16i8 VPR128:$Rd),
                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 
 }
@@ -143,28 +145,32 @@ multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
               [(set (v4i16 VPR64:$Rd),
                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
               [(set (v8i16 VPR128:$Rd),
                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
               [(set (v2i32 VPR64:$Rd),
                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
               [(set (v4i32 VPR128:$Rd),
                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
@@ -177,14 +183,16 @@ multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
                [(set (v8i8 VPR64:$Rd),
                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
                [(set (v16i8 VPR128:$Rd),
                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -198,7 +206,8 @@ multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
               [(set (v2i64 VPR128:$Rd),
                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -214,21 +223,24 @@ multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
               [(set (ResTy2S VPR64:$Rd),
                  (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
               [(set (ResTy4S VPR128:$Rd),
                  (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
               [(set (ResTy2D VPR128:$Rd),
                  (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
-               NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -286,9 +298,11 @@ def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)),
 
 // Vector Multiply (Integer and Floating-Point)
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul,
                                      v2f32, v4f32, v2f64, 1>;
+}
 
 // Patterns to match mul of v1i8/v1i16/v1i32 types
 def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)),
@@ -309,8 +323,10 @@ def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)),
 
 // Vector Multiply (Polynomial)
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
+}
 
 // Vector Multiply-accumulate and Multiply-subtract (Integer)
 
@@ -324,7 +340,8 @@ class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
     [(set (OpTy VPRC:$Rd),
        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
-    NoItinerary> {
+    NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -335,6 +352,7 @@ def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
 
 
+let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in {
 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
@@ -360,6 +378,7 @@ def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
+}
 
 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
 
@@ -369,7 +388,8 @@ def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
                         (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
 
-let Predicates = [HasNEON, UseFusedMAC] in {
+let Predicates = [HasNEON, UseFusedMAC],
+    SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in {
 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
@@ -403,8 +423,10 @@ def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
 
 // Vector Divide (Floating-Point)
 
+let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in {
 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv,
                                      v2f32, v4f32, v2f64, 0>;
+}
 
 // Vector Bitwise Operations
 
@@ -770,49 +792,56 @@ multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
              [(set (v8i8 VPR64:$Rd),
                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
-             NoItinerary>;
+             NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
              [(set (v16i8 VPR128:$Rd),
                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-             NoItinerary>;
+             NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
             [(set (v4i16 VPR64:$Rd),
                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
             [(set (v8i16 VPR128:$Rd),
                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
             [(set (v2i32 VPR64:$Rd),
                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
             [(set (v4i32 VPR128:$Rd),
                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
             [(set (v2i64 VPR128:$Rd),
                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 // Vector Compare Mask Equal to Zero (Integer)
@@ -879,21 +908,24 @@ multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
             [(set (v2i32 VPR64:$Rd),
                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
             [(set (v4i32 VPR128:$Rd),
                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
             [(set (v2i64 VPR128:$Rd),
                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 // Vector Compare Mask Equal to Zero (Floating Point)
@@ -1051,6 +1083,7 @@ defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
                                        int_arm_neon_vpadd,
                                        v2f32, v4f32, v2f64, 1>;
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 // Vector Saturating Doubling Multiply High
 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
                     int_arm_neon_vqdmulh, 1>;
@@ -1063,6 +1096,7 @@ defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
                                       int_aarch64_neon_vmulx,
                                       v2f32, v4f32, v2f64, 1>;
+}
 
 // Patterns to match llvm.aarch64.* intrinsic for 
 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
@@ -1202,7 +1236,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v2i32 VPR64:$Rd),
                                  (v2i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSL_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
        bits<2> Simm;
        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
      }
@@ -1215,7 +1250,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v4i32 VPR128:$Rd),
                                  (v4i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSL_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
       bits<2> Simm;
       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
     }
@@ -1229,7 +1265,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v4i16 VPR64:$Rd),
                                  (v4i16 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSLH_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
       bit  Simm;
       let cmode = {0b1, 0b0, Simm, 0b0};
     }
@@ -1242,7 +1279,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v8i16 VPR128:$Rd),
                                  (v8i16 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSLH_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
       bit Simm;
       let cmode = {0b1, 0b0, Simm, 0b0};
      }
@@ -1263,7 +1301,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v2i32 (opnode (v2i32 VPR64:$src),
                       (v2i32 (neonopnode timm:$Imm,
                         neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bits<2> Simm;
       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
     }
@@ -1277,7 +1316,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v4i32 (opnode (v4i32 VPR128:$src),
                       (v4i32 (neonopnode timm:$Imm,
                         neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bits<2> Simm;
       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
     }
@@ -1292,7 +1332,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v4i16 (opnode (v4i16 VPR64:$src),
                        (v4i16 (neonopnode timm:$Imm,
                           neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bit  Simm;
       let cmode = {0b1, 0b0, Simm, 0b1};
     }
@@ -1306,7 +1347,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v8i16 (opnode (v8i16 VPR128:$src),
                       (v8i16 (neonopnode timm:$Imm,
                         neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bit Simm;
       let cmode = {0b1, 0b0, Simm, 0b1};
     }
@@ -1325,7 +1367,8 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
                               [(set (v2i32 VPR64:$Rd),
                                  (v2i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_MSL_operand:$Simm))))],
-                             NoItinerary> {
+                             NoItinerary>,
+               Sched<[WriteFPALU]> {
        bit Simm;
        let cmode = {0b1, 0b1, 0b0, Simm};
      }
@@ -1338,7 +1381,8 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
                               [(set (v4i32 VPR128:$Rd),
                                  (v4i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_MSL_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+              Sched<[WriteFPALU]> {
      bit Simm;
      let cmode = {0b1, 0b1, 0b0, Simm};
    }
@@ -1565,7 +1609,8 @@ def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
                                "movi\t$Rd.8b, $Imm",
                                [(set (v8i8 VPR64:$Rd),
                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
-                                NoItinerary> {
+                                NoItinerary>,
+                Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 
@@ -1574,7 +1619,8 @@ def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
                                 "movi\t$Rd.16b, $Imm",
                                 [(set (v16i8 VPR128:$Rd),
                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
-                                 NoItinerary> {
+                                 NoItinerary>,
+                Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 }
@@ -1586,7 +1632,8 @@ def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
                                "movi\t $Rd.2d, $Imm",
                                [(set (v2i64 VPR128:$Rd),
                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
-                               NoItinerary> {
+                               NoItinerary>,
+                Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 }
@@ -1599,7 +1646,8 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1,
                            "movi\t $Rd, $Imm",
                            [(set (v1i64 FPR64:$Rd),
                              (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
-                           NoItinerary> {
+                           NoItinerary>,
+             Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 }
@@ -1613,7 +1661,8 @@ class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
                    "fmov\t$Rd" # asmlane # ", $Imm",
                    [(set (OpTy VPRC:$Rd),
                       (OpTy (Neon_fmovi (timm:$Imm))))],
-                   NoItinerary> {
+                   NoItinerary>,
+    Sched<[WriteFPALU]> {
      let cmode = 0b1111;
    }
 
@@ -1692,7 +1741,8 @@ class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
                      [(set (Ty VPRC:$Rd),
                         (Ty (OpNode (Ty VPRC:$Rn),
                           (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
   // 64-bit vector types.
@@ -1873,7 +1923,8 @@ class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                         (DestTy (shl
                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
                             (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                        string SrcT, ValueType DestTy, ValueType SrcTy,
@@ -1887,7 +1938,8 @@ class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                           (DestTy (ExtOp
                             (SrcTy (getTop VPR128:$Rn)))),
                               (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
                          SDNode ExtOp> {
@@ -1988,7 +2040,8 @@ class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
                         (i32 ImmTy:$Imm))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 // shift right (vector by immediate)
 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
@@ -2091,7 +2144,8 @@ class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
               (Ty (OpNode (Ty VPRC:$Rn),
                 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
-           NoItinerary> {
+           NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2146,7 +2200,8 @@ class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
                         (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
-                     NoItinerary> {
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2201,7 +2256,8 @@ class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
              (i32 ImmTy:$Imm))))],
-           NoItinerary> {
+           NoItinerary>,
+      Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2295,14 +2351,16 @@ class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
   : NeonI_2VShiftImm<q, u, opcode,
                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
-                     [], NoItinerary>;
+                     [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                        string SrcT, Operand ImmTy>
   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
-                     [], NoItinerary> {
+                     [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2461,7 +2519,8 @@ class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
                        (i32 ImmTy:$Imm))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
                               SDPatternOperator IntOp> {
@@ -2539,28 +2598,32 @@ multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.8b",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v8i8 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
                 (outs FPR16:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.16b",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v16i8 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1s4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
                 (outs FPR32:$Rd), (ins VPR64:$Rn),
                 asmop # "\t$Rd, $Rn.4h",
                 [(set (v1i32 FPR32:$Rd),
                     (v1i32 (opnode (v4i16 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1s8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
                 (outs FPR32:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.8h",
                 [(set (v1i32 FPR32:$Rd),
                     (v1i32 (opnode (v8i16 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     // _1d2s doesn't exist!
 
@@ -2569,7 +2632,8 @@ multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.4s",
                 [(set (v1i64 FPR64:$Rd),
                     (v1i64 (opnode (v4i32 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
@@ -2585,28 +2649,32 @@ multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.8b",
                 [(set (v1i8 FPR8:$Rd),
                     (v1i8 (opnode (v8i8 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
                 (outs FPR8:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.16b",
                 [(set (v1i8 FPR8:$Rd),
                     (v1i8 (opnode (v16i8 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1h4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
                 (outs FPR16:$Rd), (ins VPR64:$Rn),
                 asmop # "\t$Rd, $Rn.4h",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v4i16 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1h8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
                 (outs FPR16:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.8h",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v8i16 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     // _1s2s doesn't exist!
 
@@ -2615,7 +2683,8 @@ multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.4s",
                 [(set (v1i32 FPR32:$Rd),
                     (v1i32 (opnode (v4i32 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
@@ -2635,7 +2704,8 @@ multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
                 asmop # "\t$Rd, $Rn.4s",
                 [(set (f32 FPR32:$Rd),
                     (f32 (opnode (v4f32 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
@@ -2658,7 +2728,8 @@ class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
                asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
                [(set (Ty OpVPR:$Rd),
                   (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
                           SDPatternOperator opnode> {
@@ -2717,7 +2788,8 @@ class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
                         string asmop, SDPatternOperator opnode,
@@ -2792,7 +2864,8 @@ class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (opnode (ResTy VPR128:$Rn),
                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
                         SDPatternOperator opnode> {
@@ -2873,7 +2946,8 @@ class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
                     (ResTy (get_hi
                       (OpTy (opnode (OpTy VPR128:$Rn),
                                     (OpTy VPR128:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
                                 SDPatternOperator opnode, bit Commutable = 0> {
@@ -2901,7 +2975,8 @@ class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
                  [(set (ResTy ResVPR:$Rd),
                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 // normal narrow pattern
 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
@@ -2925,7 +3000,8 @@ class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
   : NeonI_3VDiff<q, u, size, opcode,
                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
-                 [], NoItinerary> {
+                 [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let neverHasSideEffects = 1;
 }
@@ -2990,7 +3066,8 @@ class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
                                                 (OpTy OpVPR:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
                            SDPatternOperator opnode, bit Commutable = 0> {
@@ -3058,7 +3135,8 @@ class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
                       (ResTy VPR128:$src),
                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
                                                  (OpTy OpVPR:$Rm))))))))],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -3098,7 +3176,8 @@ defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
 // Long pattern with 2 operands
 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
                           SDPatternOperator opnode, bit Commutable = 0> {
-  let isCommutable = Commutable in {
+  let isCommutable = Commutable,
+      SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
                               opnode, VPR128, VPR64, v8i16, v8i8>;
     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
@@ -3120,7 +3199,8 @@ class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>;
 
 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
                                    string opnode, bit Commutable = 0> {
@@ -3154,7 +3234,8 @@ class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
                     (ResTy (opnode
                       (ResTy VPR128:$src),
                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -3202,7 +3283,8 @@ class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
                   (ResTy (subop
                     (ResTy VPR128:$src),
                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -3254,8 +3336,10 @@ multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
                                 int_arm_neon_vqdmull, 1>;
+}
 
 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
                                    string opnode, bit Commutable = 0> {
@@ -3299,6 +3383,7 @@ multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in
 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
                               int_aarch64_neon_vmull_p64, 1>;
 
@@ -3319,7 +3404,8 @@ multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
                           (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
                         (v1i64 (scalar_to_vector
                           (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
-                   NoItinerary>;
+                   NoItinerary>,
+      Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>;
   }
 
   def : Pat<(v16i8 (int_aarch64_neon_vmull_p64
@@ -3355,7 +3441,8 @@ class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
                  (outs VecList:$Rt), (ins GPR64xsp:$Rn),
                  asmop # "\t$Rt, [$Rn]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteVecLd, ReadVecLd]> {
   let mayLoad = 1;
   let neverHasSideEffects = 1;
 }
@@ -3409,7 +3496,8 @@ class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
                  (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
                  asmop # "\t$Rt, [$Rn]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
   let mayStore = 1;
   let neverHasSideEffects = 1;
 }
@@ -3642,7 +3730,8 @@ multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
                      (ins GPR64xsp:$Rn, ImmTy:$amt),
                      asmop # "\t$Rt, [$Rn], $amt",
                      [],
-                     NoItinerary> {
+                     NoItinerary>,
+                 Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> {
       let Rm = 0b11111;
     }
 
@@ -3651,7 +3740,8 @@ multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
                         (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
                         asmop # "\t$Rt, [$Rn], $Rm",
                         [],
-                        NoItinerary>;
+                        NoItinerary>,
+                    Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>;
   }
 }
 
@@ -3725,7 +3815,8 @@ multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
                      (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
                      asmop # "\t$Rt, [$Rn], $amt",
                      [],
-                     NoItinerary> {
+                     NoItinerary>,
+                 Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
       let Rm = 0b11111;
     }
 
@@ -3734,7 +3825,8 @@ multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
                       asmop # "\t$Rt, [$Rn], $Rm",
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+                    Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>;
   }
 }
 
@@ -3838,7 +3930,8 @@ class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
                       (outs VecList:$Rt), (ins GPR64xsp:$Rn),
                       asmop # "\t$Rt, [$Rn]",
                       [],
-                      NoItinerary> {
+                      NoItinerary>,
+      Sched<[WriteVecLd, ReadVecLd]> {
   let mayLoad = 1;
   let neverHasSideEffects = 1;
 }
@@ -3932,7 +4025,8 @@ class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
                          (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
                          asmop # "\t$Rt[$lane], [$Rn]",
                          [],
-                         NoItinerary> {
+                         NoItinerary>,
+      Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> {
   let mayLoad = 1;
   let neverHasSideEffects = 1;
   let hasExtraDefRegAllocReq = 1;
@@ -4017,7 +4111,8 @@ class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
                          (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
                          asmop # "\t$Rt[$lane], [$Rn]",
                          [],
-                         NoItinerary> {
+                         NoItinerary>,
+      Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
   let mayStore = 1;
   let neverHasSideEffects = 1;
   let hasExtraDefRegAllocReq = 1;
@@ -4109,16 +4204,18 @@ multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
                       (ins GPR64xsp:$Rn, ImmTy:$amt),
                       asmop # "\t$Rt, [$Rn], $amt",
                       [],
-                      NoItinerary> {
-                        let Rm = 0b11111;
-                      }
+                      NoItinerary>,
+                 Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> {
+      let Rm = 0b11111;
+    }
 
     def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
                       (outs VecList:$Rt, GPR64xsp:$wb),
                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
                       asmop # "\t$Rt, [$Rn], $Rm",
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+                    Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>;
   }
 }
 
@@ -4182,7 +4279,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
                                     VList:$src, ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
                                 [],
-                                NoItinerary> {
+                                NoItinerary>,
+        Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> {
     let Rm = 0b11111;
   }
 
@@ -4194,7 +4292,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
                                     VList:$src, ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+        Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>;
 }
 
 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
@@ -4282,7 +4381,8 @@ let mayStore = 1, neverHasSideEffects = 1,
                                     VList:$Rt, ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
                                 [],
-                                NoItinerary> {
+                                NoItinerary>,
+        Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
     let Rm = 0b11111;
   }
 
@@ -4294,7 +4394,8 @@ let mayStore = 1, neverHasSideEffects = 1,
                                     ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+        Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>;
 }
 
 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
@@ -4382,7 +4483,8 @@ class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
                       (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
   : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
@@ -4465,7 +4567,8 @@ class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
                       (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
   def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
@@ -4478,12 +4581,14 @@ multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
                        (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                        [],
-                       NoItinerary>;
+                       NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>;
     def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
                        (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                        [],
-                       NoItinerary>;
+                       NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -4513,7 +4618,8 @@ class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asm
                           (outs FPRCD:$Rd), (ins FPRCS:$Rn),
                           !strconcat(asmop, "\t$Rd, $Rn"),
                           [],
-                          NoItinerary>;
+                          NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
                                          string asmop> {
@@ -4550,7 +4656,8 @@ class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
                           (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
                           !strconcat(asmop, "\t$Rd, $Rn"),
                           [],
-                          NoItinerary>;
+                          NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
                                                  string asmop> {
@@ -4610,7 +4717,8 @@ class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
                           (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
                           !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
                           [],
-                          NoItinerary>;
+                          NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
                                               string asmop> {
@@ -4618,12 +4726,14 @@ multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
                            (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm),
                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
                            [],
-                           NoItinerary>;
+                           NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
   def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
                            (outs FPR64:$Rd), (ins FPR64:$Rn, fpzz32:$FPImm),
                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
                            [],
-                           NoItinerary>;
+                           NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
@@ -4707,7 +4817,8 @@ class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
   : NeonI_ScalarShiftImm<u, opcode,
                          (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
                                             string asmop> {
@@ -4772,7 +4883,8 @@ class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop
                          (outs FPR64:$Rd),
                          (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary> {
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     bits<6> Imm;
     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
     let Inst{21-16} = Imm;
@@ -4784,7 +4896,8 @@ class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
                          (outs FPR64:$Rd),
                          (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary> {
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     bits<6> Imm;
     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
     let Inst{21-16} = Imm;
@@ -4797,7 +4910,8 @@ class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
   : NeonI_ScalarShiftImm<u, opcode,
                          (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
                                                 string asmop> {
@@ -5111,10 +5225,13 @@ defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
                                            UQSUBhhh, UQSUBsss, UQSUBddd>;
 
 // Scalar Integer Saturating Doubling Multiply Half High
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in
 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
 
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
+}
 
 // Patterns to match llvm.arm.* intrinsic for
 // Scalar Integer Saturating Doubling Multiply Half High and
@@ -5124,8 +5241,10 @@ defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
                                                                 SQRDMULHsss>;
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in {
 // Scalar Floating-point Multiply Extended
 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
+}
 
 // Scalar Floating-point Reciprocal Step
 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
@@ -5218,18 +5337,24 @@ defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
 
+let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in {
 // Signed Saturating Doubling Multiply-Add Long
 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
+}
 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
                                             SQDMLALshh, SQDMLALdss>;
 
 // Signed Saturating Doubling Multiply-Subtract Long
+let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in {
 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
+}
 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
                                             SQDMLSLshh, SQDMLSLdss>;
 
 // Signed Saturating Doubling Multiply Long
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in {
 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
+}
 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
                                          SQDMULLshh, SQDMULLdss>;
 
@@ -5557,7 +5682,8 @@ multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
                                 !strconcat(asmop, "\t$Rd, $Rn.2d"),
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
   }
 }
 
@@ -5569,7 +5695,8 @@ multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
                                 !strconcat(asmop, "\t$Rd, $Rn.2s"),
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
   }
 }
 
@@ -5642,7 +5769,8 @@ class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
                              (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
                              [],
-                             NoItinerary> {
+                             NoItinerary>,
+    Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> {
   bits<3> Imm;
   bits<5> MRm;
 }
@@ -5659,7 +5787,8 @@ class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode
                              (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
                              [],
-                             NoItinerary> {
+                             NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   let Constraints = "$src = $Rd";
   bits<3> Imm;
   bits<5> MRm;
@@ -6170,7 +6299,8 @@ class NeonI_Scalar_DUP<string asmop, string asmlane,
   : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
                      asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
                      [],
-                     NoItinerary> {
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -6581,7 +6711,8 @@ class NeonI_Extract<bit q, bits<2> op2, string asmop,
                      asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
                      ", $Rm." # OpS # ", $Index",
                      [],
-                     NoItinerary>{
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{
   bits<4> Index;
 }
 
@@ -6622,7 +6753,8 @@ class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
               (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
               [],
-              NoItinerary>;
+              NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 // The vectors in look up table are always 16b
 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
@@ -6646,7 +6778,8 @@ class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
               (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
               [],
-              NoItinerary> {
+              NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -6674,7 +6807,8 @@ class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
                    (ResTy VPR128:$src),
                    (OpTy OpGPR:$Rn),
                    (OpImm:$Imm))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   bits<4> Imm;
   let Constraints = "$src = $Rd";
 }
@@ -6732,7 +6866,8 @@ class NeonI_INS_element<string asmop, string Res, Operand ResImm>
                  ResImm:$Immd, ResImm:$Immn),
                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   bits<4> Immd;
   bits<4> Immn;
@@ -6876,7 +7011,8 @@ class NeonI_SMOV<string asmop, string Res, bit Q,
                    (ResTy (vector_extract
                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
                    eleTy)))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -6970,7 +7106,8 @@ class NeonI_UMOV<string asmop, string Res, bit Q,
                [(set (ResTy ResGPR:$Rd),
                   (ResTy (vector_extract
                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -7128,7 +7265,8 @@ class NeonI_DUP_Elt<bit Q, string asmop, string rdlane,  string rnlane,
                (ins VPR128:$Rn, OpImm:$Imm),
                asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
                [],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -7234,7 +7372,8 @@ class NeonI_DUP<bit Q, string asmop, string rdlane,
                asmop # "\t$Rd" # rdlane # ", $Rn",
                [(set (ResTy ResVPR:$Rd),
                  (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
-               NoItinerary>;
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
   let Inst{20-16} = 0b00001;
@@ -7335,7 +7474,8 @@ class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
                  ", $Re." # EleOpS # "[$Index]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   bits<3> Index;
   bits<5> Re;
 
@@ -7434,7 +7574,8 @@ class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
                  ", $Re." # EleOpS # "[$Index]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   bits<3> Index;
   bits<5> Re;
 }
@@ -7473,9 +7614,11 @@ multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
+}
 
 // Pattern for lane in 128-bit vector
 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
@@ -7548,8 +7691,10 @@ multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
+}
 
 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
                          RegisterOperand OpVPR, RegisterOperand EleOpVPR,
@@ -7857,9 +8002,11 @@ multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
+}
 
 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
           (FMOVdd $src)>;
@@ -8074,7 +8221,8 @@ class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
                asmop # "\t$Rd." # Res # ", $Rn." # Res,
                [(set (ResTy ResVPR:$Rd),
                   (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
-               NoItinerary> ;
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
                           v16i8, Neon_rev64>;
@@ -8113,42 +8261,48 @@ multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
                            asmop # "\t$Rd.8h, $Rn.16b",
                            [(set (v8i16 VPR128:$Rd),
                               (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
-                           NoItinerary>;
+                           NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.4h, $Rn.8b",
                           [(set (v4i16 VPR64:$Rd),
                              (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
-                          NoItinerary>;
+                          NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                            (outs VPR128:$Rd), (ins VPR128:$Rn),
                            asmop # "\t$Rd.4s, $Rn.8h",
                            [(set (v4i32 VPR128:$Rd),
                               (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
-                           NoItinerary>;
+                           NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.2s, $Rn.4h",
                           [(set (v2i32 VPR64:$Rd),
                              (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
-                          NoItinerary>;
+                          NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
                            (outs VPR128:$Rd), (ins VPR128:$Rn),
                            asmop # "\t$Rd.2d, $Rn.4s",
                            [(set (v2i64 VPR128:$Rd),
                               (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
-                           NoItinerary>;
+                           NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.1d, $Rn.2s",
                           [(set (v1i64 VPR64:$Rd),
                              (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
-                          NoItinerary>;
+                          NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
@@ -8170,7 +8324,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                              [(set (v8i16 VPR128:$Rd),
                                 (v8i16 (Neon_Padd
                                   (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
-                             NoItinerary>;
+                             NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
@@ -8178,7 +8333,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v4i16 VPR64:$Rd),
                                (v4i16 (Neon_Padd
                                  (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
@@ -8186,7 +8342,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v4i32 VPR128:$Rd),
                                (v4i32 (Neon_Padd
                                  (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
@@ -8194,7 +8351,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v2i32 VPR64:$Rd),
                                (v2i32 (Neon_Padd
                                  (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
@@ -8202,7 +8360,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v2i64 VPR128:$Rd),
                                (v2i64 (Neon_Padd
                                  (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
@@ -8210,7 +8369,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v1i64 VPR64:$Rd),
                                (v1i64 (Neon_Padd
                                  (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8223,37 +8383,44 @@ multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
   def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
                          (outs VPR128:$Rd), (ins VPR128:$Rn),
                          asmop # "\t$Rd.16b, $Rn.16b",
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.8h, $Rn.8h",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.4s, $Rn.4s",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.2d, $Rn.2d",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
                          (outs VPR64:$Rd), (ins VPR64:$Rn),
                          asmop # "\t$Rd.8b, $Rn.8b",
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.4h, $Rn.4h",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
@@ -8323,37 +8490,44 @@ multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
     def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                            asmop # "\t$Rd.16b, $Rn.16b",
-                           [], NoItinerary>;
+                           [], NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "\t$Rd.8h, $Rn.8h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "\t$Rd.4s, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "\t$Rd.2d, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
                           asmop # "\t$Rd.8b, $Rn.8b",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
                           asmop # "\t$Rd.4h, $Rn.4h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8401,42 +8575,48 @@ multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
                          asmop # "\t$Rd.16b, $Rn.16b",
                          [(set (v16i8 VPR128:$Rd),
                             (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
-                         NoItinerary>;
+                         NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.8h, $Rn.8h",
                         [(set (v8i16 VPR128:$Rd),
                            (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (v4i32 VPR128:$Rd),
                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.8b, $Rn.8b",
                         [(set (v8i8 VPR64:$Rd),
                            (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.4h, $Rn.4h",
                         [(set (v4i16 VPR64:$Rd),
                            (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (v2i32 VPR64:$Rd),
                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
@@ -8447,12 +8627,14 @@ multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
   def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
                          (outs VPR128:$Rd), (ins VPR128:$Rn),
                          asmop # "\t$Rd.16b, $Rn.16b",
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.8b, $Rn.8b",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
@@ -8510,21 +8692,24 @@ multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (v4f32 VPR128:$Rd),
                            (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.2d, $Rn.2d",
                         [(set (v2f64 VPR128:$Rd),
                            (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (v2f32 VPR64:$Rd),
                            (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
@@ -8534,33 +8719,39 @@ multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
   def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.8b, $Rn.8h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.4h, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   let Constraints = "$Rd = $src" in {
     def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                              asmop # "2\t$Rd.16b, $Rn.8h",
-                             [], NoItinerary>;
+                             [], NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.8h, $Rn.4s",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.4s, $Rn.2d",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8613,37 +8804,43 @@ multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
                             (outs VPR128:$Rd),
                             (ins VPR64:$Rn, uimm_exact8:$Imm),
                             asmop # "\t$Rd.8h, $Rn.8b, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR64:$Rn, uimm_exact16:$Imm),
                             asmop # "\t$Rd.4s, $Rn.4h, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR64:$Rn, uimm_exact32:$Imm),
                             asmop # "\t$Rd.2d, $Rn.2s, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR128:$Rn, uimm_exact8:$Imm),
                             asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR128:$Rn, uimm_exact16:$Imm),
                             asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR128:$Rn, uimm_exact32:$Imm),
                             asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
   }
 }
 
@@ -8691,23 +8888,27 @@ multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
   def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.4h, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   let Constraints = "$src = $Rd" in {
     def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.8h, $Rn.4s",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.4s, $Rn.2d",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8745,12 +8946,14 @@ multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "2\t$Rd.4s, $Rn.2d",
-                          [], NoItinerary> {
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     let Constraints = "$src = $Rd";
   }
 
@@ -8774,22 +8977,26 @@ multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
   def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR128:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.4s, $Rn.4h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.2d, $Rn.2s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$Rn),
                           asmop # "2\t$Rd.4s, $Rn.8h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$Rn),
                           asmop # "2\t$Rd.2d, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
@@ -8825,21 +9032,24 @@ multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (ResTy4s VPR128:$Rd),
                            (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.2d, $Rn.2d",
                         [(set (ResTy2d VPR128:$Rd),
                            (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (ResTy2s VPR64:$Rd),
                            (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
@@ -8894,7 +9104,9 @@ defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
                                     int_arm_neon_vrecpe>;
 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
                                      int_arm_neon_vrsqrte>;
+let SchedRW = [WriteFPSqrt, ReadFPSqrt] in {
 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
+}
 
 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
                                bits<5> opcode, SDPatternOperator Neon_Op> {
@@ -8903,14 +9115,16 @@ multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (v4i32 VPR128:$Rd),
                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (v2i32 VPR64:$Rd),
                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
@@ -8927,7 +9141,8 @@ class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
                      [(set (v16i8 VPR128:$Rd),
                         (v16i8 (opnode (v16i8 VPR128:$src),
                                        (v16i8 VPR128:$Rn))))],
-                     NoItinerary>{
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -8942,7 +9157,8 @@ class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
                      asmop # "\t$Rd.16b, $Rn.16b",
                      [(set (v16i8 VPR128:$Rd),
                         (v16i8 (opnode (v16i8 VPR128:$Rn))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
@@ -8955,7 +9171,8 @@ class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
                      [(set (v4i32 VPR128:$Rd),
                         (v4i32 (opnode (v4i32 VPR128:$src),
                                        (v4i32 VPR128:$Rn))))],
-                     NoItinerary> {
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -8970,7 +9187,8 @@ class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
   : NeonI_Crypto_SHA<size, opcode,
                      (outs FPR32:$Rd), (ins FPR32:$Rn),
                      asmop # "\t$Rd, $Rn",
-                     [], NoItinerary> {
+                     [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   let Predicates = [HasNEON, HasCrypto];
   let hasSideEffects = 0;
 }
@@ -8990,7 +9208,8 @@ class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
                           (v4i32 (opnode (v4i32 VPR128:$src),
                                          (v4i32 VPR128:$Rn),
                                          (v4i32 VPR128:$Rm))))],
-                       NoItinerary> {
+                       NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -9010,7 +9229,8 @@ class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
                           (v4i32 (opnode (v4i32 FPR128:$src),
                                          (v4i32 FPR128:$Rn),
                                          (v4i32 VPR128:$Rm))))],
-                       NoItinerary> {
+                       NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -9025,7 +9245,8 @@ class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop>
                        (outs FPR128:$Rd),
                        (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
                        asmop # "\t$Rd, $Rn, $Rm.4s",
-                       [], NoItinerary> {
+                       [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let hasSideEffects = 0;
   let Predicates = [HasNEON, HasCrypto];
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
index 6fcb1116b6c..ec8450b9c1f 100644
--- a/lib/Target/AArch64/AArch64Schedule.td
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -37,8 +37,16 @@ def ReadDiv : SchedRead;
 // Loads
 def WriteLd : SchedWrite;
 def WritePreLd : SchedWrite;
+def WriteVecLd : SchedWrite;
 def ReadLd : SchedRead;
 def ReadPreLd : SchedRead;
+def ReadVecLd : SchedRead;
+
+// Stores
+def WriteSt : SchedWrite;
+def WriteVecSt : SchedWrite;
+def ReadSt : SchedRead;
+def ReadVecSt : SchedRead;
 
 // Branches
 def WriteBr : SchedWrite;
diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td
index e288a24eb2c..20a14e79228 100644
--- a/lib/Target/AArch64/AArch64ScheduleA53.td
+++ b/lib/Target/AArch64/AArch64ScheduleA53.td
@@ -71,9 +71,18 @@ def : WriteRes<WriteMAC, [A53UnitMAC]> { let Latency = 4; }
 // Div
 def : WriteRes<WriteDiv, [A53UnitDiv]> { let Latency = 4; }
 
-// Load
+// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below,
+//        choosing the median of 3 which makes the latency 6. May model this more
+//        carefully in the future.
 def : WriteRes<WriteLd, [A53UnitLdSt]> { let Latency = 4; }
 def : WriteRes<WritePreLd, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteVecLd, [A53UnitLdSt]> { let Latency = 6; }
+
+// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below,
+//         choosing the median of 2 which makes the latency 5. May model this more
+//         carefully in the future.
+def : WriteRes<WriteSt, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteVecSt, [A53UnitLdSt]> { let Latency = 5; }
 
 // Branch
 def : WriteRes<WriteBr, [A53UnitB]>;
@@ -114,9 +123,14 @@ def : ReadAdvance<ReadMAC, 0>;
 // No forwarding defined for ReadDiv yet.
 def : ReadAdvance<ReadDiv, 0>;
 
-// No forwarding defined for ReadLd, ReadPreLd yet.
+// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet.
 def : ReadAdvance<ReadLd, 0>;
 def : ReadAdvance<ReadPreLd, 0>;
+def : ReadAdvance<ReadVecLd, 0>;
+
+// No forwarding defined for ReadSt and ReadVecSt yet.
+def : ReadAdvance<ReadSt, 0>;
+def : ReadAdvance<ReadVecSt, 0>;
 
 // No forwarding defined for ReadFPALU yet.
 def : ReadAdvance<ReadFPALU, 0>;
diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll
index 0d5534eca54..1555c4868e1 100644
--- a/test/CodeGen/AArch64/misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/misched-basic-A53.ll
@@ -4,13 +4,15 @@
 ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
 ; much higher than the ADD instructions in order to hide latency. When not
 ; specifying a subtarget, the MADD will remain near the end of the block.
+;
+; CHECK: ********** MI Scheduling **********
 ; CHECK: main
 ; CHECK: *** Final schedule for BB#2 ***
 ; CHECK: SU(13)
 ; CHECK: MADDwwww
 ; CHECK: SU(4)
 ; CHECK: ADDwwi_lsl0_s
-; CHECK: ********** MI Scheduling **********
+; CHECK: ********** INTERVALS **********
 @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
 @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
 
@@ -76,6 +78,33 @@ for.end:                                          ; preds = %for.cond
   ret i32 %add6
 }
 
+
+; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to
+; hide latency. Whereas normally there would only be a single FADDvvv_4s
+; after it, this test checks to make sure there are more than one.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: neon4xfloat:BB#0
+; CHECK: *** Final schedule for BB#0 ***
+; CHECK: FDIVvvv_4S
+; CHECK: FADDvvv_4S
+; CHECK: FADDvvv_4S
+; CHECK: ********** INTERVALS **********
+define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) {
+        %tmp1 = fadd <4 x float> %A, %B;
+        %tmp2 = fadd <4 x float> %A, %tmp1;
+        %tmp3 = fadd <4 x float> %A, %tmp2;
+        %tmp4 = fadd <4 x float> %A, %tmp3;
+        %tmp5 = fadd <4 x float> %A, %tmp4;
+        %tmp6 = fadd <4 x float> %A, %tmp5;
+        %tmp7 = fadd <4 x float> %A, %tmp6;
+        %tmp8 = fadd <4 x float> %A, %tmp7;
+        %tmp9 = fdiv <4 x float> %A, %B;
+        %tmp10 = fadd <4 x float> %tmp8, %tmp9;
+
+        ret <4 x float> %tmp10
+}
+
 ; Function Attrs: nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1