[FastISel][AArch64] Fix address simplification.

When a shift with extension or an add with shift and extension cannot be folded into the memory operation, then the address calculation has to be materialized separately. While doing so the code forgot to consider a possible sign-/zero- extension. This fix folds now also the sign-/zero-extension into the add or shift instruction which is used to materialize the address. This fixes rdar://problem/18141718. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216511 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-06 01:24:35 +00:00 · 2014-08-27 00:58:30 +00:00
parent 836f4bd090
commit fc03e72b4f
2 changed files with 130 additions and 8 deletions
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@ -156,10 +156,19 @@ private:
  unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg,
                           bool LHSIsKill, uint64_t Imm,
                           bool WantResult = true);
+  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+                         AArch64_AM::ShiftExtendType ShiftType,
+                         uint64_t ShiftImm, bool WantResult = true);
  unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg,
                           bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
                           AArch64_AM::ShiftExtendType ShiftType,
                           uint64_t ShiftImm, bool WantResult = true);
+  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+                          AArch64_AM::ShiftExtendType ExtType,
+                          uint64_t ShiftImm, bool WantResult = true);
+
  unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg,
                           bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
                           AArch64_AM::ShiftExtendType ExtType,
@ -715,20 +724,38 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {

  if (RegisterOffsetNeedsLowering) {
    unsigned ResultReg = 0;
-    if (Addr.getReg())
-      ResultReg = FastEmitInst_rri(AArch64::ADDXrs, &AArch64::GPR64RegClass,
-                                   Addr.getReg(), /*TODO:IsKill=*/false,
-                                   Addr.getOffsetReg(), /*TODO:IsKill=*/false,
-                                   Addr.getShift());
-    else
-      ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
-                             /*Op0IsKill=*/false, Addr.getShift());
+    if (Addr.getReg()) {
+      if (Addr.getExtendType() == AArch64_AM::SXTW ||
+          Addr.getExtendType() == AArch64_AM::UXTW   )
+        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
+                                  Addr.getShift());
+      else
+        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
+                                  Addr.getShift());
+    } else {
+      if (Addr.getExtendType() == AArch64_AM::UXTW)
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift(),
+                               /*IsZExt=*/true);
+      else if (Addr.getExtendType() == AArch64_AM::SXTW)
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift(),
+                               /*IsZExt=*/false);
+      else
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift());
+    }
    if (!ResultReg)
      return false;

    Addr.setReg(ResultReg);
    Addr.setOffsetReg(0);
    Addr.setShift(0);
+    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
  }

  // Since the offset is too large for the load/store instruction get the
@ -978,6 +1005,40 @@ unsigned AArch64FastISel::emitAddsSubs_ri(bool UseAdds, MVT RetVT,
  return ResultReg;
 }

+unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT,
+                                        unsigned LHSReg, bool LHSIsKill,
+                                        unsigned RHSReg, bool RHSIsKill,
+                                        AArch64_AM::ShiftExtendType ShiftType,
+                                        uint64_t ShiftImm, bool WantResult) {
+  assert(LHSReg && RHSReg && "Invalid register number.");
+
+  if (RetVT != MVT::i32 && RetVT != MVT::i64)
+    return 0;
+
+  static const unsigned OpcTable[2][2] = {
+    { AArch64::ADDWrs, AArch64::ADDXrs },
+    { AArch64::SUBWrs, AArch64::SUBXrs }
+  };
+  unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+  unsigned ResultReg;
+  if (WantResult) {
+    const TargetRegisterClass *RC =
+        (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+    ResultReg = createResultReg(RC);
+  } else
+    ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+  const MCInstrDesc &II = TII.get(Opc);
+  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+      .addReg(LHSReg, getKillRegState(LHSIsKill))
+      .addReg(RHSReg, getKillRegState(RHSIsKill))
+      .addImm(getShifterImm(ShiftType, ShiftImm));
+  
+  return ResultReg;
+}
+
 unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
                                          unsigned LHSReg, bool LHSIsKill,
                                          unsigned RHSReg, bool RHSIsKill,
@ -1012,6 +1073,40 @@ unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
  return ResultReg;
 }

+unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT,
+                                        unsigned LHSReg, bool LHSIsKill,
+                                        unsigned RHSReg, bool RHSIsKill,
+                                        AArch64_AM::ShiftExtendType ExtType,
+                                        uint64_t ShiftImm, bool WantResult) {
+  assert(LHSReg && RHSReg && "Invalid register number.");
+
+  if (RetVT != MVT::i32 && RetVT != MVT::i64)
+    return 0;
+
+  static const unsigned OpcTable[2][2] = {
+    { AArch64::ADDWrx, AArch64::ADDXrx },
+    { AArch64::SUBWrx, AArch64::SUBXrx }
+  };
+  unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+  unsigned ResultReg;
+  if (WantResult) {
+    const TargetRegisterClass *RC =
+        (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+    ResultReg = createResultReg(RC);
+  } else
+    ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+  const MCInstrDesc &II = TII.get(Opc);
+  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+      .addReg(LHSReg, getKillRegState(LHSIsKill))
+      .addReg(RHSReg, getKillRegState(RHSIsKill))
+      .addImm(getArithExtendImm(ExtType, ShiftImm));
+
+  return ResultReg;
+}
+
 unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT,
                                          unsigned LHSReg, bool LHSIsKill,
                                          unsigned RHSReg, bool RHSIsKill,
--- a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
+++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@ -423,3 +423,30 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
  ret i32 %5
 }

+; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_sext_shift_offreg_imm1(i32 %a) {
+; CHECK-LABEL: load_sext_shift_offreg_imm1
+; CHECK:       sbfiz [[REG:x[0-9]+]], x0, #3, #32
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %2, 8
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+; Load Base Register + Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_breg_sext_shift_offreg_imm1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_imm1
+; CHECK:       add [[REG:x[0-9]+]], x1, w0, sxtw #3
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = add i64 %3, 8
+  %5 = inttoptr i64 %4 to i64*
+  %6 = load i64* %5
+  ret i64 %6
+}
+