Revert "[FastISel][AArch64] Don't fold instructions too aggressively into the memory operation."

Quentin pointed out that this is not the correct approach and there is a better and easier solution. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216632 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-04 02:24:29 +00:00 · 2014-08-27 23:09:40 +00:00
parent 34ea0a1de3
commit a26b1bdcc8
2 changed files with 16 additions and 222 deletions
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@ -134,11 +134,7 @@ private:
  // Utility helper routines.
  bool isTypeLegal(Type *Ty, MVT &VT);
  bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
-  bool isLegalToFoldAddress(const Value *Obj);
-  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
-  bool computeAddressRecursively(const Value *Obj, Address &Addr, Type *Ty);
-  bool computeAddressBase(const Value *Obj, Address &Addr);
-
+  bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
  bool ComputeCallAddress(const Value *V, Address &Addr);
  bool SimplifyAddress(Address &Addr, MVT VT);
  void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
@ -420,68 +416,9 @@ unsigned AArch64FastISel::TargetMaterializeFloatZero(const ConstantFP* CFP) {
  return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 }

-bool AArch64FastISel::isLegalToFoldAddress(const Value *Obj) {
-  // Look through BitCast, IntToPtr, and PtrToInt.
-  const User *U = nullptr;
-  unsigned Opcode = Instruction::UserOp1;
-  if (const auto *I = dyn_cast<Instruction>(Obj)) {
-    // Bail out if the result is used in a different basic block.
-    if (FuncInfo.isExportedInst(I))
-      return false;
-
-    Opcode = I->getOpcode();
-    U = I;
-  } else if (const auto *CE = dyn_cast<ConstantExpr>(Obj)) {
-    Opcode = CE->getOpcode();
-    U = CE;
-  }
-
-  switch (Opcode) {
-  default:
-    break;
-  case Instruction::BitCast:
-    return isLegalToFoldAddress(U->getOperand(0));
-  case Instruction::IntToPtr:
-    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
-      return isLegalToFoldAddress(U->getOperand(0));
-    break;
-  case Instruction::PtrToInt:
-    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
-      return isLegalToFoldAddress(U->getOperand(0));
-    break;
-  }
-
-  // Allocas never kill their operands, so it is safe to fold it.
-  if (isa<AllocaInst>(Obj) || !isa<Instruction>(Obj))
-    return true;
-
-  const auto *I = cast<Instruction>(Obj);
-  // Trivial case - the memory instruction is the only user.
-  if (I->hasOneUse())
-    return true;
-
-  // Check all users - if all of them are memory instructions that FastISel
-  // can handle, then it is safe to fold the instruction.
-  for (auto *U : I->users())
-    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
-      return false;
-
-  return true;
-}
-
 // Computes the address to get to an object.
-bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr,
-                                     Type *Ty) {
-  // Don't fold instructions into the memory operation if their result is
-  // exported to another basic block or has more than one use - except if all
-  // uses are memory operations.
-  if (isLegalToFoldAddress(Obj))
-    return computeAddressRecursively(Obj, Addr, Ty);
-  return computeAddressBase(Obj, Addr);
-}
-
-bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
-                                                Type *Ty) {
+bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
+{
  const User *U = nullptr;
  unsigned Opcode = Instruction::UserOp1;
  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@ -508,18 +445,18 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
    break;
  case Instruction::BitCast: {
    // Look through bitcasts.
-    return computeAddressRecursively(U->getOperand(0), Addr, Ty);
+    return ComputeAddress(U->getOperand(0), Addr, Ty);
  }
  case Instruction::IntToPtr: {
    // Look past no-op inttoptrs.
    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
-      return computeAddressRecursively(U->getOperand(0), Addr, Ty);
+      return ComputeAddress(U->getOperand(0), Addr, Ty);
    break;
  }
  case Instruction::PtrToInt: {
-    // Look past no-op ptrtoints. Don't increment recursion level.
+    // Look past no-op ptrtoints.
    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
-      return computeAddressRecursively(U->getOperand(0), Addr, Ty);
+      return ComputeAddress(U->getOperand(0), Addr, Ty);
    break;
  }
  case Instruction::GetElementPtr: {
@ -561,7 +498,7 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,

    // Try to grab the base operand now.
    Addr.setOffset(TmpOffset);
-    if (computeAddressRecursively(U->getOperand(0), Addr, Ty))
+    if (ComputeAddress(U->getOperand(0), Addr, Ty))
      return true;

    // We failed, restore everything and try the other options.
@ -582,9 +519,6 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
    break;
  }
  case Instruction::Add: {
-    if (!U->hasOneUse())
-      break;
-
    // Adds of constants are common and easy enough.
    const Value *LHS = U->getOperand(0);
    const Value *RHS = U->getOperand(1);
@ -594,21 +528,17 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,

    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
      Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
-      return computeAddressRecursively(LHS, Addr, Ty);
+      return ComputeAddress(LHS, Addr, Ty);
    }

    Address Backup = Addr;
-    if (computeAddressRecursively(LHS, Addr, Ty) &&
-        computeAddressRecursively(RHS, Addr, Ty))
+    if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
      return true;
    Addr = Backup;

    break;
  }
  case Instruction::Shl:
-    if (!U->hasOneUse())
-      break;
-
    if (Addr.getOffsetReg())
      break;

@ -631,10 +561,8 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
      Addr.setShift(Val);
      Addr.setExtendType(AArch64_AM::LSL);

-      // Only try to fold the operand if it has one use.
      if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
-        if (I->hasOneUse() &&
-            (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB))
+        if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
          U = I;

      if (const auto *ZE = dyn_cast<ZExtInst>(U))
@ -654,10 +582,6 @@ bool AArch64FastISel::computeAddressRecursively(const Value *Obj, Address &Addr,
    break;
  }

-  return computeAddressBase(Obj, Addr);
-}
-
-bool AArch64FastISel::computeAddressBase(const Value *Obj, Address &Addr) {
  if (Addr.getReg()) {
    if (!Addr.getOffsetReg()) {
      unsigned Reg = getRegForValue(Obj);
@ -1428,7 +1352,7 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) {

  // See if we can handle this address.
  Address Addr;
-  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
+  if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
    return false;

  unsigned ResultReg;
@ -1545,7 +1469,7 @@ bool AArch64FastISel::SelectStore(const Instruction *I) {

  // See if we can handle this address.
  Address Addr;
-  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
+  if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
    return false;

  if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
@ -2453,7 +2377,7 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
    if (MTI->isVolatile())
      return false;

-    // Disable inlining for memmove before calls to computeAddress.  Otherwise,
+    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
    // we would emit dead code because we don't currently handle memmoves.
    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
    if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
@ -2463,8 +2387,8 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
      unsigned Alignment = MTI->getAlignment();
      if (IsMemCpySmall(Len, Alignment)) {
        Address Dest, Src;
-        if (!computeAddress(MTI->getRawDest(), Dest) ||
-            !computeAddress(MTI->getRawSource(), Src))
+        if (!ComputeAddress(MTI->getRawDest(), Dest) ||
+            !ComputeAddress(MTI->getRawSource(), Src))
          return false;
        if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
          return true;
--- a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
+++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@ -281,50 +281,6 @@ define i64 @load_breg_immoff_8(i64 %a) {
  ret i64 %3
 }

-; Allow folding of the address if it is used by memory instructions only.
-define void @load_breg_immoff_9(i64 %a) {
-; FAST-LABEL: load_breg_immoff_9
-; FAST:       ldr {{x[0-9]+}}, [x0, #96]
-; FAST:       str {{x[0-9]+}}, [x0, #96]
-  %1 = add i64 %a, 96
-  %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
-  %4 = add i64 %3, 1
-  store i64 %4, i64* %2
-  ret void
-}
-
-; Don't fold if the add result leaves the basic block - even if the user is a
-; memory operation.
-define i64 @load_breg_immoff_10(i64 %a, i1 %c) {
-; FAST-LABEL: load_breg_immoff_10
-; FAST:       add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
-; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
-  %1 = add i64 %a, 96
-  %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
-  br i1 %c, label %bb1, label %bb2
-bb1:
-  %4 = shl i64 %1, 3
-  %5 = inttoptr i64 %4 to i64*
-  %res = load i64* %5
-  ret i64 %res
-bb2:
-  ret i64 %3
-}
-
-; Don't allow folding of the address if it is used by non-memory instructions.
-define i64 @load_breg_immoff_11(i64 %a) {
-; FAST-LABEL: load_breg_immoff_11
-; FAST:       add [[REG:x[0-9]+]], {{x[0-9]+}}, {{x[0-9]+}}
-; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
-  %1 = add i64 %a, 96
-  %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
-  %4 = add i64 %1, %3
-  ret i64 %4
-}
-
 ; Load Base Register + Register Offset
 define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
 ; CHECK-LABEL: load_breg_offreg_1
@ -345,33 +301,6 @@ define i64 @load_breg_offreg_2(i64 %a, i64 %b) {
  ret i64 %3
 }

-; Don't fold if the add result leaves the basic block.
-define i64 @load_breg_offreg_3(i64 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_offreg_3
-; FAST:       add [[REG:x[0-9]+]], x0, x1
-; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
-  %1 = add i64 %a, %b
-  %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
-  br i1 %c, label %bb1, label %bb2
-bb1:
-  %res = load i64* %2
-  ret i64 %res
-bb2:
-  ret i64 %3
-}
-
-define i64 @load_breg_offreg_4(i64 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_offreg_4
-; FAST:       add [[REG:x[0-9]+]], x0, x1
-; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]]{{\]}}
-  %1 = add i64 %a, %b
-  %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
-  %4 = add i64 %1, %3
-  ret i64 %4
-}
-
 ; Load Base Register + Register Offset + Immediate Offset
 define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
 ; CHECK-LABEL: load_breg_offreg_immoff_1
@ -476,35 +405,6 @@ define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) {
  ret i32 %5
 }

-; Don't fold if the shift result leaves the basic block.
-define i64 @load_breg_shift_offreg_6(i64 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_shift_offreg_6
-; FAST:       lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
-; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
-  %1 = shl i64 %a, 3
-  %2 = add i64 %b, %1
-  %3 = inttoptr i64 %2 to i64*
-  %4 = load i64* %3
-  br i1 %c, label %bb1, label %bb2
-bb1:
-  %5 = inttoptr i64 %1 to i64*
-  %res = load i64* %5
-  ret i64 %res
-bb2:
-  ret i64 %4
-}
-
-define i64 @load_breg_shift_offreg_7(i64 %a, i64 %b) {
-; FAST-LABEL: load_breg_shift_offreg_7
-; FAST:       lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
-; FAST-NEXT:  ldr {{x[0-9]+}}, {{\[}}x1, [[REG]]{{\]}}
-  %1 = shl i64 %a, 3
-  %2 = add i64 %b, %1
-  %3 = inttoptr i64 %2 to i64*
-  %4 = load i64* %3
-  %5 = add i64 %1, %4
-  ret i64 %5
-}

 ; Load Base Register + Scaled Register Offset + Sign/Zero extension
 define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
@ -529,36 +429,6 @@ define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) {
  ret i32 %5
 }

-; Don't fold if the zext result leaves the basic block.
-define i64 @load_breg_zext_shift_offreg_3(i32 %a, i64 %b, i1 %c) {
-; FAST-LABEL: load_breg_zext_shift_offreg_3
-; FAST:       ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
-  %1 = zext i32 %a to i64
-  %2 = shl i64 %1, 3
-  %3 = add i64 %b, %2
-  %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
-  br i1 %c, label %bb1, label %bb2
-bb1:
-  %6 = inttoptr i64 %1 to i64*
-  %res = load i64* %6
-  ret i64 %res
-bb2:
-  ret i64 %5
-}
-
-define i64 @load_breg_zext_shift_offreg_4(i32 %a, i64 %b) {
-; FAST-LABEL: load_breg_zext_shift_offreg_4
-; FAST:       ldr {{x[0-9]+}}, {{\[}}x1, {{x[0-9]+}}, lsl #3{{\]}}
-  %1 = zext i32 %a to i64
-  %2 = shl i64 %1, 3
-  %3 = add i64 %b, %2
-  %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
-  %6 = add i64 %1, %5
-  ret i64 %6
-}
-
 define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
 ; CHECK-LABEL: load_breg_sext_shift_offreg_1
 ; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]