Change allowsUnalignedMemoryAccesses to take type argument since some targets

support unaligned mem access only for certain types. (Should it be size instead?) ARM v7 supports unaligned access for i16 and i32, some v6 variants support it as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79127 91177308-0d34-0410-b5e6-96231b3b80d8
2026-04-20 16:17:38 +00:00 · 2009-08-15 19:23:44 +00:00
parent e93f37350d
commit 088880cb19
7 changed files with 76 additions and 33 deletions
@@ -637,13 +637,13 @@ public:
  unsigned getMaxStoresPerMemmove() const { return maxStoresPerMemmove; }

  /// This function returns true if the target allows unaligned memory accesses.
-  /// This is used, for example, in situations where an array copy/move/set is 
-  /// converted to a sequence of store operations. It's use helps to ensure that
-  /// such replacements don't generate code that causes an alignment error 
-  /// (trap) on the target machine. 
+  /// of the specified type. This is used, for example, in situations where an
+  /// array copy/move/set is  converted to a sequence of store operations. It's
+  /// use helps to ensure that such replacements don't generate code that causes
+  /// an alignment error  (trap) on the target machine. 
  /// @brief Determine if the target supports unaligned memory accesses.
-  bool allowsUnalignedMemoryAccesses() const {
-    return allowUnalignedMemoryAccesses;
+  virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+    return false;
  }

  /// This function returns true if the target would benefit from code placement
@@ -1757,12 +1757,6 @@ protected:
  /// @brief Specify maximum bytes of store instructions per memmove call.
  unsigned maxStoresPerMemmove;

-  /// This field specifies whether the target machine permits unaligned memory
-  /// accesses.  This is used, for example, to determine the size of store 
-  /// operations when copying small arrays and other similar tasks.
-  /// @brief Indicate whether the target permits unaligned memory accesses.
-  bool allowUnalignedMemoryAccesses;
-
  /// This field specifies whether the target can benefit from code placement
  /// optimization.
  bool benefitFromCodePlacementOpt;
@@ -3121,6 +3121,27 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
  return SDValue();
 }

+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+  if (!Subtarget->hasV6Ops())
+    // Pre-v6 does not support unaligned mem access.
+    return false;
+  else if (!Subtarget->hasV6Ops()) {
+    // v6 may or may not support unaligned mem access.
+    if (!Subtarget->isTargetDarwin())
+      return false;
+  }
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return false;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    return true;
+  // FIXME: VLD1 etc with standard alignment is legal.
+  }
+}
+
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
  if (V < 0)
    return false;
@@ -166,6 +166,11 @@ namespace llvm {
    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
                                                  MachineBasicBlock *MBB) const;

+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
    /// isLegalAddressingMode - Return true if the addressing mode represented
    /// by AM is legal for this target, for a load/store of the specified type.
    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
@@ -193,6 +198,8 @@ namespace llvm {
                                                APInt &KnownOne,
                                                const SelectionDAG &DAG,
                                                unsigned Depth) const;
+
+
    ConstraintType getConstraintType(const std::string &Constraint) const;
    std::pair<unsigned, const TargetRegisterClass*>
      getRegForInlineAsmConstraint(const std::string &Constraint,
@@ -957,7 +957,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
  maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
  maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
-  allowUnalignedMemoryAccesses = true; // x86 supports it!
  setPrefLoopAlignment(16);
  benefitFromCodePlacementOpt = true;
 }
@@ -389,10 +389,15 @@ namespace llvm {
    /// and store operations as a result of memset, memcpy, and memmove
    /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
    /// determining it.
-    virtual
-    EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                            bool isSrcConst, bool isSrcStr,
-                            SelectionDAG &DAG) const;
+    virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+                                    bool isSrcConst, bool isSrcStr,
+                                    SelectionDAG &DAG) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+      return true;
+    }

    /// LowerOperation - Provide custom lowering hooks for some operations.
    ///
@@ -367,9 +367,10 @@ SDValue XCoreTargetLowering::
 LowerLOAD(SDValue Op, SelectionDAG &DAG)
 {
  LoadSDNode *LD = cast<LoadSDNode>(Op);
-  assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension type");
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+         "Unexpected extension type");
  assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
-  if (allowsUnalignedMemoryAccesses()) {
+  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
    return SDValue();
  }
  unsigned ABIAlignment = getTargetData()->
@@ -465,7 +466,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
  StoreSDNode *ST = cast<StoreSDNode>(Op);
  assert(!ST->isTruncatingStore() && "Unexpected store type");
  assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
-  if (allowsUnalignedMemoryAccesses()) {
+  if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
    return SDValue();
  }
  unsigned ABIAlignment = getTargetData()->
@@ -1048,7 +1049,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
  case ISD::STORE: {
    // Replace unaligned store of unaligned load with memmove.
    StoreSDNode *ST  = cast<StoreSDNode>(N);
-    if (!DCI.isBeforeLegalize() || allowsUnalignedMemoryAccesses() ||
+    if (!DCI.isBeforeLegalize() ||
+        allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
        ST->isVolatile() || ST->isIndexed()) {
      break;
    }
@@ -1,16 +1,31 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -o %t -f
-; RUN: grep ldrb %t | count 4
-; RUN: grep strb %t | count 4
+; RUN: llvm-as < %s | llc -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llvm-as < %s | llc -march=arm -mattr=+v7a | FileCheck %s -check-prefix=V7

+; rdar://7113725

-	%struct.p = type <{ i8, i32 }>
-@t = global %struct.p <{ i8 1, i32 10 }>		; <%struct.p*> [#uses=1]
-@u = weak global %struct.p zeroinitializer		; <%struct.p*> [#uses=1]
-
-define i32 @main() {
+define arm_apcscc void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-	%tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=2]
-	store i32 %tmp3, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1
-	ret i32 %tmp3
+; GENERIC: t:
+; GENERIC: ldrb r2
+; GENERIC: ldrb r3
+; GENERIC: ldrb r12
+; GENERIC: ldrb r1
+; GENERIC: strb r1
+; GENERIC: strb r12
+; GENERIC: strb r3
+; GENERIC: strb r2
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+; V7: t:
+; V7: ldr r1
+; V7: str r1
+  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
+  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
+  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  store i32 %tmp.i, i32* %__dest2.i, align 1
+  ret void
 }