From 088880cb192fb6dd5b1bf85af62023c5ca3da38f Mon Sep 17 00:00:00 2001
From: Evan Cheng <evan.cheng@apple.com>
Date: Sat, 15 Aug 2009 19:23:44 +0000
Subject: [PATCH] Change allowsUnalignedMemoryAccesses to take type argument
 since some targets support unaligned mem access only for certain types.
 (Should it be size instead?)

ARM v7 supports unaligned access for i16 and i32, some v6 variants support it
as well.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79127 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Target/TargetLowering.h     | 18 ++++-------
 lib/Target/ARM/ARMISelLowering.cpp       | 21 +++++++++++++
 lib/Target/ARM/ARMISelLowering.h         |  7 +++++
 lib/Target/X86/X86ISelLowering.cpp       |  1 -
 lib/Target/X86/X86ISelLowering.h         | 13 +++++---
 lib/Target/XCore/XCoreISelLowering.cpp   | 10 +++---
 test/CodeGen/ARM/unaligned_load_store.ll | 39 ++++++++++++++++--------
 7 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index fda362d8ceb..00a455c3609 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -637,13 +637,13 @@ public:
   unsigned getMaxStoresPerMemmove() const { return maxStoresPerMemmove; }
 
   /// This function returns true if the target allows unaligned memory accesses.
-  /// This is used, for example, in situations where an array copy/move/set is 
-  /// converted to a sequence of store operations. It's use helps to ensure that
-  /// such replacements don't generate code that causes an alignment error 
-  /// (trap) on the target machine. 
+  /// of the specified type. This is used, for example, in situations where an
+  /// array copy/move/set is  converted to a sequence of store operations. It's
+  /// use helps to ensure that such replacements don't generate code that causes
+  /// an alignment error  (trap) on the target machine. 
   /// @brief Determine if the target supports unaligned memory accesses.
-  bool allowsUnalignedMemoryAccesses() const {
-    return allowUnalignedMemoryAccesses;
+  virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+    return false;
   }
 
   /// This function returns true if the target would benefit from code placement
@@ -1757,12 +1757,6 @@ protected:
   /// @brief Specify maximum bytes of store instructions per memmove call.
   unsigned maxStoresPerMemmove;
 
-  /// This field specifies whether the target machine permits unaligned memory
-  /// accesses.  This is used, for example, to determine the size of store 
-  /// operations when copying small arrays and other similar tasks.
-  /// @brief Indicate whether the target permits unaligned memory accesses.
-  bool allowUnalignedMemoryAccesses;
-
   /// This field specifies whether the target can benefit from code placement
   /// optimization.
   bool benefitFromCodePlacementOpt;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 9de737be99a..99135b23115 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3121,6 +3121,27 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   return SDValue();
 }
 
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+  if (!Subtarget->hasV6Ops())
+    // Pre-v6 does not support unaligned mem access.
+    return false;
+  else if (!Subtarget->hasV6Ops()) {
+    // v6 may or may not support unaligned mem access.
+    if (!Subtarget->isTargetDarwin())
+      return false;
+  }
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return false;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    return true;
+  // FIXME: VLD1 etc with standard alignment is legal.
+  }
+}
+
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   if (V < 0)
     return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 3a90ca3e62f..db6d8baaad1 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -166,6 +166,11 @@ namespace llvm {
     virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
                                                   MachineBasicBlock *MBB) const;
 
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
@@ -193,6 +198,8 @@ namespace llvm {
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth) const;
+
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 15436310207..ff9f2bf4430 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -957,7 +957,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
   maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
   maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
-  allowUnalignedMemoryAccesses = true; // x86 supports it!
   setPrefLoopAlignment(16);
   benefitFromCodePlacementOpt = true;
 }
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 3ac6e51bbb0..f3f09f5ac93 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -389,10 +389,15 @@ namespace llvm {
     /// and store operations as a result of memset, memcpy, and memmove
     /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
     /// determining it.
-    virtual
-    EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                            bool isSrcConst, bool isSrcStr,
-                            SelectionDAG &DAG) const;
+    virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+                                    bool isSrcConst, bool isSrcStr,
+                                    SelectionDAG &DAG) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+      return true;
+    }
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 0174778a1d4..605ed83eed2 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -367,9 +367,10 @@ SDValue XCoreTargetLowering::
 LowerLOAD(SDValue Op, SelectionDAG &DAG)
 {
   LoadSDNode *LD = cast<LoadSDNode>(Op);
-  assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension type");
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+         "Unexpected extension type");
   assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
-  if (allowsUnalignedMemoryAccesses()) {
+  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
     return SDValue();
   }
   unsigned ABIAlignment = getTargetData()->
@@ -465,7 +466,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
   StoreSDNode *ST = cast<StoreSDNode>(Op);
   assert(!ST->isTruncatingStore() && "Unexpected store type");
   assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
-  if (allowsUnalignedMemoryAccesses()) {
+  if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
     return SDValue();
   }
   unsigned ABIAlignment = getTargetData()->
@@ -1048,7 +1049,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::STORE: {
     // Replace unaligned store of unaligned load with memmove.
     StoreSDNode *ST  = cast<StoreSDNode>(N);
-    if (!DCI.isBeforeLegalize() || allowsUnalignedMemoryAccesses() ||
+    if (!DCI.isBeforeLegalize() ||
+        allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
         ST->isVolatile() || ST->isIndexed()) {
       break;
     }
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index dad1897463a..6fd9c2ab736 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,16 +1,31 @@
-; RUN: llvm-as < %s | \
-; RUN:   llc -march=arm -o %t -f
-; RUN: grep ldrb %t | count 4
-; RUN: grep strb %t | count 4
+; RUN: llvm-as < %s | llc -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llvm-as < %s | llc -march=arm -mattr=+v7a | FileCheck %s -check-prefix=V7
 
+; rdar://7113725
 
-	%struct.p = type <{ i8, i32 }>
-@t = global %struct.p <{ i8 1, i32 10 }>		; <%struct.p*> [#uses=1]
-@u = weak global %struct.p zeroinitializer		; <%struct.p*> [#uses=1]
-
-define i32 @main() {
+define arm_apcscc void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-	%tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=2]
-	store i32 %tmp3, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1
-	ret i32 %tmp3
+; GENERIC: t:
+; GENERIC: ldrb r2
+; GENERIC: ldrb r3
+; GENERIC: ldrb r12
+; GENERIC: ldrb r1
+; GENERIC: strb r1
+; GENERIC: strb r12
+; GENERIC: strb r3
+; GENERIC: strb r2
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+; V7: t:
+; V7: ldr r1
+; V7: str r1
+  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
+  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
+  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  store i32 %tmp.i, i32* %__dest2.i, align 1
+  ret void
 }