From 68138dc9a89cda52bdc7d27bbebec194809e93cf Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel.sanders@imgtec.com>
Date: Mon, 9 Dec 2013 12:47:12 +0000
Subject: [PATCH] [mips][msa] Fix invalid generated code when lowering
 FrameIndex involving unaligned offsets.

Summary:
The MSA ld.[bhwd] and st.[bhwd] instructions scale the immediate by the
element size before use as an offset. The offset must therefore be a
multiple of the element size to be valid in these instructions. However,
an unaligned base address is valid in MSA.

This commit causes the compiler to emit valid code when the calculated
offset is not a multiple of the element size by accounting for the offset
using addiu and using a zero offset in the load/store.

Depends on D2338

Reviewers: matheusalmeida

Reviewed By: matheusalmeida

Differential Revision: http://llvm-reviews.chandlerc.com/D2339

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196777 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/MipsSERegisterInfo.cpp | 23 +++++++++-
 test/CodeGen/Mips/msa/frameindex.ll    | 60 ++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index cf408a1c098..fcf6d0b06c7 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -84,6 +84,23 @@ static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode) {
   }
 }
 
+/// Get the scale factor applied to the immediate in the given load/store.
+static inline unsigned getLoadStoreOffsetAlign(const unsigned Opcode) {
+  switch (Opcode) {
+  case Mips::LD_H:
+  case Mips::ST_H:
+    return 2;
+  case Mips::LD_W:
+  case Mips::ST_W:
+    return 4;
+  case Mips::LD_D:
+  case Mips::ST_D:
+    return 8;
+  default:
+    return 1;
+  }
+}
+
 void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
                                      unsigned OpNo, int FrameIndex,
                                      uint64_t StackSize,
@@ -138,9 +155,11 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
     // For MSA instructions, this is a 10-bit signed immediate (scaled by
     // element size), otherwise it is a 16-bit signed immediate.
     unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits(MI.getOpcode());
+    unsigned OffsetAlign = getLoadStoreOffsetAlign(MI.getOpcode());
 
-    if (OffsetBitSize < 16 && !isIntN(OffsetBitSize, Offset) &&
-        isInt<16>(Offset)) {
+    if (OffsetBitSize < 16 && isInt<16>(Offset) &&
+        (!isIntN(OffsetBitSize, Offset) ||
+         OffsetToAlignment(Offset, OffsetAlign) != 0)) {
       // If we have an offset that needs to fit into a signed n-bit immediate
       // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu
       MachineBasicBlock &MBB = *MI.getParent();
diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll
index f8fa6863016..07e67bf0428 100644
--- a/test/CodeGen/Mips/msa/frameindex.ll
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@@ -97,6 +97,26 @@ define void @loadstore_v8i16_near() nounwind {
   ; MIPS32-AE: .size loadstore_v8i16_near
 }
 
+define void @loadstore_v8i16_unaligned() nounwind {
+  ; MIPS32-AE: loadstore_v8i16_unaligned:
+
+  %1 = alloca [2 x <8 x i16>]
+  %2 = bitcast [2 x <8 x i16>]* %1 to i8*
+  %3 = getelementptr i8* %2, i32 1
+  %4 = bitcast i8* %3 to [2 x <8 x i16>]*
+  %5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0
+
+  %6 = load volatile <8 x i16>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <8 x i16> %6, <8 x i16>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v8i16_unaligned
+}
+
 define void @loadstore_v8i16_just_under_simm10() nounwind {
   ; MIPS32-AE: loadstore_v8i16_just_under_simm10:
 
@@ -180,6 +200,26 @@ define void @loadstore_v4i32_near() nounwind {
   ; MIPS32-AE: .size loadstore_v4i32_near
 }
 
+define void @loadstore_v4i32_unaligned() nounwind {
+  ; MIPS32-AE: loadstore_v4i32_unaligned:
+
+  %1 = alloca [2 x <4 x i32>]
+  %2 = bitcast [2 x <4 x i32>]* %1 to i8*
+  %3 = getelementptr i8* %2, i32 1
+  %4 = bitcast i8* %3 to [2 x <4 x i32>]*
+  %5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0
+
+  %6 = load volatile <4 x i32>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <4 x i32> %6, <4 x i32>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v4i32_unaligned
+}
+
 define void @loadstore_v4i32_just_under_simm10() nounwind {
   ; MIPS32-AE: loadstore_v4i32_just_under_simm10:
 
@@ -263,6 +303,26 @@ define void @loadstore_v2i64_near() nounwind {
   ; MIPS32-AE: .size loadstore_v2i64_near
 }
 
+define void @loadstore_v2i64_unaligned() nounwind {
+  ; MIPS32-AE: loadstore_v2i64_unaligned:
+
+  %1 = alloca [2 x <2 x i64>]
+  %2 = bitcast [2 x <2 x i64>]* %1 to i8*
+  %3 = getelementptr i8* %2, i32 1
+  %4 = bitcast i8* %3 to [2 x <2 x i64>]*
+  %5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0
+
+  %6 = load volatile <2 x i64>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <2 x i64> %6, <2 x i64>* %5
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v2i64_unaligned
+}
+
 define void @loadstore_v2i64_just_under_simm10() nounwind {
   ; MIPS32-AE: loadstore_v2i64_just_under_simm10: