mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-11 00:39:36 +00:00
[mips][msa] Fix suboptimal FrameIndex lowering for ld.[hwd] and st.[hwd]
Summary: The immediate in these instructions is scaled before use as an offset. They therefore have a wider reach than ld.b/st.b. Reviewers: matheusalmeida Reviewed By: matheusalmeida Differential Revision: http://llvm-reviews.chandlerc.com/D2338 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196775 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
84744f6916
commit
897268d931
@ -62,21 +62,25 @@ MipsSERegisterInfo::intRegClass(unsigned Size) const {
|
||||
return &Mips::GPR64RegClass;
|
||||
}
|
||||
|
||||
/// Determine whether a given opcode is an MSA load/store (supporting 10-bit
|
||||
/// offsets) or a non-MSA load/store (supporting 16-bit offsets).
|
||||
static inline bool isMSALoadOrStore(const unsigned Opcode) {
|
||||
/// Get the size of the offset supported by the given load/store.
|
||||
/// The result includes the effects of any scale factors applied to the
|
||||
/// instruction immediate.
|
||||
static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
case Mips::LD_B:
|
||||
case Mips::LD_H:
|
||||
case Mips::LD_W:
|
||||
case Mips::LD_D:
|
||||
case Mips::ST_B:
|
||||
return 10;
|
||||
case Mips::LD_H:
|
||||
case Mips::ST_H:
|
||||
return 10 + 1 /* scale factor */;
|
||||
case Mips::LD_W:
|
||||
case Mips::ST_W:
|
||||
return 10 + 2 /* scale factor */;
|
||||
case Mips::LD_D:
|
||||
case Mips::ST_D:
|
||||
return true;
|
||||
return 10 + 3 /* scale factor */;
|
||||
default:
|
||||
return false;
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,13 +135,14 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
|
||||
|
||||
if (!MI.isDebugValue()) {
|
||||
// Make sure Offset fits within the field available.
|
||||
// For MSA instructions, this is a 10-bit signed immediate, otherwise it is
|
||||
// a 16-bit signed immediate.
|
||||
unsigned OffsetBitSize = isMSALoadOrStore(MI.getOpcode()) ? 10 : 16;
|
||||
// For MSA instructions, this is a 10-bit signed immediate (scaled by
|
||||
// element size), otherwise it is a 16-bit signed immediate.
|
||||
unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits(MI.getOpcode());
|
||||
|
||||
if (OffsetBitSize == 10 && !isInt<10>(Offset) && isInt<16>(Offset)) {
|
||||
// If we have an offset that needs to fit into a signed 10-bit immediate
|
||||
// and doesn't, but does fit into 16-bits then use an ADDiu
|
||||
if (OffsetBitSize < 16 && !isIntN(OffsetBitSize, Offset) &&
|
||||
isInt<16>(Offset)) {
|
||||
// If we have an offset that needs to fit into a signed n-bit immediate
|
||||
// (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = II->getDebugLoc();
|
||||
unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
|
||||
|
@ -83,3 +83,252 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v16i8_just_over_simm16
|
||||
}
|
||||
|
||||
define void @loadstore_v8i16_near() nounwind {
|
||||
; MIPS32-AE: loadstore_v8i16_near:
|
||||
|
||||
%1 = alloca <8 x i16>
|
||||
%2 = load volatile <8 x i16>* %1
|
||||
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0($sp)
|
||||
store volatile <8 x i16> %2, <8 x i16>* %1
|
||||
; MIPS32-AE: st.h [[R1]], 0($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v8i16_near
|
||||
}
|
||||
|
||||
define void @loadstore_v8i16_just_under_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v8i16_just_under_simm10:
|
||||
|
||||
%1 = alloca <8 x i16>
|
||||
%2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes
|
||||
|
||||
%3 = load volatile <8 x i16>* %1
|
||||
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp)
|
||||
store volatile <8 x i16> %3, <8 x i16>* %1
|
||||
; MIPS32-AE: st.h [[R1]], 1008($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v8i16_just_under_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v8i16_just_over_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v8i16_just_over_simm10:
|
||||
|
||||
%1 = alloca <8 x i16>
|
||||
%2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
|
||||
|
||||
%3 = load volatile <8 x i16>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
|
||||
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <8 x i16> %3, <8 x i16>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
|
||||
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v8i16_just_over_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v8i16_just_under_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v8i16_just_under_simm16:
|
||||
|
||||
%1 = alloca <8 x i16>
|
||||
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
|
||||
|
||||
%3 = load volatile <8 x i16>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <8 x i16> %3, <8 x i16>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v8i16_just_under_simm16
|
||||
}
|
||||
|
||||
define void @loadstore_v8i16_just_over_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v8i16_just_over_simm16:
|
||||
|
||||
%1 = alloca <8 x i16>
|
||||
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
|
||||
|
||||
%3 = load volatile <8 x i16>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <8 x i16> %3, <8 x i16>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v8i16_just_over_simm16
|
||||
}
|
||||
|
||||
define void @loadstore_v4i32_near() nounwind {
|
||||
; MIPS32-AE: loadstore_v4i32_near:
|
||||
|
||||
%1 = alloca <4 x i32>
|
||||
%2 = load volatile <4 x i32>* %1
|
||||
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0($sp)
|
||||
store volatile <4 x i32> %2, <4 x i32>* %1
|
||||
; MIPS32-AE: st.w [[R1]], 0($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v4i32_near
|
||||
}
|
||||
|
||||
define void @loadstore_v4i32_just_under_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v4i32_just_under_simm10:
|
||||
|
||||
%1 = alloca <4 x i32>
|
||||
%2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes
|
||||
|
||||
%3 = load volatile <4 x i32>* %1
|
||||
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp)
|
||||
store volatile <4 x i32> %3, <4 x i32>* %1
|
||||
; MIPS32-AE: st.w [[R1]], 2032($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v4i32_just_under_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v4i32_just_over_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v4i32_just_over_simm10:
|
||||
|
||||
%1 = alloca <4 x i32>
|
||||
%2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
|
||||
|
||||
%3 = load volatile <4 x i32>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
|
||||
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <4 x i32> %3, <4 x i32>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
|
||||
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v4i32_just_over_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v4i32_just_under_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v4i32_just_under_simm16:
|
||||
|
||||
%1 = alloca <4 x i32>
|
||||
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
|
||||
|
||||
%3 = load volatile <4 x i32>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <4 x i32> %3, <4 x i32>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v4i32_just_under_simm16
|
||||
}
|
||||
|
||||
define void @loadstore_v4i32_just_over_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v4i32_just_over_simm16:
|
||||
|
||||
%1 = alloca <4 x i32>
|
||||
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
|
||||
|
||||
%3 = load volatile <4 x i32>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <4 x i32> %3, <4 x i32>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v4i32_just_over_simm16
|
||||
}
|
||||
|
||||
define void @loadstore_v2i64_near() nounwind {
|
||||
; MIPS32-AE: loadstore_v2i64_near:
|
||||
|
||||
%1 = alloca <2 x i64>
|
||||
%2 = load volatile <2 x i64>* %1
|
||||
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0($sp)
|
||||
store volatile <2 x i64> %2, <2 x i64>* %1
|
||||
; MIPS32-AE: st.d [[R1]], 0($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v2i64_near
|
||||
}
|
||||
|
||||
define void @loadstore_v2i64_just_under_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v2i64_just_under_simm10:
|
||||
|
||||
%1 = alloca <2 x i64>
|
||||
%2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes
|
||||
|
||||
%3 = load volatile <2 x i64>* %1
|
||||
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp)
|
||||
store volatile <2 x i64> %3, <2 x i64>* %1
|
||||
; MIPS32-AE: st.d [[R1]], 4080($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v2i64_just_under_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v2i64_just_over_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v2i64_just_over_simm10:
|
||||
|
||||
%1 = alloca <2 x i64>
|
||||
%2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
|
||||
|
||||
%3 = load volatile <2 x i64>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
|
||||
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <2 x i64> %3, <2 x i64>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
|
||||
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v2i64_just_over_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v2i64_just_under_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v2i64_just_under_simm16:
|
||||
|
||||
%1 = alloca <2 x i64>
|
||||
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
|
||||
|
||||
%3 = load volatile <2 x i64>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <2 x i64> %3, <2 x i64>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v2i64_just_under_simm16
|
||||
}
|
||||
|
||||
define void @loadstore_v2i64_just_over_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v2i64_just_over_simm16:
|
||||
|
||||
%1 = alloca <2 x i64>
|
||||
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
|
||||
|
||||
%3 = load volatile <2 x i64>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <2 x i64> %3, <2 x i64>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v2i64_just_over_simm16
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user