Use subword loads instead of a 4-byte load when the size of a structure (or a

piece of it) that is being passed by value is smaller than a word.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138007 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Akira Hatanaka 2011-08-18 23:39:37 +00:00
parent 28bea08e53
commit 5ac8547a41
2 changed files with 101 additions and 29 deletions

View File

@ -1805,43 +1805,90 @@ WriteByValArg(SDValue& Chain, DebugLoc dl,
SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
MVT PtrType) {
unsigned FirstWord = VA.getLocMemOffset() / 4;
unsigned NumWords = (Flags.getByValSize() + 3) / 4;
unsigned LastWord = FirstWord + NumWords;
unsigned CurWord;
MVT PtrType, bool isLittle) {
unsigned LocMemOffset = VA.getLocMemOffset();
unsigned Offset = 0;
uint32_t RemainingSize = Flags.getByValSize();
unsigned ByValAlign = Flags.getByValAlign();
// copy the first 4 words of byval arg to registers A0 - A3
for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize);
++CurWord) {
// Copy the first 4 words of byval arg to registers A0 - A3.
// FIXME: Use a stricter alignment if it enables better optimization in passes
// run later.
for (; RemainingSize >= 4 && LocMemOffset < 4 * 4;
Offset += 4, RemainingSize -= 4, LocMemOffset += 4) {
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant((CurWord - FirstWord) * 4,
MVT::i32));
DAG.getConstant(Offset, MVT::i32));
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
MachinePointerInfo(),
false, false, std::min(ByValAlign,
(unsigned )4));
MemOpChains.push_back(LoadVal.getValue(1));
unsigned DstReg = O32IntRegs[CurWord];
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
}
// copy remaining part of byval arg to stack.
if (CurWord < LastWord) {
unsigned SizeInBytes = (LastWord - CurWord) * 4;
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant((CurWord - FirstWord) * 4,
MVT::i32));
LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true);
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
DAG.getConstant(SizeInBytes, MVT::i32),
/*Align*/ByValAlign,
/*isVolatile=*/false, /*AlwaysInline=*/false,
MachinePointerInfo(0), MachinePointerInfo(0));
MemOpChains.push_back(Chain);
if (RemainingSize == 0)
return;
// If there still is a register available for argument passing, write the
// remaining part of the structure to it using subword loads and shifts.
if (LocMemOffset < 4 * 4) {
assert(RemainingSize <= 3 && RemainingSize >= 1 &&
"There must be one to three bytes remaining.");
unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize);
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
unsigned Alignment = std::min(ByValAlign, (unsigned )4);
SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
LoadPtr, MachinePointerInfo(),
MVT::getIntegerVT(LoadSize * 8), false,
false, Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
// If target is big endian, shift it to the most significant half-word or
// byte.
if (!isLittle)
LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal,
DAG.getConstant(32 - LoadSize * 8, MVT::i32));
Offset += LoadSize;
RemainingSize -= LoadSize;
// Read second subword if necessary.
if (RemainingSize != 0) {
assert(RemainingSize == 1 && "There must be one byte remaining.");
LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
unsigned Alignment = std::min(ByValAlign, (unsigned )2);
SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
LoadPtr, MachinePointerInfo(),
MVT::i8, false, false, Alignment);
MemOpChains.push_back(Subword.getValue(1));
// Insert the loaded byte to LoadVal.
// FIXME: Use INS if supported by target.
unsigned ShiftAmt = isLittle ? 16 : 8;
SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword,
DAG.getConstant(ShiftAmt, MVT::i32));
LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift);
}
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
return;
}
// Create a fixed object on stack at offset LocMemOffset and copy
// remaining part of byval arg to it using memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
DAG.getConstant(RemainingSize, MVT::i32),
std::min(ByValAlign, (unsigned)4),
/*isVolatile=*/false, /*AlwaysInline=*/false,
MachinePointerInfo(0), MachinePointerInfo(0));
MemOpChains.push_back(Chain);
}
/// LowerCall - functions arguments are copied from virtual regs to
@ -1974,7 +2021,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
VA, Flags, getPointerTy());
VA, Flags, getPointerTy(), Subtarget->isLittle());
continue;
}

View File

@ -1,16 +1,41 @@
; RUN: llc -march=mips < %s | FileCheck %s
; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
%struct.S2 = type { %struct.S1, %struct.S1 }
%struct.S1 = type { i8, i8 }
%struct.S4 = type { [7 x i8] }
@s2 = common global %struct.S2 zeroinitializer, align 1
@s4 = common global %struct.S4 zeroinitializer, align 1
define void @foo1() nounwind {
entry:
; CHECK: ulw ${{[0-9]+}}, 2
; CHECK-EL: lw $25, %call16(foo2)
; CHECK-EL: ulhu $4, 2
; CHECK-EL: lw $[[R0:[0-9]+]], %got(s4)
; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
; CHECK-EL: ulhu $[[R2:[0-9]+]], 4($[[R0]])
; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
; CHECK-EL: ulw $4, 0($[[R0]])
; CHECK-EL: lw $25, %call16(foo4)
; CHECK-EL: or $5, $[[R2]], $[[R3]]
; CHECK-EB: ulhu $[[R0:[0-9]+]], 2
; CHECK-EB: lw $25, %call16(foo2)
; CHECK-EB: sll $4, $[[R0]], 16
; CHECK-EB: lw $[[R1:[0-9]+]], %got(s4)
; CHECK-EB: ulhu $[[R2:[0-9]+]], 4($[[R1]])
; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
; CHECK-EB: ulw $4, 0($[[R1]])
; CHECK-EB: lw $25, %call16(foo4)
; CHECK-EB: or $5, $[[R4]], $[[R5]]
tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
tail call void @foo4(%struct.S4* byval @s4) nounwind
ret void
}
declare void @foo2(%struct.S1* byval)
declare void @foo4(%struct.S4* byval)