Use subword loads instead of a 4-byte load when the size of a structure (or a

piece of it) that is being passed by value is smaller than a word.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138007 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Akira Hatanaka
2011-08-18 23:39:37 +00:00
parent 28bea08e53
commit 5ac8547a41
2 changed files with 101 additions and 29 deletions

View File

@ -1805,43 +1805,90 @@ WriteByValArg(SDValue& Chain, DebugLoc dl,
SmallVector<SDValue, 8>& MemOpChains, int& LastFI, SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
MVT PtrType) { MVT PtrType, bool isLittle) {
unsigned FirstWord = VA.getLocMemOffset() / 4; unsigned LocMemOffset = VA.getLocMemOffset();
unsigned NumWords = (Flags.getByValSize() + 3) / 4; unsigned Offset = 0;
unsigned LastWord = FirstWord + NumWords; uint32_t RemainingSize = Flags.getByValSize();
unsigned CurWord;
unsigned ByValAlign = Flags.getByValAlign(); unsigned ByValAlign = Flags.getByValAlign();
// copy the first 4 words of byval arg to registers A0 - A3 // Copy the first 4 words of byval arg to registers A0 - A3.
for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize); // FIXME: Use a stricter alignment if it enables better optimization in passes
++CurWord) { // run later.
for (; RemainingSize >= 4 && LocMemOffset < 4 * 4;
Offset += 4, RemainingSize -= 4, LocMemOffset += 4) {
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant((CurWord - FirstWord) * 4, DAG.getConstant(Offset, MVT::i32));
MVT::i32));
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
MachinePointerInfo(), MachinePointerInfo(),
false, false, std::min(ByValAlign, false, false, std::min(ByValAlign,
(unsigned )4)); (unsigned )4));
MemOpChains.push_back(LoadVal.getValue(1)); MemOpChains.push_back(LoadVal.getValue(1));
unsigned DstReg = O32IntRegs[CurWord]; unsigned DstReg = O32IntRegs[LocMemOffset / 4];
RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
} }
// copy remaining part of byval arg to stack. if (RemainingSize == 0)
if (CurWord < LastWord) { return;
unsigned SizeInBytes = (LastWord - CurWord) * 4;
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, // If there still is a register available for argument passing, write the
DAG.getConstant((CurWord - FirstWord) * 4, // remaining part of the structure to it using subword loads and shifts.
MVT::i32)); if (LocMemOffset < 4 * 4) {
LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true); assert(RemainingSize <= 3 && RemainingSize >= 1 &&
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); "There must be one to three bytes remaining.");
Chain = DAG.getMemcpy(Chain, dl, Dst, Src, unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize);
DAG.getConstant(SizeInBytes, MVT::i32), SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
/*Align*/ByValAlign, DAG.getConstant(Offset, MVT::i32));
/*isVolatile=*/false, /*AlwaysInline=*/false, unsigned Alignment = std::min(ByValAlign, (unsigned )4);
MachinePointerInfo(0), MachinePointerInfo(0)); SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
MemOpChains.push_back(Chain); LoadPtr, MachinePointerInfo(),
MVT::getIntegerVT(LoadSize * 8), false,
false, Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
// If target is big endian, shift it to the most significant half-word or
// byte.
if (!isLittle)
LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal,
DAG.getConstant(32 - LoadSize * 8, MVT::i32));
Offset += LoadSize;
RemainingSize -= LoadSize;
// Read second subword if necessary.
if (RemainingSize != 0) {
assert(RemainingSize == 1 && "There must be one byte remaining.");
LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
unsigned Alignment = std::min(ByValAlign, (unsigned )2);
SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
LoadPtr, MachinePointerInfo(),
MVT::i8, false, false, Alignment);
MemOpChains.push_back(Subword.getValue(1));
// Insert the loaded byte to LoadVal.
// FIXME: Use INS if supported by target.
unsigned ShiftAmt = isLittle ? 16 : 8;
SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword,
DAG.getConstant(ShiftAmt, MVT::i32));
LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift);
}
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
return;
} }
// Create a fixed object on stack at offset LocMemOffset and copy
// remaining part of byval arg to it using memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
DAG.getConstant(RemainingSize, MVT::i32),
std::min(ByValAlign, (unsigned)4),
/*isVolatile=*/false, /*AlwaysInline=*/false,
MachinePointerInfo(0), MachinePointerInfo(0));
MemOpChains.push_back(Chain);
} }
/// LowerCall - functions arguments are copied from virtual regs to /// LowerCall - functions arguments are copied from virtual regs to
@ -1974,7 +2021,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
assert(Flags.getByValSize() && assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end."); "ByVal args of size 0 should have been ignored by front-end.");
WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
VA, Flags, getPointerTy()); VA, Flags, getPointerTy(), Subtarget->isLittle());
continue; continue;
} }

View File

@ -1,16 +1,41 @@
; RUN: llc -march=mips < %s | FileCheck %s ; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
%struct.S2 = type { %struct.S1, %struct.S1 } %struct.S2 = type { %struct.S1, %struct.S1 }
%struct.S1 = type { i8, i8 } %struct.S1 = type { i8, i8 }
%struct.S4 = type { [7 x i8] }
@s2 = common global %struct.S2 zeroinitializer, align 1 @s2 = common global %struct.S2 zeroinitializer, align 1
@s4 = common global %struct.S4 zeroinitializer, align 1
define void @foo1() nounwind { define void @foo1() nounwind {
entry: entry:
; CHECK: ulw ${{[0-9]+}}, 2 ; CHECK-EL: lw $25, %call16(foo2)
; CHECK-EL: ulhu $4, 2
; CHECK-EL: lw $[[R0:[0-9]+]], %got(s4)
; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
; CHECK-EL: ulhu $[[R2:[0-9]+]], 4($[[R0]])
; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
; CHECK-EL: ulw $4, 0($[[R0]])
; CHECK-EL: lw $25, %call16(foo4)
; CHECK-EL: or $5, $[[R2]], $[[R3]]
; CHECK-EB: ulhu $[[R0:[0-9]+]], 2
; CHECK-EB: lw $25, %call16(foo2)
; CHECK-EB: sll $4, $[[R0]], 16
; CHECK-EB: lw $[[R1:[0-9]+]], %got(s4)
; CHECK-EB: ulhu $[[R2:[0-9]+]], 4($[[R1]])
; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
; CHECK-EB: ulw $4, 0($[[R1]])
; CHECK-EB: lw $25, %call16(foo4)
; CHECK-EB: or $5, $[[R4]], $[[R5]]
tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
tail call void @foo4(%struct.S4* byval @s4) nounwind
ret void ret void
} }
declare void @foo2(%struct.S1* byval) declare void @foo2(%struct.S1* byval)
declare void @foo4(%struct.S4* byval)