mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-28 06:24:57 +00:00
Use subword loads instead of a 4-byte load when the size of a structure (or a
piece of it) that is being passed by value is smaller than a word. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138007 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -1805,43 +1805,90 @@ WriteByValArg(SDValue& Chain, DebugLoc dl,
|
|||||||
SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
|
SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
|
||||||
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
|
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
|
||||||
const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
|
const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
|
||||||
MVT PtrType) {
|
MVT PtrType, bool isLittle) {
|
||||||
unsigned FirstWord = VA.getLocMemOffset() / 4;
|
unsigned LocMemOffset = VA.getLocMemOffset();
|
||||||
unsigned NumWords = (Flags.getByValSize() + 3) / 4;
|
unsigned Offset = 0;
|
||||||
unsigned LastWord = FirstWord + NumWords;
|
uint32_t RemainingSize = Flags.getByValSize();
|
||||||
unsigned CurWord;
|
|
||||||
unsigned ByValAlign = Flags.getByValAlign();
|
unsigned ByValAlign = Flags.getByValAlign();
|
||||||
|
|
||||||
// copy the first 4 words of byval arg to registers A0 - A3
|
// Copy the first 4 words of byval arg to registers A0 - A3.
|
||||||
for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize);
|
// FIXME: Use a stricter alignment if it enables better optimization in passes
|
||||||
++CurWord) {
|
// run later.
|
||||||
|
for (; RemainingSize >= 4 && LocMemOffset < 4 * 4;
|
||||||
|
Offset += 4, RemainingSize -= 4, LocMemOffset += 4) {
|
||||||
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||||
DAG.getConstant((CurWord - FirstWord) * 4,
|
DAG.getConstant(Offset, MVT::i32));
|
||||||
MVT::i32));
|
|
||||||
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
|
SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
|
||||||
MachinePointerInfo(),
|
MachinePointerInfo(),
|
||||||
false, false, std::min(ByValAlign,
|
false, false, std::min(ByValAlign,
|
||||||
(unsigned )4));
|
(unsigned )4));
|
||||||
MemOpChains.push_back(LoadVal.getValue(1));
|
MemOpChains.push_back(LoadVal.getValue(1));
|
||||||
unsigned DstReg = O32IntRegs[CurWord];
|
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
|
||||||
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
|
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy remaining part of byval arg to stack.
|
if (RemainingSize == 0)
|
||||||
if (CurWord < LastWord) {
|
return;
|
||||||
unsigned SizeInBytes = (LastWord - CurWord) * 4;
|
|
||||||
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
// If there still is a register available for argument passing, write the
|
||||||
DAG.getConstant((CurWord - FirstWord) * 4,
|
// remaining part of the structure to it using subword loads and shifts.
|
||||||
MVT::i32));
|
if (LocMemOffset < 4 * 4) {
|
||||||
LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true);
|
assert(RemainingSize <= 3 && RemainingSize >= 1 &&
|
||||||
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
|
"There must be one to three bytes remaining.");
|
||||||
Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
|
unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize);
|
||||||
DAG.getConstant(SizeInBytes, MVT::i32),
|
SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||||
/*Align*/ByValAlign,
|
DAG.getConstant(Offset, MVT::i32));
|
||||||
/*isVolatile=*/false, /*AlwaysInline=*/false,
|
unsigned Alignment = std::min(ByValAlign, (unsigned )4);
|
||||||
MachinePointerInfo(0), MachinePointerInfo(0));
|
SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
|
||||||
MemOpChains.push_back(Chain);
|
LoadPtr, MachinePointerInfo(),
|
||||||
|
MVT::getIntegerVT(LoadSize * 8), false,
|
||||||
|
false, Alignment);
|
||||||
|
MemOpChains.push_back(LoadVal.getValue(1));
|
||||||
|
|
||||||
|
// If target is big endian, shift it to the most significant half-word or
|
||||||
|
// byte.
|
||||||
|
if (!isLittle)
|
||||||
|
LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal,
|
||||||
|
DAG.getConstant(32 - LoadSize * 8, MVT::i32));
|
||||||
|
|
||||||
|
Offset += LoadSize;
|
||||||
|
RemainingSize -= LoadSize;
|
||||||
|
|
||||||
|
// Read second subword if necessary.
|
||||||
|
if (RemainingSize != 0) {
|
||||||
|
assert(RemainingSize == 1 && "There must be one byte remaining.");
|
||||||
|
LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||||
|
DAG.getConstant(Offset, MVT::i32));
|
||||||
|
unsigned Alignment = std::min(ByValAlign, (unsigned )2);
|
||||||
|
SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
|
||||||
|
LoadPtr, MachinePointerInfo(),
|
||||||
|
MVT::i8, false, false, Alignment);
|
||||||
|
MemOpChains.push_back(Subword.getValue(1));
|
||||||
|
// Insert the loaded byte to LoadVal.
|
||||||
|
// FIXME: Use INS if supported by target.
|
||||||
|
unsigned ShiftAmt = isLittle ? 16 : 8;
|
||||||
|
SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword,
|
||||||
|
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||||
|
LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DstReg = O32IntRegs[LocMemOffset / 4];
|
||||||
|
RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a fixed object on stack at offset LocMemOffset and copy
|
||||||
|
// remaining part of byval arg to it using memcpy.
|
||||||
|
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
|
||||||
|
DAG.getConstant(Offset, MVT::i32));
|
||||||
|
LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
|
||||||
|
SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
|
||||||
|
Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
|
||||||
|
DAG.getConstant(RemainingSize, MVT::i32),
|
||||||
|
std::min(ByValAlign, (unsigned)4),
|
||||||
|
/*isVolatile=*/false, /*AlwaysInline=*/false,
|
||||||
|
MachinePointerInfo(0), MachinePointerInfo(0));
|
||||||
|
MemOpChains.push_back(Chain);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// LowerCall - functions arguments are copied from virtual regs to
|
/// LowerCall - functions arguments are copied from virtual regs to
|
||||||
@ -1974,7 +2021,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
|||||||
assert(Flags.getByValSize() &&
|
assert(Flags.getByValSize() &&
|
||||||
"ByVal args of size 0 should have been ignored by front-end.");
|
"ByVal args of size 0 should have been ignored by front-end.");
|
||||||
WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
|
WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg,
|
||||||
VA, Flags, getPointerTy());
|
VA, Flags, getPointerTy(), Subtarget->isLittle());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,16 +1,41 @@
|
|||||||
; RUN: llc -march=mips < %s | FileCheck %s
|
; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
|
||||||
|
; RUN: llc < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
|
||||||
%struct.S2 = type { %struct.S1, %struct.S1 }
|
%struct.S2 = type { %struct.S1, %struct.S1 }
|
||||||
%struct.S1 = type { i8, i8 }
|
%struct.S1 = type { i8, i8 }
|
||||||
|
%struct.S4 = type { [7 x i8] }
|
||||||
|
|
||||||
@s2 = common global %struct.S2 zeroinitializer, align 1
|
@s2 = common global %struct.S2 zeroinitializer, align 1
|
||||||
|
@s4 = common global %struct.S4 zeroinitializer, align 1
|
||||||
|
|
||||||
define void @foo1() nounwind {
|
define void @foo1() nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: ulw ${{[0-9]+}}, 2
|
; CHECK-EL: lw $25, %call16(foo2)
|
||||||
|
; CHECK-EL: ulhu $4, 2
|
||||||
|
; CHECK-EL: lw $[[R0:[0-9]+]], %got(s4)
|
||||||
|
; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
|
||||||
|
; CHECK-EL: ulhu $[[R2:[0-9]+]], 4($[[R0]])
|
||||||
|
; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
|
||||||
|
; CHECK-EL: ulw $4, 0($[[R0]])
|
||||||
|
; CHECK-EL: lw $25, %call16(foo4)
|
||||||
|
; CHECK-EL: or $5, $[[R2]], $[[R3]]
|
||||||
|
|
||||||
|
; CHECK-EB: ulhu $[[R0:[0-9]+]], 2
|
||||||
|
; CHECK-EB: lw $25, %call16(foo2)
|
||||||
|
; CHECK-EB: sll $4, $[[R0]], 16
|
||||||
|
; CHECK-EB: lw $[[R1:[0-9]+]], %got(s4)
|
||||||
|
; CHECK-EB: ulhu $[[R2:[0-9]+]], 4($[[R1]])
|
||||||
|
; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
|
||||||
|
; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
|
||||||
|
; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
|
||||||
|
; CHECK-EB: ulw $4, 0($[[R1]])
|
||||||
|
; CHECK-EB: lw $25, %call16(foo4)
|
||||||
|
; CHECK-EB: or $5, $[[R4]], $[[R5]]
|
||||||
|
|
||||||
tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
|
tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
|
||||||
|
tail call void @foo4(%struct.S4* byval @s4) nounwind
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
declare void @foo2(%struct.S1* byval)
|
declare void @foo2(%struct.S1* byval)
|
||||||
|
|
||||||
|
declare void @foo4(%struct.S4* byval)
|
||||||
|
Reference in New Issue
Block a user