mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-10-26 18:20:39 +00:00
Eliminate redundant CR moves on PPC32.
The 32-bit ABI requires CR bit 6 to be set if the call has fp arguments and unset if it doesn't. The solution up to now was to insert a MachineNode to set/unset the CR bit, which produces a CR vreg. This vreg was then copied into CR bit 6. When the register allocator saw a bunch of these in the same function, it allocated the set/unset CR bit in some random CR register (1 extra instruction) and then emitted CR moves before every vararg function call, rather than just setting and unsetting CR bit 6 directly before every vararg function call. This patch instead inserts a PPCcrset/PPCcrunset instruction which are then matched by a dedicated instruction pattern. Patch by Tobias von Koch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162725 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -517,6 +517,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
|
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
|
||||||
case PPCISD::MTFSF: return "PPCISD::MTFSF";
|
case PPCISD::MTFSF: return "PPCISD::MTFSF";
|
||||||
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
|
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
|
||||||
|
case PPCISD::CR6SET: return "PPCISD::CR6SET";
|
||||||
|
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2834,6 +2836,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
|
|||||||
isTailCall, RegsToPass, Ops, NodeTys,
|
isTailCall, RegsToPass, Ops, NodeTys,
|
||||||
PPCSubTarget);
|
PPCSubTarget);
|
||||||
|
|
||||||
|
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
|
||||||
|
if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
|
||||||
|
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
|
||||||
|
|
||||||
// When performing tail call optimization the callee pops its arguments off
|
// When performing tail call optimization the callee pops its arguments off
|
||||||
// the stack. Account for this here so these bytes can be pushed back on in
|
// the stack. Account for this here so these bytes can be pushed back on in
|
||||||
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
|
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
|
||||||
@@ -3131,14 +3137,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
|
|||||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||||
&MemOpChains[0], MemOpChains.size());
|
&MemOpChains[0], MemOpChains.size());
|
||||||
|
|
||||||
// Set CR6 to true if this is a vararg call with floating args passed in
|
|
||||||
// registers.
|
|
||||||
if (isVarArg) {
|
|
||||||
SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
|
|
||||||
dl, MVT::i32), 0);
|
|
||||||
RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build a sequence of copy-to-reg nodes chained together with token chain
|
// Build a sequence of copy-to-reg nodes chained together with token chain
|
||||||
// and flag operands which copy the outgoing args into the appropriate regs.
|
// and flag operands which copy the outgoing args into the appropriate regs.
|
||||||
SDValue InFlag;
|
SDValue InFlag;
|
||||||
@@ -3148,6 +3146,14 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
|
|||||||
InFlag = Chain.getValue(1);
|
InFlag = Chain.getValue(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set CR bit 6 to true if this is a vararg call with floating args passed in
|
||||||
|
// registers.
|
||||||
|
if (isVarArg) {
|
||||||
|
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
|
||||||
|
dl, DAG.getVTList(MVT::Other, MVT::Glue), Chain);
|
||||||
|
InFlag = Chain.getValue(1);
|
||||||
|
}
|
||||||
|
|
||||||
if (isTailCall)
|
if (isTailCall)
|
||||||
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
|
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
|
||||||
false, TailCallArguments);
|
false, TailCallArguments);
|
||||||
|
|||||||
@@ -174,6 +174,10 @@ namespace llvm {
|
|||||||
/// operand #3 optional in flag
|
/// operand #3 optional in flag
|
||||||
TC_RETURN,
|
TC_RETURN,
|
||||||
|
|
||||||
|
/// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
|
||||||
|
CR6SET,
|
||||||
|
CR6UNSET,
|
||||||
|
|
||||||
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
|
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
|
||||||
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||||
|
|
||||||
|
|||||||
@@ -155,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
|
|||||||
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
|
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
|
||||||
[SDNPHasChain, SDNPMayStore]>;
|
[SDNPHasChain, SDNPMayStore]>;
|
||||||
|
|
||||||
|
// Instructions to set/unset CR bit 6 for SVR4 vararg calls
|
||||||
|
def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
|
||||||
|
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||||
|
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
|
||||||
|
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||||
|
|
||||||
// Instructions to support atomic operations
|
// Instructions to support atomic operations
|
||||||
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
|
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
|
||||||
[SDNPHasChain, SDNPMayLoad]>;
|
[SDNPHasChain, SDNPMayLoad]>;
|
||||||
@@ -1145,6 +1151,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
|
|||||||
"crxor $dst, $dst, $dst", BrCR,
|
"crxor $dst, $dst, $dst", BrCR,
|
||||||
[]>;
|
[]>;
|
||||||
|
|
||||||
|
let Defs = [CR1EQ], CRD = 6 in {
|
||||||
|
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
|
||||||
|
"creqv 6, 6, 6", BrCR,
|
||||||
|
[(PPCcr6set)]>;
|
||||||
|
|
||||||
|
def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
|
||||||
|
"crxor 6, 6, 6", BrCR,
|
||||||
|
[(PPCcr6unset)]>;
|
||||||
|
}
|
||||||
|
|
||||||
// XFX-Form instructions. Instructions that deal with SPRs.
|
// XFX-Form instructions. Instructions that deal with SPRs.
|
||||||
//
|
//
|
||||||
let Uses = [CTR] in {
|
let Uses = [CTR] in {
|
||||||
|
|||||||
26
test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
Normal file
26
test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
; RUN: llc < %s | FileCheck %s
|
||||||
|
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
|
||||||
|
target triple = "powerpc-unknown-linux"
|
||||||
|
|
||||||
|
@.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1
|
||||||
|
|
||||||
|
define void @test(i32 %count) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: crxor 6, 6, 6
|
||||||
|
%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
|
||||||
|
%cmp2 = icmp sgt i32 %count, 0
|
||||||
|
br i1 %cmp2, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.body: ; preds = %entry, %for.body
|
||||||
|
%i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||||||
|
; CHECK: crxor 6, 6, 6
|
||||||
|
%call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
|
||||||
|
%inc = add nsw i32 %i.03, 1
|
||||||
|
%exitcond = icmp eq i32 %inc, %count
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body, %entry
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @printf(i8* nocapture, ...) nounwind
|
||||||
Reference in New Issue
Block a user