mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-30 16:17:05 +00:00 
			
		
		
		
	Eliminate redundant CR moves on PPC32.
The 32-bit ABI requires CR bit 6 to be set if the call has fp arguments and unset if it doesn't. The solution up to now was to insert a MachineNode to set/unset the CR bit, which produces a CR vreg. This vreg was then copied into CR bit 6. When the register allocator saw a bunch of these in the same function, it allocated the set/unset CR bit in some random CR register (1 extra instruction) and then emitted CR moves before every vararg function call, rather than just setting and unsetting CR bit 6 directly before every vararg function call. This patch instead inserts a PPCcrset/PPCcrunset instruction which are then matched by a dedicated instruction pattern. Patch by Tobias von Koch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162725 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -517,6 +517,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { | |||||||
|   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ"; |   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ"; | ||||||
|   case PPCISD::MTFSF:           return "PPCISD::MTFSF"; |   case PPCISD::MTFSF:           return "PPCISD::MTFSF"; | ||||||
|   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN"; |   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN"; | ||||||
|  |   case PPCISD::CR6SET:          return "PPCISD::CR6SET"; | ||||||
|  |   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET"; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -2834,6 +2836,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, | |||||||
|                                  isTailCall, RegsToPass, Ops, NodeTys, |                                  isTailCall, RegsToPass, Ops, NodeTys, | ||||||
|                                  PPCSubTarget); |                                  PPCSubTarget); | ||||||
|  |  | ||||||
|  |   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls | ||||||
|  |   if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) | ||||||
|  |     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); | ||||||
|  |  | ||||||
|   // When performing tail call optimization the callee pops its arguments off |   // When performing tail call optimization the callee pops its arguments off | ||||||
|   // the stack. Account for this here so these bytes can be pushed back on in |   // the stack. Account for this here so these bytes can be pushed back on in | ||||||
|   // PPCRegisterInfo::eliminateCallFramePseudoInstr. |   // PPCRegisterInfo::eliminateCallFramePseudoInstr. | ||||||
| @@ -3131,14 +3137,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, | |||||||
|     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, | ||||||
|                         &MemOpChains[0], MemOpChains.size()); |                         &MemOpChains[0], MemOpChains.size()); | ||||||
|  |  | ||||||
|   // Set CR6 to true if this is a vararg call with floating args passed in |  | ||||||
|   // registers. |  | ||||||
|   if (isVarArg) { |  | ||||||
|     SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, |  | ||||||
|                                      dl, MVT::i32), 0); |  | ||||||
|     RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Build a sequence of copy-to-reg nodes chained together with token chain |   // Build a sequence of copy-to-reg nodes chained together with token chain | ||||||
|   // and flag operands which copy the outgoing args into the appropriate regs. |   // and flag operands which copy the outgoing args into the appropriate regs. | ||||||
|   SDValue InFlag; |   SDValue InFlag; | ||||||
| @@ -3148,6 +3146,14 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, | |||||||
|     InFlag = Chain.getValue(1); |     InFlag = Chain.getValue(1); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   // Set CR bit 6 to true if this is a vararg call with floating args passed in | ||||||
|  |   // registers. | ||||||
|  |   if (isVarArg) { | ||||||
|  |     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, | ||||||
|  |                         dl, DAG.getVTList(MVT::Other, MVT::Glue), Chain); | ||||||
|  |     InFlag = Chain.getValue(1); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   if (isTailCall) |   if (isTailCall) | ||||||
|     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, |     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, | ||||||
|                     false, TailCallArguments); |                     false, TailCallArguments); | ||||||
|   | |||||||
| @@ -174,6 +174,10 @@ namespace llvm { | |||||||
|       ///   operand #3 optional in flag |       ///   operand #3 optional in flag | ||||||
|       TC_RETURN, |       TC_RETURN, | ||||||
|  |  | ||||||
|  |       /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls | ||||||
|  |       CR6SET, | ||||||
|  |       CR6UNSET, | ||||||
|  |  | ||||||
|       /// STD_32 - This is the STD instruction for use with "32-bit" registers. |       /// STD_32 - This is the STD instruction for use with "32-bit" registers. | ||||||
|       STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, |       STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, | ||||||
|  |  | ||||||
|   | |||||||
| @@ -155,6 +155,12 @@ def PPClbrx       : SDNode<"PPCISD::LBRX", SDT_PPClbrx, | |||||||
| def PPCstbrx      : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, | def PPCstbrx      : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, | ||||||
|                            [SDNPHasChain, SDNPMayStore]>; |                            [SDNPHasChain, SDNPMayStore]>; | ||||||
|  |  | ||||||
|  | // Instructions to set/unset CR bit 6 for SVR4 vararg calls | ||||||
|  | def PPCcr6set   : SDNode<"PPCISD::CR6SET", SDTNone, | ||||||
|  |                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; | ||||||
|  | def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, | ||||||
|  |                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; | ||||||
|  |  | ||||||
| // Instructions to support atomic operations | // Instructions to support atomic operations | ||||||
| def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx, | def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx, | ||||||
|                           [SDNPHasChain, SDNPMayLoad]>; |                           [SDNPHasChain, SDNPMayLoad]>; | ||||||
| @@ -1145,6 +1151,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), | |||||||
|               "crxor $dst, $dst, $dst", BrCR, |               "crxor $dst, $dst, $dst", BrCR, | ||||||
|               []>; |               []>; | ||||||
|  |  | ||||||
|  | let Defs = [CR1EQ], CRD = 6 in { | ||||||
|  | def CR6SET  : XLForm_1_ext<19, 289, (outs), (ins), | ||||||
|  |               "creqv 6, 6, 6", BrCR, | ||||||
|  |               [(PPCcr6set)]>; | ||||||
|  |  | ||||||
|  | def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), | ||||||
|  |               "crxor 6, 6, 6", BrCR, | ||||||
|  |               [(PPCcr6unset)]>; | ||||||
|  | } | ||||||
|  |  | ||||||
| // XFX-Form instructions.  Instructions that deal with SPRs. | // XFX-Form instructions.  Instructions that deal with SPRs. | ||||||
| // | // | ||||||
| let Uses = [CTR] in { | let Uses = [CTR] in { | ||||||
|   | |||||||
							
								
								
									
										26
									
								
								test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | |||||||
|  | ; RUN: llc < %s | FileCheck %s | ||||||
|  | target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" | ||||||
|  | target triple = "powerpc-unknown-linux" | ||||||
|  |  | ||||||
|  | @.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1 | ||||||
|  |  | ||||||
|  | define void @test(i32 %count) nounwind { | ||||||
|  | entry: | ||||||
|  | ; CHECK: crxor 6, 6, 6 | ||||||
|  |   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind | ||||||
|  |   %cmp2 = icmp sgt i32 %count, 0 | ||||||
|  |   br i1 %cmp2, label %for.body, label %for.end | ||||||
|  |  | ||||||
|  | for.body:                                         ; preds = %entry, %for.body | ||||||
|  |   %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] | ||||||
|  | ; CHECK: crxor 6, 6, 6 | ||||||
|  |   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind | ||||||
|  |   %inc = add nsw i32 %i.03, 1 | ||||||
|  |   %exitcond = icmp eq i32 %inc, %count | ||||||
|  |   br i1 %exitcond, label %for.end, label %for.body | ||||||
|  |  | ||||||
|  | for.end:                                          ; preds = %for.body, %entry | ||||||
|  |   ret void | ||||||
|  | } | ||||||
|  |  | ||||||
|  | declare i32 @printf(i8* nocapture, ...) nounwind | ||||||
		Reference in New Issue
	
	Block a user