From 2d496897934adfc0e3e1563dd64d9a7122971e22 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Tue, 15 Nov 2011 21:57:53 +0000 Subject: [PATCH] Added custom lowering for load->dec->store sequence in x86 when the EFLAGS registers is used by later instructions. Only done for DEC64m right now. Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144705 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/ScheduleDAGRRList.cpp | 5 ++ lib/Target/X86/X86ISelDAGToDAG.cpp | 57 +++++++++++++++++++ lib/Target/X86/X86ISelLowering.cpp | 6 +- test/CodeGen/X86/dec-eflags-lower.ll | 29 ++++++++++ 4 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/dec-eflags-lower.ll diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 7938a375050..5adbc0d6aa7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -948,6 +948,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return NULL; + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3d75de06ec9..867c93091ee 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2216,6 +2216,63 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } break; } + case ISD::STORE: { + StoreSDNode *StoreNode = cast(Node); + SDValue Chain = StoreNode->getOperand(0); + SDValue StoredVal = StoreNode->getOperand(1); + SDValue Address = StoreNode->getOperand(2); + SDValue Undef = StoreNode->getOperand(3); + + if (StoreNode->getMemOperand()->getSize() != 8 || + Undef->getOpcode() != ISD::UNDEF || + Chain->getOpcode() != ISD::LOAD || + StoredVal->getOpcode() != X86ISD::DEC || + StoredVal.getResNo() != 0 || + StoredVal->getOperand(0).getNode() != Chain.getNode()) + break; + + //OPC_CheckPredicate, 1, // Predicate_nontemporalstore + if (StoreNode->isNonTemporal()) + break; + + LoadSDNode *LoadNode = cast(Chain.getNode()); + if (LoadNode->getOperand(1) != Address || + LoadNode->getOperand(2) != Undef) + break; + + if (!ISD::isNormalLoad(LoadNode)) + break; + + if (!ISD::isNormalStore(StoreNode)) + break; + + // check load chain has only one use (from the store) + if (!Chain.hasOneUse()) + break; + + // Merge the input chains if they are not intra-pattern references. + SDValue InputChain = LoadNode->getOperand(0); + + SDValue Base, Scale, Index, Disp, Segment; + if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), + Base, Scale, Index, Disp, Segment)) + break; + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); + MemOp[0] = StoreNode->getMemOperand(); + MemOp[1] = LoadNode->getMemOperand(); + const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; + MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m, + Node->getDebugLoc(), + MVT::i32, MVT::Other, Ops, + array_lengthof(Ops)); + Result->setMemRefs(MemOp, MemOp + 2); + + ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); + ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); + + return Result; + } } SDNode *ResNode = SelectCode(Node); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ead6326a91c..4b564b2c111 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8263,8 +8263,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // climbing the DAG back to the root, and it doesn't seem to be worth the // effort. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && + UI->getOpcode() != ISD::SETCC && + UI->getOpcode() != ISD::STORE) goto default_case; if (ConstantSDNode *C = diff --git a/test/CodeGen/X86/dec-eflags-lower.ll b/test/CodeGen/X86/dec-eflags-lower.ll new file mode 100644 index 00000000000..0e031e269d0 --- /dev/null +++ b/test/CodeGen/X86/dec-eflags-lower.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +%struct.obj = type { i64 } + +define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp { +entry: +; CHECK: decq (%rdi) +; CHECK-NEXT: je + %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0 + %0 = load i64* %refcnt, align 8, !tbaa !0 + %dec = add i64 %0, -1 + store i64 %dec, i64* %refcnt, align 8, !tbaa !0 + %tobool = icmp eq i64 %dec, 0 + br i1 %tobool, label %if.end, label %return + +if.end: ; preds = %entry + %1 = bitcast %struct.obj* %o to i8* + tail call void @free(i8* %1) + br label %return + +return: ; preds = %entry, %if.end + ret void +} + +declare void @free(i8* nocapture) nounwind + +!0 = metadata !{metadata !"long", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null}