diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 4d6e8423e6f..6c756495db5 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -239,6 +239,12 @@ public: const unsigned char *MatcherTable, unsigned TableSize); + /// \brief Return true if complex patterns for this target can mutate the + /// DAG. + virtual bool ComplexPatternFuncMutatesDAG() const { + return false; + } + private: // Calls to these functions are generated by tblgen. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index df2c6b8de0b..31f4b068b90 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2432,6 +2432,42 @@ struct MatchScope { bool HasChainNodesMatched, HasGlueResultNodesMatched; }; +/// \\brief A DAG update listener to keep the matching state +/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to +/// change the DAG while matching. X86 addressing mode matcher is an example +/// for this. +class MatchStateUpdater : public SelectionDAG::DAGUpdateListener +{ + SmallVectorImpl > &RecordedNodes; + SmallVectorImpl &MatchScopes; +public: + MatchStateUpdater(SelectionDAG &DAG, + SmallVectorImpl > &RN, + SmallVectorImpl &MS) : + SelectionDAG::DAGUpdateListener(DAG), + RecordedNodes(RN), MatchScopes(MS) { } + + void NodeDeleted(SDNode *N, SDNode *E) { + // Some early-returns here to avoid the search if we deleted the node or + // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we + // do, so it's unnecessary to update matching state at that point). + // Neither of these can occur currently because we only install this + // update listener during matching a complex patterns. + if (!E || E->isMachineOpcode()) + return; + // Performing linear search here does not matter because we almost never + // run this code. You'd have to have a CSE during complex pattern + // matching. + for (auto &I : RecordedNodes) + if (I.first.getNode() == N) + I.first.setNode(E); + + for (auto &I : MatchScopes) + for (auto &J : I.NodeStack) + if (J.getNode() == N) + J.setNode(E); + } +}; } SDNode *SelectionDAGISel:: @@ -2686,6 +2722,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + + // If target can modify DAG during matching, keep the matching state + // consistent. + std::unique_ptr MSU; + if (ComplexPatternFuncMutatesDAG()) + MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MatchScopes)); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, RecordedNodes[RecNo].first, CPNum, RecordedNodes)) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index a8587cd9a2a..429c8bdbdf0 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -299,6 +299,13 @@ namespace { const X86InstrInfo *getInstrInfo() const { return getTargetMachine().getSubtargetImpl()->getInstrInfo(); } + + /// \brief Address-mode matching performs shift-of-and to and-of-shift + /// reassociation in order to expose more scaled addressing + /// opportunities. + bool ComplexPatternFuncMutatesDAG() const override { + return true; + } }; } diff --git a/test/CodeGen/X86/addr-mode-matcher.ll b/test/CodeGen/X86/addr-mode-matcher.ll new file mode 100644 index 00000000000..d5920910f28 --- /dev/null +++ b/test/CodeGen/X86/addr-mode-matcher.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s | FileCheck %s + +; This testcase used to hit an assert during ISel. For details, see the big +; comment inside the function. + +; CHECK-LABEL: foo: +; The AND should be turned into a subreg access. +; CHECK-NOT: and +; The shift (leal) should be folded into the scale of the address in the load. +; CHECK-NOT: leal +; CHECK: movl {{.*}},4), + +target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" +target triple = "i386-apple-macosx10.6.0" + +define void @foo(i32 %a) { +bb: + br label %bb1692 + +bb1692: + %tmp1694 = phi i32 [ 0, %bb ], [ %tmp1745, %bb1692 ] + %xor = xor i32 0, %tmp1694 + +; %load1 = (load (and (shl %xor, 2), 1020)) + %tmp1701 = shl i32 %xor, 2 + %tmp1702 = and i32 %tmp1701, 1020 + %tmp1703 = getelementptr inbounds [1028 x i8]* null, i32 0, i32 %tmp1702 + %tmp1704 = bitcast i8* %tmp1703 to i32* + %load1 = load i32* %tmp1704, align 4 + +; %load2 = (load (shl (and %xor, 255), 2)) + %tmp1698 = and i32 %xor, 255 + %tmp1706 = shl i32 %tmp1698, 2 + %tmp1707 = getelementptr inbounds [1028 x i8]* null, i32 0, i32 %tmp1706 + %tmp1708 = bitcast i8* %tmp1707 to i32* + %load2 = load i32* %tmp1708, align 4 + + %tmp1710 = or i32 %load2, %a + +; While matching xor we address-match %load1. The and-of-shift reassocication +; in address matching transform this into into a shift-of-and and the resuting +; node becomes identical to %load2. CSE replaces %load1 which leaves its +; references in MatchScope and RecordedNodes stale. + %tmp1711 = xor i32 %load1, %tmp1710 + + %tmp1744 = getelementptr inbounds [256 x i32]* null, i32 0, i32 %tmp1711 + store i32 0, i32* %tmp1744, align 4 + %tmp1745 = add i32 %tmp1694, 1 + indirectbr i8* undef, [label %bb1756, label %bb1692] + +bb1756: + br label %bb2705 + +bb2705: + indirectbr i8* undef, [label %bb5721, label %bb5736] + +bb5721: + br label %bb2705 + +bb5736: + ret void +}