From 88d211f82304e53694ece666d4a2507b170e4582 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 12 Mar 2006 09:13:49 +0000 Subject: [PATCH] Several big changes: 1. Use flags on the instructions in the .td file to indicate the PPC970 unit type instead of a table in the .cpp file. Much cleaner. 2. Change the hazard recognizer to build d-groups according to the actual algorithm used, not my flawed understanding of it. 3. Model "must be in the first slot" and "must be the only instr in a group" accurately. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26719 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.td | 21 +- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 282 ++++++++++---------- lib/Target/PowerPC/PPCHazardRecognizers.h | 32 +-- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 4 +- lib/Target/PowerPC/PPCInstrFormats.td | 20 ++ lib/Target/PowerPC/PPCInstrInfo.h | 37 +++ lib/Target/PowerPC/PPCInstrInfo.td | 132 +++++---- 7 files changed, 316 insertions(+), 212 deletions(-) diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 272f6d6a16a..d4d4995d6a7 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -67,17 +67,32 @@ def : Processor<"g5", G5Itineraries, Feature64Bit /*, Feature64BitRegs */]>; +def PPCInstrInfo : InstrInfo { + // Define how we want to layout our TargetSpecific information field... This + // should be kept up-to-date with the fields in the PPCInstrInfo.h file. + let TSFlagsFields = ["PPC970_First", + "PPC970_Single", + "PPC970_Unit"]; + let TSFlagsShifts = [0, + 1, + 2]; + + let isLittleEndianEncoding = 1; +} + + def PPC : Target { // Pointers on PPC are 32-bits in size. let PointerType = i32; + // Information about the instructions. + let InstructionSet = PPCInstrInfo; + + // According to the Mach-O Runtime ABI, these regs are nonvolatile across // calls let CalleeSavedRegisters = [R1, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31, CR2, CR3, CR4, LR]; - - // Pull in Instruction Info: - let InstructionSet = PowerPCInstrInfo; } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index a88cb123a57..b52e9939235 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "sched" #include "PPCHazardRecognizers.h" #include "PPC.h" +#include "PPCInstrInfo.h" #include "llvm/Support/Debug.h" #include using namespace llvm; @@ -23,15 +24,14 @@ using namespace llvm; // PowerPC 970 Hazard Recognizer // // This models the dispatch group formation of the PPC970 processor. Dispatch -// groups are bundles of up to five instructions that can contain up to two ALU -// (aka FXU) ops, two FPU ops, two Load/Store ops, one CR op, one VALU op, one -// VPERM op, and one BRANCH op. If the code contains more instructions in a -// sequence than the dispatch group can contain (e.g. three loads in a row) the -// processor terminates the dispatch group early, wasting execution resources. +// groups are bundles of up to five instructions that can contain various mixes +// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one +// branch instruction per-cycle. // -// In addition to these restrictions, there are a number of other restrictions: -// some instructions, e.g. branches, are required to be the last instruction in -// a group. Additionally, only branches can issue in the 5th (last) slot. +// There are a number of restrictions to dispatch group formation: some +// instructions can only be issued in the first slot of a dispatch group, & some +// instructions fill an entire dispatch group. Additionally, only branches can +// issue in the 5th (last) slot. // // Finally, there are a number of "structural" hazards on the PPC970. These // conditions cause large performance penalties due to misprediction, recovery, @@ -41,8 +41,6 @@ using namespace llvm; // conditions, we insert no-op instructions when appropriate. // // FIXME: This is missing some significant cases: -// -1. Handle all of the instruction types in GetInstrType. -// 0. Handling of instructions that must be the first/last in a group. // 1. Modeling of microcoded instructions. // 2. Handling of cracked instructions. // 3. Handling of serialized operations. @@ -50,103 +48,70 @@ using namespace llvm; // e.g. integer divides that only execute in the second slot. // -PPCHazardRecognizer970::PPCHazardRecognizer970() { +PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) + : TII(tii) { EndDispatchGroup(); } void PPCHazardRecognizer970::EndDispatchGroup() { DEBUG(std::cerr << "=== Start of dispatch group\n"); - // Pipeline units. - NumFXU = NumLSU = NumFPU = 0; - HasCR = HasSPR = HasVALU = HasVPERM = false; NumIssued = 0; // Structural hazard info. HasCTRSet = false; - StorePtr1 = StorePtr2 = SDOperand(); - StoreSize = 0; + NumStores = 0; } -PPCHazardRecognizer970::PPC970InstrType -PPCHazardRecognizer970::GetInstrType(unsigned Opcode) { - if (Opcode < ISD::BUILTIN_OP_END) - return PseudoInst; +PPCII::PPC970_Unit +PPCHazardRecognizer970::GetInstrType(unsigned Opcode, + bool &isFirst, bool &isSingle, + bool &isLoad, bool &isStore){ + if (Opcode < ISD::BUILTIN_OP_END) { + isFirst = isSingle = isLoad = isStore = false; + return PPCII::PPC970_Pseudo; + } Opcode -= ISD::BUILTIN_OP_END; - switch (Opcode) { - case PPC::FMRSD: return PseudoInst; // Usually coallesced away. - case PPC::BCTRL: - case PPC::BL: - case PPC::BLA: - case PPC::BLR: - return BR; - case PPC::MCRF: - case PPC::MFCR: - case PPC::MFOCRF: - return CR; - case PPC::MFLR: - case PPC::MFCTR: - case PPC::MTLR: - case PPC::MTCTR: - return SPR; - case PPC::LFS: - case PPC::LFD: - case PPC::LWZ: - case PPC::LFSX: - case PPC::LWZX: - case PPC::LBZ: - case PPC::LHA: - case PPC::LHZ: - case PPC::LWZU: - return LSU_LD; - case PPC::STFS: - case PPC::STFD: - case PPC::STW: - case PPC::STB: - case PPC::STH: - case PPC::STWU: - return LSU_ST; - case PPC::DIVW: - case PPC::DIVWU: - case PPC::DIVD: - case PPC::DIVDU: - return FXU_FIRST; - case PPC::FADDS: - case PPC::FCTIWZ: - case PPC::FRSP: - case PPC::FSUB: - return FPU; - } + const TargetInstrDescriptor &TID = TII.get(Opcode); - return FXU; + isLoad = TID.Flags & M_LOAD_FLAG; + isStore = TID.Flags & M_STORE_FLAG; + + unsigned TSFlags = TID.TSFlags; + + isFirst = TSFlags & PPCII::PPC970_First; + isSingle = TSFlags & PPCII::PPC970_Single; + return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); } /// isLoadOfStoredAddress - If we have a load from the previously stored pointer /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. bool PPCHazardRecognizer970:: isLoadOfStoredAddress(unsigned LoadSize, SDOperand Ptr1, SDOperand Ptr2) const { - // Handle exact and commuted addresses. - if (Ptr1 == StorePtr1 && Ptr2 == StorePtr2) - return true; - if (Ptr2 == StorePtr1 && Ptr1 == StorePtr2) - return true; - - // Okay, we don't have an exact match, if this is an indexed offset, see if we - // have overlap (which happens during fp->int conversion for example). - if (StorePtr2 == Ptr2) { - if (ConstantSDNode *StoreOffset = dyn_cast(StorePtr1)) - if (ConstantSDNode *LoadOffset = dyn_cast(Ptr1)) { - // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check to - // see if the load and store actually overlap. - int StoreOffs = StoreOffset->getValue(); - int LoadOffs = LoadOffset->getValue(); - if (StoreOffs < LoadOffs) { - if (int(StoreOffs+StoreSize) > LoadOffs) return true; - } else { - if (int(LoadOffs+LoadSize) > StoreOffs) return true; + for (unsigned i = 0, e = NumStores; i != e; ++i) { + // Handle exact and commuted addresses. + if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) + return true; + if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) + return true; + + // Okay, we don't have an exact match, if this is an indexed offset, see if + // we have overlap (which happens during fp->int conversion for example). + if (StorePtr2[i] == Ptr2) { + if (ConstantSDNode *StoreOffset = dyn_cast(StorePtr1[i])) + if (ConstantSDNode *LoadOffset = dyn_cast(Ptr1)) { + // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check + // to see if the load and store actually overlap. + int StoreOffs = StoreOffset->getValue(); + int LoadOffs = LoadOffset->getValue(); + if (StoreOffs < LoadOffs) { + if (int(StoreOffs+StoreSize) > LoadOffs) return true; + } else { + if (int(LoadOffs+LoadSize) > StoreOffs) return true; + } } - } + } } return false; } @@ -157,53 +122,76 @@ isLoadOfStoredAddress(unsigned LoadSize, SDOperand Ptr1, SDOperand Ptr2) const { /// pipeline flush. HazardRecognizer::HazardType PPCHazardRecognizer970:: getHazardType(SDNode *Node) { - PPC970InstrType InstrType = GetInstrType(Node->getOpcode()); - if (InstrType == PseudoInst) return NoHazard; + bool isFirst, isSingle, isLoad, isStore; + PPCII::PPC970_Unit InstrType = + GetInstrType(Node->getOpcode(), isFirst, isSingle, isLoad, isStore); + if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; unsigned Opcode = Node->getOpcode()-ISD::BUILTIN_OP_END; + // We can only issue a PPC970_First/PPC970_Single instruction (such as + // crand/mtspr/etc) if this is the first cycle of the dispatch group. + if (NumIssued != 0 && (isFirst || isSingle) ) + return Hazard; + switch (InstrType) { default: assert(0 && "Unknown instruction type!"); - case FXU: - case FXU_FIRST: if (NumFXU == 2) return Hazard; - case LSU_ST: - case LSU_LD: if (NumLSU == 2) return Hazard; - case FPU: if (NumFPU == 2) return Hazard; - case CR: if (HasCR) return Hazard; - case SPR: if (HasSPR) return Hazard; - case VALU: if (HasVALU) return Hazard; - case VPERM: if (HasVPERM) return Hazard; - case BR: break; + case PPCII::PPC970_FXU: + case PPCII::PPC970_LSU: + case PPCII::PPC970_FPU: + case PPCII::PPC970_VALU: + case PPCII::PPC970_VPERM: + // We can only issue a branch as the last instruction in a group. + if (NumIssued == 4) return Hazard; + break; + case PPCII::PPC970_CRU: + // We can only issue a CR instruction in the first two slots. + if (NumIssued >= 2) return Hazard; + break; + case PPCII::PPC970_BRU: + break; } - - // We can only issue a CR or SPR instruction, or an FXU instruction that needs - // to lead a dispatch group as the first instruction in the group. - if (NumIssued != 0 && - (InstrType == CR || InstrType == SPR || InstrType == FXU_FIRST)) - return Hazard; - - // We can only issue a branch as the last instruction in a group. - if (NumIssued == 4 && InstrType != BR) - return Hazard; - + // Do not allow MTCTR and BCTRL to be in the same dispatch group. if (HasCTRSet && Opcode == PPC::BCTRL) return NoopHazard; // If this is a load following a store, make sure it's not to the same or // overlapping address. - if (InstrType == LSU_LD && StoreSize) { + if (isLoad && StoreSize) { unsigned LoadSize; switch (Opcode) { default: assert(0 && "Unknown load!"); - case PPC::LBZ: LoadSize = 1; break; + case PPC::LBZ: + case PPC::LBZX: + case PPC::LVEBX: + LoadSize = 1; + break; case PPC::LHA: - case PPC::LHZ: LoadSize = 2; break; - case PPC::LWZU: - case PPC::LFSX: + case PPC::LHAX: + case PPC::LHZ: + case PPC::LHZX: + case PPC::LVEHX: + LoadSize = 2; + break; case PPC::LFS: + case PPC::LFSX: + case PPC::LWZ: case PPC::LWZX: - case PPC::LWZ: LoadSize = 4; break; - case PPC::LFD: LoadSize = 8; break; + case PPC::LWZU: + case PPC::LWA: + case PPC::LWAX: + case PPC::LVEWX: + LoadSize = 4; + break; + case PPC::LFD: + case PPC::LFDX: + case PPC::LD: + case PPC::LDX: + LoadSize = 8; + break; + case PPC::LVX: + LoadSize = 16; + break; } if (isLoadOfStoredAddress(LoadSize, @@ -215,41 +203,61 @@ getHazardType(SDNode *Node) { } void PPCHazardRecognizer970::EmitInstruction(SDNode *Node) { - PPC970InstrType InstrType = GetInstrType(Node->getOpcode()); - if (InstrType == PseudoInst) return; + bool isFirst, isSingle, isLoad, isStore; + PPCII::PPC970_Unit InstrType = + GetInstrType(Node->getOpcode(), isFirst, isSingle, isLoad, isStore); + if (InstrType == PPCII::PPC970_Pseudo) return; unsigned Opcode = Node->getOpcode()-ISD::BUILTIN_OP_END; // Update structural hazard information. if (Opcode == PPC::MTCTR) HasCTRSet = true; // Track the address stored to. - if (InstrType == LSU_ST) { - StorePtr1 = Node->getOperand(1); - StorePtr2 = Node->getOperand(2); + if (isStore) { + unsigned ThisStoreSize; switch (Opcode) { default: assert(0 && "Unknown store instruction!"); - case PPC::STB: StoreSize = 1; break; - case PPC::STH: StoreSize = 2; break; + case PPC::STBX: + case PPC::STB: + case PPC::STVEBX: + ThisStoreSize = 1; + break; + case PPC::STHX: + case PPC::STH: + case PPC::STVEHX: + ThisStoreSize = 2; + break; case PPC::STFS: + case PPC::STFSX: case PPC::STWU: - case PPC::STW: StoreSize = 4; break; - case PPC::STFD: StoreSize = 8; break; + case PPC::STWX: + case PPC::STWUX: + case PPC::STW: + case PPC::STVEWX: + case PPC::STFIWX: + ThisStoreSize = 4; + break; + case PPC::STD: + case PPC::STDU: + case PPC::STFD: + case PPC::STFDX: + case PPC::STDX: + case PPC::STDUX: + ThisStoreSize = 8; + break; + case PPC::STVX: + ThisStoreSize = 16; + break; } + + StoreSize[NumStores] = ThisStoreSize; + StorePtr1[NumStores] = Node->getOperand(1); + StorePtr2[NumStores] = Node->getOperand(2); + ++NumStores; } - switch (InstrType) { - default: assert(0 && "Unknown instruction type!"); - case FXU: - case FXU_FIRST: ++NumFXU; break; - case LSU_LD: - case LSU_ST: ++NumLSU; break; - case FPU: ++NumFPU; break; - case CR: HasCR = true; break; - case SPR: HasSPR = true; break; - case VALU: HasVALU = true; break; - case VPERM: HasVPERM = true; break; - case BR: NumIssued = 4; return; // ends a d-group. - } + if (InstrType == PPCII::PPC970_BRU || isSingle) + NumIssued = 4; // Terminate a d-group. ++NumIssued; if (NumIssued == 5) diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index fb1f88e0449..a924c3ea146 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -15,6 +15,7 @@ #define PPCHAZRECS_H #include "llvm/CodeGen/ScheduleDAG.h" +#include "PPCInstrInfo.h" namespace llvm { @@ -25,16 +26,9 @@ namespace llvm { /// setting the CTR register then branching through it within a dispatch group), /// or storing then loading from the same address within a dispatch group. class PPCHazardRecognizer970 : public HazardRecognizer { - unsigned NumIssued; // Number of insts issued, including advanced cycles. + const TargetInstrInfo &TII; - // Number of various types of instructions in the current dispatch group. - unsigned NumFXU; // Number of Fixed Point (integer) instructions - unsigned NumLSU; // Number of Load/Store instructions - unsigned NumFPU; // Number of Floating Point instructions - bool HasCR; // True if Condition Register instruction issued - bool HasSPR; // True if Special-Purpose Register instruction used - bool HasVALU; // True if Vector Arithmetic instruction issued - bool HasVPERM; // True if Vector Permute instruction issued + unsigned NumIssued; // Number of insts issued, including advanced cycles. // Various things that can cause a structural hazard. @@ -42,17 +36,17 @@ class PPCHazardRecognizer970 : public HazardRecognizer { bool HasCTRSet; // StoredPtr - Keep track of the address of any store. If we see a load from - // the same address (or one that aliases it), disallow the store. We only - // need one pointer here, because there can only be two LSU operations and we - // only get an LSU reject if the first is a store and the second is a load. + // the same address (or one that aliases it), disallow the store. We can have + // up to four stores in one dispatch group, hence we track up to 4. // // This is null if we haven't seen a store yet. We keep track of both // operands of the store here, since we support [r+r] and [r+i] addressing. - SDOperand StorePtr1, StorePtr2; - unsigned StoreSize; + SDOperand StorePtr1[4], StorePtr2[4]; + unsigned StoreSize[4]; + unsigned NumStores; public: - PPCHazardRecognizer970(); + PPCHazardRecognizer970(const TargetInstrInfo &TII); virtual HazardType getHazardType(SDNode *Node); virtual void EmitInstruction(SDNode *Node); virtual void AdvanceCycle(); @@ -63,13 +57,11 @@ private: /// void EndDispatchGroup(); - enum PPC970InstrType { - FXU, FXU_FIRST, LSU_LD, LSU_ST, FPU, CR, SPR, VALU, VPERM, BR, PseudoInst - }; - /// GetInstrType - Classify the specified powerpc opcode according to its /// pipeline. - PPC970InstrType GetInstrType(unsigned Opcode); + PPCII::PPC970_Unit GetInstrType(unsigned Opcode, + bool &isFirst, bool &isSingle, + bool &isLoad, bool &isStore); bool isLoadOfStoredAddress(unsigned LoadSize, SDOperand Ptr1, SDOperand Ptr2) const; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4c2d4af6f92..09bbfc6a45a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -129,7 +129,9 @@ namespace { virtual HazardRecognizer *CreateTargetHazardRecognizer() { // Should use subtarget info to pick the right hazard recognizer. For // now, always return a PPC970 recognizer. - return new PPCHazardRecognizer970(); + const TargetInstrInfo *II = PPCLowering.getTargetMachine().getInstrInfo(); + assert(II && "No InstrInfo?"); + return new PPCHazardRecognizer970(*II); } // Include the pieces autogenerated from the target description. diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 0c7dca0d7b1..6901543d052 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -27,8 +27,28 @@ class I opcode, dag OL, string asmstr, InstrItinClass itin> let OperandList = OL; let AsmString = asmstr; let Itinerary = itin; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<3> PPC970_Unit = 0; } +class PPC970_DGroup_First { bits<1> PPC970_First = 1; } +class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; } +class PPC970_MicroCode; + +class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; } +class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; } +class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; } +class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; } +class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; } +class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } +class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } +class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } + + // 1.7.1 I-Form class IForm opcode, bit aa, bit lk, dag OL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index b10de48bbd9..ff9fbbcc237 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -19,6 +19,43 @@ #include "PPCRegisterInfo.h" namespace llvm { + +/// PPCII - This namespace holds all of the PowerPC target-specific +/// per-instruction flags. These must match the corresponding definitions in +/// PPC.td and PPCInstrFormats.td. +namespace PPCII { +enum { + // PPC970 Instruction Flags. These flags describe the characteristics of the + // PowerPC 970 (aka G5) dispatch groups and how they are formed out of + // raw machine instructions. + + /// PPC970_First - This instruction starts a new dispatch group, so it will + /// always be the first one in the group. + PPC970_First = 0x1, + + /// PPC970_Single - This instruction starts a new dispatch group and + /// terminates it, so it will be the sole instruction in the group. + PPC970_Single = 0x2, + + /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that + /// an instruction is issued to. + PPC970_Shift = 2, + PPC970_Mask = 0x07 << PPC970_Shift, +}; +enum PPC970_Unit { + /// These are the various PPC970 execution unit pipelines. Each instruction + /// is one of these. + PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction + PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit + PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit + PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit + PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit + PPC970_VALU = 5 << PPC970_Shift, // Vector ALU + PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit + PPC970_BRU = 7 << PPC970_Shift, // Branch Unit +}; +} + class PPCInstrInfo : public TargetInstrInfo { const PPCRegisterInfo RI; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index dba64c1aad2..a1521ab67d9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -203,7 +203,7 @@ def FPContractions : Predicate<"!NoExcessFPPrecision">; // Pseudo-instructions: -let isLoad = 1, hasCtrlDep = 1 in { +let hasCtrlDep = 1 in { def ADJCALLSTACKDOWN : Pseudo<(ops u16imm:$amt), "; ADJCALLSTACKDOWN", [(callseq_start imm:$amt)]>; @@ -220,7 +220,8 @@ def IMPLICIT_DEF_F4 : Pseudo<(ops F4RC:$rD), "; %rD = IMPLICIT_DEF_F4", // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the // scheduler into a branch sequence. -let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +let usesCustomDAGSchedInserter = 1, // Expanded by the scheduler. + PPC970_Single = 1 in { def SELECT_CC_Int : Pseudo<(ops GPRC:$dst, CRRC:$cond, GPRC:$T, GPRC:$F, i32imm:$BROPC), "; SELECT_CC PSEUDO!", []>; def SELECT_CC_F4 : Pseudo<(ops F4RC:$dst, CRRC:$cond, F4RC:$T, F4RC:$F, @@ -229,16 +230,18 @@ let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. i32imm:$BROPC), "; SELECT_CC PSEUDO!", []>; } -let isTerminator = 1, noResults = 1 in { +let isTerminator = 1, noResults = 1, PPC970_Unit = 7 in { let isReturn = 1 in def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (ops), "blr", BrB, [(retflag)]>; def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (ops), "bctr", BrB, []>; } let Defs = [LR] in - def MovePCtoLR : Pseudo<(ops piclabel:$label), "bl $label", []>; + def MovePCtoLR : Pseudo<(ops piclabel:$label), "bl $label", []>, + PPC970_Unit_BRU; -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, noResults = 1 in { +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, + noResults = 1, PPC970_Unit = 7 in { def COND_BRANCH : Pseudo<(ops CRRC:$crS, u16imm:$opc, target:$true, target:$false), "; COND_BRANCH", []>; @@ -266,7 +269,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, noResults = 1 in { "bnu $crS, $block", BrB>; } -let isCall = 1, noResults = 1, +let isCall = 1, noResults = 1, PPC970_Unit = 7, // All calls clobber the non-callee saved registers... Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, @@ -284,7 +287,7 @@ let isCall = 1, noResults = 1, // D-Form instructions. Most instructions that perform an operation on a // register and an immediate are of this type. // -let isLoad = 1 in { +let isLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (ops GPRC:$rD, memri:$src), "lbz $rD, $src", LdStGeneral, [(set GPRC:$rD, (zextload iaddr:$src, i8))]>; @@ -304,6 +307,7 @@ def LWZU : DForm_1<35, (ops GPRC:$rD, s16imm:$disp, GPRC:$rA), "lwzu $rD, $disp($rA)", LdStGeneral, []>; } +let PPC970_Unit = 1 in { // FXU Operations. def ADDI : DForm_2<14, (ops GPRC:$rD, GPRC:$rA, s16imm:$imm), "addi $rD, $rA, $imm", IntGeneral, [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; @@ -332,7 +336,8 @@ def LI : DForm_2_r0<14, (ops GPRC:$rD, symbolLo:$imm), def LIS : DForm_2_r0<15, (ops GPRC:$rD, symbolHi:$imm), "lis $rD, $imm", IntGeneral, [(set GPRC:$rD, imm16Shifted:$imm)]>; -let isStore = 1, noResults = 1 in { +} +let isStore = 1, noResults = 1, PPC970_Unit = 2 in { def STMW : DForm_3<47, (ops GPRC:$rS, s16imm:$disp, GPRC:$rA), "stmw $rS, $disp($rA)", LdStLMW, []>; @@ -349,6 +354,7 @@ def STWU : DForm_3<37, (ops GPRC:$rS, s16imm:$disp, GPRC:$rA), "stwu $rS, $disp($rA)", LdStGeneral, []>; } +let PPC970_Unit = 1 in { // FXU Operations. def ANDIo : DForm_4<28, (ops GPRC:$dst, GPRC:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>, @@ -383,7 +389,8 @@ def CMPLWI : DForm_6_ext<10, (ops CRRC:$dst, GPRC:$src1, u16imm:$src2), "cmplwi $dst, $src1, $src2", IntCompare>; def CMPLDI : DForm_6_ext<10, (ops CRRC:$dst, GPRC:$src1, u16imm:$src2), "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; -let isLoad = 1 in { +} +let isLoad = 1, PPC970_Unit = 2 in { def LFS : DForm_8<48, (ops F4RC:$rD, memri:$src), "lfs $rD, $src", LdStLFDU, [(set F4RC:$rD, (load iaddr:$src))]>; @@ -391,7 +398,7 @@ def LFD : DForm_8<50, (ops F8RC:$rD, memri:$src), "lfd $rD, $src", LdStLFD, [(set F8RC:$rD, (load iaddr:$src))]>; } -let isStore = 1, noResults = 1 in { +let isStore = 1, noResults = 1, PPC970_Unit = 2 in { def STFS : DForm_9<52, (ops F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStUX, [(store F4RC:$rS, iaddr:$dst)]>; @@ -402,7 +409,7 @@ def STFD : DForm_9<54, (ops F8RC:$rS, memri:$dst), // DS-Form instructions. Load/Store instructions available in PPC-64 // -let isLoad = 1 in { +let isLoad = 1, PPC970_Unit = 2 in { def LWA : DSForm_1<58, 2, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA), "lwa $rT, $DS($rA)", LdStLWA, []>, isPPC64; @@ -410,7 +417,7 @@ def LD : DSForm_2<58, 0, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA), "ld $rT, $DS($rA)", LdStLD, []>, isPPC64; } -let isStore = 1, noResults = 1 in { +let isStore = 1, noResults = 1, PPC970_Unit = 2 in { def STD : DSForm_2<62, 0, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA), "std $rT, $DS($rA)", LdStSTD, []>, isPPC64; @@ -422,7 +429,7 @@ def STDU : DSForm_2<62, 1, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA), // X-Form instructions. Most instructions that perform an operation on a // register and another register are of this type. // -let isLoad = 1 in { +let isLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (ops GPRC:$rD, memrr:$src), "lbzx $rD, $src", LdStGeneral, [(set GPRC:$rD, (zextload xaddr:$src, i8))]>; @@ -456,10 +463,11 @@ def LVX : XForm_1<31, 103, (ops VRRC:$vD, memrr:$src), } def LVSL : XForm_1<31, 6, (ops VRRC:$vD, GPRC:$base, GPRC:$rA), "lvsl $vD, $base, $rA", LdStGeneral, - []>; + []>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (ops VRRC:$vD, GPRC:$base, GPRC:$rA), "lvsl $vD, $base, $rA", LdStGeneral, - []>; + []>, PPC970_Unit_LSU; +let PPC970_Unit = 1 in { // FXU Operations. def NAND : XForm_6<31, 476, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB), "nand $rA, $rS, $rB", IntGeneral, [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>; @@ -517,7 +525,8 @@ def SRAD : XForm_6<31, 794, (ops G8RC:$rA, G8RC:$rS, G8RC:$rB), def SRAW : XForm_6<31, 792, (ops GPRC:$rA, GPRC:$rS, GPRC:$rB), "sraw $rA, $rS, $rB", IntShift, [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>; -let isStore = 1, noResults = 1 in { +} +let isStore = 1, noResults = 1, PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (ops GPRC:$rS, memrr:$dst), "stbx $rS, $dst", LdStGeneral, [(truncstore GPRC:$rS, xaddr:$dst, i8)]>; @@ -549,6 +558,7 @@ def STVX : XForm_8<31, 231, (ops VRRC:$rS, memrr:$dst), "stvx $rS, $dst", LdStGeneral, [(store (v4f32 VRRC:$rS), xoaddr:$dst)]>; } +let PPC970_Unit = 1 in { // FXU Operations. def SRAWI : XForm_10<31, 824, (ops GPRC:$rA, GPRC:$rS, u5imm:$SH), "srawi $rA, $rS, $SH", IntShift, [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>; @@ -576,14 +586,16 @@ def CMPLW : XForm_16_ext<31, 32, (ops CRRC:$crD, GPRC:$rA, GPRC:$rB), "cmplw $crD, $rA, $rB", IntCompare>; def CMPLD : XForm_16_ext<31, 32, (ops CRRC:$crD, GPRC:$rA, GPRC:$rB), "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; +} +let PPC970_Unit = 3 in { // FPU Operations. //def FCMPO : XForm_17<63, 32, (ops CRRC:$crD, FPRC:$fA, FPRC:$fB), // "fcmpo $crD, $fA, $fB", FPCompare>; def FCMPUS : XForm_17<63, 0, (ops CRRC:$crD, F4RC:$fA, F4RC:$fB), "fcmpu $crD, $fA, $fB", FPCompare>; def FCMPUD : XForm_17<63, 0, (ops CRRC:$crD, F8RC:$fA, F8RC:$fB), "fcmpu $crD, $fA, $fB", FPCompare>; - -let isLoad = 1 in { +} +let isLoad = 1, PPC970_Unit = 2 in { def LFSX : XForm_25<31, 535, (ops F4RC:$frD, memrr:$src), "lfsx $frD, $src", LdStLFDU, [(set F4RC:$frD, (load xaddr:$src))]>; @@ -591,6 +603,7 @@ def LFDX : XForm_25<31, 599, (ops F8RC:$frD, memrr:$src), "lfdx $frD, $src", LdStLFDU, [(set F8RC:$frD, (load xaddr:$src))]>; } +let PPC970_Unit = 3 in { // FPU Operations. def FCFID : XForm_26<63, 846, (ops F8RC:$frD, F8RC:$frB), "fcfid $frD, $frB", FPGeneral, [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64; @@ -609,18 +622,28 @@ def FSQRT : XForm_26<63, 22, (ops F8RC:$frD, F8RC:$frB), def FSQRTS : XForm_26<59, 22, (ops F4RC:$frD, F4RC:$frB), "fsqrts $frD, $frB", FPSqrt, [(set F4RC:$frD, (fsqrt F4RC:$frB))]>; +} /// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending. +/// +/// Note that these are defined as pseudo-ops on the PPC970 because they are +/// often coallesced away and we don't want the dispatch group builder to think +/// that they will fill slots (which could cause the load of a LSU reject to +/// sneak into a d-group with a store). def FMRS : XForm_26<63, 72, (ops F4RC:$frD, F4RC:$frB), "fmr $frD, $frB", FPGeneral, - []>; // (set F4RC:$frD, F4RC:$frB) + []>, // (set F4RC:$frD, F4RC:$frB) + PPC970_Unit_Pseudo; def FMRD : XForm_26<63, 72, (ops F8RC:$frD, F8RC:$frB), "fmr $frD, $frB", FPGeneral, - []>; // (set F8RC:$frD, F8RC:$frB) + []>, // (set F8RC:$frD, F8RC:$frB) + PPC970_Unit_Pseudo; def FMRSD : XForm_26<63, 72, (ops F8RC:$frD, F4RC:$frB), "fmr $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fextend F4RC:$frB))]>; + [(set F8RC:$frD, (fextend F4RC:$frB))]>, + PPC970_Unit_Pseudo; +let PPC970_Unit = 3 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. def FABSS : XForm_26<63, 264, (ops F4RC:$frD, F4RC:$frB), "fabs $frD, $frB", FPGeneral, @@ -640,9 +663,9 @@ def FNEGS : XForm_26<63, 40, (ops F4RC:$frD, F4RC:$frB), def FNEGD : XForm_26<63, 40, (ops F8RC:$frD, F8RC:$frB), "fneg $frD, $frB", FPGeneral, [(set F8RC:$frD, (fneg F8RC:$frB))]>; +} - -let isStore = 1, noResults = 1 in { +let isStore = 1, noResults = 1, PPC970_Unit = 2 in { def STFIWX: XForm_28<31, 983, (ops F8RC:$frS, memrr:$dst), "stfiwx $frS, $dst", LdStUX, [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; @@ -657,27 +680,34 @@ def STFDX : XForm_28<31, 727, (ops F8RC:$frS, memrr:$dst), // XL-Form instructions. condition register logical ops. // def MCRF : XLForm_3<19, 0, (ops CRRC:$BF, CRRC:$BFA), - "mcrf $BF, $BFA", BrMCR>; + "mcrf $BF, $BFA", BrMCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; -// XFX-Form instructions. Instructions that deal with SPRs +// XFX-Form instructions. Instructions that deal with SPRs. // -// Note that although LR should be listed as `8' and CTR as `9' in the SPR -// field, the manual lists the groups of bits as [5-9] = 0, [0-4] = 8 or 9 -// which means the SPR value needs to be multiplied by a factor of 32. -def MFCTR : XFXForm_1_ext<31, 339, 9, (ops GPRC:$rT), "mfctr $rT", SprMFSPR>; -def MFLR : XFXForm_1_ext<31, 339, 8, (ops GPRC:$rT), "mflr $rT", SprMFSPR>; -def MFCR : XFXForm_3<31, 19, (ops GPRC:$rT), "mfcr $rT", SprMFCR>; +def MFCTR : XFXForm_1_ext<31, 339, 9, (ops GPRC:$rT), "mfctr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +def MFLR : XFXForm_1_ext<31, 339, 8, (ops GPRC:$rT), "mflr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +def MFCR : XFXForm_3<31, 19, (ops GPRC:$rT), "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; def MTCRF : XFXForm_5<31, 144, (ops crbitm:$FXM, GPRC:$rS), - "mtcrf $FXM, $rS", BrMCRX>; + "mtcrf $FXM, $rS", BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; def MFOCRF: XFXForm_5a<31, 19, (ops GPRC:$rT, crbitm:$FXM), - "mfcr $rT, $FXM", SprMFCR>; -def MTCTR : XFXForm_7_ext<31, 467, 9, (ops GPRC:$rS), "mtctr $rS", SprMTSPR>; -def MTLR : XFXForm_7_ext<31, 467, 8, (ops GPRC:$rS), "mtlr $rS", SprMTSPR>; + "mfcr $rT, $FXM", SprMFCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; +def MTCTR : XFXForm_7_ext<31, 467, 9, (ops GPRC:$rS), "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +def MTLR : XFXForm_7_ext<31, 467, 8, (ops GPRC:$rS), "mtlr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; def MTSPR : XFXForm_7<31, 467, (ops GPRC:$rS, u16imm:$UIMM), "mtspr $UIMM, $rS", - SprMTSPR>; + SprMTSPR>, + PPC970_DGroup_Single, PPC970_Unit_FXU; // XS-Form instructions. Just 'sradi' // +let PPC970_Unit = 1 in { // FXU Operations. def SRADI : XSForm_1<31, 413, (ops GPRC:$rA, GPRC:$rS, u6imm:$SH), "sradi $rA, $rS, $SH", IntRotateD>, isPPC64; @@ -697,16 +727,20 @@ def ADDE : XOForm_1<31, 138, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB), [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; def DIVD : XOForm_1<31, 489, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB), "divd $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64; + [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + PPC970_DGroup_First; def DIVDU : XOForm_1<31, 457, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB), "divdu $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64; + [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + PPC970_DGroup_First; def DIVW : XOForm_1<31, 491, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB), "divw $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>; + [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_First; def DIVWU : XOForm_1<31, 459, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB), "divwu $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>; + [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_First; def MULHD : XOForm_1<31, 73, 0, (ops G8RC:$rT, G8RC:$rA, G8RC:$rB), "mulhd $rT, $rA, $rB", IntMulHW, [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>; @@ -749,10 +783,12 @@ def SUBFME : XOForm_3<31, 232, 0, (ops GPRC:$rT, GPRC:$rA), def SUBFZE : XOForm_3<31, 200, 0, (ops GPRC:$rT, GPRC:$rA), "subfze $rT, $rA", IntGeneral, [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; +} // A-Form instructions. Most of the instructions executed in the FPU are of // this type. // +let PPC970_Unit = 3 in { // FPU Operations. def FMADD : AForm_1<63, 29, (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, @@ -845,7 +881,9 @@ def FSUBS : AForm_2<59, 20, (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB), "fsubs $FRT, $FRA, $FRB", FPGeneral, [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>; +} +let PPC970_Unit = 1 in { // FXU Operations. // M-Form instructions. rotate and mask instructions. // let isTwoAddress = 1, isCommutable = 1 in { @@ -882,7 +920,9 @@ def RLDICR : MDForm_1<30, 1, (ops G8RC:$rA, G8RC:$rS, u6imm:$SH, u6imm:$ME), "rldicr $rA, $rS, $SH, $ME", IntRotateD, []>, isPPC64; +} +let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC), "vmaddfp $vD, $vA, $vC, $vB", VecFP, @@ -957,7 +997,7 @@ def VXOR : VXForm_1<1220, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB), def V_SET0 : VXForm_setzero<1220, (ops VRRC:$vD), "vxor $vD, $vD, $vD", VecFP, []>; - +} //===----------------------------------------------------------------------===// // DWARF Pseudo Instructions @@ -1097,13 +1137,3 @@ def : Pattern<(xor GPRC:$in, imm:$imm), (XORIS GPRC:$tmp, (HI16 imm:$imm))]>; */ -//===----------------------------------------------------------------------===// -// PowerPCInstrInfo Definition -// -def PowerPCInstrInfo : InstrInfo { - let TSFlagsFields = [ "VMX", "PPC64" ]; - let TSFlagsShifts = [ 0, 1 ]; - - let isLittleEndianEncoding = 1; -} -