From 9b9d45f60e9304b2b08a0efac5041e92e9d05df1 Mon Sep 17 00:00:00 2001 From: Duraid Madina Date: Thu, 17 Mar 2005 18:17:03 +0000 Subject: [PATCH] and so it begins... PHASE 1: write instruction selector PHASE 2: ??? PHASE 3: profit! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@20652 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/IA64/IA64.h | 50 + lib/Target/IA64/IA64.td | 93 ++ lib/Target/IA64/IA64AsmPrinter.cpp | 399 +++++ lib/Target/IA64/IA64ISelPattern.cpp | 1640 +++++++++++++++++++++ lib/Target/IA64/IA64InstrBuilder.h | 52 + lib/Target/IA64/IA64InstrFormats.td | 67 + lib/Target/IA64/IA64InstrInfo.cpp | 47 + lib/Target/IA64/IA64InstrInfo.h | 50 + lib/Target/IA64/IA64InstrInfo.td | 319 ++++ lib/Target/IA64/IA64MachineFunctionInfo.h | 34 + lib/Target/IA64/IA64RegisterInfo.cpp | 362 +++++ lib/Target/IA64/IA64RegisterInfo.h | 55 + lib/Target/IA64/IA64RegisterInfo.td | 291 ++++ lib/Target/IA64/IA64TargetMachine.cpp | 134 ++ lib/Target/IA64/IA64TargetMachine.h | 48 + lib/Target/IA64/Makefile | 17 + lib/Target/IA64/README | 98 ++ 17 files changed, 3756 insertions(+) create mode 100644 lib/Target/IA64/IA64.h create mode 100644 lib/Target/IA64/IA64.td create mode 100644 lib/Target/IA64/IA64AsmPrinter.cpp create mode 100644 lib/Target/IA64/IA64ISelPattern.cpp create mode 100644 lib/Target/IA64/IA64InstrBuilder.h create mode 100644 lib/Target/IA64/IA64InstrFormats.td create mode 100644 lib/Target/IA64/IA64InstrInfo.cpp create mode 100644 lib/Target/IA64/IA64InstrInfo.h create mode 100644 lib/Target/IA64/IA64InstrInfo.td create mode 100644 lib/Target/IA64/IA64MachineFunctionInfo.h create mode 100644 lib/Target/IA64/IA64RegisterInfo.cpp create mode 100644 lib/Target/IA64/IA64RegisterInfo.h create mode 100644 lib/Target/IA64/IA64RegisterInfo.td create mode 100644 lib/Target/IA64/IA64TargetMachine.cpp create mode 100644 lib/Target/IA64/IA64TargetMachine.h create mode 100644 lib/Target/IA64/Makefile create mode 100644 lib/Target/IA64/README diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h new file mode 100644 index 00000000000..204080155df --- /dev/null +++ b/lib/Target/IA64/IA64.h @@ -0,0 +1,50 @@ +//===-- IA64.h - Top-level interface for IA64 representation ------*- C++ -*-===// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the IA64 +// target library, as used by the LLVM JIT. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_IA64_H +#define TARGET_IA64_H + +#include + +namespace llvm { + +class TargetMachine; +class FunctionPass; +class IntrinsicLowering; + +/// createIA64PatternInstructionSelector - This pass converts an LLVM function +/// into a machine code representation in a more aggressive way. +/// +FunctionPass *createIA64PatternInstructionSelector(TargetMachine &TM); + +/// createIA64CodePrinterPass - Returns a pass that prints the IA64 +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *createIA64CodePrinterPass(std::ostream &o,TargetMachine &tm); + +} // End llvm namespace + +// Defines symbolic names for IA64 registers. This defines a mapping from +// register name to register number. +// +#include "IA64GenRegisterNames.inc" + +// Defines symbolic names for the IA64 instructions. +// +#include "IA64GenInstrNames.inc" + +#endif + + diff --git a/lib/Target/IA64/IA64.td b/lib/Target/IA64/IA64.td new file mode 100644 index 00000000000..36ed340aa73 --- /dev/null +++ b/lib/Target/IA64/IA64.td @@ -0,0 +1,93 @@ +//===-- IA64.td - Target definition file for Intel IA64 -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a target description file for the Intel IA64 architecture, +// also known variously as ia64, IA-64, IPF, "the Itanium architecture" etc. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing... +// +include "../Target.td" + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "IA64RegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "IA64InstrInfo.td" + +def IA64InstrInfo : InstrInfo { + let PHIInst = PHI; +} + +def IA64 : Target { + // The following registers are always saved across calls: + let CalleeSavedRegisters = + + //'preserved' GRs: + + [r4, r5, r6, r7, + + //'special' GRs: + + r1, // global data pointer (GP) + r12, // memory stack pointer (SP) + // **** r13 (thread pointer) we do not touch, ever. it's not here. ****// + //r15, // our frame pointer (FP) + + //'stacked' GRs the RSE takes care of, we don't worry about +/* We don't want PEI::calculateCallerSavedRegisters to worry about them, + since the RSE takes care of them (and we determinethe appropriate + 'alloc' instructions and save/restore ar.pfs ourselves, in instruction + selection) + +************************************************************************** +* r32, r33, r34, r35, +* r36, r37, r38, r39, r40, r41, r42, r43, r44, r45, r46, r47, +* r48, r49, r50, r51, r52, r53, r54, r55, r56, r57, r58, r59, +* r60, r61, r62, r63, r64, r65, r66, r67, r68, r69, r70, r71, +* r72, r73, r74, r75, r76, r77, r78, r79, r80, r81, r82, r83, +* r84, r85, r86, r87, r88, r89, r90, r91, r92, r93, r94, r95, +* r96, r97, r98, r99, r100, r101, r102, r103, r104, r105, r106, r107, +* r108, r109, r110, r111, r112, r113, r114, r115, r116, r117, r118, r119, +* r120, r121, r122, r123, r124, r125, r126, r127, +************************************************************************** +*/ + //'preserved' FP regs: + + F2,F3,F4,F5, + F16,F17,F18,F19,F20,F21,F22,F23, + F24,F25,F26,F27,F28,F29,F30,F31, + + //'preserved' predicate regs: + + p1, p2, p3, p4, p5, + p16, p17, p18, p19, p20, p21, p22, p23, + p24, p25, p26, p27, p28, p29, p30, p31, + p32, p33, p34, p35, p36, p37, p38, p39, + p40, p41, p42, p43, p44, p45, p46, p47, + p48, p49, p50, p51, p52, p53, p54, p55, + p56, p57, p58, p59, p60, p61, p62, p63]; + + // We don't go anywhere near the LP32 variant of IA64 as + // sometimes seen in (for example) HP-UX + let PointerType = i64; + + // Our instruction set + let InstructionSet = IA64InstrInfo; + +} + + diff --git a/lib/Target/IA64/IA64AsmPrinter.cpp b/lib/Target/IA64/IA64AsmPrinter.cpp new file mode 100644 index 00000000000..fff828f5fbf --- /dev/null +++ b/lib/Target/IA64/IA64AsmPrinter.cpp @@ -0,0 +1,399 @@ +//===-- IA64AsmPrinter.cpp - Print out IA64 LLVM as assembly --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to assembly accepted by the GNU binutils 'gas' +// assembler. The Intel 'ias' and HP-UX 'as' assemblers *may* choke on this +// output, but if so that's a bug I'd like to hear about: please file a bug +// report in bugzilla. FYI, the excellent 'ias' assembler is bundled with +// the Intel C/C++ compiler for Itanium Linux. +// +//===----------------------------------------------------------------------===// + +#include "IA64.h" +#include "IA64TargetMachine.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +namespace { + Statistic<> EmittedInsts("asm-printer", "Number of machine instrs printed"); + + struct IA64SharedAsmPrinter : public AsmPrinter { + + std::set ExternalFunctionNames; + + IA64SharedAsmPrinter(std::ostream &O, TargetMachine &TM) + : AsmPrinter(O, TM) { } + + void printConstantPool(MachineConstantPool *MCP); + bool doFinalization(Module &M); + }; +} + +static bool isScale(const MachineOperand &MO) { + return MO.isImmediate() && + (MO.getImmedValue() == 1 || MO.getImmedValue() == 2 || + MO.getImmedValue() == 4 || MO.getImmedValue() == 8); +} + +static bool isMem(const MachineInstr *MI, unsigned Op) { + if (MI->getOperand(Op).isFrameIndex()) return true; + if (MI->getOperand(Op).isConstantPoolIndex()) return true; + return Op+4 <= MI->getNumOperands() && + MI->getOperand(Op ).isRegister() && isScale(MI->getOperand(Op+1)) && + MI->getOperand(Op+2).isRegister() && (MI->getOperand(Op+3).isImmediate() || + MI->getOperand(Op+3).isGlobalAddress()); +} + +// SwitchSection - Switch to the specified section of the executable if we are +// not already in it! +// +static void SwitchSection(std::ostream &OS, std::string &CurSection, + const char *NewSection) { + if (CurSection != NewSection) { + CurSection = NewSection; + if (!CurSection.empty()) + OS << "\t" << NewSection << "\n"; + } +} + +/// printConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +/// +void IA64SharedAsmPrinter::printConstantPool(MachineConstantPool *MCP) { + const std::vector &CP = MCP->getConstants(); + const TargetData &TD = TM.getTargetData(); + + if (CP.empty()) return; + + O << "\n\t.section .data\n"; // would be nice to have this rodata? hmmm + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + emitAlignment(TD.getTypeAlignmentShift(CP[i]->getType())); + O << ".CPI" << CurrentFnName << "_" << i << ":\t\t\t\t\t" << CommentString + << *CP[i] << "\n"; + emitGlobalConstant(CP[i]); + } +} + +bool IA64SharedAsmPrinter::doFinalization(Module &M) { + const TargetData &TD = TM.getTargetData(); + std::string CurSection; + + // Print out module-level global variables here. + for (Module::const_giterator I = M.gbegin(), E = M.gend(); I != E; ++I) + if (I->hasInitializer()) { // External global require no code + O << "\n\n"; + std::string name = Mang->getValueName(I); + Constant *C = I->getInitializer(); + unsigned Size = TD.getTypeSize(C->getType()); + unsigned Align = TD.getTypeAlignmentShift(C->getType()); + + if (C->isNullValue() && + (I->hasLinkOnceLinkage() || I->hasInternalLinkage() || + I->hasWeakLinkage() /* FIXME: Verify correct */)) { + SwitchSection(O, CurSection, ".data"); + if (I->hasInternalLinkage()) + O << "\t.local " << name << "\n"; + + O << "\t.common " << name << "," << TD.getTypeSize(C->getType()) + << "," << (1 << Align); + O << "\t\t// "; + WriteAsOperand(O, I, true, true, &M); + O << "\n"; + } else { + switch (I->getLinkage()) { + case GlobalValue::LinkOnceLinkage: + case GlobalValue::WeakLinkage: // FIXME: Verify correct for weak. + // Nonnull linkonce -> weak + O << "\t.weak " << name << "\n"; + SwitchSection(O, CurSection, ""); + O << "\t.section\t.llvm.linkonce.d." << name + << ", \"aw\", \"progbits\"\n"; + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.global " << name << "\n"; + // FALL THROUGH + case GlobalValue::InternalLinkage: + if (C->isNullValue()) + SwitchSection(O, CurSection, ".data"); // FIXME: this was + // '.bss', but in ia64-land .bss means "nobits" (i.e. uninitialized) + // hmm. + else + SwitchSection(O, CurSection, ".data"); + break; + case GlobalValue::GhostLinkage: + std::cerr << "GhostLinkage cannot appear in IA64AsmPrinter!\n"; + abort(); + } + + emitAlignment(Align); + O << "\t.type " << name << ",@object\n"; + O << "\t.size " << name << "," << Size << "\n"; + O << name << ":\t\t\t\t// "; + WriteAsOperand(O, I, true, true, &M); + O << " = "; + WriteAsOperand(O, C, false, false, &M); + O << "\n"; + emitGlobalConstant(C); + } + } + + // we print out ".global X \n .type X, @function" for each external function + O << "\n\n// br.call targets referenced (and not defined) above: \n"; + for (std::set::iterator i = ExternalFunctionNames.begin(), + e = ExternalFunctionNames.end(); i!=e; ++i) { + O << "\t.global " << *i << "\n\t.type " << *i << ", @function\n"; + } + O << "\n\n"; + + AsmPrinter::doFinalization(M); + return false; // success +} + +namespace { + struct IA64AsmPrinter : public IA64SharedAsmPrinter { + IA64AsmPrinter(std::ostream &O, TargetMachine &TM) + : IA64SharedAsmPrinter(O, TM) { + + CommentString = "//"; + Data8bitsDirective = "\tdata1\t"; + Data16bitsDirective = "\tdata2\t"; + Data32bitsDirective = "\tdata4\t"; + Data64bitsDirective = "\tdata8\t"; + ZeroDirective = "\t.skip\t"; + AsciiDirective = "\tstring\t"; + + } + + virtual const char *getPassName() const { + return "IA64 Assembly Printer"; + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + bool printInstruction(const MachineInstr *MI); + + // This method is used by the tablegen'erated instruction printer. + void printOperand(const MachineInstr *MI, unsigned OpNo, MVT::ValueType VT){ + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.getType() == MachineOperand::MO_MachineRegister) { + assert(MRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physref??"); + //XXX Bug Workaround: See note in Printer::doInitialization about %. + O << TM.getRegisterInfo()->get(MO.getReg()).Name; + } else { + printOp(MO); + } + } + + void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + O << (short)MI->getOperand(OpNo).getImmedValue(); + } + void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + O << (unsigned short)MI->getOperand(OpNo).getImmedValue(); + } + void printS21ImmOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + O << (int)MI->getOperand(OpNo).getImmedValue(); // FIXME (21, not 32!) + } + void printS32ImmOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + O << (int)MI->getOperand(OpNo).getImmedValue(); + } + void printU32ImmOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + O << (unsigned int)MI->getOperand(OpNo).getImmedValue(); + } + void printU64ImmOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + O << (uint64_t)MI->getOperand(OpNo).getImmedValue(); + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + printOp(MI->getOperand(OpNo), true); // this is a br.call instruction + } + + void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO, bool isBRCALLinsn= false); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + }; +} // end of anonymous namespace + + +// Include the auto-generated portion of the assembly writer. +#include "IA64GenAsmWriter.inc" + + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + setupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + printConstantPool(MF.getConstantPool()); + + // Print out labels for the function. + O << "\n\t.section .text, \"ax\", \"progbits\"\n"; + // ^^ means "Allocated instruXions in mem, initialized" + emitAlignment(4); + O << "\t.global\t" << CurrentFnName << "\n"; + O << "\t.type\t" << CurrentFnName << ", @function\n"; + O << CurrentFnName << ":\n"; + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block if there are any predecessors. + if (I->pred_begin() != I->pred_end()) + O << ".LBB" << CurrentFnName << "_" << I->getNumber() << ":\t" + << CommentString << " " << I->getBasicBlock()->getName() << "\n"; + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + O << "\t"; + printMachineInstruction(II); + } + } + + // We didn't modify anything. + return false; +} + +void IA64AsmPrinter::printOp(const MachineOperand &MO, + bool isBRCALLinsn /* = false */) { + const MRegisterInfo &RI = *TM.getRegisterInfo(); + switch (MO.getType()) { + case MachineOperand::MO_VirtualRegister: + if (Value *V = MO.getVRegValueOrNull()) { + O << "<" << V->getName() << ">"; + return; + } + // FALLTHROUGH + case MachineOperand::MO_MachineRegister: + case MachineOperand::MO_CCRegister: { + O << RI.get(MO.getReg()).Name; + return; + } + + case MachineOperand::MO_SignExtendedImmed: + case MachineOperand::MO_UnextendedImmed: + O << /*(unsigned int)*/MO.getImmedValue(); + return; + case MachineOperand::MO_MachineBasicBlock: { + MachineBasicBlock *MBBOp = MO.getMachineBasicBlock(); + O << ".LBB" << Mang->getValueName(MBBOp->getParent()->getFunction()) + << "_" << MBBOp->getNumber () << "\t// " + << MBBOp->getBasicBlock ()->getName (); + return; + } + case MachineOperand::MO_PCRelativeDisp: + std::cerr << "Shouldn't use addPCDisp() when building IA64 MachineInstrs"; + abort (); + return; + + case MachineOperand::MO_ConstantPoolIndex: { + O << "@gprel(.CPI" << CurrentFnName << "_" + << MO.getConstantPoolIndex() << ")"; + return; + } + + case MachineOperand::MO_GlobalAddress: { + + // functions need @ltoff(@fptr(fn_name)) form + GlobalValue *GV = MO.getGlobal(); + Function *F = dyn_cast(GV); + + bool Needfptr=false; // if we're computing an address @ltoff(X), do + // we need to decorate it so it becomes + // @ltoff(@fptr(X)) ? + if(F && !isBRCALLinsn && F->isExternal()) + Needfptr=true; + + // if this is the target of a call instruction, we should define + // the function somewhere (GNU gas has no problem without this, but + // Intel ias rightly complains of an 'undefined symbol') + + if(F && isBRCALLinsn && F->isExternal()) + ExternalFunctionNames.insert(Mang->getValueName(MO.getGlobal())); + + if (!isBRCALLinsn) + O << "@ltoff("; + if (Needfptr) + O << "@fptr("; + O << Mang->getValueName(MO.getGlobal()); + if (Needfptr) + O << ")"; // close fptr( + if (!isBRCALLinsn) + O << ")"; // close ltoff( + int Offset = MO.getOffset(); + if (Offset > 0) + O << " + " << Offset; + else if (Offset < 0) + O << " - " << -Offset; + return; + } + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + return; + default: + O << ""; return; + } +} + +/// printMachineInstruction -- Print out a single IA64 LLVM instruction +/// MI to the current output stream. +/// +void IA64AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + + ++EmittedInsts; + + // Call the autogenerated instruction printer routines. + printInstruction(MI); +} + +bool IA64AsmPrinter::doInitialization(Module &M) { + AsmPrinter::doInitialization(M); + + O << "\t.psr lsb\n" // should be "msb" on HP-UX, for starters + << "\t.radix C\n" + << "\t.psr abi64\n"; // we only support 64 bits for now + return false; +} + +/// createIA64CodePrinterPass - Returns a pass that prints the IA64 +/// assembly code for a MachineFunction to the given output stream, using +/// the given target machine description. +/// +FunctionPass *llvm::createIA64CodePrinterPass(std::ostream &o,TargetMachine &tm){ + return new IA64AsmPrinter(o, tm); +} + + diff --git a/lib/Target/IA64/IA64ISelPattern.cpp b/lib/Target/IA64/IA64ISelPattern.cpp new file mode 100644 index 00000000000..09cb1ce3807 --- /dev/null +++ b/lib/Target/IA64/IA64ISelPattern.cpp @@ -0,0 +1,1640 @@ +//===-- IA64ISelPattern.cpp - A pattern matching inst selector for IA64 ---===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for IA64. +// +//===----------------------------------------------------------------------===// + +#include "IA64.h" +#include "IA64InstrBuilder.h" +#include "IA64RegisterInfo.h" +#include "IA64MachineFunctionInfo.h" +#include "llvm/Constants.h" // FIXME: REMOVE +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineConstantPool.h" // FIXME: REMOVE +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/Statistic.h" +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// IA64TargetLowering - IA64 Implementation of the TargetLowering interface +namespace { + class IA64TargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + + //int ReturnAddrIndex; // FrameIndex for return slot. + unsigned GP, SP, RP; // FIXME - clean this mess up + public: + + unsigned VirtGPR; // this is public so it can be accessed in the selector + // for ISD::RET down below. add an accessor instead? FIXME + + IA64TargetLowering(TargetMachine &TM) : TargetLowering(TM) { + + // register class for general registers + addRegisterClass(MVT::i64, IA64::GRRegisterClass); + + // register class for FP registers + addRegisterClass(MVT::f64, IA64::FPRegisterClass); + + // register class for predicate registers + addRegisterClass(MVT::i1, IA64::PRRegisterClass); + + setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); + + setSetCCResultType(MVT::i1); + setShiftAmountType(MVT::i64); + + setOperationAction(ISD::EXTLOAD , MVT::i1 , Promote); + setOperationAction(ISD::EXTLOAD , MVT::f32 , Promote); + + setOperationAction(ISD::ZEXTLOAD , MVT::i1 , Expand); + setOperationAction(ISD::ZEXTLOAD , MVT::i32 , Expand); + + setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); + setOperationAction(ISD::SEXTLOAD , MVT::i8 , Expand); + setOperationAction(ISD::SEXTLOAD , MVT::i16 , Expand); + + setOperationAction(ISD::SREM , MVT::f32 , Expand); + setOperationAction(ISD::SREM , MVT::f64 , Expand); + + setOperationAction(ISD::UREM , MVT::f32 , Expand); + setOperationAction(ISD::UREM , MVT::f64 , Expand); + + setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); + setOperationAction(ISD::MEMSET , MVT::Other, Expand); + setOperationAction(ISD::MEMCPY , MVT::Other, Expand); + + + computeRegisterProperties(); + + addLegalFPImmediate(+0.0); + addLegalFPImmediate(+1.0); + addLegalFPImmediate(-0.0); + addLegalFPImmediate(-1.0); + } + + /// LowerArguments - This hook must be implemented to indicate how we should + /// lower the arguments for the specified function, into the specified DAG. + virtual std::vector + LowerArguments(Function &F, SelectionDAG &DAG); + + /// LowerCallTo - This hook lowers an abstract call to a function into an + /// actual call. + virtual std::pair + LowerCallTo(SDOperand Chain, const Type *RetTy, SDOperand Callee, + ArgListTy &Args, SelectionDAG &DAG); + + virtual std::pair + LowerVAStart(SDOperand Chain, SelectionDAG &DAG); + + virtual std::pair + LowerVAArgNext(bool isVANext, SDOperand Chain, SDOperand VAList, + const Type *ArgTy, SelectionDAG &DAG); + + virtual std::pair + LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth, + SelectionDAG &DAG); + + void restoreGP_SP_RP(MachineBasicBlock* BB) + { + BuildMI(BB, IA64::MOV, 1, IA64::r1).addReg(GP); + BuildMI(BB, IA64::MOV, 1, IA64::r12).addReg(SP); + BuildMI(BB, IA64::MOV, 1, IA64::rp).addReg(RP); + } + + void restoreRP(MachineBasicBlock* BB) + { + BuildMI(BB, IA64::MOV, 1, IA64::rp).addReg(RP); + } + + void restoreGP(MachineBasicBlock* BB) + { + BuildMI(BB, IA64::MOV, 1, IA64::r1).addReg(GP); + } + + }; +} + + +std::vector +IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { + std::vector ArgValues; + + // + // add beautiful description of IA64 stack frame format + // here (from intel 24535803.pdf most likely) + // + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + GP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + SP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + RP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + + MachineBasicBlock& BB = MF.front(); + + unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35, + IA64::r36, IA64::r37, IA64::r38, IA64::r39}; + + unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11, + IA64::F12,IA64::F13,IA64::F14, IA64::F15}; + + unsigned argVreg[8]; + unsigned argPreg[8]; + unsigned argOpc[8]; + + unsigned used_FPArgs=0; // how many FP args have been used so far? + + int count = 0; + for (Function::aiterator I = F.abegin(), E = F.aend(); I != E; ++I) + { + SDOperand newroot, argt; + if(count < 8) { // need to fix this logic? maybe. + + switch (getValueType(I->getType())) { + default: + std::cerr << "ERROR in LowerArgs: unknown type " + << getValueType(I->getType()) << "\n"; + abort(); + case MVT::f32: + // fixme? (well, will need to for weird FP structy stuff, + // see intel ABI docs) + case MVT::f64: + BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]); + // floating point args go into f8..f15 as-needed, the increment + argVreg[count] = // is below..: + MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::f64)); + // FP args go into f8..f15 as needed: (hence the ++) + argPreg[count] = args_FP[used_FPArgs++]; + argOpc[count] = IA64::FMOV; + argt = newroot = DAG.getCopyFromReg(argVreg[count], + getValueType(I->getType()), DAG.getRoot()); + break; + case MVT::i1: // NOTE: as far as C abi stuff goes, + // bools are just boring old ints + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + BuildMI(&BB, IA64::IDEF, 0, args_int[count]); + argVreg[count] = + MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + argPreg[count] = args_int[count]; + argOpc[count] = IA64::MOV; + argt = newroot = + DAG.getCopyFromReg(argVreg[count], MVT::i64, DAG.getRoot()); + if ( getValueType(I->getType()) != MVT::i64) + argt = DAG.getNode(ISD::TRUNCATE, getValueType(I->getType()), + newroot); + break; + } + } else { // more than 8 args go into the frame + // Create the frame index object for this incoming parameter... + int FI = MFI->CreateFixedObject(8, 16 + 8 * (count - 8)); + + // Create the SelectionDAG nodes corresponding to a load + //from this parameter + SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64); + argt = newroot = DAG.getLoad(getValueType(I->getType()), + DAG.getEntryNode(), FIN); + } + ++count; + DAG.setRoot(newroot.getValue(1)); + ArgValues.push_back(argt); + } + +// Create a vreg to hold the output of (what will become) +// the "alloc" instruction + VirtGPR = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + BuildMI(&BB, IA64::PSEUDO_ALLOC, 0, VirtGPR); + // we create a PSEUDO_ALLOC (pseudo)instruction for now + + BuildMI(&BB, IA64::IDEF, 0, IA64::r1); + + // hmm: + BuildMI(&BB, IA64::IDEF, 0, IA64::r12); + BuildMI(&BB, IA64::IDEF, 0, IA64::rp); + // ..hmm. + + BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1); + + // hmm: + BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12); + BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp); + // ..hmm. + + for (int i = 0; i < count && i < 8; ++i) { + BuildMI(&BB, argOpc[i], 1, argVreg[i]).addReg(argPreg[i]); + } + + return ArgValues; +} + +std::pair +IA64TargetLowering::LowerCallTo(SDOperand Chain, + const Type *RetTy, SDOperand Callee, + ArgListTy &Args, SelectionDAG &DAG) { + + MachineFunction &MF = DAG.getMachineFunction(); + +// fow now, we are overly-conservative and pretend that all 8 +// outgoing registers (out0-out7) are always used. FIXME + +// update comment line 137 of MachineFunction.h + MF.getInfo()->outRegsUsed=8; + + unsigned NumBytes = 16; + if (Args.size() > 8) + NumBytes += (Args.size() - 8) * 8; + + Chain = DAG.getNode(ISD::ADJCALLSTACKDOWN, MVT::Other, Chain, + DAG.getConstant(NumBytes, getPointerTy())); + + std::vector args_to_use; + for (unsigned i = 0, e = Args.size(); i != e; ++i) + { + switch (getValueType(Args[i].second)) { + default: assert(0 && "unexpected argument type!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + //promote to 64-bits, sign/zero extending based on type + //of the argument + if(Args[i].second->isSigned()) + Args[i].first = DAG.getNode(ISD::SIGN_EXTEND, MVT::i64, + Args[i].first); + else + Args[i].first = DAG.getNode(ISD::ZERO_EXTEND, MVT::i64, + Args[i].first); + break; + case MVT::f32: + //promote to 64-bits + Args[i].first = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Args[i].first); + case MVT::f64: + case MVT::i64: + break; + } + args_to_use.push_back(Args[i].first); + } + + std::vector RetVals; + MVT::ValueType RetTyVT = getValueType(RetTy); + if (RetTyVT != MVT::isVoid) + RetVals.push_back(RetTyVT); + RetVals.push_back(MVT::Other); + + SDOperand TheCall = SDOperand(DAG.getCall(RetVals, Chain, + Callee, args_to_use), 0); + Chain = TheCall.getValue(RetTyVT != MVT::isVoid); + Chain = DAG.getNode(ISD::ADJCALLSTACKUP, MVT::Other, Chain, + DAG.getConstant(NumBytes, getPointerTy())); + return std::make_pair(TheCall, Chain); +} + +std::pair +IA64TargetLowering::LowerVAStart(SDOperand Chain, SelectionDAG &DAG) { + // vastart just returns the address of the VarArgsFrameIndex slot. + return std::make_pair(DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64), Chain); +} + +std::pair IA64TargetLowering:: +LowerVAArgNext(bool isVANext, SDOperand Chain, SDOperand VAList, + const Type *ArgTy, SelectionDAG &DAG) { + + assert(0 && "LowerVAArgNext not done yet!\n"); +} + + +std::pair IA64TargetLowering:: +LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, + SelectionDAG &DAG) { + + assert(0 && "LowerFrameReturnAddress not done yet\n"); +} + + +namespace { + + //===--------------------------------------------------------------------===// + /// ISel - IA64 specific code to select IA64 machine instructions for + /// SelectionDAG operations. + /// + class ISel : public SelectionDAGISel { + /// IA64Lowering - This object fully describes how to lower LLVM code to an + /// IA64-specific SelectionDAG. + IA64TargetLowering IA64Lowering; + + /// ExprMap - As shared expressions are codegen'd, we keep track of which + /// vreg the value is produced in, so we only emit one copy of each compiled + /// tree. + std::map ExprMap; + std::set LoweredTokens; + + public: + ISel(TargetMachine &TM) : SelectionDAGISel(IA64Lowering), IA64Lowering(TM) { + } + + /// InstructionSelectBasicBlock - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); + +// bool isFoldableLoad(SDOperand Op); +// void EmitFoldedLoad(SDOperand Op, IA64AddressMode &AM); + + unsigned SelectExpr(SDOperand N); + void Select(SDOperand N); + }; +} + +/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel +/// when it has created a SelectionDAG for us to codegen. +void ISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { + + // Codegen the basic block. + Select(DAG.getRoot()); + + // Clear state used for selection. + ExprMap.clear(); + LoweredTokens.clear(); +} + +unsigned ISel::SelectExpr(SDOperand N) { + unsigned Result; + unsigned Tmp1, Tmp2, Tmp3; + unsigned Opc = 0; + MVT::ValueType DestType = N.getValueType(); + + unsigned opcode = N.getOpcode(); + + SDNode *Node = N.Val; + SDOperand Op0, Op1; + + if (Node->getOpcode() == ISD::CopyFromReg) + // Just use the specified register as our input. + return dyn_cast(Node)->getReg(); + + unsigned &Reg = ExprMap[N]; + if (Reg) return Reg; + + if (N.getOpcode() != ISD::CALL) + Reg = Result = (N.getValueType() != MVT::Other) ? + MakeReg(N.getValueType()) : 1; + else { + // If this is a call instruction, make sure to prepare ALL of the result + // values as well as the chain. + if (Node->getNumValues() == 1) + Reg = Result = 1; // Void call, just a chain. + else { + Result = MakeReg(Node->getValueType(0)); + ExprMap[N.getValue(0)] = Result; + for (unsigned i = 1, e = N.Val->getNumValues()-1; i != e; ++i) + ExprMap[N.getValue(i)] = MakeReg(Node->getValueType(i)); + ExprMap[SDOperand(Node, Node->getNumValues()-1)] = 1; + } + } + + switch (N.getOpcode()) { + default: + Node->dump(); + assert(0 && "Node not handled!\n"); + + case ISD::FrameIndex: { + Tmp1 = cast(N)->getIndex(); + BuildMI(BB, IA64::MOV, 1, Result).addFrameIndex(Tmp1); + return Result; + } + + case ISD::ConstantPool: { + Tmp1 = cast(N)->getIndex(); + IA64Lowering.restoreGP(BB); // FIXME: do i really need this? + BuildMI(BB, IA64::ADD, 2, Result).addConstantPoolIndex(Tmp1) + .addReg(IA64::r1); + return Result; + } + + case ISD::ConstantFP: { + Tmp1 = Result; // Intermediate Register + if (cast(N)->getValue() < 0.0 || + cast(N)->isExactlyValue(-0.0)) + Tmp1 = MakeReg(MVT::f64); + + if (cast(N)->isExactlyValue(+0.0) || + cast(N)->isExactlyValue(-0.0)) + BuildMI(BB, IA64::FMOV, 1, Tmp1).addReg(IA64::F0); // load 0.0 + else if (cast(N)->isExactlyValue(+1.0) || + cast(N)->isExactlyValue(-1.0)) + BuildMI(BB, IA64::FMOV, 1, Tmp1).addReg(IA64::F1); // load 1.0 + else + assert(0 && "Unexpected FP constant!"); + if (Tmp1 != Result) + // we multiply by +1.0, negate (this is FNMA), and then add 0.0 + BuildMI(BB, IA64::FNMA, 3, Result).addReg(Tmp1).addReg(IA64::F1) + .addReg(IA64::F0); + return Result; + } + + case ISD::DYNAMIC_STACKALLOC: { + // Generate both result values. + if (Result != 1) + ExprMap[N.getValue(1)] = 1; // Generate the token + else + Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType()); + + // FIXME: We are currently ignoring the requested alignment for handling + // greater than the stack alignment. This will need to be revisited at some + // point. Align = N.getOperand(2); + + if (!isa(N.getOperand(2)) || + cast(N.getOperand(2))->getValue() != 0) { + std::cerr << "Cannot allocate stack object with greater alignment than" + << " the stack alignment yet!"; + abort(); + } + + Select(N.getOperand(0)); + if (ConstantSDNode* CN = dyn_cast(N.getOperand(1))) + { + if (CN->getValue() < 32000) + { + BuildMI(BB, IA64::ADDIMM22, 2, IA64::r12).addReg(IA64::r12) + .addImm(-CN->getValue()); + } else { + Tmp1 = SelectExpr(N.getOperand(1)); + // Subtract size from stack pointer, thereby allocating some space. + BuildMI(BB, IA64::SUB, 2, IA64::r12).addReg(IA64::r12).addReg(Tmp1); + } + } else { + Tmp1 = SelectExpr(N.getOperand(1)); + // Subtract size from stack pointer, thereby allocating some space. + BuildMI(BB, IA64::SUB, 2, IA64::r12).addReg(IA64::r12).addReg(Tmp1); + } + + // Put a pointer to the space into the result register, by copying the + // stack pointer. + BuildMI(BB, IA64::MOV, 1, Result).addReg(IA64::r12); + return Result; + } + + case ISD::SELECT: { + Tmp1 = SelectExpr(N.getOperand(0)); //Cond + Tmp2 = SelectExpr(N.getOperand(1)); //Use if TRUE + Tmp3 = SelectExpr(N.getOperand(2)); //Use if FALSE + + // a temporary predicate register to hold the complement of the + // condition: + unsigned CondComplement=MakeReg(MVT::i1); + unsigned bogusTemp=MakeReg(MVT::i1); + + unsigned bogoResult; + + switch (N.getOperand(1).getValueType()) { + default: assert(0 && + "ISD::SELECT: 'select'ing something other than i64 or f64!\n"); + case MVT::i64: + bogoResult=MakeReg(MVT::i64); + break; + case MVT::f64: + bogoResult=MakeReg(MVT::f64); + break; + } + // set up the complement predicate reg (CondComplement = NOT Tmp1) + BuildMI(BB, IA64::CMPEQ, 2, bogusTemp).addReg(IA64::r0).addReg(IA64::r0); + BuildMI(BB, IA64::TPCMPNE, 3, CondComplement).addReg(bogusTemp) + .addReg(IA64::r0).addReg(IA64::r0).addReg(Tmp1); + + // and do a 'conditional move' + BuildMI(BB, IA64::PMOV, 2, bogoResult).addReg(Tmp2).addReg(Tmp1); + BuildMI(BB, IA64::CMOV, 2, Result).addReg(bogoResult).addReg(Tmp3) + .addReg(CondComplement); + + return Result; + } + + case ISD::Constant: { + unsigned depositPos=0; + unsigned depositLen=0; + switch (N.getValueType()) { + default: assert(0 && "Cannot use constants of this type!"); + case MVT::i1: { // if a bool, we don't 'load' so much as generate + // the constant: + if(cast(N)->getValue()) // true: + BuildMI(BB, IA64::CMPEQ, 2, Result) + .addReg(IA64::r0).addReg(IA64::r0); + else // false: + BuildMI(BB, IA64::CMPNE, 2, Result) + .addReg(IA64::r0).addReg(IA64::r0); + return Result; + } + case MVT::i64: Opc = IA64::MOVLI32; break; + } + + int64_t immediate = cast(N)->getValue(); + if(immediate>>32) { // if our immediate really is big: + int highPart = immediate>>32; + int lowPart = immediate&0xFFFFFFFF; + unsigned dummy = MakeReg(MVT::i64); + unsigned dummy2 = MakeReg(MVT::i64); + unsigned dummy3 = MakeReg(MVT::i64); + + BuildMI(BB, IA64::MOVLI32, 1, dummy).addImm(highPart); + BuildMI(BB, IA64::SHLI, 2, dummy2).addReg(dummy).addImm(32); + BuildMI(BB, IA64::MOVLI32, 1, dummy3).addImm(lowPart); + BuildMI(BB, IA64::ADD, 2, Result).addReg(dummy2).addReg(dummy3); + } else { + BuildMI(BB, IA64::MOVLI32, 1, Result).addImm(immediate); + } + + return Result; + } + + case ISD::GlobalAddress: { + GlobalValue *GV = cast(N)->getGlobal(); + unsigned Tmp1 = MakeReg(MVT::i64); + BuildMI(BB, IA64::ADD, 2, Tmp1).addGlobalAddress(GV).addReg(IA64::r1); + //r1==GP + BuildMI(BB, IA64::LD8, 1, Result).addReg(Tmp1); + return Result; + } + + case ISD::ExternalSymbol: { + const char *Sym = cast(N)->getSymbol(); + assert(0 && "ISD::ExternalSymbol not done yet\n"); + //XXX BuildMI(BB, IA64::MOV, 1, Result).addExternalSymbol(Sym); + return Result; + } + + case ISD::FP_EXTEND: { + Tmp1 = SelectExpr(N.getOperand(0)); + BuildMI(BB, IA64::FMOV, 1, Result).addReg(Tmp1); + return Result; + } + + case ISD::ZERO_EXTEND: { + Tmp1 = SelectExpr(N.getOperand(0)); // value + + switch (N.getOperand(0).getValueType()) { + default: assert(0 && "Cannot zero-extend this type!"); + case MVT::i8: Opc = IA64::ZXT1; break; + case MVT::i16: Opc = IA64::ZXT2; break; + case MVT::i32: Opc = IA64::ZXT4; break; + + // we handle bools differently! : + case MVT::i1: { // if the predicate reg has 1, we want a '1' in our GR. + unsigned dummy = MakeReg(MVT::i64); + // first load zero: + BuildMI(BB, IA64::MOV, 1, dummy).addReg(IA64::r0); + // ...then conditionally (PR:Tmp1) add 1: + BuildMI(BB, IA64::CADDIMM22, 3, Result).addReg(dummy) + .addImm(1).addReg(Tmp1); + return Result; // XXX early exit! + } + } + + BuildMI(BB, Opc, 1, Result).addReg(Tmp1); + return Result; + } + + case ISD::SIGN_EXTEND: { // we should only have to handle i1 -> i64 here!!! + +assert(0 && "hmm, ISD::SIGN_EXTEND: shouldn't ever be reached. bad luck!\n"); + + Tmp1 = SelectExpr(N.getOperand(0)); // value + + switch (N.getOperand(0).getValueType()) { + default: assert(0 && "Cannot sign-extend this type!"); + case MVT::i1: assert(0 && "trying to sign extend a bool? ow.\n"); + Opc = IA64::SXT1; break; + // FIXME: for now, we treat bools the same as i8s + case MVT::i8: Opc = IA64::SXT1; break; + case MVT::i16: Opc = IA64::SXT2; break; + case MVT::i32: Opc = IA64::SXT4; break; + } + + BuildMI(BB, Opc, 1, Result).addReg(Tmp1); + return Result; + } + + case ISD::TRUNCATE: { + // we use the funky dep.z (deposit (zero)) instruction to deposit bits + // of R0 appropriately. + switch (N.getOperand(0).getValueType()) { + default: assert(0 && "Unknown truncate!"); + case MVT::i64: break; + } + Tmp1 = SelectExpr(N.getOperand(0)); + unsigned depositPos, depositLen; + + switch (N.getValueType()) { + default: assert(0 && "Unknown truncate!"); + case MVT::i1: { + // if input (normal reg) is 0, 0!=0 -> false (0), if 1, 1!=0 ->true (1): + BuildMI(BB, IA64::CMPNE, 2, Result).addReg(Tmp1) + .addReg(IA64::r0); + return Result; // XXX early exit! + } + case MVT::i8: depositPos=0; depositLen=8; break; + case MVT::i16: depositPos=0; depositLen=16; break; + case MVT::i32: depositPos=0; depositLen=32; break; + } + BuildMI(BB, IA64::DEPZ, 1, Result).addReg(Tmp1) + .addImm(depositPos).addImm(depositLen); + return Result; + } + +/* + case ISD::FP_ROUND: { + assert (DestType == MVT::f32 && N.getOperand(0).getValueType() == MVT::f64 && + "error: trying to FP_ROUND something other than f64 -> f32!\n"); + Tmp1 = SelectExpr(N.getOperand(0)); + BuildMI(BB, IA64::FADDS, 2, Result).addReg(Tmp1).addReg(IA64::F0); + // we add 0.0 using a single precision add to do rounding + return Result; + } +*/ + +// FIXME: the following 4 cases need cleaning + case ISD::SINT_TO_FP: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = MakeReg(MVT::f64); + unsigned dummy = MakeReg(MVT::f64); + BuildMI(BB, IA64::SETFSIG, 1, Tmp2).addReg(Tmp1); + BuildMI(BB, IA64::FCVTXF, 1, dummy).addReg(Tmp2); + BuildMI(BB, IA64::FNORMD, 1, Result).addReg(dummy); + return Result; + } + + case ISD::UINT_TO_FP: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = MakeReg(MVT::f64); + unsigned dummy = MakeReg(MVT::f64); + BuildMI(BB, IA64::SETFSIG, 1, Tmp2).addReg(Tmp1); + BuildMI(BB, IA64::FCVTXUF, 1, dummy).addReg(Tmp2); + BuildMI(BB, IA64::FNORMD, 1, Result).addReg(dummy); + return Result; + } + + case ISD::FP_TO_SINT: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = MakeReg(MVT::f64); + BuildMI(BB, IA64::FCVTFXTRUNC, 1, Tmp2).addReg(Tmp1); + BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(Tmp2); + return Result; + } + + case ISD::FP_TO_UINT: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = MakeReg(MVT::f64); + BuildMI(BB, IA64::FCVTFXUTRUNC, 1, Tmp2).addReg(Tmp1); + BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(Tmp2); + return Result; + } + + case ISD::ADD: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + if(DestType != MVT::f64) + BuildMI(BB, IA64::ADD, 2, Result).addReg(Tmp1).addReg(Tmp2); // int + else + BuildMI(BB, IA64::FADD, 2, Result).addReg(Tmp1).addReg(Tmp2); // FP + return Result; + } + + case ISD::MUL: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + if(DestType != MVT::f64) { // integer multiply, emit some code (FIXME) + unsigned TempFR1=MakeReg(MVT::f64); + unsigned TempFR2=MakeReg(MVT::f64); + unsigned TempFR3=MakeReg(MVT::f64); + BuildMI(BB, IA64::SETFSIG, 1, TempFR1).addReg(Tmp1); + BuildMI(BB, IA64::SETFSIG, 1, TempFR2).addReg(Tmp2); + BuildMI(BB, IA64::XMAL, 1, TempFR3).addReg(TempFR1).addReg(TempFR2) + .addReg(IA64::F0); + BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(TempFR3); + } + else // floating point multiply + BuildMI(BB, IA64::FMPY, 2, Result).addReg(Tmp1).addReg(Tmp2); + return Result; + } + + case ISD::SUB: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + if(DestType != MVT::f64) + BuildMI(BB, IA64::SUB, 2, Result).addReg(Tmp1).addReg(Tmp2); + else + BuildMI(BB, IA64::FSUB, 2, Result).addReg(Tmp1).addReg(Tmp2); + return Result; + } + + case ISD::AND: { + switch (N.getValueType()) { + default: assert(0 && "Cannot AND this type!"); + case MVT::i1: { // if a bool, we emit a pseudocode AND + unsigned pA = SelectExpr(N.getOperand(0)); + unsigned pB = SelectExpr(N.getOperand(1)); + +/* our pseudocode for AND is: + * +(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA + cmp.eq pTemp,p0 = r0,r0 // pTemp = NOT pB + ;; +(pB) cmp.ne pTemp,p0 = r0,r0 + ;; +(pTemp)cmp.ne pC,p0 = r0,r0 // if (NOT pB) pC = 0 + +*/ + unsigned pTemp = MakeReg(MVT::i1); + + unsigned bogusTemp1 = MakeReg(MVT::i1); + unsigned bogusTemp2 = MakeReg(MVT::i1); + unsigned bogusTemp3 = MakeReg(MVT::i1); + unsigned bogusTemp4 = MakeReg(MVT::i1); + + BuildMI(BB, IA64::PCMPEQUNC, 3, bogusTemp1) + .addReg(IA64::r0).addReg(IA64::r0).addReg(pA); + BuildMI(BB, IA64::CMPEQ, 2, bogusTemp2) + .addReg(IA64::r0).addReg(IA64::r0); + BuildMI(BB, IA64::TPCMPNE, 3, pTemp) + .addReg(bogusTemp2).addReg(IA64::r0).addReg(IA64::r0).addReg(pB); + BuildMI(BB, IA64::TPCMPNE, 3, Result) + .addReg(bogusTemp1).addReg(IA64::r0).addReg(IA64::r0).addReg(pTemp); + break; + } + // if not a bool, we just AND away: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + BuildMI(BB, IA64::AND, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + } + } + return Result; + } + + case ISD::OR: { + switch (N.getValueType()) { + default: assert(0 && "Cannot OR this type!"); + case MVT::i1: { // if a bool, we emit a pseudocode OR + unsigned pA = SelectExpr(N.getOperand(0)); + unsigned pB = SelectExpr(N.getOperand(1)); + + unsigned pTemp1 = MakeReg(MVT::i1); + +/* our pseudocode for OR is: + * + +pC = pA OR pB +------------- + +(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA + ;; +(pB) cmp.eq pC,p0 = r0,r0 // if (pB) pC = 1 + +*/ + BuildMI(BB, IA64::PCMPEQUNC, 3, pTemp1) + .addReg(IA64::r0).addReg(IA64::r0).addReg(pA); + BuildMI(BB, IA64::TPCMPEQ, 3, Result) + .addReg(pTemp1).addReg(IA64::r0).addReg(IA64::r0).addReg(pB); + break; + } + // if not a bool, we just OR away: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + BuildMI(BB, IA64::OR, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + } + } + return Result; + } + + case ISD::XOR: { + switch (N.getValueType()) { + default: assert(0 && "Cannot XOR this type!"); + case MVT::i1: { // if a bool, we emit a pseudocode XOR + unsigned pY = SelectExpr(N.getOperand(0)); + unsigned pZ = SelectExpr(N.getOperand(1)); + +/* one possible routine for XOR is: + + // Compute px = py ^ pz + // using sum of products: px = (py & !pz) | (pz & !py) + // Uses 5 instructions in 3 cycles. + // cycle 1 +(pz) cmp.eq.unc px = r0, r0 // px = pz +(py) cmp.eq.unc pt = r0, r0 // pt = py + ;; + // cycle 2 +(pt) cmp.ne.and px = r0, r0 // px = px & !pt (px = pz & !pt) +(pz) cmp.ne.and pt = r0, r0 // pt = pt & !pz + ;; + } { .mmi + // cycle 3 +(pt) cmp.eq.or px = r0, r0 // px = px | pt + +*** Another, which we use here, requires one scratch GR. it is: + + mov rt = 0 // initialize rt off critical path + ;; + + // cycle 1 +(pz) cmp.eq.unc px = r0, r0 // px = pz +(pz) mov rt = 1 // rt = pz + ;; + // cycle 2 +(py) cmp.ne px = 1, rt // if (py) px = !pz + +.. these routines kindly provided by Jim Hull +*/ + unsigned rt = MakeReg(MVT::i64); + + // these two temporaries will never actually appear, + // due to the two-address form of some of the instructions below + unsigned bogoPR = MakeReg(MVT::i1); // becomes Result + unsigned bogoGR = MakeReg(MVT::i64); // becomes rt + + BuildMI(BB, IA64::MOV, 1, bogoGR).addReg(IA64::r0); + BuildMI(BB, IA64::PCMPEQUNC, 3, bogoPR) + .addReg(IA64::r0).addReg(IA64::r0).addReg(pZ); + BuildMI(BB, IA64::TPCADDIMM22, 2, rt) + .addReg(bogoGR).addImm(1).addReg(pZ); + BuildMI(BB, IA64::TPCMPIMM8NE, 3, Result) + .addReg(bogoPR).addImm(1).addReg(rt).addReg(pY); + break; + } + // if not a bool, we just XOR away: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + BuildMI(BB, IA64::XOR, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + } + } + return Result; + } + + case ISD::SHL: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + BuildMI(BB, IA64::SHL, 2, Result).addReg(Tmp1).addReg(Tmp2); + return Result; + } + case ISD::SRL: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + BuildMI(BB, IA64::SHRU, 2, Result).addReg(Tmp1).addReg(Tmp2); + return Result; + } + case ISD::SRA: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + BuildMI(BB, IA64::SHRS, 2, Result).addReg(Tmp1).addReg(Tmp2); + return Result; + } + + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + + bool isFP=false; + + if(DestType == MVT::f64) // XXX: we're not gonna be fed MVT::f32, are we? + isFP=true; + + bool isModulus=false; // is it a division or a modulus? + bool isSigned=false; + + switch(N.getOpcode()) { + case ISD::SDIV: isModulus=false; isSigned=true; break; + case ISD::UDIV: isModulus=false; isSigned=false; break; + case ISD::SREM: isModulus=true; isSigned=true; break; + case ISD::UREM: isModulus=true; isSigned=false; break; + } + + unsigned TmpPR=MakeReg(MVT::i1); // we need a scratch predicate register, + unsigned TmpF1=MakeReg(MVT::f64); // and one metric truckload of FP regs. + unsigned TmpF2=MakeReg(MVT::f64); // lucky we have IA64? + unsigned TmpF3=MakeReg(MVT::f64); // well, the real FIXME is to have + unsigned TmpF4=MakeReg(MVT::f64); // isTwoAddress forms of these + unsigned TmpF5=MakeReg(MVT::f64); // FP instructions so we can end up with + unsigned TmpF6=MakeReg(MVT::f64); // stuff like setf.sig f10=f10 etc. + unsigned TmpF7=MakeReg(MVT::f64); + unsigned TmpF8=MakeReg(MVT::f64); + unsigned TmpF9=MakeReg(MVT::f64); + unsigned TmpF10=MakeReg(MVT::f64); + unsigned TmpF11=MakeReg(MVT::f64); + unsigned TmpF12=MakeReg(MVT::f64); + unsigned TmpF13=MakeReg(MVT::f64); + unsigned TmpF14=MakeReg(MVT::f64); + unsigned TmpF15=MakeReg(MVT::f64); + + // OK, emit some code: + + if(!isFP) { + // first, load the inputs into FP regs. + BuildMI(BB, IA64::SETFSIG, 1, TmpF1).addReg(Tmp1); + BuildMI(BB, IA64::SETFSIG, 1, TmpF2).addReg(Tmp2); + + // next, convert the inputs to FP + if(isSigned) { + BuildMI(BB, IA64::FCVTXF, 1, TmpF3).addReg(TmpF1); + BuildMI(BB, IA64::FCVTXF, 1, TmpF4).addReg(TmpF2); + } else { + BuildMI(BB, IA64::FCVTXUFS1, 1, TmpF3).addReg(TmpF1); + BuildMI(BB, IA64::FCVTXUFS1, 1, TmpF4).addReg(TmpF2); + } + + } else { // this is an FP divide/remainder, so we 'leak' some temp + // regs and assign TmpF3=Tmp1, TmpF4=Tmp2 + TmpF3=Tmp1; + TmpF4=Tmp2; + } + + // we start by computing an approximate reciprocal (good to 9 bits?) + // note, this instruction writes _both_ TmpF5 (answer) and tmpPR (predicate) + // FIXME: or at least, it should!! + BuildMI(BB, IA64::FRCPAS1FLOAT, 2, TmpF5).addReg(TmpF3).addReg(TmpF4); + BuildMI(BB, IA64::FRCPAS1PREDICATE, 2, TmpPR).addReg(TmpF3).addReg(TmpF4); + + // now we apply newton's method, thrice! (FIXME: this is ~72 bits of + // precision, don't need this much for f32/i32) + BuildMI(BB, IA64::CFNMAS1, 4, TmpF6) + .addReg(TmpF4).addReg(TmpF5).addReg(IA64::F1).addReg(TmpPR); + BuildMI(BB, IA64::CFMAS1, 4, TmpF7) + .addReg(TmpF3).addReg(TmpF5).addReg(IA64::F0).addReg(TmpPR); + BuildMI(BB, IA64::CFMAS1, 4, TmpF8) + .addReg(TmpF6).addReg(TmpF6).addReg(IA64::F0).addReg(TmpPR); + BuildMI(BB, IA64::CFMAS1, 4, TmpF9) + .addReg(TmpF6).addReg(TmpF7).addReg(TmpF7).addReg(TmpPR); + BuildMI(BB, IA64::CFMAS1, 4,TmpF10) + .addReg(TmpF6).addReg(TmpF5).addReg(TmpF5).addReg(TmpPR); + BuildMI(BB, IA64::CFMAS1, 4,TmpF11) + .addReg(TmpF8).addReg(TmpF9).addReg(TmpF9).addReg(TmpPR); + BuildMI(BB, IA64::CFMAS1, 4,TmpF12) + .addReg(TmpF8).addReg(TmpF10).addReg(TmpF10).addReg(TmpPR); + BuildMI(BB, IA64::CFNMAS1, 4,TmpF13) + .addReg(TmpF4).addReg(TmpF11).addReg(TmpF3).addReg(TmpPR); + BuildMI(BB, IA64::CFMAS1, 4,TmpF14) + .addReg(TmpF13).addReg(TmpF12).addReg(TmpF11).addReg(TmpPR); + + if(!isFP) { + // round to an integer + if(isSigned) + BuildMI(BB, IA64::FCVTFXTRUNCS1, 1, TmpF15).addReg(TmpF14); + else + BuildMI(BB, IA64::FCVTFXUTRUNCS1, 1, TmpF15).addReg(TmpF14); + } else { + BuildMI(BB, IA64::FMOV, 1, TmpF15).addReg(TmpF14); + // EXERCISE: can you see why TmpF15=TmpF14 does not work here, and + // we really do need the above FMOV? ;) + } + + if(!isModulus) { + if(isFP) + BuildMI(BB, IA64::FMOV, 1, Result).addReg(TmpF15); + else + BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(TmpF15); + } else { // this is a modulus + if(!isFP) { + // answer = q * (-b) + a + unsigned ModulusResult = MakeReg(MVT::f64); + unsigned TmpF = MakeReg(MVT::f64); + unsigned TmpI = MakeReg(MVT::i64); + BuildMI(BB, IA64::SUB, 2, TmpI).addReg(IA64::r0).addReg(Tmp2); + BuildMI(BB, IA64::SETFSIG, 1, TmpF).addReg(TmpI); + BuildMI(BB, IA64::XMAL, 3, ModulusResult) + .addReg(TmpF15).addReg(TmpF).addReg(TmpF1); + BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(ModulusResult); + } else { // FP modulus! The horror... the horror.... + assert(0 && "sorry, no FP modulus just yet!\n!\n"); + } + } + + return Result; + } + + case ISD::ZERO_EXTEND_INREG: { + Tmp1 = SelectExpr(N.getOperand(0)); + MVTSDNode* MVN = dyn_cast(Node); + switch(MVN->getExtraValueType()) + { + default: + Node->dump(); + assert(0 && "don't know how to zero extend this type"); + break; + case MVT::i8: Opc = IA64::ZXT1; break; + case MVT::i16: Opc = IA64::ZXT2; break; + case MVT::i32: Opc = IA64::ZXT4; break; + } + BuildMI(BB, Opc, 1, Result).addReg(Tmp1); + return Result; + } + + case ISD::SIGN_EXTEND_INREG: { + Tmp1 = SelectExpr(N.getOperand(0)); + MVTSDNode* MVN = dyn_cast(Node); + switch(MVN->getExtraValueType()) + { + default: + Node->dump(); + assert(0 && "don't know how to sign extend this type"); + break; + case MVT::i8: Opc = IA64::SXT1; break; + case MVT::i16: Opc = IA64::SXT2; break; + case MVT::i32: Opc = IA64::SXT4; break; + } + BuildMI(BB, Opc, 1, Result).addReg(Tmp1); + return Result; + } + + case ISD::SETCC: { + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + if (SetCCSDNode *SetCC = dyn_cast(Node)) { + if (MVT::isInteger(SetCC->getOperand(0).getValueType())) { + switch (SetCC->getCondition()) { + default: assert(0 && "Unknown integer comparison!"); + case ISD::SETEQ: + BuildMI(BB, IA64::CMPEQ, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETGT: + BuildMI(BB, IA64::CMPGT, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETGE: + BuildMI(BB, IA64::CMPGE, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETLT: + BuildMI(BB, IA64::CMPLT, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETLE: + BuildMI(BB, IA64::CMPLE, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETNE: + BuildMI(BB, IA64::CMPNE, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETULT: + BuildMI(BB, IA64::CMPLTU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETUGT: + BuildMI(BB, IA64::CMPGTU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETULE: + BuildMI(BB, IA64::CMPLEU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETUGE: + BuildMI(BB, IA64::CMPGEU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + } + } + else { // if not integer, should be FP. FIXME: what about bools? ;) + assert(SetCC->getOperand(0).getValueType() != MVT::f32 && + "error: SETCC should have had incoming f32 promoted to f64!\n"); + switch (SetCC->getCondition()) { + default: assert(0 && "Unknown FP comparison!"); + case ISD::SETEQ: + BuildMI(BB, IA64::FCMPEQ, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETGT: + BuildMI(BB, IA64::FCMPGT, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETGE: + BuildMI(BB, IA64::FCMPGE, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETLT: + BuildMI(BB, IA64::FCMPLT, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETLE: + BuildMI(BB, IA64::FCMPLE, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETNE: + BuildMI(BB, IA64::FCMPNE, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETULT: + BuildMI(BB, IA64::FCMPLTU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETUGT: + BuildMI(BB, IA64::FCMPGTU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETULE: + BuildMI(BB, IA64::FCMPLEU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + case ISD::SETUGE: + BuildMI(BB, IA64::FCMPGEU, 2, Result).addReg(Tmp1).addReg(Tmp2); + break; + } + } + } + else + assert(0 && "this setcc not implemented yet"); + + return Result; + } + + case ISD::EXTLOAD: + case ISD::ZEXTLOAD: + case ISD::LOAD: { + // Make sure we generate both values. + if (Result != 1) + ExprMap[N.getValue(1)] = 1; // Generate the token + else + Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType()); + + bool isBool=false; + + if(opcode == ISD::LOAD) { // this is a LOAD + switch (Node->getValueType(0)) { + default: assert(0 && "Cannot load this type!"); + case MVT::i1: Opc = IA64::LD1; isBool=true; break; + // FIXME: for now, we treat bool loads the same as i8 loads */ + case MVT::i8: Opc = IA64::LD1; break; + case MVT::i16: Opc = IA64::LD2; break; + case MVT::i32: Opc = IA64::LD4; break; + case MVT::i64: Opc = IA64::LD8; break; + + case MVT::f32: Opc = IA64::LDF4; break; + case MVT::f64: Opc = IA64::LDF8; break; + } + } else { // this is an EXTLOAD or ZEXTLOAD + MVT::ValueType TypeBeingLoaded = cast(Node)->getExtraValueType(); + switch (TypeBeingLoaded) { + default: assert(0 && "Cannot extload/zextload this type!"); + // FIXME: bools? + case MVT::i8: Opc = IA64::LD1; break; + case MVT::i16: Opc = IA64::LD2; break; + case MVT::i32: Opc = IA64::LD4; break; + case MVT::f32: Opc = IA64::LDF4; break; + } + } + + SDOperand Chain = N.getOperand(0); + SDOperand Address = N.getOperand(1); + + if(Address.getOpcode() == ISD::GlobalAddress) { + Select(Chain); + unsigned dummy = MakeReg(MVT::i64); + unsigned dummy2 = MakeReg(MVT::i64); + BuildMI(BB, IA64::ADD, 2, dummy) + .addGlobalAddress(cast(Address)->getGlobal()) + .addReg(IA64::r1); + BuildMI(BB, IA64::LD8, 1, dummy2).addReg(dummy); + if(!isBool) + BuildMI(BB, Opc, 1, Result).addReg(dummy2); + else { // emit a little pseudocode to load a bool (stored in one byte) + // into a predicate register + assert(Opc==IA64::LD1 && "problem loading a bool"); + unsigned dummy3 = MakeReg(MVT::i64); + BuildMI(BB, Opc, 1, dummy3).addReg(dummy2); + // we compare to 0. true? 0. false? 1. + BuildMI(BB, IA64::CMPNE, 2, Result).addReg(dummy3).addReg(IA64::r0); + } + } else if(ConstantPoolSDNode *CP = dyn_cast(Address)) { + Select(Chain); + IA64Lowering.restoreGP(BB); + unsigned dummy = MakeReg(MVT::i64); + BuildMI(BB, IA64::ADD, 2, dummy).addConstantPoolIndex(CP->getIndex()) + .addReg(IA64::r1); // CPI+GP + if(!isBool) + BuildMI(BB, Opc, 1, Result).addReg(dummy); + else { // emit a little pseudocode to load a bool (stored in one byte) + // into a predicate register + assert(Opc==IA64::LD1 && "problem loading a bool"); + unsigned dummy3 = MakeReg(MVT::i64); + BuildMI(BB, Opc, 1, dummy3).addReg(dummy); + // we compare to 0. true? 0. false? 1. + BuildMI(BB, IA64::CMPNE, 2, Result).addReg(dummy3).addReg(IA64::r0); + } + } else if(Address.getOpcode() == ISD::FrameIndex) { + Select(Chain); // FIXME ? what about bools? + unsigned dummy = MakeReg(MVT::i64); + BuildMI(BB, IA64::MOV, 1, dummy) + .addFrameIndex(cast(Address)->getIndex()); + if(!isBool) + BuildMI(BB, Opc, 1, Result).addReg(dummy); + else { // emit a little pseudocode to load a bool (stored in one byte) + // into a predicate register + assert(Opc==IA64::LD1 && "problem loading a bool"); + unsigned dummy3 = MakeReg(MVT::i64); + BuildMI(BB, Opc, 1, dummy3).addReg(dummy); + // we compare to 0. true? 0. false? 1. + BuildMI(BB, IA64::CMPNE, 2, Result).addReg(dummy3).addReg(IA64::r0); + } + } else { // none of the above... + Select(Chain); + Tmp2 = SelectExpr(Address); + if(!isBool) + BuildMI(BB, Opc, 1, Result).addReg(Tmp2); + else { // emit a little pseudocode to load a bool (stored in one byte) + // into a predicate register + assert(Opc==IA64::LD1 && "problem loading a bool"); + unsigned dummy = MakeReg(MVT::i64); + BuildMI(BB, Opc, 1, dummy).addReg(Tmp2); + // we compare to 0. true? 0. false? 1. + BuildMI(BB, IA64::CMPNE, 2, Result).addReg(dummy).addReg(IA64::r0); + } + } + + return Result; + } + + case ISD::CopyFromReg: { + if (Result == 1) + Result = ExprMap[N.getValue(0)] = + MakeReg(N.getValue(0).getValueType()); + + SDOperand Chain = N.getOperand(0); + + Select(Chain); + unsigned r = dyn_cast(Node)->getReg(); + + if(N.getValueType() == MVT::i1) // if a bool, we use pseudocode + BuildMI(BB, IA64::PCMPEQUNC, 3, Result) + .addReg(IA64::r0).addReg(IA64::r0).addReg(r); + // (r) Result =cmp.eq.unc(r0,r0) + else + BuildMI(BB, IA64::MOV, 1, Result).addReg(r); // otherwise MOV + return Result; + } + + case ISD::CALL: { + Select(N.getOperand(0)); + + // The chain for this call is now lowered. + ExprMap.insert(std::make_pair(N.getValue(Node->getNumValues()-1), 1)); + + //grab the arguments + std::vector argvregs; + + for(int i = 2, e = Node->getNumOperands(); i < e; ++i) + argvregs.push_back(SelectExpr(N.getOperand(i))); + + // see section 8.5.8 of "Itanium Software Conventions and + // Runtime Architecture Guide to see some examples of what's going + // on here. (in short: int args get mapped 1:1 'slot-wise' to out0->out7, + // while FP args get mapped to F8->F15 as needed) + + unsigned used_FPArgs=0; // how many FP Args have been used so far? + + // in reg args + for(int i = 0, e = std::min(8, (int)argvregs.size()); i < e; ++i) + { + unsigned intArgs[] = {IA64::out0, IA64::out1, IA64::out2, IA64::out3, + IA64::out4, IA64::out5, IA64::out6, IA64::out7 }; + unsigned FPArgs[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11, + IA64::F12, IA64::F13, IA64::F14, IA64::F15 }; + + switch(N.getOperand(i+2).getValueType()) + { + default: // XXX do we need to support MVT::i1 here? + Node->dump(); + N.getOperand(i).Val->dump(); + std::cerr << "Type for " << i << " is: " << + N.getOperand(i+2).getValueType() << std::endl; + assert(0 && "Unknown value type for call"); + case MVT::i64: + BuildMI(BB, IA64::MOV, 1, intArgs[i]).addReg(argvregs[i]); + break; + case MVT::f64: + BuildMI(BB, IA64::FMOV, 1, FPArgs[used_FPArgs++]) + .addReg(argvregs[i]); + BuildMI(BB, IA64::GETFD, 1, intArgs[i]).addReg(argvregs[i]); + break; + } + } + + //in mem args + for (int i = 8, e = argvregs.size(); i < e; ++i) + { + unsigned tempAddr = MakeReg(MVT::i64); + + switch(N.getOperand(i+2).getValueType()) { + default: + Node->dump(); + N.getOperand(i).Val->dump(); + std::cerr << "Type for " << i << " is: " << + N.getOperand(i+2).getValueType() << "\n"; + assert(0 && "Unknown value type for call"); + case MVT::i1: // FIXME? + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + BuildMI(BB, IA64::ADDIMM22, 2, tempAddr) + .addReg(IA64::r12).addImm(16 + (i - 8) * 8); // r12 is SP + BuildMI(BB, IA64::ST8, 2).addReg(tempAddr).addReg(argvregs[i]); + break; + case MVT::f32: + case MVT::f64: + BuildMI(BB, IA64::ADDIMM22, 2, tempAddr) + .addReg(IA64::r12).addImm(16 + (i - 8) * 8); // r12 is SP + BuildMI(BB, IA64::STF8, 2).addReg(tempAddr).addReg(argvregs[i]); + break; + } + } + //build the right kind of call + if (GlobalAddressSDNode *GASD = + dyn_cast(N.getOperand(1))) + { + BuildMI(BB, IA64::BRCALL, 1).addGlobalAddress(GASD->getGlobal(),true); + IA64Lowering.restoreGP_SP_RP(BB); + } + + else if (ExternalSymbolSDNode *ESSDN = + dyn_cast(N.getOperand(1))) + { + BuildMI(BB, IA64::BRCALL, 0) + .addExternalSymbol(ESSDN->getSymbol(), true); + IA64Lowering.restoreGP_SP_RP(BB); + } + else { + // no need to restore GP as we are doing an indirect call + Tmp1 = SelectExpr(N.getOperand(1)); + // b6 is a scratch branch register, we load the target: + BuildMI(BB, IA64::MOV, 1, IA64::B6).addReg(Tmp1); + // and then jump: (well, call) + BuildMI(BB, IA64::BRCALL, 1).addReg(IA64::B6); + IA64Lowering.restoreGP_SP_RP(BB); + } + + switch (Node->getValueType(0)) { + default: assert(0 && "Unknown value type for call result!"); + case MVT::Other: return 1; + case MVT::i1: + BuildMI(BB, IA64::CMPNE, 2, Result) + .addReg(IA64::r8).addReg(IA64::r0); + break; + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + BuildMI(BB, IA64::MOV, 1, Result).addReg(IA64::r8); + break; + case MVT::f64: + BuildMI(BB, IA64::FMOV, 1, Result).addReg(IA64::F8); + break; + } + return Result+N.ResNo; + } + + } // <- uhhh XXX + return 0; +} + +void ISel::Select(SDOperand N) { + unsigned Tmp1, Tmp2, Opc; + unsigned opcode = N.getOpcode(); + + // FIXME: Disable for our current expansion model! + if (/*!N->hasOneUse() &&*/ !LoweredTokens.insert(N).second) + return; // Already selected. + + SDNode *Node = N.Val; + + switch (Node->getOpcode()) { + default: + Node->dump(); std::cerr << "\n"; + assert(0 && "Node not handled yet!"); + + case ISD::EntryToken: return; // Noop + + case ISD::TokenFactor: { + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Select(Node->getOperand(i)); + return; + } + + case ISD::CopyToReg: { + Select(N.getOperand(0)); + Tmp1 = SelectExpr(N.getOperand(1)); + Tmp2 = cast(N)->getReg(); + + if (Tmp1 != Tmp2) { + if(N.getValueType() == MVT::i1) // if a bool, we use pseudocode + BuildMI(BB, IA64::PCMPEQUNC, 3, Tmp2) + .addReg(IA64::r0).addReg(IA64::r0).addReg(Tmp1); + // (Tmp1) Tmp2 = cmp.eq.unc(r0,r0) + else + BuildMI(BB, IA64::MOV, 1, Tmp2).addReg(Tmp1); + // XXX is this the right way 'round? ;) + } + return; + } + + case ISD::RET: { + + /* what the heck is going on here: + +<_sabre_> ret with two operands is obvious: chain and value + yep +<_sabre_> ret with 3 values happens when 'expansion' occurs +<_sabre_> e.g. i64 gets split into 2x i32 + oh right +<_sabre_> you don't have this case on ia64 + yep +<_sabre_> so the two returned values go into EAX/EDX on ia32 + ahhh *memories* +<_sabre_> :) + ok, thanks :) +<_sabre_> so yeah, everything that has a side effect takes a 'token chain' +<_sabre_> this is the first operand always +<_sabre_> these operand often define chains, they are the last operand +<_sabre_> they are printed as 'ch' if you do DAG.dump() + */ + + switch (N.getNumOperands()) { + default: + assert(0 && "Unknown return instruction!"); + case 2: + Select(N.getOperand(0)); + Tmp1 = SelectExpr(N.getOperand(1)); + switch (N.getOperand(1).getValueType()) { + default: assert(0 && "All other types should have been promoted!!"); + // FIXME: do I need to add support for bools here? + // (return '0' or '1' r8, basically...) + case MVT::i64: + BuildMI(BB, IA64::MOV, 1, IA64::r8).addReg(Tmp1); + break; + case MVT::f64: + BuildMI(BB, IA64::FMOV, 1, IA64::F8).addReg(Tmp1); + } + break; + case 1: + Select(N.getOperand(0)); + break; + } + // before returning, restore the ar.pfs register (set by the 'alloc' up top) + BuildMI(BB, IA64::MOV, 1).addReg(IA64::AR_PFS).addReg(IA64Lowering.VirtGPR); + BuildMI(BB, IA64::RET, 0); // and then just emit a 'ret' instruction + return; + } + + case ISD::BR: { + Select(N.getOperand(0)); + MachineBasicBlock *Dest = + cast(N.getOperand(1))->getBasicBlock(); + BuildMI(BB, IA64::BRLCOND_NOTCALL, 1).addReg(IA64::p0).addMBB(Dest); + // XXX HACK! we do _not_ need long branches all the time + return; + } + + case ISD::ImplicitDef: { + Select(N.getOperand(0)); + BuildMI(BB, IA64::IDEF, 0, cast(N)->getReg()); + return; + } + + case ISD::BRCOND: { + MachineBasicBlock *Dest = + cast(N.getOperand(2))->getBasicBlock(); + + Select(N.getOperand(0)); + Tmp1 = SelectExpr(N.getOperand(1)); + BuildMI(BB, IA64::BRLCOND_NOTCALL, 1).addReg(Tmp1).addMBB(Dest); + // XXX HACK! we do _not_ need long branches all the time + return; + } + + case ISD::EXTLOAD: + case ISD::ZEXTLOAD: + case ISD::SEXTLOAD: + case ISD::LOAD: + case ISD::CALL: + case ISD::CopyFromReg: + case ISD::DYNAMIC_STACKALLOC: + SelectExpr(N); + return; + + case ISD::TRUNCSTORE: + case ISD::STORE: { + Select(N.getOperand(0)); + Tmp1 = SelectExpr(N.getOperand(1)); // value + + bool isBool=false; + + if(opcode == ISD::STORE) { + switch (N.getOperand(1).getValueType()) { + default: assert(0 && "Cannot store this type!"); + case MVT::i1: Opc = IA64::ST1; isBool=true; break; + // FIXME?: for now, we treat bool loads the same as i8 stores */ + case MVT::i8: Opc = IA64::ST1; break; + case MVT::i16: Opc = IA64::ST2; break; + case MVT::i32: Opc = IA64::ST4; break; + case MVT::i64: Opc = IA64::ST8; break; + + case MVT::f32: Opc = IA64::STF4; break; + case MVT::f64: Opc = IA64::STF8; break; + } + } else { // truncstore + switch(cast(Node)->getExtraValueType()) { + default: assert(0 && "unknown type in truncstore"); + case MVT::i1: Opc = IA64::ST1; isBool=true; break; + //FIXME: DAG does not promote this load? + case MVT::i8: Opc = IA64::ST1; break; + case MVT::i16: Opc = IA64::ST2; break; + case MVT::i32: Opc = IA64::ST4; break; + case MVT::f32: Opc = IA64::STF4; break; + } + } + + if(N.getOperand(2).getOpcode() == ISD::GlobalAddress) { + unsigned dummy = MakeReg(MVT::i64); + unsigned dummy2 = MakeReg(MVT::i64); + BuildMI(BB, IA64::ADD, 2, dummy) + .addGlobalAddress(cast + (N.getOperand(2))->getGlobal()).addReg(IA64::r1); + BuildMI(BB, IA64::LD8, 1, dummy2).addReg(dummy); + + if(!isBool) + BuildMI(BB, Opc, 2).addReg(dummy2).addReg(Tmp1); + else { // we are storing a bool, so emit a little pseudocode + // to store a predicate register as one byte + assert(Opc==IA64::ST1); + unsigned dummy3 = MakeReg(MVT::i64); + unsigned dummy4 = MakeReg(MVT::i64); + BuildMI(BB, IA64::MOV, 1, dummy3).addReg(IA64::r0); + BuildMI(BB, IA64::CADDIMM22, 3, dummy4) + .addReg(dummy3).addImm(1).addReg(Tmp1); // if(Tmp1) dummy=0+1; + BuildMI(BB, Opc, 2).addReg(dummy2).addReg(dummy4); + } + } else if(N.getOperand(2).getOpcode() == ISD::FrameIndex) { + + // FIXME? (what about bools?) + + unsigned dummy = MakeReg(MVT::i64); + BuildMI(BB, IA64::MOV, 1, dummy) + .addFrameIndex(cast(N.getOperand(2))->getIndex()); + BuildMI(BB, Opc, 2).addReg(dummy).addReg(Tmp1); + } else { // otherwise + Tmp2 = SelectExpr(N.getOperand(2)); //address + if(!isBool) + BuildMI(BB, Opc, 2).addReg(Tmp2).addReg(Tmp1); + else { // we are storing a bool, so emit a little pseudocode + // to store a predicate register as one byte + assert(Opc==IA64::ST1); + unsigned dummy3 = MakeReg(MVT::i64); + unsigned dummy4 = MakeReg(MVT::i64); + BuildMI(BB, IA64::MOV, 1, dummy3).addReg(IA64::r0); + BuildMI(BB, IA64::CADDIMM22, 3, dummy4) + .addReg(dummy3).addImm(1).addReg(Tmp1); // if(Tmp1) dummy=0+1; + BuildMI(BB, Opc, 2).addReg(Tmp2).addReg(dummy4); + } + } + return; + } + + case ISD::ADJCALLSTACKDOWN: + case ISD::ADJCALLSTACKUP: { + Select(N.getOperand(0)); + Tmp1 = cast(N.getOperand(1))->getValue(); + + Opc = N.getOpcode() == ISD::ADJCALLSTACKDOWN ? IA64::ADJUSTCALLSTACKDOWN : + IA64::ADJUSTCALLSTACKUP; + BuildMI(BB, Opc, 1).addImm(Tmp1); + return; + } + + return; + } + assert(0 && "GAME OVER. INSERT COIN?"); +} + + +/// createIA64PatternInstructionSelector - This pass converts an LLVM function +/// into a machine code representation using pattern matching and a machine +/// description file. +/// +FunctionPass *llvm::createIA64PatternInstructionSelector(TargetMachine &TM) { + return new ISel(TM); +} + + diff --git a/lib/Target/IA64/IA64InstrBuilder.h b/lib/Target/IA64/IA64InstrBuilder.h new file mode 100644 index 00000000000..f61d42e7090 --- /dev/null +++ b/lib/Target/IA64/IA64InstrBuilder.h @@ -0,0 +1,52 @@ +//===-- IA64PCInstrBuilder.h - Aids for building IA64 insts -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64_INSTRBUILDER_H +#define IA64_INSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +inline const MachineInstrBuilder& +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + if (mem) + return MIB.addSImm(Offset).addFrameIndex(FI); + else + return MIB.addFrameIndex(FI).addSImm(Offset); +} + +/// addConstantPoolReference - This function is used to add a reference to the +/// base of a constant value spilled to the per-function constant pool. The +/// reference has base register ConstantPoolIndex offset which is retained until +/// either machine code emission or assembly output. This allows an optional +/// offset to be added as well. +/// +inline const MachineInstrBuilder& +addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, + int Offset = 0) { + return MIB.addSImm(Offset).addConstantPoolIndex(CPI); +} + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64InstrFormats.td b/lib/Target/IA64/IA64InstrFormats.td new file mode 100644 index 00000000000..9d07acac724 --- /dev/null +++ b/lib/Target/IA64/IA64InstrFormats.td @@ -0,0 +1,67 @@ +//===- IA64InstrFormats.td - IA64 Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// - Warning: the stuff in here isn't really being used, so is mostly +// junk. It'll get fixed as the JIT gets built. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +class InstIA64 op, dag OL, string asmstr> : Instruction { + // IA64 instruction baseline + field bits<41> Inst; + let Namespace = "IA64"; + let OperandList = OL; + let AsmString = asmstr; + + let Inst{40-37} = op; +} + +//"Each Itanium instruction is categorized into one of six types." +//We should have: +// A, I, M, F, B, L+X + +class AForm opcode, bits<6> qpReg, dag OL, string asmstr> : + InstIA64 { + + let Inst{5-0} = qpReg; +} + +let isBranch = 1, isTerminator = 1 in +class BForm opcode, bits<6> x6, bits<3> btype, dag OL, string asmstr> : + InstIA64 { + + let Inst{32-27} = x6; + let Inst{8-6} = btype; +} + +class MForm opcode, bits<6> x6, dag OL, string asmstr> : + InstIA64 { + bits<7> Ra; + bits<7> Rb; + bits<16> disp; + + let Inst{35-30} = x6; +// let Inst{20-16} = Rb; + let Inst{15-0} = disp; +} + +class RawForm opcode, bits<26> rest, dag OL, string asmstr> : + InstIA64 { + let Inst{25-0} = rest; +} + +// Pseudo instructions. +class PseudoInstIA64 : InstIA64<0, OL, nm> { +} + + diff --git a/lib/Target/IA64/IA64InstrInfo.cpp b/lib/Target/IA64/IA64InstrInfo.cpp new file mode 100644 index 00000000000..04662baf2ce --- /dev/null +++ b/lib/Target/IA64/IA64InstrInfo.cpp @@ -0,0 +1,47 @@ +//===- IA64InstrInfo.cpp - IA64 Instruction Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "IA64InstrInfo.h" +#include "IA64.h" +#include "IA64InstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "IA64GenInstrInfo.inc" +using namespace llvm; + +IA64InstrInfo::IA64InstrInfo() + : TargetInstrInfo(IA64Insts, sizeof(IA64Insts)/sizeof(IA64Insts[0])) { +} + + +bool IA64InstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned& sourceReg, + unsigned& destReg) const { + MachineOpCode oc = MI.getOpcode(); + if (oc == IA64::MOV || oc == IA64::FMOV) { + assert(MI.getNumOperands() == 2 && + /* MI.getOperand(0).isRegister() && + MI.getOperand(1).isRegister() && */ + "invalid register-register move instruction"); + if( MI.getOperand(0).isRegister() && + MI.getOperand(1).isRegister() ) { + // if both operands of the MOV/FMOV are registers, then + // yes, this is a move instruction + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + } + return false; // we don't consider e.g. %regN = MOV a + // move instruction +} + diff --git a/lib/Target/IA64/IA64InstrInfo.h b/lib/Target/IA64/IA64InstrInfo.h new file mode 100644 index 00000000000..5a96356873d --- /dev/null +++ b/lib/Target/IA64/IA64InstrInfo.h @@ -0,0 +1,50 @@ +//===- IA64InstrInfo.h - IA64 Instruction Information ----------*- C++ -*- ===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64INSTRUCTIONINFO_H +#define IA64INSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "IA64RegisterInfo.h" + +namespace llvm { + +/// IA64II - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// FIXME: now gone! + + class IA64InstrInfo : public TargetInstrInfo { + const IA64RegisterInfo RI; +public: + IA64InstrInfo(); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const MRegisterInfo &getRegisterInfo() const { return RI; } + + // + // Return true if the instruction is a register to register move and + // leave the source and dest operands in the passed parameters. + // + virtual bool isMoveInstr(const MachineInstr& MI, + unsigned& sourceReg, + unsigned& destReg) const; + +}; + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td new file mode 100644 index 00000000000..f3b014d8639 --- /dev/null +++ b/lib/Target/IA64/IA64InstrInfo.td @@ -0,0 +1,319 @@ +//===- IA64InstrInfo.td - Describe the IA64 Instruction Set -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the IA64 instruction set, defining the instructions, and +// properties of the instructions which are needed for code generation, machine +// code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +include "IA64InstrFormats.td" + +def u6imm : Operand; +def s16imm : Operand; +def s21imm : Operand { + let PrintMethod = "printS21ImmOperand"; +} +def u32imm : Operand { + let PrintMethod = "printU32ImmOperand"; +} +def s32imm : Operand { + let PrintMethod = "printS32ImmOperand"; +} +def u64imm : Operand { + let PrintMethod = "printU64ImmOperand"; +} + +// the asmprinter needs to know about calls +let PrintMethod = "printCallOperand" in + def calltarget : Operand; + +def PHI : PseudoInstIA64<(ops), "PHI">; +def IDEF : PseudoInstIA64<(ops), "// IDEF">; +def WTF : PseudoInstIA64<(ops), "que??">; +def ADJUSTCALLSTACKUP : PseudoInstIA64<(ops), "// ADJUSTCALLSTACKUP">; +def ADJUSTCALLSTACKDOWN : PseudoInstIA64<(ops), "// ADJUSTCALLSTACKDOWN">; +def PSEUDO_ALLOC : PseudoInstIA64<(ops), "// PSEUDO_ALLOC">; + +def ALLOC : AForm<0x03, 0x0b, + (ops GR:$dst, i8imm:$inputs, i8imm:$locals, i8imm:$outputs, i8imm:$rotating), + "alloc $dst = ar.pfs,$inputs,$locals,$outputs,$rotating;;">; + +def MOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "mov $dst = $src;;">; +def PMOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src, PR:$qp), + "($qp) mov $dst = $src;;">; + +def SPILL_ALL_PREDICATES_TO_GR : AForm<0x03, 0x0b, (ops GR:$dst), + "mov $dst = pr;;">; +def FILL_ALL_PREDICATES_FROM_GR : AForm<0x03, 0x0b, (ops GR:$src), + "mov pr = $src;;">; + +let isTwoAddress = 1 in { + def CMOV : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src2, GR:$src, PR:$qp), + "($qp) mov $dst = $src;;">; +} + +let isTwoAddress = 1 in { + def TCMPNE : AForm<0x03, 0x0b, + (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4), + "cmp.ne $dst, p0 = $src3, $src4;;">; + + def TPCMPEQOR : AForm<0x03, 0x0b, + (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4, PR:$qp), + "($qp) cmp.eq.or $dst, p0 = $src3, $src4;;">; + + def TPCMPNE : AForm<0x03, 0x0b, + (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4, PR:$qp), + "($qp) cmp.ne $dst, p0 = $src3, $src4;;">; + + def TPCMPEQ : AForm<0x03, 0x0b, + (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4, PR:$qp), + "($qp) cmp.eq $dst, p0 = $src3, $src4;;">; +} + +def MOVI32 : AForm<0x03, 0x0b, (ops GR:$dst, u32imm:$imm), + "mov $dst = $imm;;">; +def MOVLI32 : AForm<0x03, 0x0b, (ops GR:$dst, u32imm:$imm), + "movl $dst = $imm;;">; +def MOVLSI32 : AForm<0x03, 0x0b, (ops GR:$dst, s32imm:$imm), + "movl $dst = $imm;;">; +def MOVLI64 : AForm<0x03, 0x0b, (ops GR:$dst, u64imm:$imm), + "movl $dst = $imm;;">; + +def AND : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "and $dst = $src1, $src2;;">; +def OR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "or $dst = $src1, $src2;;">; +def XOR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "xor $dst = $src1, $src2;;">; +def SHL : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "shl $dst = $src1, $src2;;">; +def SHLI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s21imm:$imm), + "shl $dst = $src1, $imm;;">; // FIXME: 6 immediate bits, not 21 +def SHRU : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "shr.u $dst = $src1, $src2;;">; +def SHRS : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "shr $dst = $src1, $src2;;">; + +def DEPZ : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2), "dep.z $dst = $src1, $imm1, $imm2;;">; + +def SXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src;;">; +def ZXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt1 $dst = $src;;">; +def SXT2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt2 $dst = $src;;">; +def ZXT2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt2 $dst = $src;;">; +def SXT4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt4 $dst = $src;;">; +def ZXT4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt4 $dst = $src;;">; + +// the following are all a bit unfortunate: we throw away the complement +// of the compare! +def CMPEQ : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2;;">; +def CMPGT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.gt $dst, p0 = $src1, $src2;;">; +def CMPGE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ge $dst, p0 = $src1, $src2;;">; +def CMPLT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.lt $dst, p0 = $src1, $src2;;">; +def CMPLE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.le $dst, p0 = $src1, $src2;;">; +def CMPNE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ne $dst, p0 = $src1, $src2;;">; +def CMPLTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ltu $dst, p0 = $src1, $src2;;">; +def CMPGTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.gtu $dst, p0 = $src1, $src2;;">; +def CMPLEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.leu $dst, p0 = $src1, $src2;;">; +def CMPGEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.geu $dst, p0 = $src1, $src2;;">; + +// and we do the whole thing again for FP compares! +def FCMPEQ : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.eq $dst, p0 = $src1, $src2;;">; +def FCMPGT : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.gt $dst, p0 = $src1, $src2;;">; +def FCMPGE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.ge $dst, p0 = $src1, $src2;;">; +def FCMPLT : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.lt $dst, p0 = $src1, $src2;;">; +def FCMPLE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.le $dst, p0 = $src1, $src2;;">; +def FCMPNE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.neq $dst, p0 = $src1, $src2;;">; +def FCMPLTU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.ltu $dst, p0 = $src1, $src2;;">; +def FCMPGTU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.gtu $dst, p0 = $src1, $src2;;">; +def FCMPLEU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.leu $dst, p0 = $src1, $src2;;">; +def FCMPGEU : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "fcmp.geu $dst, p0 = $src1, $src2;;">; + +def PCMPEQOR : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp), + "($qp) cmp.eq.or $dst, p0 = $src1, $src2;;">; +def PCMPEQUNC : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp), + "($qp) cmp.eq.unc $dst, p0 = $src1, $src2;;">; +def PCMPNE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp), + "($qp) cmp.ne $dst, p0 = $src1, $src2;;">; + +// two destinations! +def BCMPEQ : AForm<0x03, 0x0b, (ops PR:$dst1, PR:$dst2, GR:$src1, GR:$src2), + "cmp.eq $dst1, dst2 = $src1, $src2;;">; + +def ADD : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "add $dst = $src1, $src2;;">; + +def ADDIMM22 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s21imm:$imm), + "add $dst = $imm, $src1;;">; +def CADDIMM22 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s21imm:$imm, PR:$qp), + "($qp) add $dst = $imm, $src1;;">; + +let isTwoAddress = 1 in { +def TPCADDIMM22 : AForm<0x03, 0x0b, + (ops GR:$dst, GR:$src1, s21imm:$imm, PR:$qp), + "($qp) add $dst = $imm, $dst;;">; +def TPCMPIMM8NE : AForm<0x03, 0x0b, + (ops PR:$dst, PR:$src1, s21imm:$imm, GR:$src2, PR:$qp), + "($qp) cmp.ne $dst , p0 = $imm, $src2;;">; +} + +def SUB : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "sub $dst = $src1, $src2;;">; + +def ST1 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value), + "st1 [$dstPtr] = $value;;">; +def ST2 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value), + "st2 [$dstPtr] = $value;;">; +def ST4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value), + "st4 [$dstPtr] = $value;;">; +def ST8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, GR:$value), + "st8 [$dstPtr] = $value;;">; + +def LD1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr), + "ld1 $dst = [$srcPtr];;">; +def LD2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr), + "ld2 $dst = [$srcPtr];;">; +def LD4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr), + "ld4 $dst = [$srcPtr];;">; +def LD8 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$srcPtr), + "ld8 $dst = [$srcPtr];;">; + +// some FP stuff: +def FADD : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2), + "fadd $dst = $src1, $src2;;">; +def FADDS: AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2), + "fadd.s $dst = $src1, $src2;;">; +def FSUB : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2), + "fsub $dst = $src1, $src2;;">; +def FMPY : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2), + "fmpy $dst = $src1, $src2;;">; +def FMOV : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "mov $dst = $src;;">; // XXX: there _is_ no fmov +def FMA : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3), + "fma $dst = $src1, $src2, $src3;;">; +def FNMA : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3), + "fnma $dst = $src1, $src2, $src3;;">; + +def CFMAS1 : AForm<0x03, 0x0b, + (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.s1 $dst = $src1, $src2, $src3;;">; +def CFNMAS1 : AForm<0x03, 0x0b, + (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fnma.s1 $dst = $src1, $src2, $src3;;">; + +// FIXME: we 'explode' FRCPA (which should write two registers) into two +// operations that write one each. this is a waste, and is also destroying +// f127. not cool. +def FRCPAS1FLOAT : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2), + "frcpa.s1 $dst , p0 = $src1, $src2;;">; +// XXX: this _will_ break things: (f127) +def FRCPAS1PREDICATE : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), + "frcpa.s1 f127 , $dst = $src1, $src2;; // XXX FIXME!!!!">; + +def XMAL : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3), + "xma.l $dst = $src1, $src2, $src3;;">; + +def FCVTXF : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.xf $dst = $src;;">; +def FCVTXUF : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.xuf $dst = $src;;">; +def FCVTXUFS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.xuf.s1 $dst = $src;;">; +def FCVTFX : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.fx $dst = $src;;">; +def FCVTFXU : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.fxu $dst = $src;;">; + +def FCVTFXTRUNC : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.fx.trunc $dst = $src;;">; +def FCVTFXUTRUNC : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.fxu.trunc $dst = $src;;">; + +def FCVTFXTRUNCS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.fx.trunc.s1 $dst = $src;;">; +def FCVTFXUTRUNCS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fcvt.fxu.trunc.s1 $dst = $src;;">; + +def FNORMD : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src), + "fnorm.d $dst = $src;;">; + +def GETFD : AForm<0x03, 0x0b, (ops GR:$dst, FP:$src), + "getf.d $dst = $src;;">; +def SETFD : AForm<0x03, 0x0b, (ops FP:$dst, GR:$src), + "setf.d $dst = $src;;">; + +def GETFSIG : AForm<0x03, 0x0b, (ops GR:$dst, FP:$src), + "getf.sig $dst = $src;;">; +def SETFSIG : AForm<0x03, 0x0b, (ops FP:$dst, GR:$src), + "setf.sig $dst = $src;;">; + +def LDF4 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr), + "ldfs $dst = [$srcPtr];;">; +def LDF8 : AForm<0x03, 0x0b, (ops FP:$dst, GR:$srcPtr), + "ldfd $dst = [$srcPtr];;">; + +def STF4 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value), + "stfs [$dstPtr] = $value;;">; +def STF8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value), + "stfd [$dstPtr] = $value;;">; + +let isTerminator = 1, isBranch = 1 in { + def BRLCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst), + "($qp) brl.cond.sptk $dst;;">; + def BRCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst), + "($qp) br.cond.sptk $dst;;">; +} + +let isCall = 1, isTerminator = 1, isBranch = 1, +// all calls clobber non-callee-saved registers, and for now, they are these: + Defs = [r2,r3,r8,r9,r10,r11,r14,r15,r16,r17,r18,r19,r20,r21,r22,r23,r24, + r25,r26,r27,r28,r29,r30,r31, + p6,p7,p8,p9,p10,p11,p12,p13,p14,p15, + F6,F7,F8,F9,F10,F11,F12,F13,F14,F15, + F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49, + F50,F51,F52,F53,F54,F55,F56, + F57,F58,F59,F60,F61,F62,F63,F64,F65,F66,F67,F68,F69,F70,F71,F72,F73,F74, + F75,F76,F77,F78,F79,F80,F81, + F82,F83,F84,F85,F86,F87,F88,F89,F90,F91,F92,F93,F94,F95,F96,F97,F98,F99, + F100,F101,F102,F103,F104,F105, + F106,F107,F108,F109,F110,F111,F112,F113,F114,F115,F116,F117,F118,F119, + F120,F121,F122,F123,F124,F125,F126,F127, + out0,out1,out2,out3,out4,out5,out6,out7] in { + def BRCALL : RawForm<0x03, 0xb0, (ops calltarget:$dst), + "br.call.sptk rp = $dst;;">; // FIXME: teach llvm about branch regs? + def BRLCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst), + "($qp) brl.cond.call.sptk $dst;;">; + def BRCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst), + "($qp) br.cond.call.sptk $dst;;">; +} + +let isTerminator = 1, isReturn = 1 in + def RET : RawForm<0x03, 0xb0, (ops), "br.ret.sptk.many rp;;">; // return + + diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h new file mode 100644 index 00000000000..40b0f0da186 --- /dev/null +++ b/lib/Target/IA64/IA64MachineFunctionInfo.h @@ -0,0 +1,34 @@ +//===-- IA64MachineFunctionInfo.h - IA64-specific information ---*- C++ -*-===// +//===-- for MachineFunction ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +//===----------------------------------------------------------------------===// +// +// This file declares IA64-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64MACHINEFUNCTIONINFO_H +#define IA64MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +//#include "IA64JITInfo.h" + +namespace llvm { + +class IA64FunctionInfo : public MachineFunctionInfo { + +public: + unsigned outRegsUsed; // how many 'out' registers are used + // by this machinefunction? (used to compute the appropriate + // entry in the 'alloc' instruction at the top of the + // machinefunction) + IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; }; + +}; + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64RegisterInfo.cpp b/lib/Target/IA64/IA64RegisterInfo.cpp new file mode 100644 index 00000000000..91fa4563c99 --- /dev/null +++ b/lib/Target/IA64/IA64RegisterInfo.cpp @@ -0,0 +1,362 @@ +//===- IA64RegisterInfo.cpp - IA64 Register Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the MRegisterInfo class. This +// file is responsible for the frame pointer elimination optimization on IA64. +// +//===----------------------------------------------------------------------===// + +#include "IA64.h" +#include "IA64RegisterInfo.h" +#include "IA64InstrBuilder.h" +#include "IA64MachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/STLExtras.h" +#include + +using namespace llvm; + +namespace { +} + +IA64RegisterInfo::IA64RegisterInfo() + : IA64GenRegisterInfo(IA64::ADJUSTCALLSTACKDOWN, IA64::ADJUSTCALLSTACKUP) {} + +static const TargetRegisterClass *getClass(unsigned SrcReg) { + if (IA64::FPRegisterClass->contains(SrcReg)) + return IA64::FPRegisterClass; + if (IA64::PRRegisterClass->contains(SrcReg)) + return IA64::PRRegisterClass; + + assert(IA64::GRRegisterClass->contains(SrcReg) && + "PROBLEM: Reg is not FP, predicate or GR!"); + return IA64::GRRegisterClass; +} + +void IA64RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, int FrameIdx) const { + + if (getClass(SrcReg) == IA64::FPRegisterClass) { + BuildMI(MBB, MI, IA64::STF8, 2).addFrameIndex(FrameIdx).addReg(SrcReg); + } + else if (getClass(SrcReg) == IA64::GRRegisterClass) { + BuildMI(MBB, MI, IA64::ST8, 2).addFrameIndex(FrameIdx).addReg(SrcReg); + } + else if (getClass(SrcReg) == IA64::PRRegisterClass) { + /* we use IA64::r2 as a temporary register for doing this hackery. */ + // first we load 0: + BuildMI(MBB, MI, IA64::MOV, 1, IA64::r2).addReg(IA64::r0); + // then conditionally add 1: + BuildMI(MBB, MI, IA64::CADDIMM22, 3, IA64::r2).addReg(IA64::r2) + .addImm(1).addReg(SrcReg); + // and then store it to the stack + BuildMI(MBB, MI, IA64::ST8, 2).addFrameIndex(FrameIdx).addReg(IA64::r2); + } else assert(0 && + "sorry, I don't know how to store this sort of reg in the stack\n"); +} + +void IA64RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx)const{ + + if (getClass(DestReg) == IA64::FPRegisterClass) { + BuildMI(MBB, MI, IA64::LDF8, 1, DestReg).addFrameIndex(FrameIdx); + } else if (getClass(DestReg) == IA64::GRRegisterClass) { + BuildMI(MBB, MI, IA64::LD8, 1, DestReg).addFrameIndex(FrameIdx); + } else if (getClass(DestReg) == IA64::PRRegisterClass) { + // first we load a byte from the stack into r2, our 'predicate hackery' + // scratch reg + BuildMI(MBB, MI, IA64::LD8, 1, IA64::r2).addFrameIndex(FrameIdx); + // then we compare it to zero. If it _is_ zero, compare-not-equal to + // r0 gives us 0, which is what we want, so that's nice. + BuildMI(MBB, MI, IA64::CMPNE, 2, DestReg).addReg(IA64::r2).addReg(IA64::r0); + } else assert(0 && + "sorry, I don't know how to load this sort of reg from the stack\n"); +} + +void IA64RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *RC) const { + + if(RC == IA64::PRRegisterClass ) // if a bool, we use pseudocode + // (SrcReg) DestReg = cmp.eq.unc(r0, r0) + BuildMI(MBB, MI, IA64::PCMPEQUNC, 1, DestReg).addReg(IA64::r0).addReg(IA64::r0).addReg(SrcReg); + else // otherwise, MOV works (for both gen. regs and FP regs) + BuildMI(MBB, MI, IA64::MOV, 1, DestReg).addReg(SrcReg); +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +static bool hasFP(MachineFunction &MF) { + return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); +} + +void IA64RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + + if (hasFP(MF)) { + // If we have a frame pointer, turn the adjcallstackup instruction into a + // 'sub SP, ' and the adjcallstackdown instruction into 'add SP, + // ' + MachineInstr *Old = I; + unsigned Amount = Old->getOperand(0).getImmedValue(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + MachineInstr *New; + if (Old->getOpcode() == IA64::ADJUSTCALLSTACKDOWN) { + New=BuildMI(IA64::ADDIMM22, 2, IA64::r12).addReg(IA64::r12) + .addImm(-Amount); + } else { + assert(Old->getOpcode() == IA64::ADJUSTCALLSTACKUP); + New=BuildMI(IA64::ADDIMM22, 2, IA64::r12).addReg(IA64::r12) + .addImm(Amount); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + +void IA64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{ + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + + bool FP = hasFP(MF); + + while (!MI.getOperand(i).isFrameIndex()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getFrameIndex(); + + // choose a base register: ( hasFP? framepointer : stack pointer ) + unsigned BaseRegister = FP ? IA64::r15 : IA64::r12; + // Add the base register + MI.SetMachineOperandReg(i, BaseRegister); + + // Now add the frame object offset to the offset from r1. + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + + // If we're not using a Frame Pointer that has been set to the value of the + // SP before having the stack size subtracted from it, then add the stack size + // to Offset to get the correct offset. + Offset += MF.getFrameInfo()->getStackSize(); + + // XXX: we use 'r22' as another hack+slash temporary register here :( + if ( Offset <= 8191 && Offset >= -8192) { // smallish offset + //fix up the old: + MI.SetMachineOperandReg(i, IA64::r22); + //insert the new + MachineInstr* nMI=BuildMI(IA64::ADDIMM22, 2, IA64::r22) + .addReg(BaseRegister).addSImm(Offset); + MBB.insert(II, nMI); + } else { // it's big + //fix up the old: + MI.SetMachineOperandReg(i, IA64::r22); + MachineInstr* nMI; + nMI=BuildMI(IA64::MOVLSI32, 1, IA64::r22).addSImm(Offset); + MBB.insert(II, nMI); + nMI=BuildMI(IA64::ADD, 2, IA64::r22).addReg(BaseRegister) + .addReg(IA64::r22); + MBB.insert(II, nMI); + } + +} + +void IA64RegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineInstr *MI; + bool FP = hasFP(MF); + + // first, we handle the 'alloc' instruction, that should be right up the + // top of any function + static const unsigned RegsInOrder[96] = { // there are 96 GPRs the + // RSE worries about + IA64::r32, IA64::r33, IA64::r34, IA64::r35, + IA64::r36, IA64::r37, IA64::r38, IA64::r39, IA64::r40, IA64::r41, + IA64::r42, IA64::r43, IA64::r44, IA64::r45, IA64::r46, IA64::r47, + IA64::r48, IA64::r49, IA64::r50, IA64::r51, IA64::r52, IA64::r53, + IA64::r54, IA64::r55, IA64::r56, IA64::r57, IA64::r58, IA64::r59, + IA64::r60, IA64::r61, IA64::r62, IA64::r63, IA64::r64, IA64::r65, + IA64::r66, IA64::r67, IA64::r68, IA64::r69, IA64::r70, IA64::r71, + IA64::r72, IA64::r73, IA64::r74, IA64::r75, IA64::r76, IA64::r77, + IA64::r78, IA64::r79, IA64::r80, IA64::r81, IA64::r82, IA64::r83, + IA64::r84, IA64::r85, IA64::r86, IA64::r87, IA64::r88, IA64::r89, + IA64::r90, IA64::r91, IA64::r92, IA64::r93, IA64::r94, IA64::r95, + IA64::r96, IA64::r97, IA64::r98, IA64::r99, IA64::r100, IA64::r101, + IA64::r102, IA64::r103, IA64::r104, IA64::r105, IA64::r106, IA64::r107, + IA64::r108, IA64::r109, IA64::r110, IA64::r111, IA64::r112, IA64::r113, + IA64::r114, IA64::r115, IA64::r116, IA64::r117, IA64::r118, IA64::r119, + IA64::r120, IA64::r121, IA64::r122, IA64::r123, IA64::r124, IA64::r125, + IA64::r126, IA64::r127 }; + + unsigned numStackedGPRsUsed=0; + for(int i=0; i<96; i++) { + if(MF.isPhysRegUsed(RegsInOrder[i])) + numStackedGPRsUsed=i+1; // (i+1 and not ++ - consider fn(fp, fp, int) + } + + unsigned numOutRegsUsed=MF.getInfo()->outRegsUsed; + + // XXX FIXME : this code should be a bit more reliable (in case there _isn't_ a pseudo_alloc in the MBB) + unsigned dstRegOfPseudoAlloc; + for(MBBI = MBB.begin(); /*MBBI->getOpcode() != IA64::PSEUDO_ALLOC*/; ++MBBI) { + assert(MBBI != MBB.end()); + if(MBBI->getOpcode() == IA64::PSEUDO_ALLOC) { + dstRegOfPseudoAlloc=MBBI->getOperand(0).getReg(); + break; + } + } + + MI=BuildMI(IA64::ALLOC,5).addReg(dstRegOfPseudoAlloc).addImm(0).\ + addImm(numStackedGPRsUsed).addImm(numOutRegsUsed).addImm(0); + MBB.insert(MBBI, MI); + + // Get the number of bytes to allocate from the FrameInfo + unsigned NumBytes = MFI->getStackSize(); + + if (MFI->hasCalls() && !FP) { + // We reserve argument space for call sites in the function immediately on + // entry to the current function. This eliminates the need for add/sub + // brackets around call sites. + NumBytes += MFI->getMaxCallFrameSize(); + } + + if(FP) + NumBytes += 8; // reserve space for the old FP + + // Do we need to allocate space on the stack? + if (NumBytes == 0) + return; + + // Add 16 bytes at the bottom of the stack (scratch area) + // and round the size to a multiple of the alignment. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Size = 16 + (FP ? 8 : 0); + NumBytes = (NumBytes+Size+Align-1)/Align*Align; + + // Update frame info to pretend that this is part of the stack... + MFI->setStackSize(NumBytes); + + // adjust stack pointer: r12 -= numbytes + if (NumBytes <= 8191) { + MI=BuildMI(IA64::ADDIMM22, 2, IA64::r12).addReg(IA64::r12).addImm(-NumBytes); + MBB.insert(MBBI, MI); + } else { // we use r22 as a scratch register here + MI=BuildMI(IA64::MOVLSI32, 1, IA64::r22).addSImm(-NumBytes); + // FIXME: MOVLSI32 expects a _u_32imm + MBB.insert(MBBI, MI); // first load the decrement into r22 + MI=BuildMI(IA64::ADD, 2, IA64::r12).addReg(IA64::r12).addReg(IA64::r22); + MBB.insert(MBBI, MI); // then add (subtract) it to r12 (stack ptr) + } + + // now if we need to, save the old FP and set the new + if (FP) { + MI = BuildMI(IA64::ST8, 2).addReg(IA64::r12).addReg(IA64::r15); + MBB.insert(MBBI, MI); + // this must be the last instr in the prolog ? (XXX: why??) + MI = BuildMI(IA64::MOV, 1, IA64::r15).addReg(IA64::r12); + MBB.insert(MBBI, MI); + } + +} + +void IA64RegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineInstr *MI; + assert(MBBI->getOpcode() == IA64::RET && + "Can only insert epilog into returning blocks"); + + bool FP = hasFP(MF); + + // Get the number of bytes allocated from the FrameInfo... + unsigned NumBytes = MFI->getStackSize(); + + //now if we need to, restore the old FP + if (FP) + { + //copy the FP into the SP (discards allocas) + MI=BuildMI(IA64::MOV, 1, IA64::r12).addReg(IA64::r15); + MBB.insert(MBBI, MI); + //restore the FP + MI=BuildMI(IA64::LD8, 1, IA64::r15).addReg(IA64::r15); + MBB.insert(MBBI, MI); + } + + if (NumBytes != 0) + { + if (NumBytes <= 8191) { + MI=BuildMI(IA64::ADDIMM22, 2, IA64::r12).addReg(IA64::r12).addImm(NumBytes); + MBB.insert(MBBI, MI); + } else { + MI=BuildMI(IA64::MOVLI32, 1, IA64::r22).addImm(NumBytes); + MBB.insert(MBBI, MI); + MI=BuildMI(IA64::ADD, 2, IA64::r12).addReg(IA64::r12).addReg(IA64::r22); + MBB.insert(MBBI, MI); + } + } + +} + +#include "IA64GenRegisterInfo.inc" + +const TargetRegisterClass* +IA64RegisterInfo::getRegClassForType(const Type* Ty) const { + switch (Ty->getTypeID()) { + default: assert(0 && "Invalid type to getClass!"); + case Type::LongTyID: + case Type::ULongTyID: + case Type::BoolTyID: + case Type::SByteTyID: + case Type::UByteTyID: + case Type::ShortTyID: + case Type::UShortTyID: + case Type::IntTyID: + case Type::UIntTyID: + case Type::PointerTyID: return &GRInstance; + + case Type::FloatTyID: + case Type::DoubleTyID: return &FPInstance; + } +} + + diff --git a/lib/Target/IA64/IA64RegisterInfo.h b/lib/Target/IA64/IA64RegisterInfo.h new file mode 100644 index 00000000000..502f3236ac7 --- /dev/null +++ b/lib/Target/IA64/IA64RegisterInfo.h @@ -0,0 +1,55 @@ +//===- IA64RegisterInfo.h - IA64 Register Information Impl ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the IA64 implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64REGISTERINFO_H +#define IA64REGISTERINFO_H + +#include "llvm/Target/MRegisterInfo.h" +#include "IA64GenRegisterInfo.h.inc" + +namespace llvm { class llvm::Type; } + +namespace llvm { + +struct IA64RegisterInfo : public IA64GenRegisterInfo { + IA64RegisterInfo(); + const TargetRegisterClass* getRegClassForType(const Type* Ty) const; + + /// Code Generation virtual methods... + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, int FrameIndex) const; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex) const; + + void copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *RC) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; +}; + +} // End llvm namespace + +#endif + diff --git a/lib/Target/IA64/IA64RegisterInfo.td b/lib/Target/IA64/IA64RegisterInfo.td new file mode 100644 index 00000000000..9534d2489bf --- /dev/null +++ b/lib/Target/IA64/IA64RegisterInfo.td @@ -0,0 +1,291 @@ +//===- IA64RegisterInfo.td - Describe the IA64 Register File ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the IA64 register file, defining the registers +// themselves, aliases between the registers, and the register classes built +// out of the registers. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register definitions... +// + +class IA64Register : Register { + let Namespace = "IA64"; +} + +// GR - One of 128 32-bit general registers +class GR num, string n> : IA64Register { + field bits<7> Num = num; +} + +// FP - One of 128 82-bit floating-point registers +class FP num, string n> : IA64Register { + field bits<7> Num = num; +} + +// PR - One of 64 1-bit predicate registers +class PR num, string n> : IA64Register { + field bits<6> Num = num; +} + +/* general registers */ +def r0 : GR< 0, "r0">; def r1 : GR< 1, "r1">; +def r2 : GR< 2, "r2">; def r3 : GR< 3, "r3">; +def r4 : GR< 4, "r4">; def r5 : GR< 5, "r5">; +def r6 : GR< 6, "r6">; def r7 : GR< 7, "r7">; +def r8 : GR< 8, "r8">; def r9 : GR< 9, "r9">; +def r10 : GR< 10, "r10">; def r11 : GR< 11, "r11">; +def r12 : GR< 12, "r12">; def r13 : GR< 13, "r13">; +def r14 : GR< 14, "r14">; def r15 : GR< 15, "r15">; +def r16 : GR< 16, "r16">; def r17 : GR< 17, "r17">; +def r18 : GR< 18, "r18">; def r19 : GR< 19, "r19">; +def r20 : GR< 20, "r20">; def r21 : GR< 21, "r21">; +def r22 : GR< 22, "r22">; def r23 : GR< 23, "r23">; +def r24 : GR< 24, "r24">; def r25 : GR< 25, "r25">; +def r26 : GR< 26, "r26">; def r27 : GR< 27, "r27">; +def r28 : GR< 28, "r28">; def r29 : GR< 29, "r29">; +def r30 : GR< 30, "r30">; def r31 : GR< 31, "r31">; +def r32 : GR< 32, "r32">; def r33 : GR< 33, "r33">; +def r34 : GR< 34, "r34">; def r35 : GR< 35, "r35">; +def r36 : GR< 36, "r36">; def r37 : GR< 37, "r37">; +def r38 : GR< 38, "r38">; def r39 : GR< 39, "r39">; +def r40 : GR< 40, "r40">; def r41 : GR< 41, "r41">; +def r42 : GR< 42, "r42">; def r43 : GR< 43, "r43">; +def r44 : GR< 44, "r44">; def r45 : GR< 45, "r45">; +def r46 : GR< 46, "r46">; def r47 : GR< 47, "r47">; +def r48 : GR< 48, "r48">; def r49 : GR< 49, "r49">; +def r50 : GR< 50, "r50">; def r51 : GR< 51, "r51">; +def r52 : GR< 52, "r52">; def r53 : GR< 53, "r53">; +def r54 : GR< 54, "r54">; def r55 : GR< 55, "r55">; +def r56 : GR< 56, "r56">; def r57 : GR< 57, "r57">; +def r58 : GR< 58, "r58">; def r59 : GR< 59, "r59">; +def r60 : GR< 60, "r60">; def r61 : GR< 61, "r61">; +def r62 : GR< 62, "r62">; def r63 : GR< 63, "r63">; +def r64 : GR< 64, "r64">; def r65 : GR< 65, "r65">; +def r66 : GR< 66, "r66">; def r67 : GR< 67, "r67">; +def r68 : GR< 68, "r68">; def r69 : GR< 69, "r69">; +def r70 : GR< 70, "r70">; def r71 : GR< 71, "r71">; +def r72 : GR< 72, "r72">; def r73 : GR< 73, "r73">; +def r74 : GR< 74, "r74">; def r75 : GR< 75, "r75">; +def r76 : GR< 76, "r76">; def r77 : GR< 77, "r77">; +def r78 : GR< 78, "r78">; def r79 : GR< 79, "r79">; +def r80 : GR< 80, "r80">; def r81 : GR< 81, "r81">; +def r82 : GR< 82, "r82">; def r83 : GR< 83, "r83">; +def r84 : GR< 84, "r84">; def r85 : GR< 85, "r85">; +def r86 : GR< 86, "r86">; def r87 : GR< 87, "r87">; +def r88 : GR< 88, "r88">; def r89 : GR< 89, "r89">; +def r90 : GR< 90, "r90">; def r91 : GR< 91, "r91">; +def r92 : GR< 92, "r92">; def r93 : GR< 93, "r93">; +def r94 : GR< 94, "r94">; def r95 : GR< 95, "r95">; +def r96 : GR< 96, "r96">; def r97 : GR< 97, "r97">; +def r98 : GR< 98, "r98">; def r99 : GR< 99, "r99">; +def r100 : GR< 100, "r100">; def r101 : GR< 101, "r101">; +def r102 : GR< 102, "r102">; def r103 : GR< 103, "r103">; +def r104 : GR< 104, "r104">; def r105 : GR< 105, "r105">; +def r106 : GR< 106, "r106">; def r107 : GR< 107, "r107">; +def r108 : GR< 108, "r108">; def r109 : GR< 109, "r109">; +def r110 : GR< 110, "r110">; def r111 : GR< 111, "r111">; +def r112 : GR< 112, "r112">; def r113 : GR< 113, "r113">; +def r114 : GR< 114, "r114">; def r115 : GR< 115, "r115">; +def r116 : GR< 116, "r116">; def r117 : GR< 117, "r117">; +def r118 : GR< 118, "r118">; def r119 : GR< 119, "r119">; +def r120 : GR< 120, "r120">; def r121 : GR< 121, "r121">; +def r122 : GR< 122, "r122">; def r123 : GR< 123, "r123">; +def r124 : GR< 124, "r124">; def r125 : GR< 125, "r125">; +def r126 : GR< 126, "r126">; def r127 : GR< 127, "r127">; + +/* floating-point registers */ +def F0 : FP< 0, "f0">; def F1 : FP< 1, "f1">; +def F2 : FP< 2, "f2">; def F3 : FP< 3, "f3">; +def F4 : FP< 4, "f4">; def F5 : FP< 5, "f5">; +def F6 : FP< 6, "f6">; def F7 : FP< 7, "f7">; +def F8 : FP< 8, "f8">; def F9 : FP< 9, "f9">; +def F10 : FP< 10, "f10">; def F11 : FP< 11, "f11">; +def F12 : FP< 12, "f12">; def F13 : FP< 13, "f13">; +def F14 : FP< 14, "f14">; def F15 : FP< 15, "f15">; +def F16 : FP< 16, "f16">; def F17 : FP< 17, "f17">; +def F18 : FP< 18, "f18">; def F19 : FP< 19, "f19">; +def F20 : FP< 20, "f20">; def F21 : FP< 21, "f21">; +def F22 : FP< 22, "f22">; def F23 : FP< 23, "f23">; +def F24 : FP< 24, "f24">; def F25 : FP< 25, "f25">; +def F26 : FP< 26, "f26">; def F27 : FP< 27, "f27">; +def F28 : FP< 28, "f28">; def F29 : FP< 29, "f29">; +def F30 : FP< 30, "f30">; def F31 : FP< 31, "f31">; +def F32 : FP< 32, "f32">; def F33 : FP< 33, "f33">; +def F34 : FP< 34, "f34">; def F35 : FP< 35, "f35">; +def F36 : FP< 36, "f36">; def F37 : FP< 37, "f37">; +def F38 : FP< 38, "f38">; def F39 : FP< 39, "f39">; +def F40 : FP< 40, "f40">; def F41 : FP< 41, "f41">; +def F42 : FP< 42, "f42">; def F43 : FP< 43, "f43">; +def F44 : FP< 44, "f44">; def F45 : FP< 45, "f45">; +def F46 : FP< 46, "f46">; def F47 : FP< 47, "f47">; +def F48 : FP< 48, "f48">; def F49 : FP< 49, "f49">; +def F50 : FP< 50, "f50">; def F51 : FP< 51, "f51">; +def F52 : FP< 52, "f52">; def F53 : FP< 53, "f53">; +def F54 : FP< 54, "f54">; def F55 : FP< 55, "f55">; +def F56 : FP< 56, "f56">; def F57 : FP< 57, "f57">; +def F58 : FP< 58, "f58">; def F59 : FP< 59, "f59">; +def F60 : FP< 60, "f60">; def F61 : FP< 61, "f61">; +def F62 : FP< 62, "f62">; def F63 : FP< 63, "f63">; +def F64 : FP< 64, "f64">; def F65 : FP< 65, "f65">; +def F66 : FP< 66, "f66">; def F67 : FP< 67, "f67">; +def F68 : FP< 68, "f68">; def F69 : FP< 69, "f69">; +def F70 : FP< 70, "f70">; def F71 : FP< 71, "f71">; +def F72 : FP< 72, "f72">; def F73 : FP< 73, "f73">; +def F74 : FP< 74, "f74">; def F75 : FP< 75, "f75">; +def F76 : FP< 76, "f76">; def F77 : FP< 77, "f77">; +def F78 : FP< 78, "f78">; def F79 : FP< 79, "f79">; +def F80 : FP< 80, "f80">; def F81 : FP< 81, "f81">; +def F82 : FP< 82, "f82">; def F83 : FP< 83, "f83">; +def F84 : FP< 84, "f84">; def F85 : FP< 85, "f85">; +def F86 : FP< 86, "f86">; def F87 : FP< 87, "f87">; +def F88 : FP< 88, "f88">; def F89 : FP< 89, "f89">; +def F90 : FP< 90, "f90">; def F91 : FP< 91, "f91">; +def F92 : FP< 92, "f92">; def F93 : FP< 93, "f93">; +def F94 : FP< 94, "f94">; def F95 : FP< 95, "f95">; +def F96 : FP< 96, "f96">; def F97 : FP< 97, "f97">; +def F98 : FP< 98, "f98">; def F99 : FP< 99, "f99">; +def F100 : FP< 100, "f100">; def F101 : FP< 101, "f101">; +def F102 : FP< 102, "f102">; def F103 : FP< 103, "f103">; +def F104 : FP< 104, "f104">; def F105 : FP< 105, "f105">; +def F106 : FP< 106, "f106">; def F107 : FP< 107, "f107">; +def F108 : FP< 108, "f108">; def F109 : FP< 109, "f109">; +def F110 : FP< 110, "f110">; def F111 : FP< 111, "f111">; +def F112 : FP< 112, "f112">; def F113 : FP< 113, "f113">; +def F114 : FP< 114, "f114">; def F115 : FP< 115, "f115">; +def F116 : FP< 116, "f116">; def F117 : FP< 117, "f117">; +def F118 : FP< 118, "f118">; def F119 : FP< 119, "f119">; +def F120 : FP< 120, "f120">; def F121 : FP< 121, "f121">; +def F122 : FP< 122, "f122">; def F123 : FP< 123, "f123">; +def F124 : FP< 124, "f124">; def F125 : FP< 125, "f125">; +def F126 : FP< 126, "f126">; def F127 : FP< 127, "f127">; + +/* predicate registers */ +def p0 : PR< 0, "p0">; def p1 : PR< 1, "p1">; +def p2 : PR< 2, "p2">; def p3 : PR< 3, "p3">; +def p4 : PR< 4, "p4">; def p5 : PR< 5, "p5">; +def p6 : PR< 6, "p6">; def p7 : PR< 7, "p7">; +def p8 : PR< 8, "p8">; def p9 : PR< 9, "p9">; +def p10 : PR< 10, "p10">; def p11 : PR< 11, "p11">; +def p12 : PR< 12, "p12">; def p13 : PR< 13, "p13">; +def p14 : PR< 14, "p14">; def p15 : PR< 15, "p15">; +def p16 : PR< 16, "p16">; def p17 : PR< 17, "p17">; +def p18 : PR< 18, "p18">; def p19 : PR< 19, "p19">; +def p20 : PR< 20, "p20">; def p21 : PR< 21, "p21">; +def p22 : PR< 22, "p22">; def p23 : PR< 23, "p23">; +def p24 : PR< 24, "p24">; def p25 : PR< 25, "p25">; +def p26 : PR< 26, "p26">; def p27 : PR< 27, "p27">; +def p28 : PR< 28, "p28">; def p29 : PR< 29, "p29">; +def p30 : PR< 30, "p30">; def p31 : PR< 31, "p31">; +def p32 : PR< 32, "p32">; def p33 : PR< 33, "p33">; +def p34 : PR< 34, "p34">; def p35 : PR< 35, "p35">; +def p36 : PR< 36, "p36">; def p37 : PR< 37, "p37">; +def p38 : PR< 38, "p38">; def p39 : PR< 39, "p39">; +def p40 : PR< 40, "p40">; def p41 : PR< 41, "p41">; +def p42 : PR< 42, "p42">; def p43 : PR< 43, "p43">; +def p44 : PR< 44, "p44">; def p45 : PR< 45, "p45">; +def p46 : PR< 46, "p46">; def p47 : PR< 47, "p47">; +def p48 : PR< 48, "p48">; def p49 : PR< 49, "p49">; +def p50 : PR< 50, "p50">; def p51 : PR< 51, "p51">; +def p52 : PR< 52, "p52">; def p53 : PR< 53, "p53">; +def p54 : PR< 54, "p54">; def p55 : PR< 55, "p55">; +def p56 : PR< 56, "p56">; def p57 : PR< 57, "p57">; +def p58 : PR< 58, "p58">; def p59 : PR< 59, "p59">; +def p60 : PR< 60, "p60">; def p61 : PR< 61, "p61">; +def p62 : PR< 62, "p62">; def p63 : PR< 63, "p63">; + +// XXX : this is temporary, we'll eventually have the output registers +// in the general purpose register class too? +def out0 : GR<0, "out0">; def out1 : GR<1, "out1">; +def out2 : GR<2, "out2">; def out3 : GR<3, "out3">; +def out4 : GR<4, "out4">; def out5 : GR<5, "out5">; +def out6 : GR<6, "out6">; def out7 : GR<7, "out7">; + +// application (special) registers: + +// " previous function state" application register +def AR_PFS : GR<0, "ar.pfs">; + +// "return pointer" (this is really branch register b0) +def rp : GR<0, "rp">; +// branch reg 6 +def B6 : GR<0, "b6">; + +//===----------------------------------------------------------------------===// +// Register Class Definitions... now that we have all of the pieces, define the +// top-level register classes. The order specified in the register list is +// implicitly defined to be the register allocation order. +// + +// these are the scratch (+stacked) general registers +// ZERO (r0), GP (r1), SP (r12), ThreadP (r13) are not here... +// FIXME/XXX we also reserve a frame pointer (r15) +// FIXME/XXX we also reserve r2 for spilling/filling predicates +// in IA64RegisterInfo.cpp +// FIXME/XXX we also reserve r22 for calculating addresses +// in IA64RegisterInfo.cpp + +def GR : RegisterClass; + +// these are the scratch (+stacked) FP registers +// ZERO (F0) and ONE (F1) are not here +def FP : RegisterClass; + +// these are the predicate registers, p0 (1/TRUE) is not here +def PR : RegisterClass { + let Size = 64; + } + +/* + [p1, p2, p3, p4, p5, p6, p7, + p8, p9, p10, p11, p12, p13, p14, p15, + p16, p17, p18, p19, p20, p21, p22, p23, + p24, p25, p26, p27, p28, p29, p30, p31, + p32, p33, p34, p35, p36, p37, p38, p39, + p40, p41, p42, p43, p44, p45, p46, p47, + p48, p49, p50, p51, p52, p53, p54, p55, + p56, p57, p58, p59, p60, p61, p62, p63]>; + */ + + diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp new file mode 100644 index 00000000000..0008f1fa5a7 --- /dev/null +++ b/lib/Target/IA64/IA64TargetMachine.cpp @@ -0,0 +1,134 @@ +//===-- IA64TargetMachine.cpp - Define TargetMachine for IA64 -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the IA64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "IA64TargetMachine.h" +#include "IA64.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +/// IA64TargetMachineModule - Note that this is used on hosts that cannot link +/// in a library unless there are references into the library. In particular, +/// it seems that it is not possible to get things to work on Win32 without +/// this. Though it is unused, do not remove it. +extern "C" int IA64TargetMachineModule; +int IA64TargetMachineModule = 0; + +namespace { + cl::opt DisableOutput("disable-ia64-llc-output", cl::Hidden, + cl::desc("Disable the IA64 asm printer, for use " + "when profiling the code generator.")); + + // Register the target. + RegisterTarget X("ia64", " IA-64 (Itanium)"); +} + +unsigned IA64TargetMachine::compileTimeMatchQuality() { +#if defined(__ia64__) || defined(__IA64__) + return 50; +#else + return 0; +#endif +} + +unsigned IA64TargetMachine::getModuleMatchQuality(const Module &M) { + // we match [iI][aA]*64 + bool seenIA64=false; + std::string TT = M.getTargetTriple(); + + if (TT.size() >= 4) { + if( (TT[0]=='i' || TT[0]=='I') && + (TT[1]=='a' || TT[1]=='A') ) { + for(unsigned int i=2; i<(TT.size()-1); i++) + if(TT[i]=='6' && TT[i+1]=='4') + seenIA64=true; + } + + if(seenIA64) + return 50; // strong match + } + + return compileTimeMatchQuality()/2; + +} + +/// IA64TargetMachine ctor - Create an LP64 architecture model +/// +IA64TargetMachine::IA64TargetMachine(const Module &M, IntrinsicLowering *IL) + : TargetMachine("IA64", IL, true), + FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0) { // FIXME? check this stuff +} + +// addPassesToEmitAssembly - We currently use all of the same passes as the JIT +// does to emit statically compiled machine code. +bool IA64TargetMachine::addPassesToEmitAssembly(PassManager &PM, + std::ostream &Out) { + // FIXME: Implement efficient support for garbage collection intrinsics. + PM.add(createLowerGCPass()); + + // FIXME: Implement the invoke/unwind instructions! + PM.add(createLowerInvokePass()); + + // FIXME: Implement the switch instruction in the instruction selector! + PM.add(createLowerSwitchPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + PM.add(createIA64PatternInstructionSelector(*this)); + +/* XXX not yet. ;) + // Run optional SSA-based machine code optimizations next... + if (!NoSSAPeephole) + PM.add(createIA64SSAPeepholeOptimizerPass()); +*/ + + // Print the instruction selected machine code... + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(&std::cerr)); + + // Perform register allocation to convert to a concrete IA64 representation + PM.add(createRegisterAllocator()); + + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(&std::cerr)); + + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(&std::cerr)); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + +/* XXX no, not just yet */ +// PM.add(createIA64PeepholeOptimizerPass()); + + if (PrintMachineCode) // Print the register-allocated code + PM.add(createIA64CodePrinterPass(std::cerr, *this)); + + if (!DisableOutput) + PM.add(createIA64CodePrinterPass(Out, *this)); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h new file mode 100644 index 00000000000..639df924650 --- /dev/null +++ b/lib/Target/IA64/IA64TargetMachine.h @@ -0,0 +1,48 @@ +//===-- IA64TargetMachine.h - Define TargetMachine for IA64 ---*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the IA64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef IA64TARGETMACHINE_H +#define IA64TARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/PassManager.h" +#include "IA64InstrInfo.h" + +namespace llvm { +class IntrinsicLowering; + +class IA64TargetMachine : public TargetMachine { + IA64InstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + //IA64JITInfo JITInfo; +public: + IA64TargetMachine(const Module &M, IntrinsicLowering *IL); + + virtual const IA64InstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const MRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual bool addPassesToEmitAssembly(PassManager &PM, std::ostream &Out); + + static unsigned getModuleMatchQuality(const Module &M); + static unsigned compileTimeMatchQuality(void); + +}; +} // End llvm namespace + +#endif + + diff --git a/lib/Target/IA64/Makefile b/lib/Target/IA64/Makefile new file mode 100644 index 00000000000..8bd2b6a790f --- /dev/null +++ b/lib/Target/IA64/Makefile @@ -0,0 +1,17 @@ +##===- lib/Target/IA64/Makefile -----------------------------*- Makefile -*-===## +# The LLVM Compiler Infrastructure +# +# This file was developed by Duraid Madina and is distributed under the +# University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMIA64 +TARGET = IA64 +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = IA64GenRegisterInfo.h.inc IA64GenRegisterNames.inc \ + IA64GenRegisterInfo.inc IA64GenInstrNames.inc \ + IA64GenInstrInfo.inc IA64GenAsmWriter.inc + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/IA64/README b/lib/Target/IA64/README new file mode 100644 index 00000000000..e7af1d8cf07 --- /dev/null +++ b/lib/Target/IA64/README @@ -0,0 +1,98 @@ +*** README for the LLVM IA64 Backend "Version 0.01" - March 18, 2004 +*** Quote for this version: + + "Kaori and Hitomi are naughty!!" + + +Congratulations, you have found: + +**************************************************************** +* @@@ @@@ @@@ @@@ @@@@@@@@@@ * +* @@@ @@@ @@@ @@@ @@@@@@@@@@@ * +* @@! @@! @@! @@@ @@! @@! @@! * +* !@! !@! !@! @!@ !@! !@! !@! * +* @!! @!! @!@ !@! @!! !!@ @!@ * +* !!! !!! !@! !!! !@! ! !@! * +* !!: !!: :!: !!: !!: !!: * +* :!: :!: ::!!:! :!: :!: * +* :: :::: :: :::: :::: ::: :: * +* : :: : : : :: : : : : : * +* * +* * +* @@@@@@ @@@ @@@ @@@ @@@@@@ @@@@@@ @@@ * +* @@@@@@@@ @@@@ @@@ @@@ @@@@@@@@ @@@@@@@ @@@@ * +* @@! @@@ @@!@!@@@ @@! @@! @@@ !@@ @@!@! * +* !@! @!@ !@!!@!@! !@! !@! @!@ !@! !@!!@! * +* @!@ !@! @!@ !!@! !!@ @!@!@!@! !!@@!@! @!! @!! * +* !@! !!! !@! !!! !!! !!!@!!!! @!!@!!!! !!! !@! * +* !!: !!! !!: !!! !!: !!: !!! !:! !:! :!!:!:!!: * +* :!: !:! :!: !:! :!: :!: !:! :!: !:! !:::!!::: * +* ::::: :: :: :: :: :: ::: :::: ::: ::: * +* : : : :: : : : : : :: : : ::: * +* * +**************************************************************** +* Bow down, bow down, before the power of IA64! Or be crushed, * +* be crushed, by its jolly registers of doom!! * +**************************************************************** + +DEVELOPMENT PLAN: + + _ you are 2005 maybe 2005 2006 2006 and + / here | | | beyond + v v v v | + v +CLEAN UP ADD INSTRUCTION ADD PLAY WITH +INSTRUCTION --> SCHEDULING AND --> JIT --> DYNAMIC --> FUTURE WORK +SELECTION BUNDLING SUPPORT REOPTIMIZATION + +DISCLAIMER AND PROMISE: + +As of the time of this release, you are probably better off using Intel C/C++ +or GCC. The performance of the code emitted right now is, in a word, +terrible. Check back in a few months - the story will be different then, +I guarantee it. + +TODO: + + - clean up and thoroughly test the isel patterns. + - fix up floating point + (nb http://gcc.gnu.org/wiki?pagename=ia64%20floating%20point ) + - bundling! + (we will avoid the mess that is: + http://gcc.gnu.org/ml/gcc/2003-12/msg00832.html ) + - instruction scheduling (yep) + - write truly inspirational documentation + - if-conversion (predicate database/knowledge? etc etc) + - counted loop support + - make integer + FP mul/div more clever (we have fixed pseudocode atm) + - track and use comparison complements + +INFO: + + - we are strictly LP64 here, no support for ILP32 on HP-UX. Linux users + don't need to worry about this. + - i have instruction scheduling/bundling pseudocode, that really works + (has been tested, albeit at the perl-script level). + so, before you go write your own, send me an email! + +KNOWN DEFECTS AT THE CURRENT TIME: + + - no varargs + - alloca doesn't work (indeed, stack frame layout is bogus) + - no support for big-endian environments + - (not really the backend, but...) the CFE has some issues on IA64. + these will probably be fixed soon. + +ACKNOWLEDGEMENTS: + + - Chris Lattner (x100) + - Other LLVM developers ("hey, that looks familiar") + +CONTACT: + + - You can email me at duraid@octopus.com.au. If you find a small bug, + just email me. If you find a big bug, please file a bug report + in bugzilla! http://llvm.cs.uiuc.edu is your one stop shop for all + things LLVM. + +