diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp new file mode 100644 index 00000000000..6fdb14cf3b8 --- /dev/null +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -0,0 +1,654 @@ +//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Cell SPU assembly language. This printer +// is the output mechanism used by `llc'. +// +// Documentation at http://developer.apple.com/documentation/DeveloperTools/ +// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asmprinter" +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include +using namespace llvm; + +namespace { + STATISTIC(EmittedInsts, "Number of machine instrs printed"); + + const std::string bss_section(".bss"); + + struct VISIBILITY_HIDDEN SPUAsmPrinter : public AsmPrinter { + std::set FnStubs, GVStubs; + + SPUAsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T) : + AsmPrinter(O, TM, T) + { + } + + virtual const char *getPassName() const { + return "STI CBEA SPU Assembly Printer"; + } + + SPUTargetMachine &getTM() { + return static_cast(TM); + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + bool printInstruction(const MachineInstr *MI); + + void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO); + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero) { + unsigned RegNo = MO.getReg(); + assert(MRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); + O << TM.getRegisterInfo()->get(RegNo).Name; + } + + void printOperand(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isRegister()) { + assert(MRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physreg??"); + O << TM.getRegisterInfo()->get(MO.getReg()).Name; + } else if (MO.isImmediate()) { + O << MO.getImmedValue(); + } else { + printOp(MO); + } + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + + void + printS7ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + int value = MI->getOperand(OpNo).getImmedValue(); + value = (value << (32 - 7)) >> (32 - 7); + + assert((value >= -(1 << 8) && value <= (1 << 7) - 1) + && "Invalid s7 argument"); + O << value; + } + + void + printU7ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + unsigned int value = MI->getOperand(OpNo).getImmedValue(); + assert(value < (1 << 8) && "Invalid u7 argument"); + O << value; + } + + void + printMemRegImmS7(const MachineInstr *MI, unsigned OpNo) + { + char value = MI->getOperand(OpNo).getImmedValue(); + O << (int) value; + O << "("; + printOperand(MI, OpNo+1); + O << ")"; + } + + void + printS16ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + O << (short) MI->getOperand(OpNo).getImmedValue(); + } + + void + printU16ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + O << (unsigned short)MI->getOperand(OpNo).getImmedValue(); + } + + void + printU32ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + O << (unsigned)MI->getOperand(OpNo).getImmedValue(); + } + + void + printMemRegReg(const MachineInstr *MI, unsigned OpNo) { + // When used as the base register, r0 reads constant zero rather than + // the value contained in the register. For this reason, the darwin + // assembler requires that we print r0 as 0 (no r) when used as the base. + const MachineOperand &MO = MI->getOperand(OpNo); + O << TM.getRegisterInfo()->get(MO.getReg()).Name; + O << ", "; + printOperand(MI, OpNo+1); + } + + void + printU18ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + unsigned int value = MI->getOperand(OpNo).getImmedValue(); + assert(value <= (1 << 19) - 1 && "Invalid u18 argument"); + O << value; + } + + void + printS10ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + short value = (short) (((int) MI->getOperand(OpNo).getImmedValue() << 16) + >> 16); + assert((value >= -(1 << 9) && value <= (1 << 9) - 1) + && "Invalid s10 argument"); + O << value; + } + + void + printU10ImmOperand(const MachineInstr *MI, unsigned OpNo) + { + short value = (short) (((int) MI->getOperand(OpNo).getImmedValue() << 16) + >> 16); + assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); + O << value; + } + + void + printMemRegImmS10(const MachineInstr *MI, unsigned OpNo) + { + const MachineOperand &MO = MI->getOperand(OpNo); + assert(MO.isImmediate() + && "printMemRegImmS10 first operand is not immedate"); + printS10ImmOperand(MI, OpNo); + O << "("; + printOperand(MI, OpNo+1); + O << ")"; + } + + void + printAddr256K(const MachineInstr *MI, unsigned OpNo) + { + /* Note: operand 1 is an offset or symbol name. Operand 2 is + ignored. */ + if (MI->getOperand(OpNo).isImmediate()) { + printS16ImmOperand(MI, OpNo); + } else { + printOp(MI->getOperand(OpNo)); + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo) { + printOp(MI->getOperand(OpNo)); + } + + void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo) { + printOp(MI->getOperand(OpNo)); + O << "-."; + } + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImmediate()) { + printS16ImmOperand(MI, OpNo); + } else { + printOp(MI->getOperand(OpNo)); + O << "@h"; + } + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImmediate()) { + printS16ImmOperand(MI, OpNo); + } else { + printOp(MI->getOperand(OpNo)); + O << "@l"; + } + } + + /// Print local store address + void printSymbolLSA(const MachineInstr *MI, unsigned OpNo) { + printOp(MI->getOperand(OpNo)); + } + + void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImmediate()) { + int value = (int) MI->getOperand(OpNo).getImmedValue(); + assert((value >= 0 && value < 16) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } else { + assert(0 && "Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + } + } + + void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) { + if (MI->getOperand(OpNo).isImmediate()) { + int value = (int) MI->getOperand(OpNo).getImmedValue(); + assert((value >= 0 && value < 32) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } else { + assert(0 && "Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + } + } + + virtual bool runOnMachineFunction(MachineFunction &F) = 0; + virtual bool doFinalization(Module &M) = 0; + }; + + /// LinuxAsmPrinter - SPU assembly printer, customized for Linux + struct VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter { + + DwarfWriter DW; + + LinuxAsmPrinter(std::ostream &O, SPUTargetMachine &TM, + const TargetAsmInfo *T) : + SPUAsmPrinter(O, TM, T), + DW(O, this, T) + { } + + virtual const char *getPassName() const { + return "STI CBEA SPU Assembly Printer"; + } + + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + SPUAsmPrinter::getAnalysisUsage(AU); + } + + /// getSectionForFunction - Return the section that we should emit the + /// specified function body into. + virtual std::string getSectionForFunction(const Function &F) const; + }; +} // end of anonymous namespace + +// Include the auto-generated portion of the assembly writer +#include "SPUGenAsmWriter.inc" + +void SPUAsmPrinter::printOp(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + cerr << "printOp() does not handle immediate values\n"; + abort(); + return; + + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMachineBasicBlock()); + return; + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getJumpTableIndex(); + // FIXME: PIC relocation model + return; + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getConstantPoolIndex(); + return; + case MachineOperand::MO_ExternalSymbol: + // Computing the address of an external symbol, not calling it. + if (TM.getRelocationModel() != Reloc::Static) { + std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); + GVStubs.insert(Name); + O << "L" << Name << "$non_lazy_ptr"; + return; + } + O << TAI->getGlobalPrefix() << MO.getSymbolName(); + return; + case MachineOperand::MO_GlobalAddress: { + // Computing the address of a global symbol, not calling it. + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + + // External or weakly linked global variables need non-lazily-resolved + // stubs + if (TM.getRelocationModel() != Reloc::Static) { + if (((GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()))) { + GVStubs.insert(Name); + O << "L" << Name << "$non_lazy_ptr"; + return; + } + } + O << Name; + + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + return; + } + + default: + O << ""; + return; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isRegister() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isRegister()) + return true; + ++OpNo; // Return the high-part. + break; + } + } + + printOperand(MI, OpNo); + return false; +} + +bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + printMemRegReg(MI, OpNo); + return false; +} + +/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax +/// to the current output stream. +/// +void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + printInstruction(MI); +} + + + +std::string LinuxAsmPrinter::getSectionForFunction(const Function &F) const { + switch (F.getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::ExternalLinkage: + case Function::InternalLinkage: return TAI->getTextSection(); + case Function::WeakLinkage: + case Function::LinkOnceLinkage: + return ""; // Print nothing for the time being... + } +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool +LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) +{ + DW.SetModuleInfo(&getAnalysis()); + + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out labels for the function. + const Function *F = MF.getFunction(); + + SwitchToTextSection(getSectionForFunction(*F).c_str(), F); + EmitAlignment(3, F); + + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + break; + case Function::ExternalLinkage: + O << "\t.global\t" << CurrentFnName << "\n" + << "\t.type\t" << CurrentFnName << ", @function\n"; + break; + case Function::WeakLinkage: + case Function::LinkOnceLinkage: + O << "\t.global\t" << CurrentFnName << "\n"; + O << "\t.weak_definition\t" << CurrentFnName << "\n"; + break; + } + O << CurrentFnName << ":\n"; + + // Emit pre-function debug information. + DW.BeginFunction(&MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + O << "\t"; + printMachineInstruction(II); + } + } + + O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << "\n"; + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // Emit post-function debug information. + DW.EndFunction(); + + // We didn't modify anything. + return false; +} + + +bool LinuxAsmPrinter::doInitialization(Module &M) { + bool Result = AsmPrinter::doInitialization(M); + SwitchToTextSection(TAI->getTextSection()); + // Emit initial debug information. + DW.BeginModule(&M); + return Result; +} + +bool LinuxAsmPrinter::doFinalization(Module &M) { + const TargetData *TD = TM.getTargetData(); + + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (!I->hasInitializer()) continue; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(I)) + continue; + + std::string name = Mang->getValueName(I); + Constant *C = I->getInitializer(); + unsigned Size = TD->getTypeStoreSize(C->getType()); + unsigned Align = TD->getPreferredAlignmentLog(I); + + if (C->isNullValue() && /* FIXME: Verify correct */ + (I->hasInternalLinkage() || I->hasWeakLinkage() || + I->hasLinkOnceLinkage() || + (I->hasExternalLinkage() && !I->hasSection()))) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + if (I->hasExternalLinkage()) { + // External linkage globals -> .bss section + // FIXME: Want to set the global variable's section so that + // SwitchToDataSection emits the ".section" directive + SwitchToDataSection("\t.section\t.bss", I); + O << "\t.global\t" << name << '\n'; + O << "\t.align\t" << Align << '\n'; + O << "\t.type\t" << name << ", @object\n"; + O << "\t.size\t" << name << ", " << Size << '\n'; + O << name << ":\n"; + O << "\t.zero\t" << Size; + } else if (I->hasInternalLinkage()) { + SwitchToDataSection("\t.data", I); + O << TAI->getLCOMMDirective() << name << "," << Size << "," << Align; + } else { + SwitchToDataSection("\t.data", I); + O << ".comm " << name << "," << Size; + } + O << "\t\t# '" << I->getName() << "'\n"; + } else { + switch (I->getLinkage()) { + case GlobalValue::LinkOnceLinkage: + case GlobalValue::WeakLinkage: + O << "\t.global " << name << '\n' + << "\t.weak_definition " << name << '\n'; + SwitchToDataSection(".section __DATA,__datacoal_nt,coalesced", I); + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.global " << name << "\n"; + // FALL THROUGH + case GlobalValue::InternalLinkage: + if (I->isConstant()) { + const ConstantArray *CVA = dyn_cast(C); + if (TAI->getCStringSection() && CVA && CVA->isCString()) { + SwitchToDataSection(TAI->getCStringSection(), I); + break; + } + } + + SwitchToDataSection("\t.data", I); + break; + default: + cerr << "Unknown linkage type!"; + abort(); + } + + EmitAlignment(Align, I); + O << name << ":\t\t\t\t# '" << I->getName() << "'\n"; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; + } + } + + // Output stubs for dynamically-linked functions + if (TM.getRelocationModel() == Reloc::PIC_) { + for (std::set::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + SwitchToTextSection(".section __TEXT,__picsymbolstub1,symbol_stubs," + "pure_instructions,32"); + EmitAlignment(4); + O << "L" << *i << "$stub:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\tmflr r0\n"; + O << "\tbcl 20,31,L0$" << *i << "\n"; + O << "L0$" << *i << ":\n"; + O << "\tmflr r11\n"; + O << "\taddis r11,r11,ha16(L" << *i << "$lazy_ptr-L0$" << *i << ")\n"; + O << "\tmtlr r0\n"; + O << "\tlwzu r12,lo16(L" << *i << "$lazy_ptr-L0$" << *i << ")(r11)\n"; + O << "\tmtctr r12\n"; + O << "\tbctr\n"; + SwitchToDataSection(".lazy_symbol_pointer"); + O << "L" << *i << "$lazy_ptr:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\t.long dyld_stub_binding_helper\n"; + } + } else { + for (std::set::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + SwitchToTextSection(".section __TEXT,__symbol_stub1,symbol_stubs," + "pure_instructions,16"); + EmitAlignment(4); + O << "L" << *i << "$stub:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\tlis r11,ha16(L" << *i << "$lazy_ptr)\n"; + O << "\tlwzu r12,lo16(L" << *i << "$lazy_ptr)(r11)\n"; + O << "\tmtctr r12\n"; + O << "\tbctr\n"; + SwitchToDataSection(".lazy_symbol_pointer"); + O << "L" << *i << "$lazy_ptr:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\t.long dyld_stub_binding_helper\n"; + } + } + + O << "\n"; + + // Output stubs for external and common global variables. + if (GVStubs.begin() != GVStubs.end()) { + SwitchToDataSection(".non_lazy_symbol_pointer"); + for (std::set::iterator I = GVStubs.begin(), + E = GVStubs.end(); I != E; ++I) { + O << "L" << *I << "$non_lazy_ptr:\n"; + O << "\t.indirect_symbol " << *I << "\n"; + O << "\t.long\t0\n"; + } + } + + // Emit initial debug information. + DW.EndModule(); + + // Emit ident information + O << "\t.ident\t\"(llvm 1.9+) STI CBEA Cell SPU backend\"\n"; + + return AsmPrinter::doFinalization(M); +} + + + +/// createSPUCodePrinterPass - Returns a pass that prints the Cell SPU +/// assembly code for a MachineFunction to the given output stream, in a format +/// that the Linux SPU assembler can deal with. +/// +FunctionPass *llvm::createSPUAsmPrinterPass(std::ostream &o, + SPUTargetMachine &tm) { + return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo()); +} + diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td new file mode 100644 index 00000000000..cc1a9d6fd90 --- /dev/null +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -0,0 +1,62 @@ +//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the STI Cell SPU architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf().", F), A>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention +//===----------------------------------------------------------------------===// + +// Return-value convention for Cell SPU: Everything can be passed back via $3: +def RetCC_SPU : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R3]>>, + CCIfType<[i64], CCAssignToReg<[R3]>>, + CCIfType<[f32, f64], CCAssignToReg<[R3]>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>> +]>; + + +//===----------------------------------------------------------------------===// +// CellSPU Argument Calling Conventions +// FIXME +//===----------------------------------------------------------------------===// +/* +def CC_SPU : CallingConv<[ + // The first 8 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, + + // SPU can pass back arguments in all + CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()", + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>, + // Other sub-targets pass FP values in F1-10. + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8, F9,F10]>>, + + // The first 12 Vector arguments are passed in altivec registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>> + */ +/* + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 16>>*/ +]>; + */ diff --git a/lib/Target/CellSPU/SPUFrameInfo.cpp b/lib/Target/CellSPU/SPUFrameInfo.cpp new file mode 100644 index 00000000000..c110db9abc9 --- /dev/null +++ b/lib/Target/CellSPU/SPUFrameInfo.cpp @@ -0,0 +1,32 @@ +//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the Cell SPU target. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUFrameInfo.h" +#include "SPURegisterNames.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// SPUFrameInfo: +//===----------------------------------------------------------------------===// + +SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm): + TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), + TM(tm) +{ + LR[0].first = SPU::R0; + LR[0].second = 16; +} diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h new file mode 100644 index 00000000000..2fe7b3542b8 --- /dev/null +++ b/lib/Target/CellSPU/SPUFrameInfo.h @@ -0,0 +1,77 @@ +//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains CellSPU frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#if !defined(SPUFRAMEINFO_H) + +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "SPURegisterInfo.h" + +namespace llvm { + class SPUFrameInfo: public TargetFrameInfo { + const TargetMachine &TM; + std::pair LR[1]; + + public: + SPUFrameInfo(const TargetMachine &tm); + + //! Return a function's saved spill slots + /*! + For CellSPU, a function's saved spill slots is just the link register. + */ + const std::pair * + getCalleeSaveSpillSlots(unsigned &NumEntries) const; + + //! Stack slot size (16 bytes) + static const int stackSlotSize() { + return 16; + } + //! Maximum frame offset representable by a signed 10-bit integer + /*! + This is the maximum frame offset that can be expressed as a 10-bit + integer, used in D-form addresses. + */ + static const int maxFrameOffset() { + return ((1 << 9) - 1) * stackSlotSize(); + } + //! Minimum frame offset representable by a signed 10-bit integer + static const int minFrameOffset() { + return -(1 << 9) * stackSlotSize(); + } + //! Minimum frame size (enough to spill LR + SP) + static const int minStackSize() { + return (2 * stackSlotSize()); + } + //! Frame size required to spill all registers plus frame info + static const int fullSpillSize() { + return (SPURegisterInfo::getNumArgRegs() * stackSlotSize()); + } + //! Number of instructions required to overcome hint-for-branch latency + /*! + HBR (hint-for-branch) instructions can be inserted when, for example, + we know that a given function is going to be called, such as printf(), + in the control flow graph. HBRs are only inserted if a sufficient number + of instructions occurs between the HBR and the target. Currently, HBRs + take 6 cycles, ergo, the magic number 6. + */ + static const int branchHintPenalty() { + return 6; + } + }; +} + +#define SPUFRAMEINFO_H 1 +#endif diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp new file mode 100644 index 00000000000..e4787ebfc31 --- /dev/null +++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -0,0 +1,137 @@ +//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on Cell SPU +// processors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sched" + +#include "SPUHazardRecognizers.h" +#include "SPU.h" +#include "SPUInstrInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Cell SPU hazard recognizer +// +// This is the pipeline hazard recognizer for the Cell SPU processor. It does +// very little right now. +//===----------------------------------------------------------------------===// + +SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) : + TII(tii), + EvenOdd(0) +{ +} + +/// Return the pipeline hazard type encountered or generated by this +/// instruction. Currently returns NoHazard. +/// +/// \return NoHazard +HazardRecognizer::HazardType +SPUHazardRecognizer::getHazardType(SDNode *Node) +{ + // Initial thoughts on how to do this, but this code cannot work unless the + // function's prolog and epilog code are also being scheduled so that we can + // accurately determine which pipeline is being scheduled. +#if 0 + HazardRecognizer::HazardType retval = NoHazard; + bool mustBeOdd = false; + + switch (Node->getOpcode()) { + case SPU::LQDv16i8: + case SPU::LQDv8i16: + case SPU::LQDv4i32: + case SPU::LQDv4f32: + case SPU::LQDv2f64: + case SPU::LQDr128: + case SPU::LQDr64: + case SPU::LQDr32: + case SPU::LQDr16: + case SPU::LQAv16i8: + case SPU::LQAv8i16: + case SPU::LQAv4i32: + case SPU::LQAv4f32: + case SPU::LQAv2f64: + case SPU::LQAr128: + case SPU::LQAr64: + case SPU::LQAr32: + case SPU::LQXv4i32: + case SPU::LQXr128: + case SPU::LQXr64: + case SPU::LQXr32: + case SPU::LQXr16: + case SPU::STQDv16i8: + case SPU::STQDv8i16: + case SPU::STQDv4i32: + case SPU::STQDv4f32: + case SPU::STQDv2f64: + case SPU::STQDr128: + case SPU::STQDr64: + case SPU::STQDr32: + case SPU::STQDr16: + case SPU::STQDr8: + case SPU::STQAv16i8: + case SPU::STQAv8i16: + case SPU::STQAv4i32: + case SPU::STQAv4f32: + case SPU::STQAv2f64: + case SPU::STQAr128: + case SPU::STQAr64: + case SPU::STQAr32: + case SPU::STQAr16: + case SPU::STQAr8: + case SPU::STQXv16i8: + case SPU::STQXv8i16: + case SPU::STQXv4i32: + case SPU::STQXv4f32: + case SPU::STQXv2f64: + case SPU::STQXr128: + case SPU::STQXr64: + case SPU::STQXr32: + case SPU::STQXr16: + case SPU::STQXr8: + case SPU::RET: + mustBeOdd = true; + break; + default: + // Assume that this instruction can be on the even pipe + break; + } + + if (mustBeOdd && !EvenOdd) + retval = Hazard; + + DOUT << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " << retval << "\n"; + EvenOdd ^= 1; + return retval; +#else + return NoHazard; +#endif +} + +void SPUHazardRecognizer::EmitInstruction(SDNode *Node) +{ +} + +void SPUHazardRecognizer::AdvanceCycle() +{ + DOUT << "SPUHazardRecognizer::AdvanceCycle\n"; +} + +void SPUHazardRecognizer::EmitNoop() +{ + AdvanceCycle(); +} diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h new file mode 100644 index 00000000000..ce602fd72f5 --- /dev/null +++ b/lib/Target/CellSPU/SPUHazardRecognizers.h @@ -0,0 +1,43 @@ +//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling on the Cell SPU +// processor. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUHAZRECS_H +#define SPUHAZRECS_H + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "SPUInstrInfo.h" + +namespace llvm { + +/// SPUHazardRecognizer +class SPUHazardRecognizer : public HazardRecognizer +{ +private: + const TargetInstrInfo &TII; + int EvenOdd; + +public: + SPUHazardRecognizer(const TargetInstrInfo &TII); + virtual HazardType getHazardType(SDNode *Node); + virtual void EmitInstruction(SDNode *Node); + virtual void AdvanceCycle(); + virtual void EmitNoop(); +}; + +} // end namespace llvm + +#endif + diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp new file mode 100644 index 00000000000..7d5c8ca8614 --- /dev/null +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -0,0 +1,615 @@ +//===-- SPUISelDAGToDAG.cpp - CellSPU -pattern matching inst selector -----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for the Cell SPU, +// converting from a legalized dag to a SPU-target dag. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "SPUISelLowering.h" +#include "SPUHazardRecognizers.h" +#include "SPUFrameInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/GlobalValue.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Compiler.h" +#include +#include +#include + +using namespace llvm; + +namespace { + //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates + bool + isI64IntS10Immediate(ConstantSDNode *CN) + { + return isS10Constant(CN->getValue()); + } + + //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates + bool + isI32IntS10Immediate(ConstantSDNode *CN) + { + return isS10Constant((int) CN->getValue()); + } + +#if 0 + //! SDNode predicate for sign-extended, 10-bit immediate values + bool + isI32IntS10Immediate(SDNode *N) + { + return (N->getOpcode() == ISD::Constant + && isI32IntS10Immediate(cast(N))); + } +#endif + + //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values + bool + isI16IntS10Immediate(ConstantSDNode *CN) + { + return isS10Constant((short) CN->getValue()); + } + + //! SDNode predicate for i16 sign-extended, 10-bit immediate values + bool + isI16IntS10Immediate(SDNode *N) + { + return (N->getOpcode() == ISD::Constant + && isI16IntS10Immediate(cast(N))); + } + + //! ConstantSDNode predicate for signed 16-bit values + /*! + \arg CN The constant SelectionDAG node holding the value + \arg Imm The returned 16-bit value, if returning true + + This predicate tests the value in \a CN to see whether it can be + represented as a 16-bit, sign-extended quantity. Returns true if + this is the case. + */ + bool + isIntS16Immediate(ConstantSDNode *CN, short &Imm) + { + MVT::ValueType vt = CN->getValueType(0); + Imm = (short) CN->getValue(); + if (vt >= MVT::i1 && vt <= MVT::i16) { + return true; + } else if (vt == MVT::i32) { + int32_t i_val = (int32_t) CN->getValue(); + short s_val = (short) i_val; + return i_val == s_val; + } else { + int64_t i_val = (int64_t) CN->getValue(); + short s_val = (short) i_val; + return i_val == s_val; + } + + return false; + } + + //! SDNode predicate for signed 16-bit values. + bool + isIntS16Immediate(SDNode *N, short &Imm) + { + return (N->getOpcode() == ISD::Constant + && isIntS16Immediate(cast(N), Imm)); + } + + //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. + static bool + isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) + { + MVT::ValueType vt = FPN->getValueType(0); + if (vt == MVT::f32) { + const APFloat &apf = FPN->getValueAPF(); + float fval = apf.convertToFloat(); + int val = *((int *) &fval); + int sval = (int) ((val << 16) >> 16); + Imm = (short) val; + return val == sval; + } + + return false; + } + + //===------------------------------------------------------------------===// + //! MVT::ValueType to useful stuff structure: + + struct valtype_map_s { + MVT::ValueType VT; + unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) + int prefslot_byte; /// Byte offset of the "preferred" slot + unsigned brcc_eq_ins; /// br_cc equal instruction + unsigned brcc_neq_ins; /// br_cc not equal instruction + }; + + const valtype_map_s valtype_map[] = { + { MVT::i1, 0, 3, 0, 0 }, + { MVT::i8, 0, 3, 0, 0 }, + { MVT::i16, SPU::ORHIr16, 2, SPU::BRHZ, SPU::BRHNZ }, + { MVT::i32, SPU::ORIr32, 0, SPU::BRZ, SPU::BRNZ }, + { MVT::i64, SPU::ORIr64, 0, 0, 0 }, + { MVT::f32, SPU::ORIf32, 0, 0, 0 }, + { MVT::f64, SPU::ORIf64, 0, 0, 0 } + }; + + const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); + + const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) + { + const valtype_map_s *retval = 0; + for (size_t i = 0; i < n_valtype_map; ++i) { + if (valtype_map[i].VT == VT) { + retval = valtype_map + i; + break; + } + } + + +#ifndef NDEBUG + if (retval == 0) { + cerr << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for " + << MVT::getValueTypeString(VT) + << "\n"; + abort(); + } +#endif + + return retval; + } +} + +//===--------------------------------------------------------------------===// +/// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine +/// instructions for SelectionDAG operations. +/// +class SPUDAGToDAGISel : + public SelectionDAGISel +{ + SPUTargetMachine &TM; + SPUTargetLowering &SPUtli; + unsigned GlobalBaseReg; + +public: + SPUDAGToDAGISel(SPUTargetMachine &tm) : + SelectionDAGISel(*tm.getTargetLowering()), + TM(tm), + SPUtli(*tm.getTargetLowering()) + {} + + virtual bool runOnFunction(Function &Fn) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + SelectionDAGISel::runOnFunction(Fn); + return true; + } + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDOperand getI32Imm(uint32_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDOperand getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } + + /// getSmallIPtrImm - Return a target constant of pointer type. + inline SDOperand getSmallIPtrImm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); + } + + /// Select - Convert the specified operand from a target-independent to a + /// target-specific node if it hasn't already been changed. + SDNode *Select(SDOperand Op); + + /// Return true if the address N is a RI7 format address [r+imm] + bool SelectDForm2Addr(SDOperand Op, SDOperand N, SDOperand &Disp, + SDOperand &Base); + + //! Returns true if the address N is an A-form (local store) address + bool SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Index); + + //! D-form address predicate + bool SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Index); + + //! Address predicate if N can be expressed as an indexed [r+r] operation. + bool SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Index); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op, + char ConstraintCode, + std::vector &OutOps, + SelectionDAG &DAG) { + SDOperand Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) + SelectXFormAddr(Op, Op, Op0, Op1); + break; + case 'o': // offsetable + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) { + Op0 = Op; + AddToISelQueue(Op0); // r+0. + Op1 = getSmallIPtrImm(0); + } + break; + case 'v': // not offsetable +#if 1 + assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled."); +#else + SelectAddrIdxOnly(Op, Op, Op0, Op1); +#endif + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } + + /// InstructionSelectBasicBlock - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); + + virtual const char *getPassName() const { + return "Cell SPU DAG->DAG Pattern Instruction Selection"; + } + + /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for + /// this target when scheduling the DAG. + virtual HazardRecognizer *CreateTargetHazardRecognizer() { + const TargetInstrInfo *II = SPUtli.getTargetMachine().getInstrInfo(); + assert(II && "No InstrInfo?"); + return new SPUHazardRecognizer(*II); + } + + // Include the pieces autogenerated from the target description. +#include "SPUGenDAGISel.inc" +}; + +/// InstructionSelectBasicBlock - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void +SPUDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) +{ + DEBUG(BB->dump()); + + // Select target instructions for the DAG. + DAG.setRoot(SelectRoot(DAG.getRoot())); + DAG.RemoveDeadNodes(); + + // Emit machine code to BB. + ScheduleAndEmitDAG(DAG); +} + +bool +SPUDAGToDAGISel::SelectDForm2Addr(SDOperand Op, SDOperand N, SDOperand &Disp, + SDOperand &Base) { + unsigned Opc = N.getOpcode(); + unsigned VT = N.getValueType(); + MVT::ValueType PtrVT = SPUtli.getPointerTy(); + ConstantSDNode *CN = 0; + int Imm; + + if (Opc == ISD::ADD) { + SDOperand Op0 = N.getOperand(0); + SDOperand Op1 = N.getOperand(1); + if (Op1.getOpcode() == ISD::Constant || + Op1.getOpcode() == ISD::TargetConstant) { + CN = cast(Op1); + Imm = int(CN->getValue()); + if (Imm <= 0xff) { + Disp = CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); + Base = Op0; + return true; + } + } + } else if (Opc == ISD::GlobalAddress + || Opc == ISD::TargetGlobalAddress + || Opc == ISD::Register) { + // Plain old local store address: + Disp = CurDAG->getTargetConstant(0, VT); + Base = N; + return true; + } else if (Opc == SPUISD::DFormAddr) { + // D-Form address: This is pretty straightforward, naturally... + CN = cast(N.getOperand(1)); + assert(CN != 0 && "SelectDFormAddr/SPUISD::DForm2Addr expecting constant"); + Imm = unsigned(CN->getValue()); + if (Imm < 0xff) { + Disp = CurDAG->getTargetConstant(CN->getValue(), PtrVT); + Base = N.getOperand(0); + return true; + } + } + + return false; +} + +/*! + \arg Op The ISD instructio operand + \arg N The address to be tested + \arg Base The base address + \arg Index The base address index + */ +bool +SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Index) { + // These match the addr256k operand type: + MVT::ValueType PtrVT = SPUtli.getPointerTy(); + MVT::ValueType OffsVT = MVT::i16; + + switch (N.getOpcode()) { + case ISD::Constant: + case ISD::TargetConstant: { + // Loading from a constant address. + ConstantSDNode *CN = dyn_cast(N); + int Imm = (int)CN->getValue(); + if (Imm < 0x3ffff && (Imm & 0x3) == 0) { + Base = CurDAG->getTargetConstant(Imm, PtrVT); + // Note that this operand will be ignored by the assembly printer... + Index = CurDAG->getTargetConstant(0, OffsVT); + return true; + } + } + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + // The constant pool address is N. Base is a dummy that will be ignored by + // the assembly printer. + Base = N; + Index = CurDAG->getTargetConstant(0, OffsVT); + return true; + } + + case ISD::GlobalAddress: + case ISD::TargetGlobalAddress: { + // The global address is N. Base is a dummy that is ignored by the + // assembly printer. + Base = N; + Index = CurDAG->getTargetConstant(0, OffsVT); + return true; + } + } + + return false; +} + +/*! + \arg Op The ISD instruction (ignored) + \arg N The address to be tested + \arg Base Base address register/pointer + \arg Index Base address index + + Examine the input address by a base register plus a signed 10-bit + displacement, [r+I10] (D-form address). + + \return true if \a N is a D-form address with \a Base and \a Index set + to non-empty SDOperand instances. +*/ +bool +SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Index) { + unsigned Opc = N.getOpcode(); + unsigned PtrTy = SPUtli.getPointerTy(); + + if (Opc == ISD::Register) { + Base = N; + Index = CurDAG->getTargetConstant(0, PtrTy); + return true; + } else if (Opc == ISD::FrameIndex) { + // Stack frame index must be less than 512 (divided by 16): + FrameIndexSDNode *FI = dyn_cast(N); + DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " + << FI->getIndex() << "\n"); + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + return true; + } + } else if (Opc == ISD::ADD) { + // Generated by getelementptr + const SDOperand Op0 = N.getOperand(0); // Frame index/base + const SDOperand Op1 = N.getOperand(1); // Offset within base + ConstantSDNode *CN = dyn_cast(Op1); + + // Not a constant? + if (CN == 0) + return false; + + int32_t offset = (int32_t) CN->getSignExtended(); + unsigned Opc0 = Op0.getOpcode(); + + if ((offset & 0xf) != 0) { + cerr << "SelectDFormAddr: unaligned offset = " << offset << "\n"; + abort(); + /*NOTREACHED*/ + } + + if (Opc0 == ISD::FrameIndex) { + FrameIndexSDNode *FI = dyn_cast(Op0); + DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI->getIndex() << "\n"); + + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + return true; + } + } else if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + if (Opc0 == ISD::GlobalAddress) { + // Convert global address to target global address + GlobalAddressSDNode *GV = dyn_cast(Op0); + Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy); + return true; + } else { + // Otherwise, just take operand 0 + Index = Op0; + return true; + } + } + } else if (Opc == SPUISD::DFormAddr) { + // D-Form address: This is pretty straightforward, naturally... + ConstantSDNode *CN = cast(N.getOperand(1)); + assert(CN != 0 && "SelectDFormAddr/SPUISD::DFormAddr expecting constant"); + Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); + Index = N.getOperand(0); + return true; + } + + return false; +} + +/*! + \arg Op The ISD instruction operand + \arg N The address operand + \arg Base The base pointer operand + \arg Index The offset/index operand + + If the address \a N can be expressed as a [r + s10imm] address, returns false. + Otherwise, creates two operands, Base and Index that will become the [r+r] + address. +*/ +bool +SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Index) { + if (SelectAFormAddr(Op, N, Base, Index) + || SelectDFormAddr(Op, N, Base, Index)) + return false; + + unsigned Opc = N.getOpcode(); + + if (Opc == ISD::ADD) { + SDOperand N1 = N.getOperand(0); + SDOperand N2 = N.getOperand(1); + unsigned N1Opc = N1.getOpcode(); + unsigned N2Opc = N2.getOpcode(); + + if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo) + || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } else { + cerr << "SelectXFormAddr: Unhandled ADD operands:\n"; + N1.Val->dump(); + cerr << "\n"; + N2.Val->dump(); + cerr << "\n"; + abort(); + /*UNREACHED*/ + } + } else if (N.getNumOperands() == 2) { + SDOperand N1 = N.getOperand(0); + SDOperand N2 = N.getOperand(1); + unsigned N1Opc = N1.getOpcode(); + unsigned N2Opc = N2.getOpcode(); + + if ((N1Opc == ISD::CopyToReg || N1Opc == ISD::Register) + && (N2Opc == ISD::CopyToReg || N2Opc == ISD::Register)) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + /*UNREACHED*/ + } else { + cerr << "SelectXFormAddr: 2-operand unhandled operand:\n"; + N.Val->dump(); + cerr << "\n"; + abort(); + /*UNREACHED*/ + } + } else { + cerr << "SelectXFormAddr: Unhandled operand type:\n"; + N.Val->dump(); + cerr << "\n"; + abort(); + /*UNREACHED*/ + } + + return false; +} + +//! Convert the operand from a target-independent to a target-specific node +/*! + */ +SDNode * +SPUDAGToDAGISel::Select(SDOperand Op) { + SDNode *N = Op.Val; + unsigned Opc = N->getOpcode(); + + if (Opc >= ISD::BUILTIN_OP_END && Opc < SPUISD::FIRST_NUMBER) { + return NULL; // Already selected. + } else if (Opc == ISD::FrameIndex) { + // Selects to AIr32 FI, 0 which in turn will become AIr32 SP, imm. + int FI = cast(N)->getIndex(); + SDOperand TFI = CurDAG->getTargetFrameIndex(FI, SPUtli.getPointerTy()); + + DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 TFI, 0\n"); + return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, + CurDAG->getTargetConstant(0, MVT::i32)); + } else if (Opc == SPUISD::LDRESULT) { + // Custom select instructions for LDRESULT + unsigned VT = N->getValueType(0); + SDOperand Arg = N->getOperand(0); + SDOperand Chain = N->getOperand(1); + SDOperand Zero = CurDAG->getTargetConstant(0, VT); + SDNode *Result; + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + + if (vtm->ldresult_ins == 0) { + cerr << "LDRESULT for unsupported type: " + << MVT::getValueTypeString(VT) + << "\n"; + abort(); + } else + Opc = vtm->ldresult_ins; + + AddToISelQueue(Arg); + AddToISelQueue(Zero); + AddToISelQueue(Chain); + Result = CurDAG->SelectNodeTo(N, Opc, VT, MVT::Other, Arg, Zero, Chain); + Chain = SDOperand(Result, 1); + return Result; + } + + return SelectCode(Op); +} + +/// createPPCISelDag - This pass converts a legalized DAG into a +/// SPU-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { + return new SPUDAGToDAGISel(TM); +} diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp new file mode 100644 index 00000000000..91c0024d744 --- /dev/null +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -0,0 +1,2673 @@ +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPUTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SPURegisterNames.h" +#include "SPUISelLowering.h" +#include "SPUTargetMachine.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetOptions.h" + +#include + +using namespace llvm; + +// Used in getTargetNodeName() below +namespace { + std::map node_names; + + //! MVT::ValueType mapping to useful data for Cell SPU + struct valtype_map_s { + const MVT::ValueType valtype; + const int prefslot_byte; + }; + + const valtype_map_s valtype_map[] = { + { MVT::i1, 3 }, + { MVT::i8, 3 }, + { MVT::i16, 2 }, + { MVT::i32, 0 }, + { MVT::f32, 0 }, + { MVT::i64, 0 }, + { MVT::f64, 0 }, + { MVT::i128, 0 } + }; + + const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); + + const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) { + const valtype_map_s *retval = 0; + + for (size_t i = 0; i < n_valtype_map; ++i) { + if (valtype_map[i].valtype == VT) { + retval = valtype_map + i; + break; + } + } + +#ifndef NDEBUG + if (retval == 0) { + cerr << "getValueTypeMapEntry returns NULL for " + << MVT::getValueTypeString(VT) + << "\n"; + abort(); + } +#endif + + return retval; + } + + //! Predicate that returns true if operand is a memory target + /*! + \arg Op Operand to test + \return true if the operand is a memory target (i.e., global + address, external symbol, constant pool) or an existing D-Form + address. + */ + bool isMemoryOperand(const SDOperand &Op) + { + const unsigned Opc = Op.getOpcode(); + return (Opc == ISD::GlobalAddress + || Opc == ISD::GlobalTLSAddress + || Opc == ISD::FrameIndex + || Opc == ISD::JumpTable + || Opc == ISD::ConstantPool + || Opc == ISD::ExternalSymbol + || Opc == ISD::TargetGlobalAddress + || Opc == ISD::TargetGlobalTLSAddress + || Opc == ISD::TargetFrameIndex + || Opc == ISD::TargetJumpTable + || Opc == ISD::TargetConstantPool + || Opc == ISD::TargetExternalSymbol + || Opc == SPUISD::DFormAddr); + } +} + +SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) + : TargetLowering(TM), + SPUTM(TM) +{ + // Fold away setcc operations if possible. + setPow2DivIsCheap(); + + // Use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(true); + + // Set up the SPU's register classes: + // NOTE: i8 register class is not registered because we cannot determine when + // we need to zero or sign extend for custom-lowered loads and stores. + addRegisterClass(MVT::i16, SPU::R16CRegisterClass); + addRegisterClass(MVT::i32, SPU::R32CRegisterClass); + addRegisterClass(MVT::i64, SPU::R64CRegisterClass); + addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); + addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); + addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); + + // SPU has no sign or zero extended loads for i1, i8, i16: + setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom); + setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setStoreXAction(MVT::i1, Custom); + + setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom); + setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setStoreXAction(MVT::i8, Custom); + + setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom); + setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom); + setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom); + + // SPU constant load actions are custom lowered: + setOperationAction(ISD::Constant, MVT::i64, Custom); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); + + // SPU's loads and stores have to be custom lowered: + for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128; + ++sctype) { + setOperationAction(ISD::LOAD, sctype, Custom); + setOperationAction(ISD::STORE, sctype, Custom); + } + + // SPU supports BRCOND, although DAGCombine will convert BRCONDs + // into BR_CCs. BR_CC instructions are custom selected in + // SPUDAGToDAGISel. + setOperationAction(ISD::BRCOND, MVT::Other, Legal); + + // Expand the jumptable branches + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + // SPU has no intrinsics for these particular operations: + setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); + setOperationAction(ISD::MEMSET, MVT::Other, Expand); + setOperationAction(ISD::MEMCPY, MVT::Other, Expand); + + // PowerPC has no SREM/UREM instructions + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + // We don't support sin/cos/sqrt/fmod + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + + // If we're enabling GP optimizations, use hardware square root + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // SPU can do rotate right and left, so legalize it... but customize for i8 + // because instructions don't exist. + setOperationAction(ISD::ROTR, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i16, Legal); + setOperationAction(ISD::ROTR, MVT::i8, Custom); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTL, MVT::i16, Legal); + setOperationAction(ISD::ROTL, MVT::i8, Custom); + // SPU has no native version of shift left/right for i8 + setOperationAction(ISD::SHL, MVT::i8, Custom); + setOperationAction(ISD::SRL, MVT::i8, Custom); + setOperationAction(ISD::SRA, MVT::i8, Custom); + + // Custom lower i32 multiplications + setOperationAction(ISD::MUL, MVT::i32, Custom); + + // Need to custom handle (some) common i8 math ops + setOperationAction(ISD::SUB, MVT::i8, Custom); + setOperationAction(ISD::MUL, MVT::i8, Custom); + + // SPU does not have BSWAP. It does have i32 support CTLZ. + // CTPOP has to be custom lowered. + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i8, Custom); + setOperationAction(ISD::CTPOP, MVT::i16, Custom); + setOperationAction(ISD::CTPOP, MVT::i32, Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); + + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i64, Expand); + + setOperationAction(ISD::CTLZ , MVT::i32, Legal); + + // SPU does not have select or setcc + setOperationAction(ISD::SELECT, MVT::i1, Expand); + setOperationAction(ISD::SELECT, MVT::i8, Expand); + setOperationAction(ISD::SELECT, MVT::i16, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + setOperationAction(ISD::SETCC, MVT::i1, Expand); + setOperationAction(ISD::SETCC, MVT::i8, Expand); + setOperationAction(ISD::SETCC, MVT::i16, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::i64, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + + // SPU has a legal FP -> signed INT instruction + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + + // FDIV on SPU requires custom lowering + setOperationAction(ISD::FDIV, MVT::f32, Custom); + //setOperationAction(ISD::FDIV, MVT::f64, Custom); + + // SPU has [U|S]INT_TO_FP + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); + + // We cannot sextinreg(i1). Expand to shifts. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // Support label based line numbers. + setOperationAction(ISD::LOCATION, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + + // We want to legalize GlobalAddress and ConstantPool nodes into the + // appropriate instructions to materialize the address. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::f32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::f64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + // RET must be custom lowered, to meet ABI requirements + setOperationAction(ISD::RET, MVT::Other, Custom); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); + + // Cell SPU has instructions for converting between i64 and fp. + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + + // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); + + // BUILD_PAIR can't be handled natively, and should be expanded to shl/or + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // First set operation action for all vector types to expand. Then we + // will selectively turn on ones that can be effectively codegen'd. + addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + // add/sub are legal for all supported vector VT's. + setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); + // mul has to be custom lowered. + setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom); + + setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal); + setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal); + + // These operations need to be expanded: + setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom); + + // Custom lower build_vector, constant pool spills, insert and + // extract vector elements: + setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); + } + + setOperationAction(ISD::MUL, MVT::v16i8, Custom); + setOperationAction(ISD::AND, MVT::v16i8, Custom); + setOperationAction(ISD::OR, MVT::v16i8, Custom); + setOperationAction(ISD::XOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + + setSetCCResultType(MVT::i32); + setShiftAmountType(MVT::i32); + setSetCCResultContents(ZeroOrOneSetCCResult); + + setStackPointerRegisterToSaveRestore(SPU::R1); + + // We have target-specific dag combine patterns for the following nodes: + // e.g., setTargetDAGCombine(ISD::SUB); + + computeRegisterProperties(); +} + +const char * +SPUTargetLowering::getTargetNodeName(unsigned Opcode) const +{ + if (node_names.empty()) { + node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; + node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; + node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; + node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; + node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr"; + node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr"; + node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; + node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; + node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; + node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK"; + node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; + node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; + node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0"; + node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED"; + node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT"; + node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT"; + node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT"; + node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT"; + node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; + node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; + node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; + node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; + node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; + node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; + node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; + node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; + node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; + node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] = + "SPUISD::ROTBYTES_RIGHT_Z"; + node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] = + "SPUISD::ROTBYTES_RIGHT_S"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] = + "SPUISD::ROTBYTES_LEFT_CHAINED"; + node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI"; + node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; + node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant"; + node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; + node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; + node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; + } + + std::map::iterator i = node_names.find(Opcode); + + return ((i != node_names.end()) ? i->second : 0); +} + +//===----------------------------------------------------------------------===// +// Calling convention code: +//===----------------------------------------------------------------------===// + +#include "SPUGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// LowerOperation implementation +//===----------------------------------------------------------------------===// + +/// Custom lower loads for CellSPU +/*! + All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to rotate to extract the requested element. + */ +static SDOperand +LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + LoadSDNode *LN = cast(Op); + SDOperand basep = LN->getBasePtr(); + SDOperand the_chain = LN->getChain(); + MVT::ValueType VT = LN->getLoadedVT(); + MVT::ValueType OpVT = Op.Val->getValueType(0); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + ISD::LoadExtType ExtType = LN->getExtensionType(); + unsigned alignment = LN->getAlignment(); + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + SDOperand Ops[8]; + + // For an extending load of an i1 variable, just call it i8 (or whatever we + // were passed) and make it zero-extended: + if (VT == MVT::i1) { + VT = OpVT; + ExtType = ISD::ZEXTLOAD; + } + + switch (LN->getAddressingMode()) { + case ISD::UNINDEXED: { + SDOperand result; + SDOperand rot_op, rotamt; + SDOperand ptrp; + int c_offset; + int c_rotamt; + + // The vector type we really want to be when we load the 16-byte chunk + MVT::ValueType vecVT, opVecVT; + + if (VT != MVT::i1) + vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + else + vecVT = MVT::v16i8; + + opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT))); + + if (basep.getOpcode() == ISD::ADD) { + const ConstantSDNode *CN = cast(basep.Val->getOperand(1)); + + assert(CN != NULL + && "LowerLOAD: ISD::ADD operand 1 is not constant"); + + c_offset = (int) CN->getValue(); + c_rotamt = (int) (c_offset & 0xf); + + // Adjust the rotation amount to ensure that the final result ends up in + // the preferred slot: + c_rotamt -= vtm->prefslot_byte; + ptrp = basep.getOperand(0); + } else { + c_offset = 0; + c_rotamt = -vtm->prefslot_byte; + ptrp = basep; + } + + if (alignment == 16) { + // 16-byte aligned load into preferred slot, no rotation + if (c_rotamt == 0) { + if (isMemoryOperand(ptrp)) + // Return unchanged + return SDOperand(); + else { + // Return modified D-Form address for pointer: + ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT)); + if (VT == OpVT) + return DAG.getLoad(VT, LN->getChain(), ptrp, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + else + return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(), + LN->getSrcValueOffset(), OpVT, + LN->isVolatile(), 16); + } + } else { + // Need to rotate... + if (c_rotamt < 0) + c_rotamt += 16; + // Realign the base pointer, with a D-Form address + if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp)) + basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32)); + else + basep = ptrp; + + // Rotate the load: + rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + the_chain = rot_op.getValue(1); + rotamt = DAG.getConstant(c_rotamt, MVT::i16); + + SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); + Ops[0] = the_chain; + Ops[1] = rot_op; + Ops[2] = rotamt; + + result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); + the_chain = result.getValue(1); + + if (VT == OpVT || ExtType == ISD::EXTLOAD) { + SDVTList scalarvts; + Ops[0] = the_chain; + Ops[1] = result; + if (OpVT == VT) { + scalarvts = DAG.getVTList(VT, MVT::Other); + } else { + scalarvts = DAG.getVTList(OpVT, MVT::Other); + } + + result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT), + result); + Ops[0] = the_chain; + Ops[1] = result; + result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); + the_chain = result.getValue(1); + } else { + // Handle the sign and zero-extending loads for i1 and i8: + unsigned NewOpC; + + if (ExtType == ISD::SEXTLOAD) { + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_SEXT + : SPUISD::EXTRACT_I8_SEXT); + } else if (ExtType == ISD::ZEXTLOAD) { + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_ZEXT + : SPUISD::EXTRACT_I8_ZEXT); + } + + result = DAG.getNode(NewOpC, OpVT, result); + } + + SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); + SDOperand retops[2] = { result, the_chain }; + + result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + return result; + /*UNREACHED*/ + } + } else { + // Misaligned 16-byte load: + if (basep.getOpcode() == ISD::LOAD) { + LN = cast(basep); + if (LN->getAlignment() == 16) { + // We can verify that we're really loading from a 16-byte aligned + // chunk. Encapsulate basep as a D-Form address and return a new + // load: + basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep, + DAG.getConstant(0, PtrVT)); + if (OpVT == VT) + return DAG.getLoad(VT, LN->getChain(), basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + else + return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + OpVT, LN->isVolatile(), 16); + } + } + + // Catch all other cases where we can't guarantee that we have a + // 16-byte aligned entity, which means resorting to an X-form + // address scheme: + + SDOperand ZeroOffs = DAG.getConstant(0, PtrVT); + SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs); + SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs); + + ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp); + + SDOperand alignLoad = + DAG.getLoad(opVecVT, LN->getChain(), ptrp, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + + SDOperand insertEltOp = + DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp); + + result = DAG.getNode(SPUISD::SHUFB, opVecVT, + alignLoad, + alignLoad, + DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp)); + + result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result); + + SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); + SDOperand retops[2] = { result, the_chain }; + + result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + return result; + } + break; + } + case ISD::PRE_INC: + case ISD::PRE_DEC: + case ISD::POST_INC: + case ISD::POST_DEC: + case ISD::LAST_INDEXED_MODE: + cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + "UNINDEXED\n"; + cerr << (unsigned) LN->getAddressingMode() << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +/// Custom lower stores for CellSPU +/*! + All CellSPU stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to generate a shuffle to insert the + requested element into its place, then store the resulting block. + */ +static SDOperand +LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + StoreSDNode *SN = cast(Op); + SDOperand Value = SN->getValue(); + MVT::ValueType VT = Value.getValueType(); + MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT()); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDOperand the_chain = SN->getChain(); + unsigned alignment = SN->getAlignment(); + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + + switch (SN->getAddressingMode()) { + case ISD::UNINDEXED: { + SDOperand basep = SN->getBasePtr(); + SDOperand ptrOp; + int offset; + + if (basep.getOpcode() == ISD::ADD) { + const ConstantSDNode *CN = cast(basep.Val->getOperand(1)); + assert(CN != NULL + && "LowerSTORE: ISD::ADD operand 1 is not constant"); + offset = unsigned(CN->getValue()); + ptrOp = basep.getOperand(0); + DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = " + << offset + << "\n"); + } else { + ptrOp = basep; + offset = 0; + } + + // The vector type we really want to load from the 16-byte chunk, except + // in the case of MVT::i1, which has to be v16i8. + unsigned vecVT, stVecVT; + + if (StVT != MVT::i1) + stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT))); + else + stVecVT = MVT::v16i8; + vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + + // Realign the pointer as a D-Form address (ptrOp is the pointer, + // to force a register load with the address; basep is the actual + // dform addr offs($reg). + ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp, + DAG.getConstant(0, PtrVT)); + basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrOp, DAG.getConstant((offset & ~0xf), PtrVT)); + + // Create the 16-byte aligned vector load + SDOperand alignLoad = + DAG.getLoad(vecVT, the_chain, basep, + SN->getSrcValue(), SN->getSrcValueOffset(), + SN->isVolatile(), 16); + the_chain = alignLoad.getValue(1); + + LoadSDNode *LN = cast(alignLoad); + SDOperand theValue = SN->getValue(); + SDOperand result; + + if (StVT != VT + && (theValue.getOpcode() == ISD::AssertZext + || theValue.getOpcode() == ISD::AssertSext)) { + // Drill down and get the value for zero- and sign-extended + // quantities + theValue = theValue.getOperand(0); + } + + SDOperand insertEltOp = + DAG.getNode(SPUISD::INSERT_MASK, stVecVT, + DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrOp, + DAG.getConstant((offset & 0xf), PtrVT))); + + result = DAG.getNode(SPUISD::SHUFB, vecVT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue), + alignLoad, + DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp)); + + result = DAG.getStore(the_chain, result, basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), LN->getAlignment()); + + return result; + /*UNREACHED*/ + } + case ISD::PRE_INC: + case ISD::PRE_DEC: + case ISD::POST_INC: + case ISD::POST_DEC: + case ISD::LAST_INDEXED_MODE: + cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + "UNINDEXED\n"; + cerr << (unsigned) SN->getAddressingMode() << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +/// Generate the address of a constant pool entry. +static SDOperand +LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT::ValueType PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast(Op); + Constant *C = CP->getConstVal(); + SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); + const TargetMachine &TM = DAG.getTarget(); + SDOperand Zero = DAG.getConstant(0, PtrVT); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Just return the SDOperand with the constant pool address in it. + return CPI; + } else { + // Generate hi/lo address pair + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); + + return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); + } + } + + assert(0 && + "LowerConstantPool: Relocation model other than static not supported."); + return SDOperand(); +} + +static SDOperand +LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT::ValueType PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast(Op); + SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + SDOperand Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Just return the SDOperand with the jump table address in it. + return JTI; + } else { + // Generate hi/lo address pair + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero); + + return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); + } + } + + assert(0 && + "LowerJumpTable: Relocation model other than static not supported."); + return SDOperand(); +} + +static SDOperand +LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT::ValueType PtrVT = Op.getValueType(); + GlobalAddressSDNode *GSDN = cast(Op); + GlobalValue *GV = GSDN->getGlobal(); + SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + SDOperand Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Generate a local store address + return GA; + } else { + // Generate hi/lo address pair + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); + + return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); + } + } else { + cerr << "LowerGlobalAddress: Relocation model other than static not " + << "supported.\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +//! Custom lower i64 integer constants +/*! + This code inserts all of the necessary juggling that needs to occur to load + a 64-bit constant into a register. + */ +static SDOperand +LowerConstant(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + ConstantSDNode *CN = cast(Op.Val); + + if (VT == MVT::i64) { + SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64); + return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); + + } else { + cerr << "LowerConstant: unhandled constant type " + << MVT::getValueTypeString(VT) + << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +//! Custom lower single precision floating point constants +/*! + "float" immediates can be lowered as if they were unsigned 32-bit integers. + The SPUISD::SFPConstant pseudo-instruction handles this in the instruction + target description. + */ +static SDOperand +LowerConstantFP(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + ConstantFPSDNode *FP = cast(Op.Val); + + assert((FP != 0) && + "LowerConstantFP: Node is not ConstantFPSDNode"); + + const APFloat &apf = FP->getValueAPF(); + + if (VT == MVT::f32) { + return DAG.getNode(SPUISD::SFPConstant, VT, + DAG.getTargetConstantFP(apf.convertToFloat(), VT)); + } else if (VT == MVT::f64) { + uint64_t dbits = DoubleToBits(apf.convertToDouble()); + return DAG.getNode(ISD::BIT_CONVERT, VT, + LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG)); + } + + return SDOperand(); +} + +static SDOperand +LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex) +{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SSARegMap *RegMap = MF.getSSARegMap(); + SmallVector ArgValues; + SDOperand Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + + const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); + const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + unsigned ArgOffset = SPUFrameInfo::minStackSize(); + unsigned ArgRegIdx = 0; + unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Add DAG nodes to load the arguments or copy them out of registers. + for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { + SDOperand ArgVal; + bool needsLoad = false; + MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); + unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; + + switch (ObjectVT) { + default: { + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << MVT::getValueTypeString(ObjectVT) + << "\n"; + abort(); + } + case MVT::i8: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::i16: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::i32: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::i64: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::f32: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::f64: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::v2f64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + } + + // We need to load the argument to a virtual register if we determined above + // that we ran out of physical registers of the appropriate type + if (needsLoad) { + // If the argument is actually used, emit a load from the right stack + // slot. + if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); + } else { + // Don't emit a dead load. + ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT); + } + + ArgOffset += StackSlotSize; + } + + ArgValues.push_back(ArgVal); + } + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, + ArgOffset); + SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + // If this function is vararg, store any remaining integer argument regs to + // their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + SmallVector MemOps; + for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); + SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + std::vector RetVT(Op.Val->value_begin(), + Op.Val->value_end()); + return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); +} + +/// isLSAAddress - Return the immediate to use if the specified +/// value is representable as a LSA address. +static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) { + ConstantSDNode *C = dyn_cast(Op); + if (!C) return 0; + + int Addr = C->getValue(); + if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. + (Addr << 14 >> 14) != Addr) + return 0; // Top 14 bits have to be sext of immediate. + + return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val; +} + +static +SDOperand +LowerCALL(SDOperand Op, SelectionDAG &DAG) { + SDOperand Chain = Op.getOperand(0); +#if 0 + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; +#endif + SDOperand Callee = Op.getOperand(4); + unsigned NumOps = (Op.getNumOperands() - 5) / 2; + unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); + const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + // Handy pointer type + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Accumulate how many bytes are to be pushed on the stack, including the + // linkage area, and parameter passing area. According to the SPU ABI, + // we minimally need space for [LR] and [SP] + unsigned NumStackBytes = SPUFrameInfo::minStackSize(); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32); + + // Figure out which arguments are going to go in registers, and which in + // memory. + unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] + unsigned ArgRegIdx = 0; + + // Keep track of registers passing arguments + std::vector > RegsToPass; + // And the arguments passed on the stack + SmallVector MemOpChains; + + for (unsigned i = 0; i != NumOps; ++i) { + SDOperand Arg = Op.getOperand(5+2*i); + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); + + switch (Arg.getValueType()) { + default: assert(0 && "Unexpected ValueType for argument!"); + case MVT::i32: + case MVT::i64: + case MVT::i128: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + case MVT::f32: + case MVT::f64: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + } + } + + // Update number of stack bytes actually used, insert a call sequence start + NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT)); + + if (!MemOpChains.empty()) { + // Adjust the stack pointer for the stack arguments. + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + } + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDOperand InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + + std::vector NodeTys; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + + SmallVector Ops; + unsigned CallOpc = SPUISD::CALL; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + GlobalValue *GV = G->getGlobal(); + unsigned CalleeVT = Callee.getValueType(); + + // Turn calls to targets that are defined (i.e., have bodies) into BRSL + // style calls, otherwise, external symbols are BRASL calls. + // NOTE: + // This may be an unsafe assumption for JIT and really large compilation + // units. + if (GV->isDeclaration()) { + Callee = DAG.getGlobalAddress(GV, CalleeVT); + } else { + Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, + DAG.getTargetGlobalAddress(GV, CalleeVT), + DAG.getConstant(0, PtrVT)); + } + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); + else if (SDNode *Dest = isLSAAddress(Callee, DAG)) + // If this is an absolute destination address that appears to be a legal + // local store address, use the munged value. + Callee = SDOperand(Dest, 0); + + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.Val) + Ops.push_back(InFlag); + Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + SDOperand ResultVals[3]; + unsigned NumResults = 0; + NodeTys.clear(); + + // If the call has results, copy the values out of the ret val registers. + switch (Op.Val->getValueType(0)) { + default: assert(0 && "Unexpected ret value!"); + case MVT::Other: break; + case MVT::i32: + if (Op.Val->getValueType(1) == MVT::i32) { + Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, + Chain.getValue(2)).getValue(1); + ResultVals[1] = Chain.getValue(0); + NumResults = 2; + NodeTys.push_back(MVT::i32); + } else { + Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + } + NodeTys.push_back(MVT::i32); + break; + case MVT::i64: + Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + NodeTys.push_back(MVT::i64); + break; + case MVT::f32: + case MVT::f64: + Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0), + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + NodeTys.push_back(Op.Val->getValueType(0)); + break; + case MVT::v2f64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0), + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + NodeTys.push_back(Op.Val->getValueType(0)); + break; + } + + Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, + DAG.getConstant(NumStackBytes, PtrVT)); + NodeTys.push_back(MVT::Other); + + // If the function returns void, just return the chain. + if (NumResults == 0) + return Chain; + + // Otherwise, merge everything together with a MERGE_VALUES node. + ResultVals[NumResults++] = Chain; + SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, + ResultVals, NumResults); + return Res.getValue(Op.ResNo); +} + +static SDOperand +LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, TM, RVLocs); + CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); + } + + SDOperand Chain = Op.getOperand(0); + SDOperand Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); + Flag = Chain.getValue(1); + } + + if (Flag.Val) + return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag); + else + return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain); +} + + +//===----------------------------------------------------------------------===// +// Vector related lowering: +//===----------------------------------------------------------------------===// + +static ConstantSDNode * +getVecImm(SDNode *N) { + SDOperand OpVal(0, 0); + + // Check to see if this buildvec has a single non-undef value in its elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.Val == 0) + OpVal = N->getOperand(i); + else if (OpVal != N->getOperand(i)) + return 0; + } + + if (OpVal.Val != 0) { + if (ConstantSDNode *CN = dyn_cast(OpVal)) { + return CN; + } + } + + return 0; // All UNDEF: use implicit def.; not Constant node +} + +/// get_vec_i18imm - Test if this vector is a vector filled with the same value +/// and the value fits into an unsigned 18-bit constant, and if so, return the +/// constant +SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getValue(); + if (Value <= 0x3ffff) + return DAG.getConstant(Value, ValueType); + } + + return SDOperand(); +} + +/// get_vec_i16imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + if (ValueType == MVT::i32) { + int Value = (int) CN->getValue(); + int SExtValue = ((Value & 0xffff) << 16) >> 16; + + if (Value == SExtValue) + return DAG.getConstant(Value, ValueType); + } else if (ValueType == MVT::i16) { + short Value = (short) CN->getValue(); + int SExtValue = ((int) Value << 16) >> 16; + + if (Value == (short) SExtValue) + return DAG.getConstant(Value, ValueType); + } else if (ValueType == MVT::i64) { + int64_t Value = CN->getValue(); + int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16); + + if (Value == SExtValue) + return DAG.getConstant(Value, ValueType); + } + } + + return SDOperand(); +} + +/// get_vec_i10imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 10-bit constant, and if so, return the +/// constant +SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int Value = (int) CN->getValue(); + if ((ValueType == MVT::i32 && isS10Constant(Value)) + || (ValueType == MVT::i16 && isS10Constant((short) Value))) + return DAG.getConstant(Value, ValueType); + } + + return SDOperand(); +} + +/// get_vec_i8imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 8-bit constant, and if so, return the +/// constant. +/// +/// @note: The incoming vector is v16i8 because that's the only way we can load +/// constant vectors. Thus, we test to see if the upper and lower bytes are the +/// same value. +SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int Value = (int) CN->getValue(); + if (ValueType == MVT::i16 + && Value <= 0xffff /* truncated from uint64_t */ + && ((short) Value >> 8) == ((short) Value & 0xff)) + return DAG.getConstant(Value & 0xff, ValueType); + else if (ValueType == MVT::i8 + && (Value & 0xff) == Value) + return DAG.getConstant(Value, ValueType); + } + + return SDOperand(); +} + +/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getValue(); + if ((ValueType == MVT::i32 + && ((unsigned) Value & 0xffff0000) == (unsigned) Value) + || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) + return DAG.getConstant(Value >> 16, ValueType); + } + + return SDOperand(); +} + +/// get_v4i32_imm - Catch-all for general 32-bit constant vectors +SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getConstant((unsigned) CN->getValue(), MVT::i32); + } + + return SDOperand(); +} + +/// get_v4i32_imm - Catch-all for general 64-bit constant vectors +SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getConstant((unsigned) CN->getValue(), MVT::i64); + } + + return SDOperand(); +} + +// If this is a vector of constants or undefs, get the bits. A bit in +// UndefBits is set if the corresponding element of the vector is an +// ISD::UNDEF value. For undefs, the corresponding VectorBits values are +// zero. Return true if this is not an array of constants, false if it is. +// +static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], + uint64_t UndefBits[2]) { + // Start with zero'd results. + VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; + + unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDOperand OpVal = BV->getOperand(i); + + unsigned PartNo = i >= e/2; // In the upper 128 bits? + unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. + + uint64_t EltBits = 0; + if (OpVal.getOpcode() == ISD::UNDEF) { + uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); + UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); + continue; + } else if (ConstantSDNode *CN = dyn_cast(OpVal)) { + EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize)); + } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { + const APFloat &apf = CN->getValueAPF(); + EltBits = (CN->getValueType(0) == MVT::f32 + ? FloatToBits(apf.convertToFloat()) + : DoubleToBits(apf.convertToDouble())); + } else { + // Nonconstant element. + return true; + } + + VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); + } + + //printf("%llx %llx %llx %llx\n", + // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); + return false; +} + +/// If this is a splat (repetition) of a value across the whole vector, return +/// the smallest size that splats it. For example, "0x01010101010101..." is a +/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and +/// SplatSize = 1 byte. +static bool isConstantSplat(const uint64_t Bits128[2], + const uint64_t Undef128[2], + int MinSplatBits, + uint64_t &SplatBits, uint64_t &SplatUndef, + int &SplatSize) { + // Don't let undefs prevent splats from matching. See if the top 64-bits are + // the same as the lower 64-bits, ignoring undefs. + uint64_t Bits64 = Bits128[0] | Bits128[1]; + uint64_t Undef64 = Undef128[0] & Undef128[1]; + uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); + uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); + uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); + uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); + + if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { + if (MinSplatBits < 64) { + + // Check that the top 32-bits are the same as the lower 32-bits, ignoring + // undefs. + if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { + if (MinSplatBits < 32) { + + // If the top 16-bits are different than the lower 16-bits, ignoring + // undefs, we have an i32 splat. + if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { + if (MinSplatBits < 16) { + // If the top 8-bits are different than the lower 8-bits, ignoring + // undefs, we have an i16 splat. + if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) { + // Otherwise, we have an 8-bit splat. + SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); + SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); + SplatSize = 1; + return true; + } + } else { + SplatBits = Bits16; + SplatUndef = Undef16; + SplatSize = 2; + return true; + } + } + } else { + SplatBits = Bits32; + SplatUndef = Undef32; + SplatSize = 4; + return true; + } + } + } else { + SplatBits = Bits128[0]; + SplatUndef = Undef128[0]; + SplatSize = 8; + return true; + } + } + + return false; // Can't be a splat if two pieces don't match. +} + +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. If we CAN select this case, and if it +// selects to a single instruction, return Op. Otherwise, if we can codegen +// this case more efficiently than a constant pool load, lower it to the +// sequence of ops that should be used. +static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + // If this is a vector of constants or undefs, get the bits. A bit in + // UndefBits is set if the corresponding element of the vector is an + // ISD::UNDEF value. For undefs, the corresponding VectorBits values are + // zero. + uint64_t VectorBits[2]; + uint64_t UndefBits[2]; + uint64_t SplatBits, SplatUndef; + int SplatSize; + if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits) + || !isConstantSplat(VectorBits, UndefBits, + MVT::getSizeInBits(MVT::getVectorElementType(VT)), + SplatBits, SplatUndef, SplatSize)) + return SDOperand(); // Not a constant vector, not a splat. + + switch (VT) { + default: + case MVT::v4f32: { + uint32_t Value32 = SplatBits; + assert(SplatSize == 4 + && "LowerBUILD_VECTOR: Unexpected floating point vector element."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDOperand T = DAG.getConstant(Value32, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T)); + break; + } + case MVT::v2f64: { + uint64_t f64val = SplatBits; + assert(SplatSize == 8 + && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDOperand T = DAG.getConstant(f64val, MVT::i64); + return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); + break; + } + case MVT::v16i8: { + // 8-bit constants have to be expanded to 16-bits + unsigned short Value16 = SplatBits | (SplatBits << 8); + SDOperand Ops[8]; + for (int i = 0; i < 8; ++i) + Ops[i] = DAG.getConstant(Value16, MVT::i16); + return DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8)); + } + case MVT::v8i16: { + unsigned short Value16; + if (SplatSize == 2) + Value16 = (unsigned short) (SplatBits & 0xffff); + else + Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); + SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT)); + SDOperand Ops[8]; + for (int i = 0; i < 8; ++i) Ops[i] = T; + return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8); + } + case MVT::v4i32: { + unsigned int Value = SplatBits; + SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT)); + return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T); + } + case MVT::v2i64: { + uint64_t val = SplatBits; + uint32_t upper = uint32_t(val >> 32); + uint32_t lower = uint32_t(val); + + if (val != 0) { + SDOperand LO32; + SDOperand HI32; + SmallVector ShufBytes; + SDOperand Result; + bool upper_special, lower_special; + + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Create lower vector if not a special pattern + if (!lower_special) { + SDOperand LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } + + // Create upper vector if not a special pattern + if (!upper_special) { + SDOperand HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } + + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + if (lower_special && upper_special) { + // Unhappy situation... both upper and lower are special, so punt with + // a target constant: + SDOperand Zero = DAG.getConstant(0, MVT::i32); + HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, + Zero, Zero); + } + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + SDOperand V; + bool process_upper, process_lower; + uint64_t val; + + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val = 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val = 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val = (j == 0 ? 0xe0 : 0x80); + } else + val = i * 4 + j + ((i & 1) * 16); + + ShufBytes.push_back(DAG.getConstant(val, MVT::i8)); + } + } + + return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + &ShufBytes[0], ShufBytes.size())); + } else { + // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR + SDOperand Zero = DAG.getConstant(0, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + Zero, Zero, Zero, Zero)); + } + } + } + + return SDOperand(); +} + +/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on +/// which the Cell can operate. The code inspects V3 to ascertain whether the +/// permutation vector, V3, is monotonically increasing with one "exception" +/// element, e.g., (0, 1, _, 3). If this is the case, then generate a +/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. +/// In either case, the net result is going to eventually invoke SHUFB to +/// permute/shuffle the bytes from V1 and V2. +/// \note +/// INSERT_MASK is eventually selected as one of the C*D instructions, generate +/// control word for byte/halfword/word insertion. This takes care of a single +/// element move from V2 into V1. +/// \note +/// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. +static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { + SDOperand V1 = Op.getOperand(0); + SDOperand V2 = Op.getOperand(1); + SDOperand PermMask = Op.getOperand(2); + + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + // If we have a single element being moved from V1 to V2, this can be handled + // using the C*[DX] compute mask instructions, but the vector elements have + // to be monotonically increasing with one exception element. + MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType()); + unsigned EltsFromV2 = 0; + unsigned V2Elt = 0; + unsigned V2EltIdx0 = 0; + unsigned CurrElt = 0; + bool monotonic = true; + if (EltVT == MVT::i8) + V2EltIdx0 = 16; + else if (EltVT == MVT::i16) + V2EltIdx0 = 8; + else if (EltVT == MVT::i32) + V2EltIdx0 = 4; + else + assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); + + for (unsigned i = 0, e = PermMask.getNumOperands(); + EltsFromV2 <= 1 && monotonic && i != e; + ++i) { + unsigned SrcElt; + if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) + SrcElt = 0; + else + SrcElt = cast(PermMask.getOperand(i))->getValue(); + + if (SrcElt >= V2EltIdx0) { + ++EltsFromV2; + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } else if (CurrElt != SrcElt) { + monotonic = false; + } + + ++CurrElt; + } + + if (EltsFromV2 == 1 && monotonic) { + // Compute mask and shuffle + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Initialize temporary register to 0 + SDOperand InitTempReg = + DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT)); + // Copy register's contents as index in INSERT_MASK: + SDOperand ShufMaskOp = + DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(), + DAG.getTargetConstant(V2Elt, MVT::i32), + DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); + // Use shuffle mask in SHUFB synthetic instruction: + return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); + } else { + // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes. + unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; + + SmallVector ResultMask; + for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { + unsigned SrcElt; + if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) + SrcElt = 0; + else + SrcElt = cast(PermMask.getOperand(i))->getValue(); + + for (unsigned j = 0; j != BytesPerElement; ++j) { + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + MVT::i8)); + } + } + + SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask); + } +} + +static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { + SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar + + if (Op0.Val->getOpcode() == ISD::Constant) { + // For a constant, build the appropriate constant vector, which will + // eventually simplify to a vector register load. + + ConstantSDNode *CN = cast(Op0.Val); + SmallVector ConstVecValues; + MVT::ValueType VT; + size_t n_copies; + + // Create a constant vector: + switch (Op.getValueType()) { + default: assert(0 && "Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); + case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; + case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; + case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; + case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; + case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; + case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; + } + + SDOperand CValue = DAG.getConstant(CN->getValue(), VT); + for (size_t j = 0; j < n_copies; ++j) + ConstVecValues.push_back(CValue); + + return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(), + &ConstVecValues[0], ConstVecValues.size()); + } else { + // Otherwise, copy the value from one register to another: + switch (Op0.getValueType()) { + default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0); + } + } + + return SDOperand(); +} + +static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) { + switch (Op.getValueType()) { + case MVT::v4i32: { + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB); + SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA); + SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB); + SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1); + + return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2); + break; + } + + // Multiply two v8i16 vectors (pipeline friendly version): + // a) multiply lower halves, mask off upper 16-bit of 32-bit product + // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) + // c) Use SELB to select upper and lower halves from the intermediate results + // + // NOTE: We really want to move the FSMBI to earlier to actually get the + // dual-issue. This code does manage to do this, even if it's a little on + // the wacky side + case MVT::v8i16: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + SDOperand Chain = Op.getOperand(0); + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + + SDOperand FSMBOp = + DAG.getCopyToReg(Chain, FSMBIreg, + DAG.getNode(SPUISD::FSMBI, MVT::v8i16, + DAG.getConstant(0xcccc, MVT::i32))); + + SDOperand HHProd = + DAG.getCopyToReg(FSMBOp, HiProdReg, + DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); + + SDOperand HHProd_v4i32 = + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, + DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); + + return DAG.getNode(SPUISD::SELB, MVT::v8i16, + DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), + DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), + DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, + HHProd_v4i32, + DAG.getConstant(16, MVT::i16))), + DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); + } + + // This M00sE is N@stI! (apologies to Monty Python) + // + // SPU doesn't know how to do any 8-bit multiplication, so the solution + // is to break it all apart, sign extend, and reassemble the various + // intermediate products. + case MVT::v16i8: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + SDOperand Chain = Op.getOperand(0); + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + SDOperand c8 = DAG.getConstant(8, MVT::i8); + SDOperand c16 = DAG.getConstant(16, MVT::i8); + + unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + + SDOperand LLProd = + DAG.getNode(SPUISD::MPY, MVT::v8i16, + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); + + SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); + + SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); + + SDOperand LHProd = + DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, + DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); + + SDOperand FSMBdef_2222 = + DAG.getCopyToReg(Chain, FSMBreg_2222, + DAG.getNode(SPUISD::FSMBI, MVT::v8i16, + DAG.getConstant(0x2222, MVT::i32))); + + SDOperand FSMBuse_2222 = + DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32); + + SDOperand LoProd_1 = + DAG.getCopyToReg(Chain, LoProd_reg, + DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd, + FSMBuse_2222)); + + SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32); + + SDOperand LoProd = + DAG.getNode(ISD::AND, MVT::v4i32, + DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32), + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + LoProdMask, LoProdMask, + LoProdMask, LoProdMask)); + + SDOperand rAH = + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); + + SDOperand rBH = + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); + + SDOperand HLProd = + DAG.getNode(SPUISD::MPY, MVT::v8i16, + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); + + SDOperand HHProd_1 = + DAG.getNode(SPUISD::MPY, MVT::v8i16, + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)), + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8))); + + SDOperand HHProd = + DAG.getCopyToReg(Chain, HiProd_reg, + DAG.getNode(SPUISD::SELB, MVT::v8i16, + HLProd, + DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), + FSMBuse_2222)); + + SDOperand HiProd = + DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, + DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16); + + return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, + DAG.getNode(ISD::OR, MVT::v4i32, + LoProd, HiProd)); + } + + default: + cerr << "CellSPU: Unknown vector multiplication, got " + << MVT::getValueTypeString(Op.getValueType()) + << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + + SDOperand A = Op.getOperand(0); + SDOperand B = Op.getOperand(1); + unsigned VT = Op.getValueType(); + + unsigned VRegBR, VRegC; + + if (VT == MVT::f32) { + VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass); + VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass); + } else { + VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + } + // TODO: make sure we're feeding FPInterp the right arguments + // Right now: fi B, frest(B) + + // Computes BRcpl = + // (Floating Interpolate (FP Reciprocal Estimate B)) + SDOperand BRcpl = + DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, + DAG.getNode(SPUISD::FPInterp, VT, B, + DAG.getNode(SPUISD::FPRecipEst, VT, B))); + + // Computes A * BRcpl and stores in a temporary register + SDOperand AxBRcpl = + DAG.getCopyToReg(BRcpl, VRegC, + DAG.getNode(ISD::FMUL, VT, A, + DAG.getCopyFromReg(BRcpl, VRegBR, VT))); + // What's the Chain variable do? It's magic! + // TODO: set Chain = Op(0).getEntryNode() + + return DAG.getNode(ISD::FADD, VT, + DAG.getCopyFromReg(AxBRcpl, VRegC, VT), + DAG.getNode(ISD::FMUL, VT, + DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), + DAG.getNode(ISD::FSUB, VT, A, + DAG.getNode(ISD::FMUL, VT, B, + DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); +} + +// Expands double-precision FDIV +// Expects two doubles as inputs X and Y, does a floating point +// reciprocal estimate, and three iterations of Newton-Raphson +// to increase accuracy. +//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) { +// MachineFunction &MF = DAG.getMachineFunction(); +// SSARegMap *RegMap = MF.getSSARegMap(); +// +// SDOperand X = Op.getOperand(0); +// SDOperand Y = Op.getOperand(1); +//} + +static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + SDOperand N = Op.getOperand(0); + SDOperand Elt = Op.getOperand(1); + SDOperand ShufMask[16]; + ConstantSDNode *C = dyn_cast(Elt); + + assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode"); + + int EltNo = (int) C->getValue(); + + // sanity checks: + if (VT == MVT::i8 && EltNo >= 16) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + else if (VT == MVT::i16 && EltNo >= 8) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + else if (VT == MVT::i32 && EltNo >= 4) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + else if (VT == MVT::i64 && EltNo >= 2) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + + if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { + // i32 and i64: Element 0 is the preferred slot + return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N); + } + + // Need to generate shuffle mask and extract: + int prefslot_begin, prefslot_end; + int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8; + + switch (VT) { + case MVT::i8: { + prefslot_begin = prefslot_end = 3; + break; + } + case MVT::i16: { + prefslot_begin = 2; prefslot_end = 3; + break; + } + case MVT::i32: { + prefslot_begin = 0; prefslot_end = 3; + break; + } + case MVT::i64: { + prefslot_begin = 0; prefslot_end = 7; + break; + } + } + + for (int i = 0; i < 16; ++i) { + // zero fill uppper part of preferred slot, don't care about the + // other slots: + unsigned int mask_val; + + if (i <= prefslot_end) { + mask_val = + ((i < prefslot_begin) + ? 0x80 + : elt_byte + (i - prefslot_begin)); + + ShufMask[i] = DAG.getConstant(mask_val, MVT::i16); + } else + ShufMask[i] = ShufMask[i % (prefslot_end + 1)]; + } + + SDOperand ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + &ShufMask[0], + sizeof(ShufMask) / sizeof(ShufMask[0])); + + return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, + DAG.getNode(SPUISD::SHUFB, N.getValueType(), + N, N, ShufMaskVec)); + +} + +static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { + SDOperand VecOp = Op.getOperand(0); + SDOperand ValOp = Op.getOperand(1); + SDOperand IdxOp = Op.getOperand(2); + MVT::ValueType VT = Op.getValueType(); + + ConstantSDNode *CN = cast(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Use $2 because it's always 16-byte aligned and it's available: + SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT); + + SDOperand result = + DAG.getNode(SPUISD::SHUFB, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), + VecOp, + DAG.getNode(SPUISD::INSERT_MASK, VT, + DAG.getNode(ISD::ADD, PtrVT, + PtrBase, + DAG.getConstant(CN->getValue(), + PtrVT)))); + + return result; +} + +static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) { + SDOperand N0 = Op.getOperand(0); // Everything has at least one operand + + assert(Op.getValueType() == MVT::i8); + switch (Opc) { + default: + assert(0 && "Unhandled i8 math operator"); + /*NOTREACHED*/ + break; + case ISD::SUB: { + // 8-bit subtraction: Promote the arguments up to 16-bits and truncate + // the result: + SDOperand N1 = Op.getOperand(1); + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) + : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + } + case ISD::ROTR: + case ISD::ROTL: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); + SDOperand ExpandArg = + DAG.getNode(ISD::OR, MVT::i16, N0, + DAG.getNode(ISD::SHL, MVT::i16, + N0, DAG.getConstant(8, MVT::i16))); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, ExpandArg, N1)); + } + case ISD::SRL: + case ISD::SHL: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + } + case ISD::SRA: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + } + case ISD::MUL: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + break; + } + } + + return SDOperand(); +} + +//! Lower byte immediate operations for v16i8 vectors: +static SDOperand +LowerByteImmed(SDOperand Op, SelectionDAG &DAG) { + SDOperand ConstVec; + SDOperand Arg; + MVT::ValueType VT = Op.getValueType(); + + ConstVec = Op.getOperand(0); + Arg = Op.getOperand(1); + if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) { + if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) { + ConstVec = ConstVec.getOperand(0); + } else { + ConstVec = Op.getOperand(1); + Arg = Op.getOperand(0); + if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) { + ConstVec = ConstVec.getOperand(0); + } + } + } + + if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) { + uint64_t VectorBits[2]; + uint64_t UndefBits[2]; + uint64_t SplatBits, SplatUndef; + int SplatSize; + + if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits) + && isConstantSplat(VectorBits, UndefBits, + MVT::getSizeInBits(MVT::getVectorElementType(VT)), + SplatBits, SplatUndef, SplatSize)) { + SDOperand tcVec[16]; + SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); + const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]); + + // Turn the BUILD_VECTOR into a set of target constants: + for (size_t i = 0; i < tcVecSize; ++i) + tcVec[i] = tc; + + return DAG.getNode(Op.Val->getOpcode(), VT, Arg, + DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize)); + } + } + + return SDOperand(); +} + +//! Lower i32 multiplication +static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT, + unsigned Opc) { + switch (VT) { + default: + cerr << "CellSPU: Unknown LowerMUL value type, got " + << MVT::getValueTypeString(Op.getValueType()) + << "\n"; + abort(); + /*NOTREACHED*/ + + case MVT::i32: { + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + + return DAG.getNode(ISD::ADD, MVT::i32, + DAG.getNode(ISD::ADD, MVT::i32, + DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB), + DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)), + DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB)); + } + } + + return SDOperand(); +} + +//! Custom lowering for CTPOP (count population) +/*! + Custom lowering code that counts the number ones in the input + operand. SPU has such an instruction, but it counts the number of + ones per byte, which then have to be accumulated. +*/ +static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + + switch (VT) { + case MVT::i8: { + SDOperand N = Op.getOperand(0); + SDOperand Elt0 = DAG.getConstant(0, MVT::i32); + + SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0); + } + + case MVT::i16: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + + unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass); + + SDOperand N = Op.getOperand(0); + SDOperand Elt0 = DAG.getConstant(0, MVT::i16); + SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16); + SDOperand Shift1 = DAG.getConstant(8, MVT::i16); + + SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDOperand CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0); + + SDOperand CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + + SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16); + + return DAG.getNode(ISD::AND, MVT::i16, + DAG.getNode(ISD::ADD, MVT::i16, + DAG.getNode(ISD::SRL, MVT::i16, + Tmp1, Shift1), + Tmp1), + Mask0); + } + + case MVT::i32: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + + unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + + SDOperand N = Op.getOperand(0); + SDOperand Elt0 = DAG.getConstant(0, MVT::i32); + SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32); + SDOperand Shift1 = DAG.getConstant(16, MVT::i32); + SDOperand Shift2 = DAG.getConstant(8, MVT::i32); + + SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDOperand CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0); + + SDOperand CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + + SDOperand Comp1 = + DAG.getNode(ISD::SRL, MVT::i32, + DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1); + + SDOperand Sum1 = + DAG.getNode(ISD::ADD, MVT::i32, + Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32)); + + SDOperand Sum1_rescopy = + DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1); + + SDOperand Comp2 = + DAG.getNode(ISD::SRL, MVT::i32, + DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32), + Shift2); + SDOperand Sum2 = + DAG.getNode(ISD::ADD, MVT::i32, Comp2, + DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32)); + + return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0); + } + + case MVT::i64: + break; + } + + return SDOperand(); +} + +/// LowerOperation - Provide custom lowering hooks for some operations. +/// +SDOperand +SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) +{ + switch (Op.getOpcode()) { + default: { + cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; + cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n"; + cerr << "*Op.Val:\n"; + Op.Val->dump(); + abort(); + } + case ISD::LOAD: + case ISD::SEXTLOAD: + case ISD::ZEXTLOAD: + return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::STORE: + return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::Constant: + return LowerConstant(Op, DAG); + case ISD::ConstantFP: + return LowerConstantFP(Op, DAG); + case ISD::FORMAL_ARGUMENTS: + return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); + case ISD::CALL: + return LowerCALL(Op, DAG); + case ISD::RET: + return LowerRET(Op, DAG, getTargetMachine()); + + // i8 math ops: + case ISD::SUB: + case ISD::ROTR: + case ISD::ROTL: + case ISD::SRL: + case ISD::SHL: + case ISD::SRA: + return LowerI8Math(Op, DAG, Op.getOpcode()); + + // Vector-related lowering. + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + + // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return LowerByteImmed(Op, DAG); + + // Vector and i8 multiply: + case ISD::MUL: + if (MVT::isVector(Op.getValueType())) + return LowerVectorMUL(Op, DAG); + else if (Op.getValueType() == MVT::i8) + return LowerI8Math(Op, DAG, Op.getOpcode()); + else + return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode()); + + case ISD::FDIV: + if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32) + return LowerFDIVf32(Op, DAG); +// else if (Op.getValueType() == MVT::f64) +// return LowerFDIVf64(Op, DAG); + else + assert(0 && "Calling FDIV on unsupported MVT"); + + case ISD::CTPOP: + return LowerCTPOP(Op, DAG); + } + + return SDOperand(); +} + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, + MachineBasicBlock *BB) +{ + return BB; +} + +//===----------------------------------------------------------------------===// +// Target Optimization Hooks +//===----------------------------------------------------------------------===// + +SDOperand +SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const +{ +#if 0 + TargetMachine &TM = getTargetMachine(); + SelectionDAG &DAG = DCI.DAG; +#endif + SDOperand N0 = N->getOperand(0); // everything has at least one operand + + switch (N->getOpcode()) { + default: break; + + // Look for obvious optimizations for shift left: + // a) Replace 0 << V with 0 + // b) Replace V << 0 with V + // + // N.B: llvm will generate an undef node if the shift amount is greater than + // 15 (e.g.: V << 16), which will naturally trigger an assert. + case SPU::SHLIr32: + case SPU::SHLHIr16: + case SPU::SHLQBIIvec: + case SPU::ROTHIr16: + case SPU::ROTHIr16_i32: + case SPU::ROTIr32: + case SPU::ROTIr32_i16: + case SPU::ROTQBYIvec: + case SPU::ROTQBYBIvec: + case SPU::ROTQBIIvec: + case SPU::ROTHMIr16: + case SPU::ROTMIr32: + case SPU::ROTQMBYIvec: { + if (N0.getOpcode() == ISD::Constant) { + if (ConstantSDNode *C = cast(N0)) { + if (C->getValue() == 0) // 0 << V -> 0. + return N0; + } + } + SDOperand N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::Constant) { + if (ConstantSDNode *C = cast(N1)) { + if (C->getValue() == 0) // V << 0 -> V + return N1; + } + } + break; + } + } + + return SDOperand(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +SPUTargetLowering::ConstraintType +SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { + if (ConstraintLetter.size() == 1) { + switch (ConstraintLetter[0]) { + default: break; + case 'b': + case 'r': + case 'f': + case 'v': + case 'y': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(ConstraintLetter); +} + +std::pair +SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const +{ + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'b': // R1-R31 + case 'r': // R0-R31 + if (VT == MVT::i64) + return std::make_pair(0U, SPU::R64CRegisterClass); + return std::make_pair(0U, SPU::R32CRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, SPU::R32FPRegisterClass); + else if (VT == MVT::f64) + return std::make_pair(0U, SPU::R64FPRegisterClass); + break; + case 'v': + return std::make_pair(0U, SPU::GPRCRegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +void +SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, + uint64_t Mask, + uint64_t &KnownZero, + uint64_t &KnownOne, + const SelectionDAG &DAG, + unsigned Depth ) const { + KnownZero = 0; + KnownOne = 0; +} + +// LowerAsmOperandForConstraint +void +SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, + char ConstraintLetter, + std::vector &Ops, + SelectionDAG &DAG) { + // Default, for the time being, to the base class handler + TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode. +bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const { + // SPU's addresses are 256K: + return (V > -(1 << 18) && V < (1 << 18) - 1); +} + +bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { + return false; +} diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h new file mode 100644 index 00000000000..4e3ec3a2457 --- /dev/null +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -0,0 +1,139 @@ +//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Cell SPU uses to lower LLVM code into +// a selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_ISELLOWERING_H +#define SPU_ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "SPU.h" + +namespace llvm { + namespace SPUISD { + enum NodeType { + // Start the numbering where the builting ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END+SPU::INSTRUCTION_LIST_END, + + // Pseudo instructions: + RET_FLAG, ///< Return with flag, matched by bi instruction + + Hi, ///< High address component (upper 16) + Lo, ///< Low address component (lower 16) + PCRelAddr, ///< Program counter relative address + DFormAddr, ///< D-Form address "imm($r)" + XFormAddr, ///< X-Form address "$r1($r2)" + + LDRESULT, ///< Load result (value, chain) + CALL, ///< CALL instruction + SHUFB, ///< Vector shuffle (permute) + INSERT_MASK, ///< Insert element shuffle mask + CNTB, ///< Count leading ones in bytes + PROMOTE_SCALAR, ///< Promote scalar->vector + EXTRACT_ELT0, ///< Extract element 0 + EXTRACT_ELT0_CHAINED, ///< Extract element 0, with chain + EXTRACT_I1_ZEXT, ///< Extract element 0 as i1, zero extend + EXTRACT_I1_SEXT, ///< Extract element 0 as i1, sign extend + EXTRACT_I8_ZEXT, ///< Extract element 0 as i8, zero extend + EXTRACT_I8_SEXT, ///< Extract element 0 as i8, sign extend + MPY, ///< 16-bit Multiply (low parts of a 32-bit) + MPYU, ///< Multiply Unsigned + MPYH, ///< Multiply High + MPYHH, ///< Multiply High-High + VEC_SHL, ///< Vector shift left + VEC_SRL, ///< Vector shift right (logical) + VEC_SRA, ///< Vector shift right (arithmetic) + VEC_ROTL, ///< Vector rotate left + VEC_ROTR, ///< Vector rotate right + ROTBYTES_RIGHT_Z, ///< Vector rotate right, by bytes, zero fill + ROTBYTES_RIGHT_S, ///< Vector rotate right, by bytes, sign fill + ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) + ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain + FSMBI, ///< Form Select Mask for Bytes, Immediate + SELB, ///< Select bits -> (b & mask) | (a & ~mask) + SFPConstant, ///< Single precision floating point constant + FPInterp, ///< Floating point interpolate + FPRecipEst, ///< Floating point reciprocal estimate + SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits + LAST_SPUISD ///< Last user-defined instruction + }; + } + + /// Predicates that are used for node matching: + namespace SPU { + SDOperand get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType); + SDOperand get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType); + SDOperand get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType); + SDOperand get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType); + SDOperand get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType); + SDOperand get_v4i32_imm(SDNode *N, SelectionDAG &DAG); + SDOperand get_v2i64_imm(SDNode *N, SelectionDAG &DAG); + } + + class SPUTargetMachine; // forward dec'l. + + class SPUTargetLowering : + public TargetLowering + { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + int ReturnAddrIndex; // FrameIndex for return slot. + SPUTargetMachine &SPUTM; + + public: + SPUTargetLowering(SPUTargetMachine &TM); + + /// getTargetNodeName() - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// LowerOperation - Provide custom lowering hooks for some operations. + /// + virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG); + + virtual SDOperand PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual void computeMaskedBitsForTargetNode(const SDOperand Op, + uint64_t Mask, + uint64_t &KnownZero, + uint64_t &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI, + MachineBasicBlock *MBB); + + ConstraintType getConstraintType(const std::string &ConstraintLetter) const; + + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const; + + void LowerAsmOperandForConstraint(SDOperand Op, char ConstraintLetter, + std::vector &Ops, + SelectionDAG &DAG); + + /// isLegalAddressImmediate - Return true if the integer value can be used + /// as the offset of the target addressing mode. + virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const; + virtual bool isLegalAddressImmediate(GlobalValue *) const; + }; +} + +#endif diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h new file mode 100644 index 00000000000..58e455f9f19 --- /dev/null +++ b/lib/Target/CellSPU/SPUInstrBuilder.h @@ -0,0 +1,52 @@ +//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_INSTRBUILDER_H +#define SPU_INSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +inline const MachineInstrBuilder& +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + if (mem) + return MIB.addImm(Offset).addFrameIndex(FI); + else + return MIB.addFrameIndex(FI).addImm(Offset); +} + +/// addConstantPoolReference - This function is used to add a reference to the +/// base of a constant value spilled to the per-function constant pool. The +/// reference has base register ConstantPoolIndex offset which is retained until +/// either machine code emission or assembly output. This allows an optional +/// offset to be added as well. +/// +inline const MachineInstrBuilder& +addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, + int Offset = 0) { + return MIB.addImm(Offset).addConstantPoolIndex(CPI); +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td new file mode 100644 index 00000000000..8d3a500a513 --- /dev/null +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -0,0 +1,308 @@ +//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by The Aerospace Corporation.... +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// Cell SPU instruction formats. Note that these are notationally similar to +// PowerPC, like "A-Form". But the sizes of operands and fields differ. + +// This was kiped from the PPC instruction formats (seemed like a good idea...) + +class I + : Instruction { + field bits<32> Inst; + + let Name = ""; + let Namespace = "SPU"; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; +} + +// RR Format +class RRForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<7> RA; + bits<7> RB; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-10} = opcode; + let Inst{11-17} = RB; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +let RB = 0 in { + // RR Format, where RB is zeroed (dont care): + class RRForm_1 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } + + let RA = 0 in { + // RR Format, where RA and RB are zeroed (dont care): + // Used for reads from status control registers (see FPSCRRr32) + class RRForm_2 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } + } +} + +let RT = 0 in { + // RR Format, where RT is zeroed (don't care), or as the instruction handbook + // says, "RT is a false target." Used in "Halt if" instructions + class RRForm_3 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } +} + +// RRR Format +class RRRForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<7> RA; + bits<7> RB; + bits<7> RC; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-3} = opcode; + let Inst{4-10} = RT; + let Inst{11-17} = RB; + let Inst{18-24} = RA; + let Inst{25-31} = RC; +} + +// RI7 Format +class RI7Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<7> i7; + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-10} = opcode; + let Inst{11-17} = i7; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +// CVTIntFp Format +class CVTIntFPForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-9} = opcode; + let Inst{10-17} = 0; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +let RA = 0 in { + class BICondForm opcode, string asmstr, list pattern> + : RRForm + { } + + let RT = 0 in { + // Branch instruction format (without D/E flag settings) + class BRForm opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : RRForm + { } + + class BIForm opcode, string asmstr, list pattern> + : RRForm + { } + + let RB = 0 in { + // Return instruction (bi, branch indirect), RA is zero (LR): + class RETForm pattern> + : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv, + pattern> + { } + } + } +} + +// Branch indirect external data forms: +class BISLEDForm DE_flag, string asmstr, list pattern> + : I<(outs), (ins indcalltarget:$func), asmstr, BranchResolv> +{ + bits<7> Rcalldest; + + let Pattern = pattern; + + let Inst{0-10} = 0b11010101100; + let Inst{11} = 0; + let Inst{12-13} = DE_flag; + let Inst{14-17} = 0b0000; + let Inst{18-24} = Rcalldest; + let Inst{25-31} = 0b0000000; +} + +// RI10 Format +class RI10Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<10> i10; + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-7} = opcode; + let Inst{8-17} = i10; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +// RI10 Format, where the constant is zero (or effectively ignored by the +// SPU) +class RI10Form_1 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-7} = opcode; + let Inst{8-17} = 0; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +// RI10 Format, where RT is ignored. +// This format is used primarily by the Halt If ... Immediate set of +// instructions +class RI10Form_2 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<10> i10; + bits<7> RA; + + let Pattern = pattern; + + let Inst{0-7} = opcode; + let Inst{8-17} = i10; + let Inst{18-24} = RA; + let Inst{25-31} = 0; +} + +// RI16 Format +class RI16Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<16> i16; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-8} = opcode; + let Inst{9-24} = i16; + let Inst{25-31} = RT; +} + +// Specialized version of the RI16 Format for unconditional branch relative and +// branch absolute, branch and set link. Note that for branch and set link, the +// link register doesn't have to be $lr, but this is actually hard coded into +// the instruction pattern. + +let RT = 0 in { + class UncondBranch opcode, dag OOL, dag IOL, string asmstr, + list pattern> + : RI16Form + { } + + class BranchSetLink opcode, dag OOL, dag IOL, string asmstr, + list pattern> + : RI16Form + { } +} + +// RI18 Format +class RI18Form opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I +{ + bits<18> i18; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-6} = opcode; + let Inst{7-24} = i18; + let Inst{25-31} = RT; +} + +//===----------------------------------------------------------------------===// +// Instruction formats for intrinsics: +//===----------------------------------------------------------------------===// + +// RI10 Format for v8i16 intrinsics +class RI10_Int_v8i16 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RI10Form; + +class RI10_Int_v4i32 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RI10Form; + +// RR Format for v8i16 intrinsics +class RR_Int_v8i16 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RRForm; + +// RR Format for v4i32 intrinsics +class RR_Int_v4i32 opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RRForm; + +//===----------------------------------------------------------------------===// +// Pseudo instructions, like call frames: +//===----------------------------------------------------------------------===// + +class Pseudo pattern> + : I { + let Pattern = pattern; + let Inst{31-0} = 0; +}