From 053c1da8d956a794d158ac906b3927c923f97c4d Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Tue, 29 Jan 2008 02:16:57 +0000 Subject: [PATCH] Overhaul Cell SPU's addressing mode internals so that there are now only two addressing mode nodes, SPUaform and SPUindirect (vice the three previous ones, SPUaform, SPUdform and SPUxform). This improves code somewhat because we now avoid using reg+reg addressing when it can be avoided. It also simplifies the address selection logic, which was the main point for doing this. Also, for various global variables that would be loaded using SPU's A-form addressing, prefer D-form offs[reg] addressing, keeping the base in a register if the variable is used more than once. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46483 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPUAsmPrinter.cpp | 14 +- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 406 +++++++--------- lib/Target/CellSPU/SPUISelLowering.cpp | 140 ++++-- lib/Target/CellSPU/SPUISelLowering.h | 3 +- lib/Target/CellSPU/SPUInstrFormats.td | 4 + lib/Target/CellSPU/SPUInstrInfo.td | 606 ++++++++++-------------- lib/Target/CellSPU/SPUNodes.td | 21 +- lib/Target/CellSPU/SPUTargetAsmInfo.cpp | 6 +- test/CodeGen/CellSPU/call_indirect.ll | 15 +- test/CodeGen/CellSPU/extract_elt.ll | 2 +- test/CodeGen/CellSPU/fcmp.ll | 2 +- test/CodeGen/CellSPU/struct_1.ll | 125 ++--- test/CodeGen/CellSPU/struct_2.ll | 122 ----- test/CodeGen/CellSPU/vec_const.ll | 9 +- 14 files changed, 623 insertions(+), 852 deletions(-) delete mode 100644 test/CodeGen/CellSPU/struct_2.ll diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 224616eb931..4bd8673a5b4 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -195,12 +195,18 @@ namespace { void printAddr256K(const MachineInstr *MI, unsigned OpNo) { - /* Note: operand 1 is an offset or symbol name. Operand 2 is - ignored. */ + /* Note: operand 1 is an offset or symbol name. */ if (MI->getOperand(OpNo).isImmediate()) { printS16ImmOperand(MI, OpNo); } else { printOp(MI->getOperand(OpNo)); + if (MI->getOperand(OpNo+1).isImmediate()) { + int displ = int(MI->getOperand(OpNo+1).getImm()); + if (displ > 0) + O << "+" << displ; + else if (displ < 0) + O << displ; + } } } @@ -309,7 +315,6 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) { case MachineOperand::MO_JumpTableIndex: O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); - // FIXME: PIC relocation model return; case MachineOperand::MO_ConstantPoolIndex: O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() @@ -521,7 +526,8 @@ bool LinuxAsmPrinter::doFinalization(Module &M) { O << "\t.zero\t" << Size; } else if (I->hasInternalLinkage()) { SwitchToDataSection("\t.data", I); - O << TAI->getLCOMMDirective() << name << "," << Size << "," << Align; + O << ".local " << name << "\n"; + O << TAI->getCOMMDirective() << name << "," << Size << "," << Align << "\n"; } else { SwitchToDataSection("\t.data", I); O << ".comm " << name << "," << Size; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 3a50e3bcf04..3905d553029 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -150,6 +150,16 @@ namespace { return false; } + bool + isHighLow(const SDOperand &Op) + { + return (Op.getOpcode() == SPUISD::IndirectAddr + && ((Op.getOperand(0).getOpcode() == SPUISD::Hi + && Op.getOperand(1).getOpcode() == SPUISD::Lo) + || (Op.getOperand(0).getOpcode() == SPUISD::Lo + && Op.getOperand(1).getOpcode() == SPUISD::Hi))); + } + //===------------------------------------------------------------------===// //! MVT::ValueType to "useful stuff" mapping structure: @@ -157,40 +167,24 @@ namespace { MVT::ValueType VT; unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) int prefslot_byte; /// Byte offset of the "preferred" slot - unsigned brcc_eq_ins; /// br_cc equal instruction - unsigned brcc_neq_ins; /// br_cc not equal instruction - unsigned load_aform; /// A-form load instruction for this VT - unsigned store_aform; /// A-form store instruction for this VT + unsigned insmask_ins; /// Insert mask instruction for a-form }; const valtype_map_s valtype_map[] = { - { MVT::i1, 0, 3, 0, 0, 0, - 0 }, - { MVT::i8, SPU::ORBIr8, 3, 0, 0, SPU::LQAr8, - SPU::STQAr8 }, - { MVT::i16, SPU::ORHIr16, 2, SPU::BRHZ, SPU::BRHNZ, SPU::LQAr16, - SPU::STQAr16 }, - { MVT::i32, SPU::ORIr32, 0, SPU::BRZ, SPU::BRNZ, SPU::LQAr32, - SPU::STQAr32 }, - { MVT::i64, SPU::ORIr64, 0, 0, 0, SPU::LQAr64, - SPU::STQAr64 }, - { MVT::f32, 0, 0, 0, 0, SPU::LQAf32, - SPU::STQAf32 }, - { MVT::f64, 0, 0, 0, 0, SPU::LQAf64, - SPU::STQAf64 }, + { MVT::i1, 0, 3, 0 }, + { MVT::i8, SPU::ORBIr8, 3, 0 }, + { MVT::i16, SPU::ORHIr16, 2, 0 }, + { MVT::i32, SPU::ORIr32, 0, 0 }, + { MVT::i64, SPU::ORIr64, 0, 0 }, + { MVT::f32, 0, 0, 0 }, + { MVT::f64, 0, 0, 0 }, // vector types... (sigh!) - { MVT::v16i8, 0, 0, 0, 0, SPU::LQAv16i8, - SPU::STQAv16i8 }, - { MVT::v8i16, 0, 0, 0, 0, SPU::LQAv8i16, - SPU::STQAv8i16 }, - { MVT::v4i32, 0, 0, 0, 0, SPU::LQAv4i32, - SPU::STQAv4i32 }, - { MVT::v2i64, 0, 0, 0, 0, SPU::LQAv2i64, - SPU::STQAv2i64 }, - { MVT::v4f32, 0, 0, 0, 0, SPU::LQAv4f32, - SPU::STQAv4f32 }, - { MVT::v2f64, 0, 0, 0, 0, SPU::LQAv2f64, - SPU::STQAv2f64 }, + { MVT::v16i8, 0, 0, SPU::CBD }, + { MVT::v8i16, 0, 0, SPU::CHD }, + { MVT::v4i32, 0, 0, SPU::CWD }, + { MVT::v2i64, 0, 0, 0 }, + { MVT::v4f32, 0, 0, SPU::CWD }, + { MVT::v2f64, 0, 0, 0 } }; const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); @@ -381,18 +375,20 @@ SPUDAGToDAGISel::SelectDForm2Addr(SDOperand Op, SDOperand N, SDOperand &Disp, Disp = CurDAG->getTargetConstant(0, VT); Base = N; return true; - } else if (Opc == SPUISD::DFormAddr) { - // D-Form address: This is pretty straightforward, naturally... - CN = cast(N.getOperand(1)); - assert(CN != 0 && "SelectDFormAddr/SPUISD::DForm2Addr expecting constant"); - Imm = unsigned(CN->getValue()); - if (Imm < 0xff) { - Disp = CurDAG->getTargetConstant(CN->getValue(), PtrVT); - Base = N.getOperand(0); - return true; + } else if (Opc == SPUISD::IndirectAddr) { + SDOperand Op1 = N.getOperand(1); + if (Op1.getOpcode() == ISD::TargetConstant + || Op1.getOpcode() == ISD::Constant) { + CN = cast(N.getOperand(1)); + assert(CN != 0 && "SelectIndirectAddr/SPUISD::DForm2Addr expecting constant"); + Imm = unsigned(CN->getValue()); + if (Imm < 0xff) { + Disp = CurDAG->getTargetConstant(CN->getValue(), PtrVT); + Base = N.getOperand(0); + return true; + } } } - return false; } @@ -407,7 +403,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, SDOperand &Index) { // These match the addr256k operand type: MVT::ValueType OffsVT = MVT::i16; - MVT::ValueType PtrVT = SPUtli.getPointerTy(); + SDOperand Zero = CurDAG->getTargetConstant(0, OffsVT); switch (N.getOpcode()) { case ISD::Constant: @@ -417,28 +413,40 @@ SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, abort(); /*NOTREACHED*/ - case ISD::TargetConstant: { - // Loading from a constant address. - ConstantSDNode *CN = dyn_cast(N); - int Imm = (int)CN->getValue(); - if (Imm < 0x3ffff && (Imm & 0x3) == 0) { - Base = CurDAG->getTargetConstant(Imm, PtrVT); - // Note that this operand will be ignored by the assembly printer... - Index = CurDAG->getTargetConstant(0, OffsVT); - return true; - } - } + case ISD::TargetConstant: case ISD::TargetGlobalAddress: - case ISD::TargetConstantPool: - case SPUISD::AFormAddr: { - // The address is in Base. N is a dummy that will be ignored by - // the assembly printer. - Base = N; - Index = CurDAG->getTargetConstant(0, OffsVT); - return true; - } - } + case ISD::TargetJumpTable: + cerr << "SPUSelectAFormAddr: Target Constant/Pool/Global not wrapped as " + << "A-form address.\n"; + abort(); + /*NOTREACHED*/ + case SPUISD::AFormAddr: + // Just load from memory if there's only a single use of the location, + // otherwise, this will get handled below with D-form offset addresses + if (N.hasOneUse()) { + SDOperand Op0 = N.getOperand(0); + switch (Op0.getOpcode()) { + case ISD::TargetConstantPool: + case ISD::TargetJumpTable: + Base = Op0; + Index = Zero; + return true; + + case ISD::TargetGlobalAddress: { + GlobalAddressSDNode *GSDN = cast(Op0); + GlobalValue *GV = GSDN->getGlobal(); + if (GV->getAlignment() == 16) { + Base = Op0; + Index = Zero; + return true; + } + break; + } + } + } + break; + } return false; } @@ -460,14 +468,11 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, unsigned Opc = N.getOpcode(); unsigned PtrTy = SPUtli.getPointerTy(); - if (Opc == ISD::Register) { - Base = N; - Index = CurDAG->getTargetConstant(0, PtrTy); - return true; - } else if (Opc == ISD::FrameIndex) { + if (Opc == ISD::FrameIndex) { + // Stack frame index must be less than 512 (divided by 16): FrameIndexSDNode *FI = dyn_cast(N); DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " - << FI->getIndex() << "\n"); + << FI->getIndex() << "\n"); if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { Base = CurDAG->getTargetConstant(0, PtrTy); Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); @@ -475,19 +480,20 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, } } else if (Opc == ISD::ADD) { // Generated by getelementptr - const SDOperand Op0 = N.getOperand(0); // Frame index/base - const SDOperand Op1 = N.getOperand(1); // Offset within base + const SDOperand Op0 = N.getOperand(0); + const SDOperand Op1 = N.getOperand(1); - if ((Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) - && Op0.getOpcode() != SPUISD::XFormAddr) { + if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) + || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = N; + return true; + } else if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { ConstantSDNode *CN = dyn_cast(Op1); - assert(CN != 0 && "SelectDFormAddr: Expected a constant"); + int32_t offset = int32_t(CN->getSignExtended()); - int32_t offset = (int32_t) CN->getSignExtended(); - unsigned Opc0 = Op0.getOpcode(); - - if (Opc0 == ISD::FrameIndex) { + if (Op0.getOpcode() == ISD::FrameIndex) { FrameIndexSDNode *FI = dyn_cast(Op0); DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI->getIndex() << "\n"); @@ -500,51 +506,69 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, } else if (offset > SPUFrameInfo::minFrameOffset() && offset < SPUFrameInfo::maxFrameOffset()) { Base = CurDAG->getTargetConstant(offset, PtrTy); - if (Opc0 == ISD::GlobalAddress) { - // Convert global address to target global address - GlobalAddressSDNode *GV = dyn_cast(Op0); - Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy); - return true; - } else { - // Otherwise, just take operand 0 - Index = Op0; + Index = Op0; + return true; + } + } else if (Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = dyn_cast(Op0); + int32_t offset = int32_t(CN->getSignExtended()); + + if (Op1.getOpcode() == ISD::FrameIndex) { + FrameIndexSDNode *FI = dyn_cast(Op1); + DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI->getIndex() << "\n"); + + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); return true; } + } else if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = Op1; + return true; } - } else - return false; - } else if (Opc == SPUISD::DFormAddr) { - // D-Form address: This is pretty straightforward, - // naturally... but make sure that this isn't a D-form address - // with a X-form address embedded within: - const SDOperand Op0 = N.getOperand(0); // Frame index/base - const SDOperand Op1 = N.getOperand(1); // Offset within base + } + } else if (Opc == SPUISD::IndirectAddr) { + // Indirect with constant offset -> D-Form address + const SDOperand Op0 = N.getOperand(0); + const SDOperand Op1 = N.getOperand(1); + SDOperand Zero = CurDAG->getTargetConstant(0, N.getValueType()); - if (Op0.getOpcode() == ISD::Constant - || Op0.getOpcode() == ISD::TargetConstant) { + if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { ConstantSDNode *CN = cast(Op1); - assert(CN != 0 && "SelectDFormAddr/SPUISD::DFormAddr expecting constant"); - Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); - Index = Op0; - return true; - } - } else if (Opc == ISD::FrameIndex) { - // Stack frame index must be less than 512 (divided by 16): - FrameIndexSDNode *FI = dyn_cast(N); - DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " - << FI->getIndex() << "\n"); - if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + int32_t offset = int32_t(CN->getSignExtended()); + if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); + Index = Op0; + return true; + } + } else if (Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = cast(Op0); + int32_t offset = int32_t(CN->getSignExtended()); + if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); + Index = Op1; + return true; + } + } else if (Op0.getOpcode() == SPUISD::Hi + && Op1.getOpcode() == SPUISD::Lo) { + // (SPUindirect (SPUhi , 0), (SPUlo , 0)) Base = CurDAG->getTargetConstant(0, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + Index = N; return true; } - } else if (Opc == SPUISD::LDRESULT) { - // It's a load result dereference - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = N.getOperand(0); + } else if (Opc == SPUISD::AFormAddr) { + Base = CurDAG->getTargetConstant(0, N.getValueType()); + Index = N; return true; } - return false; } @@ -565,108 +589,10 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, || SelectDFormAddr(Op, N, Base, Index)) return false; - unsigned Opc = N.getOpcode(); - - if (Opc == ISD::ADD) { - SDOperand N1 = N.getOperand(0); - SDOperand N2 = N.getOperand(1); - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; - } else if (Opc == SPUISD::XFormAddr) { - Base = N; - Index = N.getOperand(1); - return true; - } else if (Opc == SPUISD::DFormAddr) { - // Must be a D-form address with an X-form address embedded - // within: - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; - } else if (N.getNumOperands() == 2) { - SDOperand N1 = N.getOperand(0); - SDOperand N2 = N.getOperand(1); - unsigned N1Opc = N1.getOpcode(); - unsigned N2Opc = N2.getOpcode(); - - if ((N1Opc == ISD::CopyToReg || N1Opc == ISD::Register) - && (N2Opc == ISD::CopyToReg || N2Opc == ISD::Register)) { - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; - /*UNREACHED*/ - } else { - cerr << "SelectXFormAddr: 2-operand unhandled operand:\n"; - N.Val->dump(CurDAG); - cerr << "\n"; - abort(); - /*UNREACHED*/ - } - } else { - cerr << "SelectXFormAddr: Unhandled operand type:\n"; - N.Val->dump(CurDAG); - cerr << "\n"; - abort(); - /*UNREACHED*/ - } - - return false; -} - -//! Emit load for A-form addresses -/* - */ -SDNode * -Emit_LOAD_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel) -{ - SDNode *Result; - MVT::ValueType OpVT = Op.getValueType(); - SDOperand Chain = Op.getOperand(0); - SDOperand Ptr = Op.getOperand(1); - SDOperand PtrArg = Ptr.getOperand(0); - SDOperand PtrOffs = Ptr.getOperand(1); - const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); - - if (PtrOffs.getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast(PtrOffs); - MVT::ValueType PVT = PtrOffs.getValueType(); - PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT); - } - ISel.AddToISelQueue(PtrArg); - ISel.AddToISelQueue(PtrOffs); - ISel.AddToISelQueue(Chain); - Result = CurDAG.getTargetNode(vtm->load_aform, OpVT, MVT::Other, PtrArg, PtrOffs, Chain); - Chain = SDOperand(Result, 1); - return Result; -} - -//! Emit store for A-form addresses -/* - */ -SDNode * -Emit_STORE_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel) -{ - SDNode *Result; - SDOperand Chain = Op.getOperand(0); - SDOperand Val = Op.getOperand(1); - SDOperand Ptr = Op.getOperand(2); - SDOperand PtrArg = Ptr.getOperand(0); - SDOperand PtrOffs = Ptr.getOperand(1); - const valtype_map_s *vtm = getValueTypeMapEntry(Val.getValueType()); - - if (PtrOffs.getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast(PtrOffs); - MVT::ValueType PVT = PtrOffs.getValueType(); - PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT); - } - ISel.AddToISelQueue(Val); - ISel.AddToISelQueue(PtrArg); - ISel.AddToISelQueue(PtrOffs); - ISel.AddToISelQueue(Chain); - SDOperand Ops[4] = { Val, PtrArg, PtrOffs, Chain }; - Result = CurDAG.getTargetNode(vtm->store_aform, MVT::Other, Ops, 4); - Chain = SDOperand(Result, 1); - return Result; + // All else fails, punt and use an X-form address: + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; } //! Convert the operand from a target-independent to a target-specific node @@ -695,12 +621,6 @@ SPUDAGToDAGISel::Select(SDOperand Op) { Ops[0] = TFI; Ops[1] = Zero; n_ops = 2; - } else if (Opc == ISD::LOAD - && Op.getOperand(1).getOpcode() == SPUISD::AFormAddr) { - return Emit_LOAD_AFormAddr(Op, *CurDAG, *this); - } else if (Opc == ISD::STORE - && Op.getOperand(2).getOpcode() == SPUISD::AFormAddr) { - return Emit_STORE_AFormAddr(Op, *CurDAG, *this); } else if (Opc == ISD::ZERO_EXTEND) { // (zero_extend:i16 (and:i8 , )) const SDOperand &Op1 = N->getOperand(0); @@ -717,6 +637,38 @@ SPUDAGToDAGISel::Select(SDOperand Op) { n_ops = 2; } } + } else if (Opc == SPUISD::INSERT_MASK) { + SDOperand Op0 = Op.getOperand(0); + if (Op0.getOpcode() == SPUISD::AFormAddr) { + // (SPUvecinsmask (SPUaform , 0)) -> + // (CBD|CHD|CWD 0, arg) + const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); + ConstantSDNode *CN = cast(Op0.getOperand(1)); + assert(vtm->insmask_ins != 0 && "missing insert mask instruction"); + NewOpc = vtm->insmask_ins; + Ops[0] = CurDAG->getTargetConstant(CN->getValue(), Op0.getValueType()); + Ops[1] = Op0; + n_ops = 2; + + AddToISelQueue(Op0); + } else if (Op0.getOpcode() == ISD::FrameIndex) { + // (SPUvecinsmask ) -> + // (CBD|CHD|CWD 0, ) + const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); + NewOpc = vtm->insmask_ins; + Ops[0] = CurDAG->getTargetConstant(0, Op0.getValueType()); + Ops[1] = Op0; + n_ops = 2; + } else if (isHighLow(Op0)) { + // (SPUvecinsmask (SPUindirect (SPUhi , 0), (SPUlow , 0))) -> + // (CBD|CHD|CWD 0, arg) + const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); + NewOpc = vtm->insmask_ins; + Ops[0] = CurDAG->getTargetConstant(0, Op0.getValueType()); + Ops[1] = Op0; + n_ops = 2; + AddToISelQueue(Op0); + } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT unsigned VT = N->getValueType(0); @@ -748,19 +700,19 @@ SPUDAGToDAGISel::Select(SDOperand Op) { AddToISelQueue(Chain); return Result; - } else if (Opc == SPUISD::XFormAddr) { + } else if (Opc == SPUISD::IndirectAddr) { SDOperand Op0 = Op.getOperand(0); if (Op0.getOpcode() == SPUISD::LDRESULT || Op0.getOpcode() == SPUISD::AFormAddr) { - // (XFormAddr (LDRESULT|AFormAddr, imm)) + // (IndirectAddr (LDRESULT|AFormAddr, imm)) SDOperand Op1 = Op.getOperand(1); MVT::ValueType VT = Op.getValueType(); - DEBUG(cerr << "CellSPU: XFormAddr(" - << (Op0.getOpcode() == SPUISD::LDRESULT - ? "LDRESULT" - : "AFormAddr") - << ", imm):\nOp0 = "); + DEBUG(cerr << "CellSPU: IndirectAddr(" + << (Op0.getOpcode() == SPUISD::LDRESULT + ? "LDRESULT" + : "AFormAddr") + << ", imm):\nOp0 = "); DEBUG(Op.getOperand(0).Val->dump(CurDAG)); DEBUG(cerr << "\nOp1 = "); DEBUG(Op.getOperand(1).Val->dump(CurDAG)); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 0f1d0452804..33261a607b1 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -289,14 +289,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::f32, Custom); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::f64, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); + for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128; + ++sctype) { + setOperationAction(ISD::GlobalAddress, sctype, Custom); + setOperationAction(ISD::ConstantPool, sctype, Custom); + setOperationAction(ISD::JumpTable, sctype, Custom); + } // RET must be custom lowered, to meet ABI requirements setOperationAction(ISD::RET, MVT::Other, Custom); @@ -377,7 +375,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setStackPointerRegisterToSaveRestore(SPU::R1); // We have target-specific dag combine patterns for the following nodes: - // e.g., setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::ADD); computeRegisterProperties(); } @@ -391,8 +389,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; - node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr"; - node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr"; + node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; @@ -524,11 +521,12 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, // Unaligned load or we're using the "large memory" model, which means that // we have to be very pessimistic: if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { - basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); } // Add the offset - basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT)); + basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, + DAG.getConstant((alignOffs & ~0xf), PtrVT)); was16aligned = false; return DAG.getLoad(MVT::v16i8, chain, basePtr, LSN->getSrcValue(), LSN->getSrcValueOffset(), @@ -706,21 +704,20 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DEBUG(basePtr.Val->dump(&DAG)); DEBUG(cerr << "\n"); - if (basePtr.getOpcode() == SPUISD::DFormAddr) { - // Hmmmm... do we ever actually hit this code? - insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, - basePtr.getOperand(0), - insertEltOffs); - } else if (basePtr.getOpcode() == SPUISD::XFormAddr || - (basePtr.getOpcode() == ISD::ADD - && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) { + if (basePtr.getOpcode() == SPUISD::IndirectAddr || + (basePtr.getOpcode() == ISD::ADD + && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) { insertEltPtr = basePtr; } else { - // $sp is always aligned, so use it instead of potentially loading an - // address into a new register: - insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, - DAG.getRegister(SPU::R1, PtrVT), - insertEltOffs); +#if 0 + // $sp is always aligned, so use it when necessary to avoid loading + // an address + SDOperand ptrP = + basePtr.Val->hasOneUse() ? DAG.getRegister(SPU::R1, PtrVT) : basePtr; + insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, ptrP, insertEltOffs); +#else + insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs); +#endif } insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr); @@ -772,7 +769,7 @@ LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); #else - return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, PtrVT, CPI, Zero); #endif } } @@ -791,9 +788,10 @@ LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { const TargetMachine &TM = DAG.getTarget(); if (TM.getRelocationModel() == Reloc::Static) { + SDOperand JmpAForm = DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero); return (!ST->usingLargeMem() - ? DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero) - : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero)); + ? JmpAForm + : DAG.getNode(SPUISD::IndirectAddr, PtrVT, JmpAForm, Zero)); } assert(0 && @@ -811,9 +809,13 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDOperand Zero = DAG.getConstant(0, PtrVT); if (TM.getRelocationModel() == Reloc::Static) { - return (!ST->usingLargeMem() - ? DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero) - : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero)); + if (!ST->usingLargeMem()) { + return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero); + } else { + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + } } else { cerr << "LowerGlobalAddress: Relocation model other than static not " << "supported.\n"; @@ -1202,7 +1204,7 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } else { // "Large memory" mode: Turn all calls into indirect calls with a X-form // address pairs: - Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero); + Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); @@ -2553,16 +2555,80 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { #if 0 TargetMachine &TM = getTargetMachine(); +#endif + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); SelectionDAG &DAG = DCI.DAG; SDOperand N0 = N->getOperand(0); // everything has at least one operand switch (N->getOpcode()) { default: break; - // Do something creative here for ISD nodes that can be coalesced in unique - // ways. + case SPUISD::IndirectAddr: { + if (!ST->usingLargeMem() && N0.getOpcode() == SPUISD::AFormAddr) { + ConstantSDNode *CN = cast(N->getOperand(1)); + if (CN->getValue() == 0) { + // (SPUindirect (SPUaform , 0), 0) -> + // (SPUaform , 0) + + DEBUG(cerr << "Replace: "); + DEBUG(N->dump(&DAG)); + DEBUG(cerr << "\nWith: "); + DEBUG(N0.Val->dump(&DAG)); + DEBUG(cerr << "\n"); + + return N0; + } + } + } + case ISD::ADD: { + SDOperand Op0 = N->getOperand(0); + SDOperand Op1 = N->getOperand(1); + + if ((Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) + && Op0.getOpcode() == SPUISD::IndirectAddr) { + SDOperand Op01 = Op0.getOperand(1); + if (Op01.getOpcode() == ISD::Constant + || Op01.getOpcode() == ISD::TargetConstant) { + // (add , (SPUindirect , )) -> + // (SPUindirect , ) + ConstantSDNode *CN0 = cast(Op1); + ConstantSDNode *CN1 = cast(Op01); + SDOperand combinedConst = + DAG.getConstant(CN0->getValue() + CN1->getValue(), + Op0.getValueType()); + + DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", " + << "(SPUindirect , " << CN1->getValue() << "))\n"); + DEBUG(cerr << "With: (SPUindirect , " + << CN0->getValue() + CN1->getValue() << ")\n"); + return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(), + Op0.getOperand(0), combinedConst); + } + } else if ((Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) + && Op1.getOpcode() == SPUISD::IndirectAddr) { + SDOperand Op11 = Op1.getOperand(1); + if (Op11.getOpcode() == ISD::Constant + || Op11.getOpcode() == ISD::TargetConstant) { + // (add (SPUindirect , ), ) -> + // (SPUindirect , ) + ConstantSDNode *CN0 = cast(Op0); + ConstantSDNode *CN1 = cast(Op11); + SDOperand combinedConst = + DAG.getConstant(CN0->getValue() + CN1->getValue(), + Op0.getValueType()); + + DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", " + << "(SPUindirect , " << CN1->getValue() << "))\n"); + DEBUG(cerr << "With: (SPUindirect , " + << CN0->getValue() + CN1->getValue() << ")\n"); + + return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(), + Op1.getOperand(0), combinedConst); + } + } + } } -#endif - // Otherwise, return unchanged. return SDOperand(); } diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index d9e4e7ed4ed..916f2c931fc 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -32,8 +32,7 @@ namespace llvm { Lo, ///< Low address component (lower 16) PCRelAddr, ///< Program counter relative address AFormAddr, ///< A-form address (local store) - DFormAddr, ///< D-Form address "imm($r)" - XFormAddr, ///< X-Form address "$r($r)" + IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)" LDRESULT, ///< Load result (value, chain) CALL, ///< CALL instruction diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td index 7221ab2dc8e..eda1ab3da47 100644 --- a/lib/Target/CellSPU/SPUInstrFormats.td +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -247,6 +247,10 @@ let RT = 0 in { { } } +//===----------------------------------------------------------------------===// +// Specialized versions of RI16: +//===----------------------------------------------------------------------===// + // RI18 Format class RI18Form opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 71cb37dc08d..b21468a98a8 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -48,205 +48,109 @@ def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$fi //===----------------------------------------------------------------------===// let isSimpleLoad = 1 in { - def LQDv16i8: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v16i8 VECREG:$rT), (load dform_addr:$src))]>; + class LoadDFormVec + : RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), + "lqd\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load dform_addr:$src))]> + { } - def LQDv8i16: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v8i16 VECREG:$rT), (load dform_addr:$src))]>; + class LoadDForm + : RI10Form<0b00101100, (outs rclass:$rT), (ins memri10:$src), + "lqd\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load dform_addr:$src))]> + { } - def LQDv4i32: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v4i32 VECREG:$rT), (load dform_addr:$src))]>; + multiclass LoadDForms + { + def v16i8: LoadDFormVec; + def v8i16: LoadDFormVec; + def v4i32: LoadDFormVec; + def v2i64: LoadDFormVec; + def v4f32: LoadDFormVec; + def v2f64: LoadDFormVec; - def LQDv2i64: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v2i64 VECREG:$rT), (load dform_addr:$src))]>; + def r128: LoadDForm; + def r64: LoadDForm; + def r32: LoadDForm; + def f32: LoadDForm; + def f64: LoadDForm; + def r16: LoadDForm; + def r8: LoadDForm; + } - def LQDv4f32: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v4f32 VECREG:$rT), (load dform_addr:$src))]>; + class LoadAFormVec + : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load aform_addr:$src))]> + { } - def LQDv2f64: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v2f64 VECREG:$rT), (load dform_addr:$src))]>; + class LoadAForm + : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load aform_addr:$src))]> + { } - def LQDr128: - RI10Form<0b00101100, (outs GPRC:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set GPRC:$rT, (load dform_addr:$src))]>; + multiclass LoadAForms + { + def v16i8: LoadAFormVec; + def v8i16: LoadAFormVec; + def v4i32: LoadAFormVec; + def v2i64: LoadAFormVec; + def v4f32: LoadAFormVec; + def v2f64: LoadAFormVec; - def LQDr64: - RI10Form<0b00101100, (outs R64C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R64C:$rT, (load dform_addr:$src))]>; + def r128: LoadAForm; + def r64: LoadAForm; + def r32: LoadAForm; + def f32: LoadAForm; + def f64: LoadAForm; + def r16: LoadAForm; + def r8: LoadAForm; + } - def LQDr32: - RI10Form<0b00101100, (outs R32C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R32C:$rT, (load dform_addr:$src))]>; + class LoadXFormVec + : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load xform_addr:$src))]> + { } - // Floating Point - def LQDf32: - RI10Form<0b00101100, (outs R32FP:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R32FP:$rT, (load dform_addr:$src))]>; + class LoadXForm + : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load xform_addr:$src))]> + { } - def LQDf64: - RI10Form<0b00101100, (outs R64FP:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R64FP:$rT, (load dform_addr:$src))]>; - // END Floating Point + multiclass LoadXForms + { + def v16i8: LoadXFormVec; + def v8i16: LoadXFormVec; + def v4i32: LoadXFormVec; + def v2i64: LoadXFormVec; + def v4f32: LoadXFormVec; + def v2f64: LoadXFormVec; - def LQDr16: - RI10Form<0b00101100, (outs R16C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R16C:$rT, (load dform_addr:$src))]>; + def r128: LoadXForm; + def r64: LoadXForm; + def r32: LoadXForm; + def f32: LoadXForm; + def f64: LoadXForm; + def r16: LoadXForm; + def r8: LoadXForm; + } - def LQDr8: - RI10Form<0b00101100, (outs R8C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R8C:$rT, (load dform_addr:$src))]>; - - def LQAv16i8: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v16i8 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv8i16: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v8i16 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv4i32: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v4i32 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv2i64: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v2i64 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv4f32: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v4f32 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv2f64: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v2f64 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAr128: - RI16Form<0b100001100, (outs GPRC:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set GPRC:$rT, (load aform_addr:$src))]>; - - def LQAr64: - RI16Form<0b100001100, (outs R64C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R64C:$rT, (load aform_addr:$src))]>; - - def LQAr32: - RI16Form<0b100001100, (outs R32C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R32C:$rT, (load aform_addr:$src))]>; - - def LQAf32: - RI16Form<0b100001100, (outs R32FP:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R32FP:$rT, (load aform_addr:$src))]>; - - def LQAf64: - RI16Form<0b100001100, (outs R64FP:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R64FP:$rT, (load aform_addr:$src))]>; - - def LQAr16: - RI16Form<0b100001100, (outs R16C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R16C:$rT, (load aform_addr:$src))]>; - - def LQAr8: - RI16Form<0b100001100, (outs R8C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R8C:$rT, (load aform_addr:$src))]>; - - def LQXv16i8: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v16i8 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv8i16: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v8i16 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv4i32: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v4i32 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv2i64: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v2i64 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv4f32: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v4f32 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv2f64: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v2f64 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXr128: - RRForm<0b00100011100, (outs GPRC:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set GPRC:$rT, (load xform_addr:$src))]>; - - def LQXr64: - RRForm<0b00100011100, (outs R64C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R64C:$rT, (load xform_addr:$src))]>; - - def LQXr32: - RRForm<0b00100011100, (outs R32C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R32C:$rT, (load xform_addr:$src))]>; - - def LQXf32: - RRForm<0b00100011100, (outs R32FP:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R32FP:$rT, (load xform_addr:$src))]>; - - def LQXf64: - RRForm<0b00100011100, (outs R64FP:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R64FP:$rT, (load xform_addr:$src))]>; - - def LQXr16: - RRForm<0b00100011100, (outs R16C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R16C:$rT, (load xform_addr:$src))]>; - - def LQXr8: - RRForm<0b00100011100, (outs R8C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R8C:$rT, (load xform_addr:$src))]>; + defm LQA : LoadAForms; + defm LQD : LoadDForms; + defm LQX : LoadXForms; /* Load quadword, PC relative: Not much use at this point in time. - Might be of use later for relocatable code. + Might be of use later for relocatable code. It's effectively the + same as LQA, but uses PC-relative addressing. def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp), "lqr\t$rT, $disp", LoadStore, [(set VECREG:$rT, (load iaddr:$disp))]>; @@ -256,174 +160,108 @@ let isSimpleLoad = 1 in { //===----------------------------------------------------------------------===// // Stores: //===----------------------------------------------------------------------===// +class StoreDFormVec + : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), + "stqd\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), dform_addr:$src)]> +{ } -def STQDv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v16i8 VECREG:$rT), dform_addr:$src)]>; +class StoreDForm + : RI10Form<0b00100100, (outs), (ins rclass:$rT, memri10:$src), + "stqd\t$rT, $src", + LoadStore, + [(store rclass:$rT, dform_addr:$src)]> +{ } -def STQDv8i16 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v8i16 VECREG:$rT), dform_addr:$src)]>; +multiclass StoreDForms +{ + def v16i8: StoreDFormVec; + def v8i16: StoreDFormVec; + def v4i32: StoreDFormVec; + def v2i64: StoreDFormVec; + def v4f32: StoreDFormVec; + def v2f64: StoreDFormVec; -def STQDv4i32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v4i32 VECREG:$rT), dform_addr:$src)]>; + def r128: StoreDForm; + def r64: StoreDForm; + def r32: StoreDForm; + def f32: StoreDForm; + def f64: StoreDForm; + def r16: StoreDForm; + def r8: StoreDForm; +} -def STQDv2i64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v2i64 VECREG:$rT), dform_addr:$src)]>; +class StoreAFormVec + : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), aform_addr:$src)]> +{ } -def STQDv4f32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v4f32 VECREG:$rT), dform_addr:$src)]>; +class StoreAForm + : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store rclass:$rT, aform_addr:$src)]> +{ } -def STQDv2f64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v2f64 VECREG:$rT), dform_addr:$src)]>; +multiclass StoreAForms +{ + def v16i8: StoreAFormVec; + def v8i16: StoreAFormVec; + def v4i32: StoreAFormVec; + def v2i64: StoreAFormVec; + def v4f32: StoreAFormVec; + def v2f64: StoreAFormVec; -def STQDr128 : RI10Form<0b00100100, (outs), (ins GPRC:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store GPRC:$rT, dform_addr:$src)]>; + def r128: StoreAForm; + def r64: StoreAForm; + def r32: StoreAForm; + def f32: StoreAForm; + def f64: StoreAForm; + def r16: StoreAForm; + def r8: StoreAForm; +} -def STQDr64 : RI10Form<0b00100100, (outs), (ins R64C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R64C:$rT, dform_addr:$src)]>; +class StoreXFormVec + : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), xform_addr:$src)]> +{ } -def STQDr32 : RI10Form<0b00100100, (outs), (ins R32C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R32C:$rT, dform_addr:$src)]>; +class StoreXForm + : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store rclass:$rT, xform_addr:$src)]> +{ } -// Floating Point -def STQDf32 : RI10Form<0b00100100, (outs), (ins R32FP:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R32FP:$rT, dform_addr:$src)]>; +multiclass StoreXForms +{ + def v16i8: StoreXFormVec; + def v8i16: StoreXFormVec; + def v4i32: StoreXFormVec; + def v2i64: StoreXFormVec; + def v4f32: StoreXFormVec; + def v2f64: StoreXFormVec; -def STQDf64 : RI10Form<0b00100100, (outs), (ins R64FP:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R64FP:$rT, dform_addr:$src)]>; + def r128: StoreXForm; + def r64: StoreXForm; + def r32: StoreXForm; + def f32: StoreXForm; + def f64: StoreXForm; + def r16: StoreXForm; + def r8: StoreXForm; +} -def STQDr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R16C:$rT, dform_addr:$src)]>; - -def STQDr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R8C:$rT, dform_addr:$src)]>; - -def STQAv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v16i8 VECREG:$rT), aform_addr:$src)]>; - -def STQAv8i16 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v8i16 VECREG:$rT), aform_addr:$src)]>; - -def STQAv4i32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v4i32 VECREG:$rT), aform_addr:$src)]>; - -def STQAv2i64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v2i64 VECREG:$rT), aform_addr:$src)]>; - -def STQAv4f32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v4f32 VECREG:$rT), aform_addr:$src)]>; - -def STQAv2f64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v2f64 VECREG:$rT), aform_addr:$src)]>; - -def STQAr128 : RI10Form<0b00100100, (outs), (ins GPRC:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store GPRC:$rT, aform_addr:$src)]>; - -def STQAr64 : RI10Form<0b00100100, (outs), (ins R64C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R64C:$rT, aform_addr:$src)]>; - -def STQAr32 : RI10Form<0b00100100, (outs), (ins R32C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R32C:$rT, aform_addr:$src)]>; - -// Floating Point -def STQAf32 : RI10Form<0b00100100, (outs), (ins R32FP:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R32FP:$rT, aform_addr:$src)]>; - -def STQAf64 : RI10Form<0b00100100, (outs), (ins R64FP:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R64FP:$rT, aform_addr:$src)]>; - -def STQAr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R16C:$rT, aform_addr:$src)]>; - -def STQAr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R8C:$rT, aform_addr:$src)]>; - -def STQXv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v16i8 VECREG:$rT), xform_addr:$src)]>; - -def STQXv8i16 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v8i16 VECREG:$rT), xform_addr:$src)]>; - -def STQXv4i32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v4i32 VECREG:$rT), xform_addr:$src)]>; - -def STQXv2i64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v2i64 VECREG:$rT), xform_addr:$src)]>; - -def STQXv4f32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v4f32 VECREG:$rT), xform_addr:$src)]>; - -def STQXv2f64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v2f64 VECREG:$rT), xform_addr:$src)]>; - -def STQXr128 : RI10Form<0b00100100, (outs), (ins GPRC:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store GPRC:$rT, xform_addr:$src)]>; - -def STQXr64: - RI10Form<0b00100100, (outs), (ins R64C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R64C:$rT, xform_addr:$src)]>; - -def STQXr32: - RI10Form<0b00100100, (outs), (ins R32C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R32C:$rT, xform_addr:$src)]>; - -// Floating Point -def STQXf32: - RI10Form<0b00100100, (outs), (ins R32FP:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R32FP:$rT, xform_addr:$src)]>; - -def STQXf64: - RI10Form<0b00100100, (outs), (ins R64FP:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R64FP:$rT, xform_addr:$src)]>; - -def STQXr16: - RI10Form<0b00100100, (outs), (ins R16C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R16C:$rT, xform_addr:$src)]>; - -def STQXr8: - RI10Form<0b00100100, (outs), (ins R8C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R8C:$rT, xform_addr:$src)]>; +defm STQD : StoreDForms; +defm STQA : StoreAForms; +defm STQX : StoreXForms; /* Store quadword, PC relative: Not much use at this point in time. Might - be useful for relocatable code. + be useful for relocatable code. def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), "stqr\t$rT, $disp", LoadStore, [(store VECREG:$rT, iaddr:$disp)]>; @@ -620,17 +458,22 @@ def IOHLlo: // Form select mask for bytes using immediate, used in conjunction with the // SELB instruction: -def FSMBIv16i8 : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", SelectOp, - [(set (v16i8 VECREG:$rT), (SPUfsmbi_v16i8 immU16:$val))]>; +class FSMBIVec + : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), + "fsmbi\t$rT, $val", + SelectOp, + [(set (vectype VECREG:$rT), (SPUfsmbi immU16:$val))]> +{ } -def FSMBIv8i16 : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", SelectOp, - [(set (v8i16 VECREG:$rT), (SPUfsmbi_v8i16 immU16:$val))]>; +multiclass FSMBIs +{ + def v16i8: FSMBIVec; + def v8i16: FSMBIVec; + def v4i32: FSMBIVec; + def v2i64: FSMBIVec; +} -def FSMBIvecv4i32 : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", SelectOp, - [(set (v4i32 VECREG:$rT), (SPUfsmbi_v4i32 immU16:$val))]>; +defm FSMBI : FSMBIs; //===----------------------------------------------------------------------===// // Integer and Logical Operations: @@ -2762,17 +2605,17 @@ def CEQHIv8i16: def CEQr32: RRForm<0b00000011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "ceq\t$rT, $rA, $rB", ByteOp, - [/* no pattern to match: intrinsic */]>; + [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>; def CEQv4i32: RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "ceq\t$rT, $rA, $rB", ByteOp, - [/* no pattern to match: intrinsic */]>; + [(set (v4i32 VECREG:$rT), (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CEQIr32: - RI10Form<0b00111110, (outs R32C:$rT), (ins R32C:$rA, s10imm:$val), + RI10Form<0b00111110, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), "ceqi\t$rT, $rA, $val", ByteOp, - [/* no pattern to match: intrinsic */]>; + [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>; def CEQIv4i32: RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -2872,18 +2715,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { } //===----------------------------------------------------------------------===// -// brcond predicates: +// setcc and brcond patterns: //===----------------------------------------------------------------------===// + def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), (BRHZ R16C:$rA, bb:$dest)>; -def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), - (BRHNZ R16C:$rA, bb:$dest)>; - def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), (BRZ R32C:$rA, bb:$dest)>; + +def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), + (BRHNZ R16C:$rA, bb:$dest)>; def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), (BRNZ R32C:$rA, bb:$dest)>; +def : Pat<(brcond (i16 (setne R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (BRHNZ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; +def : Pat<(brcond (i32 (setne R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (BRNZ (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + +def : Pat<(brcond (i16 (setne R16C:$rA, R16C:$rB)), bb:$dest), + (BRHNZ (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>; +def : Pat<(brcond (i32 (setne R32C:$rA, R32C:$rB)), bb:$dest), + (BRNZ (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; + let isTerminator = 1, isBarrier = 1 in { let isReturn = 1 in { def RET: @@ -3460,24 +3314,42 @@ def : Pat<(i32 (anyext R16C:$rSrc)), (ORI2To4 R16C:$rSrc, 0)>; //===----------------------------------------------------------------------===// -// Address translation: SPU, like PPC, has to split addresses into high and +// Address generation: SPU, like PPC, has to split addresses into high and // low parts in order to load them into a register. //===----------------------------------------------------------------------===// -def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(SPUxform tglobaladdr:$in, 0), +def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; +def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>; +def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; +def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; + +def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0), + (SPUlo tglobaladdr:$in, 0)), (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; -def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; -def : Pat<(SPUxform tjumptable:$in, 0), +def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0), + (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0), + (SPUlo tjumptable:$in, 0)), (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; -def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; -def : Pat<(SPUlo tconstpool:$in , 0), (ILAlsa tconstpool:$in)>; -def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; -// tblgen bug prevents this from working. -// def : Pat<(SPUxform tconstpool:$in, 0), -// (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; +def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), + (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; + +def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; + +def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; + +def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; // Instrinsics: include "CellSDKIntrinsics.td" diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 84601301c33..c231befd352 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -58,14 +58,8 @@ def SPUv4i32_binop: SDTypeProfile<1, 2, [ // FSMBI type constraints: There are several variations for the various // vector types (this avoids having to bit_convert all over the place.) -def SPUfsmbi_type_v16i8: SDTypeProfile<1, 1, [ - SDTCisVT<0, v16i8>, SDTCisVT<1, i32>]>; - -def SPUfsmbi_type_v8i16: SDTypeProfile<1, 1, [ - SDTCisVT<0, v8i16>, SDTCisVT<1, i32>]>; - -def SPUfsmbi_type_v4i32: SDTypeProfile<1, 1, [ - SDTCisVT<0, v4i32>, SDTCisVT<1, i32>]>; +def SPUfsmbi_type: SDTypeProfile<1, 1, [ + SDTCisVT<1, i32>]>; // SELB type constraints: def SPUselb_type_v16i8: SDTypeProfile<1, 3, [ @@ -148,9 +142,7 @@ def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED", SPUvecshift_type_v16i8, [SDNPHasChain]>; // SPU form select mask for bytes, immediate -def SPUfsmbi_v16i8: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v16i8, []>; -def SPUfsmbi_v8i16: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v8i16, []>; -def SPUfsmbi_v4i32: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v4i32, []>; +def SPUfsmbi: SDNode<"SPUISD::FSMBI", SPUfsmbi_type, []>; // SPU select bits instruction def SPUselb_v16i8: SDNode<"SPUISD::SELB", SPUselb_type_v16i8, []>; @@ -189,11 +181,8 @@ def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; // A-Form local store addresses def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; -// D-Form "imm($reg)" addresses -def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>; - -// X-Form "$reg($reg)" addresses -def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>; +// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses +def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; // Load result node def SPUload_result : SDTypeProfile<1, 3, []>; diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp index 2f63446c129..0d7aac15a05 100644 --- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp @@ -17,6 +17,7 @@ using namespace llvm; SPUTargetAsmInfo::SPUTargetAsmInfo(const SPUTargetMachine &TM) { + PCSymbol = "."; CommentString = "#"; GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; @@ -24,17 +25,14 @@ SPUTargetAsmInfo::SPUTargetAsmInfo(const SPUTargetMachine &TM) { SetDirective = "\t.set"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; - SwitchToSectionDirective = "\t.section\t"; + SwitchToSectionDirective = ".section\t"; ConstantPoolSection = "\t.const\t"; JumpTableDataSection = ".const"; CStringSection = "\t.cstring"; - LCOMMDirective = "\t.lcomm\t"; StaticCtorsSection = ".mod_init_func"; StaticDtorsSection = ".mod_term_func"; FourByteConstantSection = ".const"; SixteenByteConstantSection = "\t.section\t.rodata.cst16,\"aM\",@progbits,16"; - UsedDirective = "\t.no_dead_strip\t"; - WeakRefDirective = "\t.weak_reference\t"; InlineAsmStart = "# InlineAsm Start"; InlineAsmEnd = "# InlineAsm End"; diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index f604f74ca10..11481edc12f 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -1,19 +1,18 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep bisl %t1.s | count 7 +; RUN: grep bisl %t1.s | count 7 ; RUN: grep ila %t1.s | count 1 ; RUN: grep rotqbyi %t1.s | count 4 -; RUN: grep lqa %t1.s | count 5 -; RUN: grep lqd %t1.s | count 6 -; RUN: grep dispatch_tab %t1.s | count 10 +; RUN: grep lqa %t1.s | count 1 +; RUN: grep lqd %t1.s | count 11 +; RUN: grep dispatch_tab %t1.s | count 6 ; RUN: grep bisl %t2.s | count 7 ; RUN: grep ilhu %t2.s | count 2 ; RUN: grep iohl %t2.s | count 2 ; RUN: grep rotqby %t2.s | count 6 -; RUN: grep lqd %t2.s | count 12 -; RUN: grep lqx %t2.s | count 8 -; RUN: grep il %t2.s | count 9 -; RUN: grep ai %t2.s | count 5 +; RUN: grep lqd %t2.s | count 17 +; RUN: grep il %t2.s | count 2 +; RUN: grep ai %t2.s | count 7 ; RUN: grep dispatch_tab %t2.s | count 7 ; ModuleID = 'call_indirect.bc' diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll index baa23bbc8ab..6e05686f408 100644 --- a/test/CodeGen/CellSPU/extract_elt.ll +++ b/test/CodeGen/CellSPU/extract_elt.ll @@ -2,7 +2,7 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s ; RUN: grep shufb %t1.s | count 27 ; RUN: grep lqa %t1.s | count 27 -; RUN: grep lqx %t2.s | count 27 +; RUN: grep lqd %t2.s | count 27 ; RUN: grep space %t1.s | count 8 ; RUN: grep byte %t1.s | count 424 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll index f4406d63dfb..d212bd51e40 100644 --- a/test/CodeGen/CellSPU/fcmp.ll +++ b/test/CodeGen/CellSPU/fcmp.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep fceq %t1.s | count 1 && +; RUN: grep fceq %t1.s | count 1 ; RUN: grep fcmeq %t1.s | count 1 ; ; This file includes standard floating point arithmetic instructions diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll index e5fa79e31d6..5d6daa2ddfe 100644 --- a/test/CodeGen/CellSPU/struct_1.ll +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -1,27 +1,26 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep lqa %t1.s | count 10 -; RUN: grep lqd %t1.s | count 4 -; RUN: grep rotqbyi %t1.s | count 5 +; RUN: grep lqa %t1.s | count 5 +; RUN: grep lqd %t1.s | count 11 +; RUN: grep rotqbyi %t1.s | count 7 ; RUN: grep xshw %t1.s | count 1 -; RUN: grep andi %t1.s | count 4 +; RUN: grep andi %t1.s | count 5 ; RUN: grep cbd %t1.s | count 3 ; RUN: grep chd %t1.s | count 1 ; RUN: grep cwd %t1.s | count 3 ; RUN: grep shufb %t1.s | count 7 -; RUN: grep stqa %t1.s | count 5 -; RUN: grep iohl %t2.s | count 14 -; RUN: grep ilhu %t2.s | count 14 -; RUN: grep lqx %t2.s | count 14 -; RUN: grep rotqbyi %t2.s | count 5 +; RUN: grep stqd %t1.s | count 7 +; RUN: grep iohl %t2.s | count 16 +; RUN: grep ilhu %t2.s | count 16 +; RUN: grep lqd %t2.s | count 16 +; RUN: grep rotqbyi %t2.s | count 7 ; RUN: grep xshw %t2.s | count 1 -; RUN: grep andi %t2.s | count 4 -; RUN: grep cbx %t2.s | count 3 -; RUN: grep chx %t2.s | count 1 -; RUN: grep cwx %t2.s | count 1 -; RUN: grep cwd %t2.s | count 2 +; RUN: grep andi %t2.s | count 5 +; RUN: grep cbd %t2.s | count 3 +; RUN: grep chd %t2.s | count 1 +; RUN: grep cwd %t2.s | count 3 ; RUN: grep shufb %t2.s | count 7 -; RUN: grep stqx %t2.s | count 7 +; RUN: grep stqd %t2.s | count 7 ; ModuleID = 'struct_1.bc' target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -48,88 +47,98 @@ target triple = "spu" ; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } @state = global %struct.hackstate zeroinitializer, align 16 -define i8 @get_hackstate_c1() zeroext { +define i8 @get_hackstate_c1() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret i8 %tmp2 } -define i8 @get_hackstate_c2() zeroext { +define i8 @get_hackstate_c2() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret i8 %tmp2 } -define i8 @get_hackstate_c3() zeroext { +define i8 @get_hackstate_c3() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret i8 %tmp2 } -define i32 @get_hackstate_i1() { +define i32 @get_hackstate_i1() nounwind { entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret i32 %tmp2 + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret i32 %tmp2 } -define i16 @get_hackstate_s1() signext { +define i16 @get_hackstate_s1() signext nounwind { entry: - %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret i16 %tmp2 + %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret i16 %tmp2 } -define i8 @get_hackstate_c7() zeroext { +define i8 @get_hackstate_c6() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16 + ret i8 %tmp2 } -define i32 @get_hackstate_i6() zeroext { +define i8 @get_hackstate_c7() zeroext nounwind { entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret i32 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 + ret i8 %tmp2 } -define void @set_hackstate_c1(i8 zeroext %c) { +define i32 @get_hackstate_i3() nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret void + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret i32 %tmp2 } -define void @set_hackstate_c2(i8 zeroext %c) { +define i32 @get_hackstate_i6() nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret void + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret i32 %tmp2 } -define void @set_hackstate_c3(i8 zeroext %c) { +define void @set_hackstate_c1(i8 zeroext %c) nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret void } -define void @set_hackstate_i1(i32 %i) { +define void @set_hackstate_c2(i8 zeroext %c) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret void } -define void @set_hackstate_s1(i16 signext %s) { +define void @set_hackstate_c3(i8 zeroext %c) nounwind { entry: - store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret void } -define void @set_hackstate_i3(i32 %i) { +define void @set_hackstate_i1(i32 %i) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 11), align 16 - ret void + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret void } - -define void @set_hackstate_i6(i32 %i) { +define void @set_hackstate_s1(i16 signext %s) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret void + store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret void } +define void @set_hackstate_i3(i32 %i) nounwind { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret void +} + +define void @set_hackstate_i6(i32 %i) nounwind { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret void +} diff --git a/test/CodeGen/CellSPU/struct_2.ll b/test/CodeGen/CellSPU/struct_2.ll deleted file mode 100644 index fee9c01dc68..00000000000 --- a/test/CodeGen/CellSPU/struct_2.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep lqx %t1.s | count 14 -; RUN: grep rotqby %t1.s | count 7 -; RUN: grep xshw %t1.s | count 1 -; RUN: grep andi %t1.s | count 4 -; RUN: grep cbx %t1.s | count 1 -; RUN: grep cbd %t1.s | count 2 -; RUN: grep chd %t1.s | count 1 -; RUN: grep cwd %t1.s | count 3 -; RUN: grep shufb %t1.s | count 7 -; RUN: grep stqx %t1.s | count 7 - -; ModuleID = 'struct_1.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; struct hackstate { -; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3) -; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3) -; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3) -; int i1; // offset 4 (rotate left by 4 bytes to byte 0) -; short s1; // offset 8 (rotate left by 6 bytes to byte 2) -; int i2; // offset 12 [ignored] -; unsigned char c4; // offset 16 [ignored] -; unsigned char c5; // offset 17 [ignored] -; unsigned char c6; // offset 18 [ignored] -; unsigned char c7; // offset 19 (no rotate, in preferred slot) -; int i3; // offset 20 [ignored] -; int i4; // offset 24 [ignored] -; int i5; // offset 28 [ignored] -; int i6; // offset 32 (no rotate, in preferred slot) -; } -%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 } - -; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } -@state = global %struct.hackstate zeroinitializer, align 4 - -define i8 @get_hackstate_c1() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 4 - ret i8 %tmp2 -} - -define i8 @get_hackstate_c2() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 4 - ret i8 %tmp2 -} - -define i8 @get_hackstate_c3() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 4 - ret i8 %tmp2 -} - -define i32 @get_hackstate_i1() { -entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 4 - ret i32 %tmp2 -} - -define i16 @get_hackstate_s1() signext { -entry: - %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 4 - ret i16 %tmp2 -} - -define i8 @get_hackstate_c7() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 4 - ret i8 %tmp2 -} - -define i32 @get_hackstate_i6() zeroext { -entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 4 - ret i32 %tmp2 -} - -define void @set_hackstate_c1(i8 zeroext %c) { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 4 - ret void -} - -define void @set_hackstate_c2(i8 zeroext %c) { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 4 - ret void -} - -define void @set_hackstate_c3(i8 zeroext %c) { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 4 - ret void -} - -define void @set_hackstate_i1(i32 %i) { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 4 - ret void -} - -define void @set_hackstate_s1(i16 signext %s) { -entry: - store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 4 - ret void -} - -define void @set_hackstate_i3(i32 %i) { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 11), align 4 - ret void -} - - -define void @set_hackstate_i6(i32 %i) { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 4 - ret void -} - diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll index 46109e3dc1f..3f7eb626cbe 100644 --- a/test/CodeGen/CellSPU/vec_const.ll +++ b/test/CodeGen/CellSPU/vec_const.ll @@ -1,11 +1,11 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep il %t1.s | count 16 -; RUN: grep ilhu %t1.s | count 8 +; RUN: grep il %t1.s | count 16 +; RUN: grep ilhu %t1.s | count 8 ; RUN: grep ilh %t1.s | count 13 ; RUN: grep iohl %t1.s | count 7 ; RUN: grep lqa %t1.s | count 6 -; RUN: grep 24672 %t1.s | count 2 +; RUN: grep 24672 %t1.s | count 2 ; RUN: grep 16429 %t1.s | count 1 ; RUN: grep 63572 %t1.s | count 1 ; RUN: grep 4660 %t1.s | count 1 @@ -17,8 +17,7 @@ ; RUN: grep 21572 %t1.s | count 1 ; RUN: grep 11544 %t1.s | count 1 ; RUN: grep 1311768467750121234 %t1.s | count 1 -; RUN: grep lqx %t2.s | count 6 -; RUN: grep ila %t2.s | count 6 +; RUN: grep lqd %t2.s | count 6 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" target triple = "spu-unknown-elf"