diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 224616eb931..4bd8673a5b4 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -195,12 +195,18 @@ namespace { void printAddr256K(const MachineInstr *MI, unsigned OpNo) { - /* Note: operand 1 is an offset or symbol name. Operand 2 is - ignored. */ + /* Note: operand 1 is an offset or symbol name. */ if (MI->getOperand(OpNo).isImmediate()) { printS16ImmOperand(MI, OpNo); } else { printOp(MI->getOperand(OpNo)); + if (MI->getOperand(OpNo+1).isImmediate()) { + int displ = int(MI->getOperand(OpNo+1).getImm()); + if (displ > 0) + O << "+" << displ; + else if (displ < 0) + O << displ; + } } } @@ -309,7 +315,6 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) { case MachineOperand::MO_JumpTableIndex: O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); - // FIXME: PIC relocation model return; case MachineOperand::MO_ConstantPoolIndex: O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() @@ -521,7 +526,8 @@ bool LinuxAsmPrinter::doFinalization(Module &M) { O << "\t.zero\t" << Size; } else if (I->hasInternalLinkage()) { SwitchToDataSection("\t.data", I); - O << TAI->getLCOMMDirective() << name << "," << Size << "," << Align; + O << ".local " << name << "\n"; + O << TAI->getCOMMDirective() << name << "," << Size << "," << Align << "\n"; } else { SwitchToDataSection("\t.data", I); O << ".comm " << name << "," << Size; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 3a50e3bcf04..3905d553029 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -150,6 +150,16 @@ namespace { return false; } + bool + isHighLow(const SDOperand &Op) + { + return (Op.getOpcode() == SPUISD::IndirectAddr + && ((Op.getOperand(0).getOpcode() == SPUISD::Hi + && Op.getOperand(1).getOpcode() == SPUISD::Lo) + || (Op.getOperand(0).getOpcode() == SPUISD::Lo + && Op.getOperand(1).getOpcode() == SPUISD::Hi))); + } + //===------------------------------------------------------------------===// //! MVT::ValueType to "useful stuff" mapping structure: @@ -157,40 +167,24 @@ namespace { MVT::ValueType VT; unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) int prefslot_byte; /// Byte offset of the "preferred" slot - unsigned brcc_eq_ins; /// br_cc equal instruction - unsigned brcc_neq_ins; /// br_cc not equal instruction - unsigned load_aform; /// A-form load instruction for this VT - unsigned store_aform; /// A-form store instruction for this VT + unsigned insmask_ins; /// Insert mask instruction for a-form }; const valtype_map_s valtype_map[] = { - { MVT::i1, 0, 3, 0, 0, 0, - 0 }, - { MVT::i8, SPU::ORBIr8, 3, 0, 0, SPU::LQAr8, - SPU::STQAr8 }, - { MVT::i16, SPU::ORHIr16, 2, SPU::BRHZ, SPU::BRHNZ, SPU::LQAr16, - SPU::STQAr16 }, - { MVT::i32, SPU::ORIr32, 0, SPU::BRZ, SPU::BRNZ, SPU::LQAr32, - SPU::STQAr32 }, - { MVT::i64, SPU::ORIr64, 0, 0, 0, SPU::LQAr64, - SPU::STQAr64 }, - { MVT::f32, 0, 0, 0, 0, SPU::LQAf32, - SPU::STQAf32 }, - { MVT::f64, 0, 0, 0, 0, SPU::LQAf64, - SPU::STQAf64 }, + { MVT::i1, 0, 3, 0 }, + { MVT::i8, SPU::ORBIr8, 3, 0 }, + { MVT::i16, SPU::ORHIr16, 2, 0 }, + { MVT::i32, SPU::ORIr32, 0, 0 }, + { MVT::i64, SPU::ORIr64, 0, 0 }, + { MVT::f32, 0, 0, 0 }, + { MVT::f64, 0, 0, 0 }, // vector types... (sigh!) - { MVT::v16i8, 0, 0, 0, 0, SPU::LQAv16i8, - SPU::STQAv16i8 }, - { MVT::v8i16, 0, 0, 0, 0, SPU::LQAv8i16, - SPU::STQAv8i16 }, - { MVT::v4i32, 0, 0, 0, 0, SPU::LQAv4i32, - SPU::STQAv4i32 }, - { MVT::v2i64, 0, 0, 0, 0, SPU::LQAv2i64, - SPU::STQAv2i64 }, - { MVT::v4f32, 0, 0, 0, 0, SPU::LQAv4f32, - SPU::STQAv4f32 }, - { MVT::v2f64, 0, 0, 0, 0, SPU::LQAv2f64, - SPU::STQAv2f64 }, + { MVT::v16i8, 0, 0, SPU::CBD }, + { MVT::v8i16, 0, 0, SPU::CHD }, + { MVT::v4i32, 0, 0, SPU::CWD }, + { MVT::v2i64, 0, 0, 0 }, + { MVT::v4f32, 0, 0, SPU::CWD }, + { MVT::v2f64, 0, 0, 0 } }; const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); @@ -381,18 +375,20 @@ SPUDAGToDAGISel::SelectDForm2Addr(SDOperand Op, SDOperand N, SDOperand &Disp, Disp = CurDAG->getTargetConstant(0, VT); Base = N; return true; - } else if (Opc == SPUISD::DFormAddr) { - // D-Form address: This is pretty straightforward, naturally... - CN = cast(N.getOperand(1)); - assert(CN != 0 && "SelectDFormAddr/SPUISD::DForm2Addr expecting constant"); - Imm = unsigned(CN->getValue()); - if (Imm < 0xff) { - Disp = CurDAG->getTargetConstant(CN->getValue(), PtrVT); - Base = N.getOperand(0); - return true; + } else if (Opc == SPUISD::IndirectAddr) { + SDOperand Op1 = N.getOperand(1); + if (Op1.getOpcode() == ISD::TargetConstant + || Op1.getOpcode() == ISD::Constant) { + CN = cast(N.getOperand(1)); + assert(CN != 0 && "SelectIndirectAddr/SPUISD::DForm2Addr expecting constant"); + Imm = unsigned(CN->getValue()); + if (Imm < 0xff) { + Disp = CurDAG->getTargetConstant(CN->getValue(), PtrVT); + Base = N.getOperand(0); + return true; + } } } - return false; } @@ -407,7 +403,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, SDOperand &Index) { // These match the addr256k operand type: MVT::ValueType OffsVT = MVT::i16; - MVT::ValueType PtrVT = SPUtli.getPointerTy(); + SDOperand Zero = CurDAG->getTargetConstant(0, OffsVT); switch (N.getOpcode()) { case ISD::Constant: @@ -417,28 +413,40 @@ SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, abort(); /*NOTREACHED*/ - case ISD::TargetConstant: { - // Loading from a constant address. - ConstantSDNode *CN = dyn_cast(N); - int Imm = (int)CN->getValue(); - if (Imm < 0x3ffff && (Imm & 0x3) == 0) { - Base = CurDAG->getTargetConstant(Imm, PtrVT); - // Note that this operand will be ignored by the assembly printer... - Index = CurDAG->getTargetConstant(0, OffsVT); - return true; - } - } + case ISD::TargetConstant: case ISD::TargetGlobalAddress: - case ISD::TargetConstantPool: - case SPUISD::AFormAddr: { - // The address is in Base. N is a dummy that will be ignored by - // the assembly printer. - Base = N; - Index = CurDAG->getTargetConstant(0, OffsVT); - return true; - } - } + case ISD::TargetJumpTable: + cerr << "SPUSelectAFormAddr: Target Constant/Pool/Global not wrapped as " + << "A-form address.\n"; + abort(); + /*NOTREACHED*/ + case SPUISD::AFormAddr: + // Just load from memory if there's only a single use of the location, + // otherwise, this will get handled below with D-form offset addresses + if (N.hasOneUse()) { + SDOperand Op0 = N.getOperand(0); + switch (Op0.getOpcode()) { + case ISD::TargetConstantPool: + case ISD::TargetJumpTable: + Base = Op0; + Index = Zero; + return true; + + case ISD::TargetGlobalAddress: { + GlobalAddressSDNode *GSDN = cast(Op0); + GlobalValue *GV = GSDN->getGlobal(); + if (GV->getAlignment() == 16) { + Base = Op0; + Index = Zero; + return true; + } + break; + } + } + } + break; + } return false; } @@ -460,14 +468,11 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, unsigned Opc = N.getOpcode(); unsigned PtrTy = SPUtli.getPointerTy(); - if (Opc == ISD::Register) { - Base = N; - Index = CurDAG->getTargetConstant(0, PtrTy); - return true; - } else if (Opc == ISD::FrameIndex) { + if (Opc == ISD::FrameIndex) { + // Stack frame index must be less than 512 (divided by 16): FrameIndexSDNode *FI = dyn_cast(N); DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " - << FI->getIndex() << "\n"); + << FI->getIndex() << "\n"); if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { Base = CurDAG->getTargetConstant(0, PtrTy); Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); @@ -475,19 +480,20 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, } } else if (Opc == ISD::ADD) { // Generated by getelementptr - const SDOperand Op0 = N.getOperand(0); // Frame index/base - const SDOperand Op1 = N.getOperand(1); // Offset within base + const SDOperand Op0 = N.getOperand(0); + const SDOperand Op1 = N.getOperand(1); - if ((Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) - && Op0.getOpcode() != SPUISD::XFormAddr) { + if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) + || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = N; + return true; + } else if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { ConstantSDNode *CN = dyn_cast(Op1); - assert(CN != 0 && "SelectDFormAddr: Expected a constant"); + int32_t offset = int32_t(CN->getSignExtended()); - int32_t offset = (int32_t) CN->getSignExtended(); - unsigned Opc0 = Op0.getOpcode(); - - if (Opc0 == ISD::FrameIndex) { + if (Op0.getOpcode() == ISD::FrameIndex) { FrameIndexSDNode *FI = dyn_cast(Op0); DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI->getIndex() << "\n"); @@ -500,51 +506,69 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, } else if (offset > SPUFrameInfo::minFrameOffset() && offset < SPUFrameInfo::maxFrameOffset()) { Base = CurDAG->getTargetConstant(offset, PtrTy); - if (Opc0 == ISD::GlobalAddress) { - // Convert global address to target global address - GlobalAddressSDNode *GV = dyn_cast(Op0); - Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy); - return true; - } else { - // Otherwise, just take operand 0 - Index = Op0; + Index = Op0; + return true; + } + } else if (Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = dyn_cast(Op0); + int32_t offset = int32_t(CN->getSignExtended()); + + if (Op1.getOpcode() == ISD::FrameIndex) { + FrameIndexSDNode *FI = dyn_cast(Op1); + DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI->getIndex() << "\n"); + + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); return true; } + } else if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = Op1; + return true; } - } else - return false; - } else if (Opc == SPUISD::DFormAddr) { - // D-Form address: This is pretty straightforward, - // naturally... but make sure that this isn't a D-form address - // with a X-form address embedded within: - const SDOperand Op0 = N.getOperand(0); // Frame index/base - const SDOperand Op1 = N.getOperand(1); // Offset within base + } + } else if (Opc == SPUISD::IndirectAddr) { + // Indirect with constant offset -> D-Form address + const SDOperand Op0 = N.getOperand(0); + const SDOperand Op1 = N.getOperand(1); + SDOperand Zero = CurDAG->getTargetConstant(0, N.getValueType()); - if (Op0.getOpcode() == ISD::Constant - || Op0.getOpcode() == ISD::TargetConstant) { + if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { ConstantSDNode *CN = cast(Op1); - assert(CN != 0 && "SelectDFormAddr/SPUISD::DFormAddr expecting constant"); - Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); - Index = Op0; - return true; - } - } else if (Opc == ISD::FrameIndex) { - // Stack frame index must be less than 512 (divided by 16): - FrameIndexSDNode *FI = dyn_cast(N); - DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " - << FI->getIndex() << "\n"); - if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + int32_t offset = int32_t(CN->getSignExtended()); + if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); + Index = Op0; + return true; + } + } else if (Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = cast(Op0); + int32_t offset = int32_t(CN->getSignExtended()); + if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); + Index = Op1; + return true; + } + } else if (Op0.getOpcode() == SPUISD::Hi + && Op1.getOpcode() == SPUISD::Lo) { + // (SPUindirect (SPUhi , 0), (SPUlo , 0)) Base = CurDAG->getTargetConstant(0, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + Index = N; return true; } - } else if (Opc == SPUISD::LDRESULT) { - // It's a load result dereference - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = N.getOperand(0); + } else if (Opc == SPUISD::AFormAddr) { + Base = CurDAG->getTargetConstant(0, N.getValueType()); + Index = N; return true; } - return false; } @@ -565,108 +589,10 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, || SelectDFormAddr(Op, N, Base, Index)) return false; - unsigned Opc = N.getOpcode(); - - if (Opc == ISD::ADD) { - SDOperand N1 = N.getOperand(0); - SDOperand N2 = N.getOperand(1); - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; - } else if (Opc == SPUISD::XFormAddr) { - Base = N; - Index = N.getOperand(1); - return true; - } else if (Opc == SPUISD::DFormAddr) { - // Must be a D-form address with an X-form address embedded - // within: - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; - } else if (N.getNumOperands() == 2) { - SDOperand N1 = N.getOperand(0); - SDOperand N2 = N.getOperand(1); - unsigned N1Opc = N1.getOpcode(); - unsigned N2Opc = N2.getOpcode(); - - if ((N1Opc == ISD::CopyToReg || N1Opc == ISD::Register) - && (N2Opc == ISD::CopyToReg || N2Opc == ISD::Register)) { - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; - /*UNREACHED*/ - } else { - cerr << "SelectXFormAddr: 2-operand unhandled operand:\n"; - N.Val->dump(CurDAG); - cerr << "\n"; - abort(); - /*UNREACHED*/ - } - } else { - cerr << "SelectXFormAddr: Unhandled operand type:\n"; - N.Val->dump(CurDAG); - cerr << "\n"; - abort(); - /*UNREACHED*/ - } - - return false; -} - -//! Emit load for A-form addresses -/* - */ -SDNode * -Emit_LOAD_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel) -{ - SDNode *Result; - MVT::ValueType OpVT = Op.getValueType(); - SDOperand Chain = Op.getOperand(0); - SDOperand Ptr = Op.getOperand(1); - SDOperand PtrArg = Ptr.getOperand(0); - SDOperand PtrOffs = Ptr.getOperand(1); - const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); - - if (PtrOffs.getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast(PtrOffs); - MVT::ValueType PVT = PtrOffs.getValueType(); - PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT); - } - ISel.AddToISelQueue(PtrArg); - ISel.AddToISelQueue(PtrOffs); - ISel.AddToISelQueue(Chain); - Result = CurDAG.getTargetNode(vtm->load_aform, OpVT, MVT::Other, PtrArg, PtrOffs, Chain); - Chain = SDOperand(Result, 1); - return Result; -} - -//! Emit store for A-form addresses -/* - */ -SDNode * -Emit_STORE_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel) -{ - SDNode *Result; - SDOperand Chain = Op.getOperand(0); - SDOperand Val = Op.getOperand(1); - SDOperand Ptr = Op.getOperand(2); - SDOperand PtrArg = Ptr.getOperand(0); - SDOperand PtrOffs = Ptr.getOperand(1); - const valtype_map_s *vtm = getValueTypeMapEntry(Val.getValueType()); - - if (PtrOffs.getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast(PtrOffs); - MVT::ValueType PVT = PtrOffs.getValueType(); - PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT); - } - ISel.AddToISelQueue(Val); - ISel.AddToISelQueue(PtrArg); - ISel.AddToISelQueue(PtrOffs); - ISel.AddToISelQueue(Chain); - SDOperand Ops[4] = { Val, PtrArg, PtrOffs, Chain }; - Result = CurDAG.getTargetNode(vtm->store_aform, MVT::Other, Ops, 4); - Chain = SDOperand(Result, 1); - return Result; + // All else fails, punt and use an X-form address: + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; } //! Convert the operand from a target-independent to a target-specific node @@ -695,12 +621,6 @@ SPUDAGToDAGISel::Select(SDOperand Op) { Ops[0] = TFI; Ops[1] = Zero; n_ops = 2; - } else if (Opc == ISD::LOAD - && Op.getOperand(1).getOpcode() == SPUISD::AFormAddr) { - return Emit_LOAD_AFormAddr(Op, *CurDAG, *this); - } else if (Opc == ISD::STORE - && Op.getOperand(2).getOpcode() == SPUISD::AFormAddr) { - return Emit_STORE_AFormAddr(Op, *CurDAG, *this); } else if (Opc == ISD::ZERO_EXTEND) { // (zero_extend:i16 (and:i8 , )) const SDOperand &Op1 = N->getOperand(0); @@ -717,6 +637,38 @@ SPUDAGToDAGISel::Select(SDOperand Op) { n_ops = 2; } } + } else if (Opc == SPUISD::INSERT_MASK) { + SDOperand Op0 = Op.getOperand(0); + if (Op0.getOpcode() == SPUISD::AFormAddr) { + // (SPUvecinsmask (SPUaform , 0)) -> + // (CBD|CHD|CWD 0, arg) + const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); + ConstantSDNode *CN = cast(Op0.getOperand(1)); + assert(vtm->insmask_ins != 0 && "missing insert mask instruction"); + NewOpc = vtm->insmask_ins; + Ops[0] = CurDAG->getTargetConstant(CN->getValue(), Op0.getValueType()); + Ops[1] = Op0; + n_ops = 2; + + AddToISelQueue(Op0); + } else if (Op0.getOpcode() == ISD::FrameIndex) { + // (SPUvecinsmask ) -> + // (CBD|CHD|CWD 0, ) + const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); + NewOpc = vtm->insmask_ins; + Ops[0] = CurDAG->getTargetConstant(0, Op0.getValueType()); + Ops[1] = Op0; + n_ops = 2; + } else if (isHighLow(Op0)) { + // (SPUvecinsmask (SPUindirect (SPUhi , 0), (SPUlow , 0))) -> + // (CBD|CHD|CWD 0, arg) + const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); + NewOpc = vtm->insmask_ins; + Ops[0] = CurDAG->getTargetConstant(0, Op0.getValueType()); + Ops[1] = Op0; + n_ops = 2; + AddToISelQueue(Op0); + } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT unsigned VT = N->getValueType(0); @@ -748,19 +700,19 @@ SPUDAGToDAGISel::Select(SDOperand Op) { AddToISelQueue(Chain); return Result; - } else if (Opc == SPUISD::XFormAddr) { + } else if (Opc == SPUISD::IndirectAddr) { SDOperand Op0 = Op.getOperand(0); if (Op0.getOpcode() == SPUISD::LDRESULT || Op0.getOpcode() == SPUISD::AFormAddr) { - // (XFormAddr (LDRESULT|AFormAddr, imm)) + // (IndirectAddr (LDRESULT|AFormAddr, imm)) SDOperand Op1 = Op.getOperand(1); MVT::ValueType VT = Op.getValueType(); - DEBUG(cerr << "CellSPU: XFormAddr(" - << (Op0.getOpcode() == SPUISD::LDRESULT - ? "LDRESULT" - : "AFormAddr") - << ", imm):\nOp0 = "); + DEBUG(cerr << "CellSPU: IndirectAddr(" + << (Op0.getOpcode() == SPUISD::LDRESULT + ? "LDRESULT" + : "AFormAddr") + << ", imm):\nOp0 = "); DEBUG(Op.getOperand(0).Val->dump(CurDAG)); DEBUG(cerr << "\nOp1 = "); DEBUG(Op.getOperand(1).Val->dump(CurDAG)); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 0f1d0452804..33261a607b1 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -289,14 +289,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::f32, Custom); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::f64, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); + for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128; + ++sctype) { + setOperationAction(ISD::GlobalAddress, sctype, Custom); + setOperationAction(ISD::ConstantPool, sctype, Custom); + setOperationAction(ISD::JumpTable, sctype, Custom); + } // RET must be custom lowered, to meet ABI requirements setOperationAction(ISD::RET, MVT::Other, Custom); @@ -377,7 +375,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setStackPointerRegisterToSaveRestore(SPU::R1); // We have target-specific dag combine patterns for the following nodes: - // e.g., setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::ADD); computeRegisterProperties(); } @@ -391,8 +389,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; - node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr"; - node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr"; + node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; @@ -524,11 +521,12 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, // Unaligned load or we're using the "large memory" model, which means that // we have to be very pessimistic: if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { - basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); } // Add the offset - basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT)); + basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, + DAG.getConstant((alignOffs & ~0xf), PtrVT)); was16aligned = false; return DAG.getLoad(MVT::v16i8, chain, basePtr, LSN->getSrcValue(), LSN->getSrcValueOffset(), @@ -706,21 +704,20 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DEBUG(basePtr.Val->dump(&DAG)); DEBUG(cerr << "\n"); - if (basePtr.getOpcode() == SPUISD::DFormAddr) { - // Hmmmm... do we ever actually hit this code? - insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, - basePtr.getOperand(0), - insertEltOffs); - } else if (basePtr.getOpcode() == SPUISD::XFormAddr || - (basePtr.getOpcode() == ISD::ADD - && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) { + if (basePtr.getOpcode() == SPUISD::IndirectAddr || + (basePtr.getOpcode() == ISD::ADD + && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) { insertEltPtr = basePtr; } else { - // $sp is always aligned, so use it instead of potentially loading an - // address into a new register: - insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, - DAG.getRegister(SPU::R1, PtrVT), - insertEltOffs); +#if 0 + // $sp is always aligned, so use it when necessary to avoid loading + // an address + SDOperand ptrP = + basePtr.Val->hasOneUse() ? DAG.getRegister(SPU::R1, PtrVT) : basePtr; + insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, ptrP, insertEltOffs); +#else + insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs); +#endif } insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr); @@ -772,7 +769,7 @@ LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); #else - return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, PtrVT, CPI, Zero); #endif } } @@ -791,9 +788,10 @@ LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { const TargetMachine &TM = DAG.getTarget(); if (TM.getRelocationModel() == Reloc::Static) { + SDOperand JmpAForm = DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero); return (!ST->usingLargeMem() - ? DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero) - : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero)); + ? JmpAForm + : DAG.getNode(SPUISD::IndirectAddr, PtrVT, JmpAForm, Zero)); } assert(0 && @@ -811,9 +809,13 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDOperand Zero = DAG.getConstant(0, PtrVT); if (TM.getRelocationModel() == Reloc::Static) { - return (!ST->usingLargeMem() - ? DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero) - : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero)); + if (!ST->usingLargeMem()) { + return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero); + } else { + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + } } else { cerr << "LowerGlobalAddress: Relocation model other than static not " << "supported.\n"; @@ -1202,7 +1204,7 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } else { // "Large memory" mode: Turn all calls into indirect calls with a X-form // address pairs: - Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero); + Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); @@ -2553,16 +2555,80 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { #if 0 TargetMachine &TM = getTargetMachine(); +#endif + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); SelectionDAG &DAG = DCI.DAG; SDOperand N0 = N->getOperand(0); // everything has at least one operand switch (N->getOpcode()) { default: break; - // Do something creative here for ISD nodes that can be coalesced in unique - // ways. + case SPUISD::IndirectAddr: { + if (!ST->usingLargeMem() && N0.getOpcode() == SPUISD::AFormAddr) { + ConstantSDNode *CN = cast(N->getOperand(1)); + if (CN->getValue() == 0) { + // (SPUindirect (SPUaform , 0), 0) -> + // (SPUaform , 0) + + DEBUG(cerr << "Replace: "); + DEBUG(N->dump(&DAG)); + DEBUG(cerr << "\nWith: "); + DEBUG(N0.Val->dump(&DAG)); + DEBUG(cerr << "\n"); + + return N0; + } + } + } + case ISD::ADD: { + SDOperand Op0 = N->getOperand(0); + SDOperand Op1 = N->getOperand(1); + + if ((Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) + && Op0.getOpcode() == SPUISD::IndirectAddr) { + SDOperand Op01 = Op0.getOperand(1); + if (Op01.getOpcode() == ISD::Constant + || Op01.getOpcode() == ISD::TargetConstant) { + // (add , (SPUindirect , )) -> + // (SPUindirect , ) + ConstantSDNode *CN0 = cast(Op1); + ConstantSDNode *CN1 = cast(Op01); + SDOperand combinedConst = + DAG.getConstant(CN0->getValue() + CN1->getValue(), + Op0.getValueType()); + + DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", " + << "(SPUindirect , " << CN1->getValue() << "))\n"); + DEBUG(cerr << "With: (SPUindirect , " + << CN0->getValue() + CN1->getValue() << ")\n"); + return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(), + Op0.getOperand(0), combinedConst); + } + } else if ((Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) + && Op1.getOpcode() == SPUISD::IndirectAddr) { + SDOperand Op11 = Op1.getOperand(1); + if (Op11.getOpcode() == ISD::Constant + || Op11.getOpcode() == ISD::TargetConstant) { + // (add (SPUindirect , ), ) -> + // (SPUindirect , ) + ConstantSDNode *CN0 = cast(Op0); + ConstantSDNode *CN1 = cast(Op11); + SDOperand combinedConst = + DAG.getConstant(CN0->getValue() + CN1->getValue(), + Op0.getValueType()); + + DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", " + << "(SPUindirect , " << CN1->getValue() << "))\n"); + DEBUG(cerr << "With: (SPUindirect , " + << CN0->getValue() + CN1->getValue() << ")\n"); + + return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(), + Op1.getOperand(0), combinedConst); + } + } + } } -#endif - // Otherwise, return unchanged. return SDOperand(); } diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index d9e4e7ed4ed..916f2c931fc 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -32,8 +32,7 @@ namespace llvm { Lo, ///< Low address component (lower 16) PCRelAddr, ///< Program counter relative address AFormAddr, ///< A-form address (local store) - DFormAddr, ///< D-Form address "imm($r)" - XFormAddr, ///< X-Form address "$r($r)" + IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)" LDRESULT, ///< Load result (value, chain) CALL, ///< CALL instruction diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td index 7221ab2dc8e..eda1ab3da47 100644 --- a/lib/Target/CellSPU/SPUInstrFormats.td +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -247,6 +247,10 @@ let RT = 0 in { { } } +//===----------------------------------------------------------------------===// +// Specialized versions of RI16: +//===----------------------------------------------------------------------===// + // RI18 Format class RI18Form opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 71cb37dc08d..b21468a98a8 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -48,205 +48,109 @@ def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$fi //===----------------------------------------------------------------------===// let isSimpleLoad = 1 in { - def LQDv16i8: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v16i8 VECREG:$rT), (load dform_addr:$src))]>; + class LoadDFormVec + : RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), + "lqd\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load dform_addr:$src))]> + { } - def LQDv8i16: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v8i16 VECREG:$rT), (load dform_addr:$src))]>; + class LoadDForm + : RI10Form<0b00101100, (outs rclass:$rT), (ins memri10:$src), + "lqd\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load dform_addr:$src))]> + { } - def LQDv4i32: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v4i32 VECREG:$rT), (load dform_addr:$src))]>; + multiclass LoadDForms + { + def v16i8: LoadDFormVec; + def v8i16: LoadDFormVec; + def v4i32: LoadDFormVec; + def v2i64: LoadDFormVec; + def v4f32: LoadDFormVec; + def v2f64: LoadDFormVec; - def LQDv2i64: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v2i64 VECREG:$rT), (load dform_addr:$src))]>; + def r128: LoadDForm; + def r64: LoadDForm; + def r32: LoadDForm; + def f32: LoadDForm; + def f64: LoadDForm; + def r16: LoadDForm; + def r8: LoadDForm; + } - def LQDv4f32: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v4f32 VECREG:$rT), (load dform_addr:$src))]>; + class LoadAFormVec + : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load aform_addr:$src))]> + { } - def LQDv2f64: - RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set (v2f64 VECREG:$rT), (load dform_addr:$src))]>; + class LoadAForm + : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load aform_addr:$src))]> + { } - def LQDr128: - RI10Form<0b00101100, (outs GPRC:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set GPRC:$rT, (load dform_addr:$src))]>; + multiclass LoadAForms + { + def v16i8: LoadAFormVec; + def v8i16: LoadAFormVec; + def v4i32: LoadAFormVec; + def v2i64: LoadAFormVec; + def v4f32: LoadAFormVec; + def v2f64: LoadAFormVec; - def LQDr64: - RI10Form<0b00101100, (outs R64C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R64C:$rT, (load dform_addr:$src))]>; + def r128: LoadAForm; + def r64: LoadAForm; + def r32: LoadAForm; + def f32: LoadAForm; + def f64: LoadAForm; + def r16: LoadAForm; + def r8: LoadAForm; + } - def LQDr32: - RI10Form<0b00101100, (outs R32C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R32C:$rT, (load dform_addr:$src))]>; + class LoadXFormVec + : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load xform_addr:$src))]> + { } - // Floating Point - def LQDf32: - RI10Form<0b00101100, (outs R32FP:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R32FP:$rT, (load dform_addr:$src))]>; + class LoadXForm + : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load xform_addr:$src))]> + { } - def LQDf64: - RI10Form<0b00101100, (outs R64FP:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R64FP:$rT, (load dform_addr:$src))]>; - // END Floating Point + multiclass LoadXForms + { + def v16i8: LoadXFormVec; + def v8i16: LoadXFormVec; + def v4i32: LoadXFormVec; + def v2i64: LoadXFormVec; + def v4f32: LoadXFormVec; + def v2f64: LoadXFormVec; - def LQDr16: - RI10Form<0b00101100, (outs R16C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R16C:$rT, (load dform_addr:$src))]>; + def r128: LoadXForm; + def r64: LoadXForm; + def r32: LoadXForm; + def f32: LoadXForm; + def f64: LoadXForm; + def r16: LoadXForm; + def r8: LoadXForm; + } - def LQDr8: - RI10Form<0b00101100, (outs R8C:$rT), (ins memri10:$src), - "lqd\t$rT, $src", LoadStore, - [(set R8C:$rT, (load dform_addr:$src))]>; - - def LQAv16i8: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v16i8 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv8i16: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v8i16 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv4i32: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v4i32 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv2i64: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v2i64 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv4f32: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v4f32 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAv2f64: - RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set (v2f64 VECREG:$rT), (load aform_addr:$src))]>; - - def LQAr128: - RI16Form<0b100001100, (outs GPRC:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set GPRC:$rT, (load aform_addr:$src))]>; - - def LQAr64: - RI16Form<0b100001100, (outs R64C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R64C:$rT, (load aform_addr:$src))]>; - - def LQAr32: - RI16Form<0b100001100, (outs R32C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R32C:$rT, (load aform_addr:$src))]>; - - def LQAf32: - RI16Form<0b100001100, (outs R32FP:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R32FP:$rT, (load aform_addr:$src))]>; - - def LQAf64: - RI16Form<0b100001100, (outs R64FP:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R64FP:$rT, (load aform_addr:$src))]>; - - def LQAr16: - RI16Form<0b100001100, (outs R16C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R16C:$rT, (load aform_addr:$src))]>; - - def LQAr8: - RI16Form<0b100001100, (outs R8C:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", LoadStore, - [(set R8C:$rT, (load aform_addr:$src))]>; - - def LQXv16i8: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v16i8 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv8i16: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v8i16 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv4i32: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v4i32 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv2i64: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v2i64 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv4f32: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v4f32 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXv2f64: - RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set (v2f64 VECREG:$rT), (load xform_addr:$src))]>; - - def LQXr128: - RRForm<0b00100011100, (outs GPRC:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set GPRC:$rT, (load xform_addr:$src))]>; - - def LQXr64: - RRForm<0b00100011100, (outs R64C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R64C:$rT, (load xform_addr:$src))]>; - - def LQXr32: - RRForm<0b00100011100, (outs R32C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R32C:$rT, (load xform_addr:$src))]>; - - def LQXf32: - RRForm<0b00100011100, (outs R32FP:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R32FP:$rT, (load xform_addr:$src))]>; - - def LQXf64: - RRForm<0b00100011100, (outs R64FP:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R64FP:$rT, (load xform_addr:$src))]>; - - def LQXr16: - RRForm<0b00100011100, (outs R16C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R16C:$rT, (load xform_addr:$src))]>; - - def LQXr8: - RRForm<0b00100011100, (outs R8C:$rT), (ins memrr:$src), - "lqx\t$rT, $src", LoadStore, - [(set R8C:$rT, (load xform_addr:$src))]>; + defm LQA : LoadAForms; + defm LQD : LoadDForms; + defm LQX : LoadXForms; /* Load quadword, PC relative: Not much use at this point in time. - Might be of use later for relocatable code. + Might be of use later for relocatable code. It's effectively the + same as LQA, but uses PC-relative addressing. def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp), "lqr\t$rT, $disp", LoadStore, [(set VECREG:$rT, (load iaddr:$disp))]>; @@ -256,174 +160,108 @@ let isSimpleLoad = 1 in { //===----------------------------------------------------------------------===// // Stores: //===----------------------------------------------------------------------===// +class StoreDFormVec + : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), + "stqd\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), dform_addr:$src)]> +{ } -def STQDv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v16i8 VECREG:$rT), dform_addr:$src)]>; +class StoreDForm + : RI10Form<0b00100100, (outs), (ins rclass:$rT, memri10:$src), + "stqd\t$rT, $src", + LoadStore, + [(store rclass:$rT, dform_addr:$src)]> +{ } -def STQDv8i16 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v8i16 VECREG:$rT), dform_addr:$src)]>; +multiclass StoreDForms +{ + def v16i8: StoreDFormVec; + def v8i16: StoreDFormVec; + def v4i32: StoreDFormVec; + def v2i64: StoreDFormVec; + def v4f32: StoreDFormVec; + def v2f64: StoreDFormVec; -def STQDv4i32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v4i32 VECREG:$rT), dform_addr:$src)]>; + def r128: StoreDForm; + def r64: StoreDForm; + def r32: StoreDForm; + def f32: StoreDForm; + def f64: StoreDForm; + def r16: StoreDForm; + def r8: StoreDForm; +} -def STQDv2i64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v2i64 VECREG:$rT), dform_addr:$src)]>; +class StoreAFormVec + : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), aform_addr:$src)]> +{ } -def STQDv4f32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v4f32 VECREG:$rT), dform_addr:$src)]>; +class StoreAForm + : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store rclass:$rT, aform_addr:$src)]> +{ } -def STQDv2f64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store (v2f64 VECREG:$rT), dform_addr:$src)]>; +multiclass StoreAForms +{ + def v16i8: StoreAFormVec; + def v8i16: StoreAFormVec; + def v4i32: StoreAFormVec; + def v2i64: StoreAFormVec; + def v4f32: StoreAFormVec; + def v2f64: StoreAFormVec; -def STQDr128 : RI10Form<0b00100100, (outs), (ins GPRC:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store GPRC:$rT, dform_addr:$src)]>; + def r128: StoreAForm; + def r64: StoreAForm; + def r32: StoreAForm; + def f32: StoreAForm; + def f64: StoreAForm; + def r16: StoreAForm; + def r8: StoreAForm; +} -def STQDr64 : RI10Form<0b00100100, (outs), (ins R64C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R64C:$rT, dform_addr:$src)]>; +class StoreXFormVec + : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), xform_addr:$src)]> +{ } -def STQDr32 : RI10Form<0b00100100, (outs), (ins R32C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R32C:$rT, dform_addr:$src)]>; +class StoreXForm + : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store rclass:$rT, xform_addr:$src)]> +{ } -// Floating Point -def STQDf32 : RI10Form<0b00100100, (outs), (ins R32FP:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R32FP:$rT, dform_addr:$src)]>; +multiclass StoreXForms +{ + def v16i8: StoreXFormVec; + def v8i16: StoreXFormVec; + def v4i32: StoreXFormVec; + def v2i64: StoreXFormVec; + def v4f32: StoreXFormVec; + def v2f64: StoreXFormVec; -def STQDf64 : RI10Form<0b00100100, (outs), (ins R64FP:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R64FP:$rT, dform_addr:$src)]>; + def r128: StoreXForm; + def r64: StoreXForm; + def r32: StoreXForm; + def f32: StoreXForm; + def f64: StoreXForm; + def r16: StoreXForm; + def r8: StoreXForm; +} -def STQDr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R16C:$rT, dform_addr:$src)]>; - -def STQDr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, memri10:$src), - "stqd\t$rT, $src", LoadStore, - [(store R8C:$rT, dform_addr:$src)]>; - -def STQAv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v16i8 VECREG:$rT), aform_addr:$src)]>; - -def STQAv8i16 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v8i16 VECREG:$rT), aform_addr:$src)]>; - -def STQAv4i32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v4i32 VECREG:$rT), aform_addr:$src)]>; - -def STQAv2i64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v2i64 VECREG:$rT), aform_addr:$src)]>; - -def STQAv4f32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v4f32 VECREG:$rT), aform_addr:$src)]>; - -def STQAv2f64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store (v2f64 VECREG:$rT), aform_addr:$src)]>; - -def STQAr128 : RI10Form<0b00100100, (outs), (ins GPRC:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store GPRC:$rT, aform_addr:$src)]>; - -def STQAr64 : RI10Form<0b00100100, (outs), (ins R64C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R64C:$rT, aform_addr:$src)]>; - -def STQAr32 : RI10Form<0b00100100, (outs), (ins R32C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R32C:$rT, aform_addr:$src)]>; - -// Floating Point -def STQAf32 : RI10Form<0b00100100, (outs), (ins R32FP:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R32FP:$rT, aform_addr:$src)]>; - -def STQAf64 : RI10Form<0b00100100, (outs), (ins R64FP:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R64FP:$rT, aform_addr:$src)]>; - -def STQAr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R16C:$rT, aform_addr:$src)]>; - -def STQAr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, addr256k:$src), - "stqa\t$rT, $src", LoadStore, - [(store R8C:$rT, aform_addr:$src)]>; - -def STQXv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v16i8 VECREG:$rT), xform_addr:$src)]>; - -def STQXv8i16 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v8i16 VECREG:$rT), xform_addr:$src)]>; - -def STQXv4i32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v4i32 VECREG:$rT), xform_addr:$src)]>; - -def STQXv2i64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v2i64 VECREG:$rT), xform_addr:$src)]>; - -def STQXv4f32 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v4f32 VECREG:$rT), xform_addr:$src)]>; - -def STQXv2f64 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store (v2f64 VECREG:$rT), xform_addr:$src)]>; - -def STQXr128 : RI10Form<0b00100100, (outs), (ins GPRC:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store GPRC:$rT, xform_addr:$src)]>; - -def STQXr64: - RI10Form<0b00100100, (outs), (ins R64C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R64C:$rT, xform_addr:$src)]>; - -def STQXr32: - RI10Form<0b00100100, (outs), (ins R32C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R32C:$rT, xform_addr:$src)]>; - -// Floating Point -def STQXf32: - RI10Form<0b00100100, (outs), (ins R32FP:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R32FP:$rT, xform_addr:$src)]>; - -def STQXf64: - RI10Form<0b00100100, (outs), (ins R64FP:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R64FP:$rT, xform_addr:$src)]>; - -def STQXr16: - RI10Form<0b00100100, (outs), (ins R16C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R16C:$rT, xform_addr:$src)]>; - -def STQXr8: - RI10Form<0b00100100, (outs), (ins R8C:$rT, memrr:$src), - "stqx\t$rT, $src", LoadStore, - [(store R8C:$rT, xform_addr:$src)]>; +defm STQD : StoreDForms; +defm STQA : StoreAForms; +defm STQX : StoreXForms; /* Store quadword, PC relative: Not much use at this point in time. Might - be useful for relocatable code. + be useful for relocatable code. def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), "stqr\t$rT, $disp", LoadStore, [(store VECREG:$rT, iaddr:$disp)]>; @@ -620,17 +458,22 @@ def IOHLlo: // Form select mask for bytes using immediate, used in conjunction with the // SELB instruction: -def FSMBIv16i8 : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", SelectOp, - [(set (v16i8 VECREG:$rT), (SPUfsmbi_v16i8 immU16:$val))]>; +class FSMBIVec + : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), + "fsmbi\t$rT, $val", + SelectOp, + [(set (vectype VECREG:$rT), (SPUfsmbi immU16:$val))]> +{ } -def FSMBIv8i16 : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", SelectOp, - [(set (v8i16 VECREG:$rT), (SPUfsmbi_v8i16 immU16:$val))]>; +multiclass FSMBIs +{ + def v16i8: FSMBIVec; + def v8i16: FSMBIVec; + def v4i32: FSMBIVec; + def v2i64: FSMBIVec; +} -def FSMBIvecv4i32 : RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", SelectOp, - [(set (v4i32 VECREG:$rT), (SPUfsmbi_v4i32 immU16:$val))]>; +defm FSMBI : FSMBIs; //===----------------------------------------------------------------------===// // Integer and Logical Operations: @@ -2762,17 +2605,17 @@ def CEQHIv8i16: def CEQr32: RRForm<0b00000011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "ceq\t$rT, $rA, $rB", ByteOp, - [/* no pattern to match: intrinsic */]>; + [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>; def CEQv4i32: RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "ceq\t$rT, $rA, $rB", ByteOp, - [/* no pattern to match: intrinsic */]>; + [(set (v4i32 VECREG:$rT), (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CEQIr32: - RI10Form<0b00111110, (outs R32C:$rT), (ins R32C:$rA, s10imm:$val), + RI10Form<0b00111110, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), "ceqi\t$rT, $rA, $val", ByteOp, - [/* no pattern to match: intrinsic */]>; + [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>; def CEQIv4i32: RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -2872,18 +2715,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { } //===----------------------------------------------------------------------===// -// brcond predicates: +// setcc and brcond patterns: //===----------------------------------------------------------------------===// + def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), (BRHZ R16C:$rA, bb:$dest)>; -def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), - (BRHNZ R16C:$rA, bb:$dest)>; - def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), (BRZ R32C:$rA, bb:$dest)>; + +def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), + (BRHNZ R16C:$rA, bb:$dest)>; def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), (BRNZ R32C:$rA, bb:$dest)>; +def : Pat<(brcond (i16 (setne R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (BRHNZ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; +def : Pat<(brcond (i32 (setne R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (BRNZ (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + +def : Pat<(brcond (i16 (setne R16C:$rA, R16C:$rB)), bb:$dest), + (BRHNZ (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>; +def : Pat<(brcond (i32 (setne R32C:$rA, R32C:$rB)), bb:$dest), + (BRNZ (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; + let isTerminator = 1, isBarrier = 1 in { let isReturn = 1 in { def RET: @@ -3460,24 +3314,42 @@ def : Pat<(i32 (anyext R16C:$rSrc)), (ORI2To4 R16C:$rSrc, 0)>; //===----------------------------------------------------------------------===// -// Address translation: SPU, like PPC, has to split addresses into high and +// Address generation: SPU, like PPC, has to split addresses into high and // low parts in order to load them into a register. //===----------------------------------------------------------------------===// -def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(SPUxform tglobaladdr:$in, 0), +def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; +def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>; +def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; +def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; + +def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0), + (SPUlo tglobaladdr:$in, 0)), (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; -def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; -def : Pat<(SPUxform tjumptable:$in, 0), +def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0), + (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0), + (SPUlo tjumptable:$in, 0)), (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; -def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; -def : Pat<(SPUlo tconstpool:$in , 0), (ILAlsa tconstpool:$in)>; -def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; -// tblgen bug prevents this from working. -// def : Pat<(SPUxform tconstpool:$in, 0), -// (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; +def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), + (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; + +def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; + +def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; + +def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; // Instrinsics: include "CellSDKIntrinsics.td" diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 84601301c33..c231befd352 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -58,14 +58,8 @@ def SPUv4i32_binop: SDTypeProfile<1, 2, [ // FSMBI type constraints: There are several variations for the various // vector types (this avoids having to bit_convert all over the place.) -def SPUfsmbi_type_v16i8: SDTypeProfile<1, 1, [ - SDTCisVT<0, v16i8>, SDTCisVT<1, i32>]>; - -def SPUfsmbi_type_v8i16: SDTypeProfile<1, 1, [ - SDTCisVT<0, v8i16>, SDTCisVT<1, i32>]>; - -def SPUfsmbi_type_v4i32: SDTypeProfile<1, 1, [ - SDTCisVT<0, v4i32>, SDTCisVT<1, i32>]>; +def SPUfsmbi_type: SDTypeProfile<1, 1, [ + SDTCisVT<1, i32>]>; // SELB type constraints: def SPUselb_type_v16i8: SDTypeProfile<1, 3, [ @@ -148,9 +142,7 @@ def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED", SPUvecshift_type_v16i8, [SDNPHasChain]>; // SPU form select mask for bytes, immediate -def SPUfsmbi_v16i8: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v16i8, []>; -def SPUfsmbi_v8i16: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v8i16, []>; -def SPUfsmbi_v4i32: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v4i32, []>; +def SPUfsmbi: SDNode<"SPUISD::FSMBI", SPUfsmbi_type, []>; // SPU select bits instruction def SPUselb_v16i8: SDNode<"SPUISD::SELB", SPUselb_type_v16i8, []>; @@ -189,11 +181,8 @@ def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; // A-Form local store addresses def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; -// D-Form "imm($reg)" addresses -def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>; - -// X-Form "$reg($reg)" addresses -def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>; +// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses +def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; // Load result node def SPUload_result : SDTypeProfile<1, 3, []>; diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp index 2f63446c129..0d7aac15a05 100644 --- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp @@ -17,6 +17,7 @@ using namespace llvm; SPUTargetAsmInfo::SPUTargetAsmInfo(const SPUTargetMachine &TM) { + PCSymbol = "."; CommentString = "#"; GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; @@ -24,17 +25,14 @@ SPUTargetAsmInfo::SPUTargetAsmInfo(const SPUTargetMachine &TM) { SetDirective = "\t.set"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; - SwitchToSectionDirective = "\t.section\t"; + SwitchToSectionDirective = ".section\t"; ConstantPoolSection = "\t.const\t"; JumpTableDataSection = ".const"; CStringSection = "\t.cstring"; - LCOMMDirective = "\t.lcomm\t"; StaticCtorsSection = ".mod_init_func"; StaticDtorsSection = ".mod_term_func"; FourByteConstantSection = ".const"; SixteenByteConstantSection = "\t.section\t.rodata.cst16,\"aM\",@progbits,16"; - UsedDirective = "\t.no_dead_strip\t"; - WeakRefDirective = "\t.weak_reference\t"; InlineAsmStart = "# InlineAsm Start"; InlineAsmEnd = "# InlineAsm End"; diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index f604f74ca10..11481edc12f 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -1,19 +1,18 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep bisl %t1.s | count 7 +; RUN: grep bisl %t1.s | count 7 ; RUN: grep ila %t1.s | count 1 ; RUN: grep rotqbyi %t1.s | count 4 -; RUN: grep lqa %t1.s | count 5 -; RUN: grep lqd %t1.s | count 6 -; RUN: grep dispatch_tab %t1.s | count 10 +; RUN: grep lqa %t1.s | count 1 +; RUN: grep lqd %t1.s | count 11 +; RUN: grep dispatch_tab %t1.s | count 6 ; RUN: grep bisl %t2.s | count 7 ; RUN: grep ilhu %t2.s | count 2 ; RUN: grep iohl %t2.s | count 2 ; RUN: grep rotqby %t2.s | count 6 -; RUN: grep lqd %t2.s | count 12 -; RUN: grep lqx %t2.s | count 8 -; RUN: grep il %t2.s | count 9 -; RUN: grep ai %t2.s | count 5 +; RUN: grep lqd %t2.s | count 17 +; RUN: grep il %t2.s | count 2 +; RUN: grep ai %t2.s | count 7 ; RUN: grep dispatch_tab %t2.s | count 7 ; ModuleID = 'call_indirect.bc' diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll index baa23bbc8ab..6e05686f408 100644 --- a/test/CodeGen/CellSPU/extract_elt.ll +++ b/test/CodeGen/CellSPU/extract_elt.ll @@ -2,7 +2,7 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s ; RUN: grep shufb %t1.s | count 27 ; RUN: grep lqa %t1.s | count 27 -; RUN: grep lqx %t2.s | count 27 +; RUN: grep lqd %t2.s | count 27 ; RUN: grep space %t1.s | count 8 ; RUN: grep byte %t1.s | count 424 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll index f4406d63dfb..d212bd51e40 100644 --- a/test/CodeGen/CellSPU/fcmp.ll +++ b/test/CodeGen/CellSPU/fcmp.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep fceq %t1.s | count 1 && +; RUN: grep fceq %t1.s | count 1 ; RUN: grep fcmeq %t1.s | count 1 ; ; This file includes standard floating point arithmetic instructions diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll index e5fa79e31d6..5d6daa2ddfe 100644 --- a/test/CodeGen/CellSPU/struct_1.ll +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -1,27 +1,26 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep lqa %t1.s | count 10 -; RUN: grep lqd %t1.s | count 4 -; RUN: grep rotqbyi %t1.s | count 5 +; RUN: grep lqa %t1.s | count 5 +; RUN: grep lqd %t1.s | count 11 +; RUN: grep rotqbyi %t1.s | count 7 ; RUN: grep xshw %t1.s | count 1 -; RUN: grep andi %t1.s | count 4 +; RUN: grep andi %t1.s | count 5 ; RUN: grep cbd %t1.s | count 3 ; RUN: grep chd %t1.s | count 1 ; RUN: grep cwd %t1.s | count 3 ; RUN: grep shufb %t1.s | count 7 -; RUN: grep stqa %t1.s | count 5 -; RUN: grep iohl %t2.s | count 14 -; RUN: grep ilhu %t2.s | count 14 -; RUN: grep lqx %t2.s | count 14 -; RUN: grep rotqbyi %t2.s | count 5 +; RUN: grep stqd %t1.s | count 7 +; RUN: grep iohl %t2.s | count 16 +; RUN: grep ilhu %t2.s | count 16 +; RUN: grep lqd %t2.s | count 16 +; RUN: grep rotqbyi %t2.s | count 7 ; RUN: grep xshw %t2.s | count 1 -; RUN: grep andi %t2.s | count 4 -; RUN: grep cbx %t2.s | count 3 -; RUN: grep chx %t2.s | count 1 -; RUN: grep cwx %t2.s | count 1 -; RUN: grep cwd %t2.s | count 2 +; RUN: grep andi %t2.s | count 5 +; RUN: grep cbd %t2.s | count 3 +; RUN: grep chd %t2.s | count 1 +; RUN: grep cwd %t2.s | count 3 ; RUN: grep shufb %t2.s | count 7 -; RUN: grep stqx %t2.s | count 7 +; RUN: grep stqd %t2.s | count 7 ; ModuleID = 'struct_1.bc' target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -48,88 +47,98 @@ target triple = "spu" ; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } @state = global %struct.hackstate zeroinitializer, align 16 -define i8 @get_hackstate_c1() zeroext { +define i8 @get_hackstate_c1() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret i8 %tmp2 } -define i8 @get_hackstate_c2() zeroext { +define i8 @get_hackstate_c2() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret i8 %tmp2 } -define i8 @get_hackstate_c3() zeroext { +define i8 @get_hackstate_c3() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret i8 %tmp2 } -define i32 @get_hackstate_i1() { +define i32 @get_hackstate_i1() nounwind { entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret i32 %tmp2 + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret i32 %tmp2 } -define i16 @get_hackstate_s1() signext { +define i16 @get_hackstate_s1() signext nounwind { entry: - %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret i16 %tmp2 + %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret i16 %tmp2 } -define i8 @get_hackstate_c7() zeroext { +define i8 @get_hackstate_c6() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16 + ret i8 %tmp2 } -define i32 @get_hackstate_i6() zeroext { +define i8 @get_hackstate_c7() zeroext nounwind { entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret i32 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 + ret i8 %tmp2 } -define void @set_hackstate_c1(i8 zeroext %c) { +define i32 @get_hackstate_i3() nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret void + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret i32 %tmp2 } -define void @set_hackstate_c2(i8 zeroext %c) { +define i32 @get_hackstate_i6() nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret void + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret i32 %tmp2 } -define void @set_hackstate_c3(i8 zeroext %c) { +define void @set_hackstate_c1(i8 zeroext %c) nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret void } -define void @set_hackstate_i1(i32 %i) { +define void @set_hackstate_c2(i8 zeroext %c) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret void } -define void @set_hackstate_s1(i16 signext %s) { +define void @set_hackstate_c3(i8 zeroext %c) nounwind { entry: - store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret void } -define void @set_hackstate_i3(i32 %i) { +define void @set_hackstate_i1(i32 %i) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 11), align 16 - ret void + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret void } - -define void @set_hackstate_i6(i32 %i) { +define void @set_hackstate_s1(i16 signext %s) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret void + store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret void } +define void @set_hackstate_i3(i32 %i) nounwind { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret void +} + +define void @set_hackstate_i6(i32 %i) nounwind { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret void +} diff --git a/test/CodeGen/CellSPU/struct_2.ll b/test/CodeGen/CellSPU/struct_2.ll deleted file mode 100644 index fee9c01dc68..00000000000 --- a/test/CodeGen/CellSPU/struct_2.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep lqx %t1.s | count 14 -; RUN: grep rotqby %t1.s | count 7 -; RUN: grep xshw %t1.s | count 1 -; RUN: grep andi %t1.s | count 4 -; RUN: grep cbx %t1.s | count 1 -; RUN: grep cbd %t1.s | count 2 -; RUN: grep chd %t1.s | count 1 -; RUN: grep cwd %t1.s | count 3 -; RUN: grep shufb %t1.s | count 7 -; RUN: grep stqx %t1.s | count 7 - -; ModuleID = 'struct_1.bc' -target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" -target triple = "spu" - -; struct hackstate { -; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3) -; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3) -; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3) -; int i1; // offset 4 (rotate left by 4 bytes to byte 0) -; short s1; // offset 8 (rotate left by 6 bytes to byte 2) -; int i2; // offset 12 [ignored] -; unsigned char c4; // offset 16 [ignored] -; unsigned char c5; // offset 17 [ignored] -; unsigned char c6; // offset 18 [ignored] -; unsigned char c7; // offset 19 (no rotate, in preferred slot) -; int i3; // offset 20 [ignored] -; int i4; // offset 24 [ignored] -; int i5; // offset 28 [ignored] -; int i6; // offset 32 (no rotate, in preferred slot) -; } -%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 } - -; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } -@state = global %struct.hackstate zeroinitializer, align 4 - -define i8 @get_hackstate_c1() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 4 - ret i8 %tmp2 -} - -define i8 @get_hackstate_c2() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 4 - ret i8 %tmp2 -} - -define i8 @get_hackstate_c3() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 4 - ret i8 %tmp2 -} - -define i32 @get_hackstate_i1() { -entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 4 - ret i32 %tmp2 -} - -define i16 @get_hackstate_s1() signext { -entry: - %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 4 - ret i16 %tmp2 -} - -define i8 @get_hackstate_c7() zeroext { -entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 4 - ret i8 %tmp2 -} - -define i32 @get_hackstate_i6() zeroext { -entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 4 - ret i32 %tmp2 -} - -define void @set_hackstate_c1(i8 zeroext %c) { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 4 - ret void -} - -define void @set_hackstate_c2(i8 zeroext %c) { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 4 - ret void -} - -define void @set_hackstate_c3(i8 zeroext %c) { -entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 4 - ret void -} - -define void @set_hackstate_i1(i32 %i) { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 4 - ret void -} - -define void @set_hackstate_s1(i16 signext %s) { -entry: - store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 4 - ret void -} - -define void @set_hackstate_i3(i32 %i) { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 11), align 4 - ret void -} - - -define void @set_hackstate_i6(i32 %i) { -entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 4 - ret void -} - diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll index 46109e3dc1f..3f7eb626cbe 100644 --- a/test/CodeGen/CellSPU/vec_const.ll +++ b/test/CodeGen/CellSPU/vec_const.ll @@ -1,11 +1,11 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep il %t1.s | count 16 -; RUN: grep ilhu %t1.s | count 8 +; RUN: grep il %t1.s | count 16 +; RUN: grep ilhu %t1.s | count 8 ; RUN: grep ilh %t1.s | count 13 ; RUN: grep iohl %t1.s | count 7 ; RUN: grep lqa %t1.s | count 6 -; RUN: grep 24672 %t1.s | count 2 +; RUN: grep 24672 %t1.s | count 2 ; RUN: grep 16429 %t1.s | count 1 ; RUN: grep 63572 %t1.s | count 1 ; RUN: grep 4660 %t1.s | count 1 @@ -17,8 +17,7 @@ ; RUN: grep 21572 %t1.s | count 1 ; RUN: grep 11544 %t1.s | count 1 ; RUN: grep 1311768467750121234 %t1.s | count 1 -; RUN: grep lqx %t2.s | count 6 -; RUN: grep ila %t2.s | count 6 +; RUN: grep lqd %t2.s | count 6 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" target triple = "spu-unknown-elf"