diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index e222e1b4636..2d801a1f8b9 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1454,11 +1454,63 @@ public: } }; +/// LSBaseSDNode - Base class for LoadSDNode and StoreSDNode +/// +class LSBaseSDNode : public SDNode { +private: + //! SrcValue - Memory location for alias analysis. + const Value *SrcValue; + + //! SVOffset - Memory location offset. + int SVOffset; + + //! Alignment - Alignment of memory location in bytes. + unsigned Alignment; + + //! IsVolatile - True if the store is volatile. + bool IsVolatile; +protected: + //! Operand array for load and store + /*! + \note Moving this array to the base class captures more + common functionality shared between LoadSDNode and + StoreSDNode + */ + SDOperand Ops[4]; +public: + LSBaseSDNode(ISD::NodeType NodeTy, SDVTList VTs, const Value *SV, int SVO, + unsigned Align, bool Vol) + : SDNode(NodeTy, VTs), + SrcValue(SV), SVOffset(SVO), Alignment(Align), IsVolatile(Vol) + { } + + const SDOperand getChain() const { + return getOperand(0); + } + const SDOperand getBasePtr() const { + return getOperand(getOpcode() == ISD::LOAD ? 1 : 2); + } + const SDOperand getOffset() const { + return getOperand(getOpcode() == ISD::LOAD ? 2 : 3); + } + const SDOperand getValue() const { + assert(getOpcode() == ISD::STORE); + return getOperand(1); + } + + const Value *getSrcValue() const { return SrcValue; } + int getSrcValueOffset() const { return SVOffset; } + unsigned getAlignment() const { return Alignment; } + bool isVolatile() const { return IsVolatile; } + + static bool classof(const LSBaseSDNode *N) { return true; } + static bool classof(const SDNode *N) { return true; } +}; + /// LoadSDNode - This class is used to represent ISD::LOAD nodes. /// -class LoadSDNode : public SDNode { +class LoadSDNode : public LSBaseSDNode { virtual void ANCHOR(); // Out-of-line virtual method to give class a home. - SDOperand Ops[3]; // AddrMode - unindexed, pre-indexed, post-indexed. ISD::MemIndexedMode AddrMode; @@ -1468,26 +1520,13 @@ class LoadSDNode : public SDNode { // LoadedVT - VT of loaded value before extension. MVT::ValueType LoadedVT; - - // SrcValue - Memory location for alias analysis. - const Value *SrcValue; - - // SVOffset - Memory location offset. - int SVOffset; - - // Alignment - Alignment of memory location in bytes. - unsigned Alignment; - - // IsVolatile - True if the load is volatile. - bool IsVolatile; protected: friend class SelectionDAG; LoadSDNode(SDOperand *ChainPtrOff, SDVTList VTs, ISD::MemIndexedMode AM, ISD::LoadExtType ETy, MVT::ValueType LVT, const Value *SV, int O=0, unsigned Align=0, bool Vol=false) - : SDNode(ISD::LOAD, VTs), - AddrMode(AM), ExtType(ETy), LoadedVT(LVT), SrcValue(SV), SVOffset(O), - Alignment(Align), IsVolatile(Vol) { + : LSBaseSDNode(ISD::LOAD, VTs, SV, O, Align, Vol), + AddrMode(AM), ExtType(ETy), LoadedVT(LVT) { Ops[0] = ChainPtrOff[0]; // Chain Ops[1] = ChainPtrOff[1]; // Ptr Ops[2] = ChainPtrOff[2]; // Off @@ -1499,18 +1538,12 @@ protected: } public: - const SDOperand getChain() const { return getOperand(0); } - const SDOperand getBasePtr() const { return getOperand(1); } - const SDOperand getOffset() const { return getOperand(2); } ISD::MemIndexedMode getAddressingMode() const { return AddrMode; } ISD::LoadExtType getExtensionType() const { return ExtType; } MVT::ValueType getLoadedVT() const { return LoadedVT; } - const Value *getSrcValue() const { return SrcValue; } - int getSrcValueOffset() const { return SVOffset; } - unsigned getAlignment() const { return Alignment; } - bool isVolatile() const { return IsVolatile; } static bool classof(const LoadSDNode *) { return true; } + static bool classof(const LSBaseSDNode *N) { return true; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::LOAD; } @@ -1518,9 +1551,8 @@ public: /// StoreSDNode - This class is used to represent ISD::STORE nodes. /// -class StoreSDNode : public SDNode { +class StoreSDNode : public LSBaseSDNode { virtual void ANCHOR(); // Out-of-line virtual method to give class a home. - SDOperand Ops[4]; // AddrMode - unindexed, pre-indexed, post-indexed. ISD::MemIndexedMode AddrMode; @@ -1530,26 +1562,13 @@ class StoreSDNode : public SDNode { // StoredVT - VT of the value after truncation. MVT::ValueType StoredVT; - - // SrcValue - Memory location for alias analysis. - const Value *SrcValue; - - // SVOffset - Memory location offset. - int SVOffset; - - // Alignment - Alignment of memory location in bytes. - unsigned Alignment; - - // IsVolatile - True if the store is volatile. - bool IsVolatile; protected: friend class SelectionDAG; StoreSDNode(SDOperand *ChainValuePtrOff, SDVTList VTs, ISD::MemIndexedMode AM, bool isTrunc, MVT::ValueType SVT, const Value *SV, int O=0, unsigned Align=0, bool Vol=false) - : SDNode(ISD::STORE, VTs), - AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT), SrcValue(SV), - SVOffset(O), Alignment(Align), IsVolatile(Vol) { + : LSBaseSDNode(ISD::STORE, VTs, SV, O, Align, Vol), + AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT) { Ops[0] = ChainValuePtrOff[0]; // Chain Ops[1] = ChainValuePtrOff[1]; // Value Ops[2] = ChainValuePtrOff[2]; // Ptr @@ -1562,19 +1581,12 @@ protected: } public: - const SDOperand getChain() const { return getOperand(0); } - const SDOperand getValue() const { return getOperand(1); } - const SDOperand getBasePtr() const { return getOperand(2); } - const SDOperand getOffset() const { return getOperand(3); } ISD::MemIndexedMode getAddressingMode() const { return AddrMode; } bool isTruncatingStore() const { return IsTruncStore; } MVT::ValueType getStoredVT() const { return StoredVT; } - const Value *getSrcValue() const { return SrcValue; } - int getSrcValueOffset() const { return SVOffset; } - unsigned getAlignment() const { return Alignment; } - bool isVolatile() const { return IsVolatile; } static bool classof(const StoreSDNode *) { return true; } + static bool classof(const LSBaseSDNode *N) { return true; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::STORE; } diff --git a/include/llvm/IntrinsicsCellSPU.td b/include/llvm/IntrinsicsCellSPU.td index 0e257c150ae..7030278708e 100644 --- a/include/llvm/IntrinsicsCellSPU.td +++ b/include/llvm/IntrinsicsCellSPU.td @@ -17,8 +17,8 @@ //===----------------------------------------------------------------------===// // 7-bit integer type, used as an immediate: -def cell_i7_ty: LLVMType; // Note: This was i8 -def cell_i8_ty: LLVMType; // Note: This was i8 +def cell_i7_ty: LLVMType; +def cell_i8_ty: LLVMType; class v16i8_u7imm : GCCBuiltin, @@ -27,7 +27,7 @@ class v16i8_u7imm : class v16i8_u8imm : GCCBuiltin, - Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; class v16i8_s10imm : diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td index cfa0089d848..2f453b1feb1 100644 --- a/lib/Target/CellSPU/CellSDKIntrinsics.td +++ b/lib/Target/CellSPU/CellSDKIntrinsics.td @@ -108,18 +108,18 @@ def CellSDKmpyhhau: def CellSDKand: RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "add\t $rT, $rA, $rB", IntegerOp, + "and\t $rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKandc: RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "addc\t $rT, $rA, $rB", IntegerOp, + "andc\t $rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKandbi: - RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "andbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -149,7 +149,7 @@ def CellSDKorc: (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "orbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -173,7 +173,7 @@ def CellSDKxor: (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKxorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "xorbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -248,7 +248,7 @@ def CellSDKceqb: (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; def CellSDKceqbi: - RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "ceqbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -294,7 +294,7 @@ def CellSDKcgtb: (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; def CellSDKcgtbi: - RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "cgtbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -329,7 +329,7 @@ def CellSDKclgtb: (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; def CellSDKclgtbi: - RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "clgtbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 73e46fff1be..bb3b100d577 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -384,11 +384,17 @@ bool SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, SDOperand &Index) { // These match the addr256k operand type: - MVT::ValueType PtrVT = SPUtli.getPointerTy(); MVT::ValueType OffsVT = MVT::i16; + MVT::ValueType PtrVT = SPUtli.getPointerTy(); switch (N.getOpcode()) { case ISD::Constant: + case ISD::ConstantPool: + case ISD::GlobalAddress: + cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n"; + abort(); + /*NOTREACHED*/ + case ISD::TargetConstant: { // Loading from a constant address. ConstantSDNode *CN = dyn_cast(N); @@ -400,23 +406,15 @@ SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, return true; } } - case ISD::ConstantPool: - case ISD::TargetConstantPool: { - // The constant pool address is N. Base is a dummy that will be ignored by + case ISD::TargetGlobalAddress: + case ISD::TargetConstantPool: + case SPUISD::AFormAddr: { + // The address is in Base. N is a dummy that will be ignored by // the assembly printer. Base = N; Index = CurDAG->getTargetConstant(0, OffsVT); return true; } - - case ISD::GlobalAddress: - case ISD::TargetGlobalAddress: { - // The global address is N. Base is a dummy that is ignored by the - // assembly printer. - Base = N; - Index = CurDAG->getTargetConstant(0, OffsVT); - return true; - } } return false; @@ -445,10 +443,9 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, Index = CurDAG->getTargetConstant(0, PtrTy); return true; } else if (Opc == ISD::FrameIndex) { - // Stack frame index must be less than 512 (divided by 16): FrameIndexSDNode *FI = dyn_cast(N); DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " - << FI->getIndex() << "\n"); + << FI->getIndex() << "\n"); if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { Base = CurDAG->getTargetConstant(0, PtrTy); Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); @@ -458,45 +455,49 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, // Generated by getelementptr const SDOperand Op0 = N.getOperand(0); // Frame index/base const SDOperand Op1 = N.getOperand(1); // Offset within base - ConstantSDNode *CN = dyn_cast(Op1); - // Not a constant? - if (CN == 0) + if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = dyn_cast(Op1); + assert(CN != 0 && "SelectDFormAddr: Expected a constant"); + + int32_t offset = (int32_t) CN->getSignExtended(); + unsigned Opc0 = Op0.getOpcode(); + + if ((offset & 0xf) != 0) { + // Unaligned offset: punt and let X-form address handle it. + // NOTE: This really doesn't have to be strictly 16-byte aligned, + // since the load/store quadword instructions will implicitly + // zero the lower 4 bits of the resulting address. + return false; + } + + if (Opc0 == ISD::FrameIndex) { + FrameIndexSDNode *FI = dyn_cast(Op0); + DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI->getIndex() << "\n"); + + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + return true; + } + } else if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + if (Opc0 == ISD::GlobalAddress) { + // Convert global address to target global address + GlobalAddressSDNode *GV = dyn_cast(Op0); + Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy); + return true; + } else { + // Otherwise, just take operand 0 + Index = Op0; + return true; + } + } + } else return false; - - int32_t offset = (int32_t) CN->getSignExtended(); - unsigned Opc0 = Op0.getOpcode(); - - if ((offset & 0xf) != 0) { - cerr << "SelectDFormAddr: unaligned offset = " << offset << "\n"; - abort(); - /*NOTREACHED*/ - } - - if (Opc0 == ISD::FrameIndex) { - FrameIndexSDNode *FI = dyn_cast(Op0); - DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset - << " frame index = " << FI->getIndex() << "\n"); - - if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); - return true; - } - } else if (offset > SPUFrameInfo::minFrameOffset() - && offset < SPUFrameInfo::maxFrameOffset()) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - if (Opc0 == ISD::GlobalAddress) { - // Convert global address to target global address - GlobalAddressSDNode *GV = dyn_cast(Op0); - Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy); - return true; - } else { - // Otherwise, just take operand 0 - Index = Op0; - return true; - } - } } else if (Opc == SPUISD::DFormAddr) { // D-Form address: This is pretty straightforward, naturally... ConstantSDNode *CN = cast(N.getOperand(1)); @@ -504,6 +505,16 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); Index = N.getOperand(0); return true; + } else if (Opc == ISD::FrameIndex) { + // Stack frame index must be less than 512 (divided by 16): + FrameIndexSDNode *FI = dyn_cast(N); + DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " + << FI->getIndex() << "\n"); + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + return true; + } } return false; @@ -535,7 +546,8 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, unsigned N2Opc = N2.getOpcode(); if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo) - || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)) { + || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi) + || (N1Opc == SPUISD::XFormAddr)) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -548,6 +560,10 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, abort(); /*UNREACHED*/ } + } else if (Opc == SPUISD::XFormAddr) { + Base = N; + Index = N.getOperand(1); + return true; } else if (N.getNumOperands() == 2) { SDOperand N1 = N.getOperand(0); SDOperand N2 = N.getOperand(1); @@ -591,11 +607,14 @@ SPUDAGToDAGISel::Select(SDOperand Op) { } else if (Opc == ISD::FrameIndex) { // Selects to AIr32 FI, 0 which in turn will become AIr32 SP, imm. int FI = cast(N)->getIndex(); - SDOperand TFI = CurDAG->getTargetFrameIndex(FI, SPUtli.getPointerTy()); + MVT::ValueType PtrVT = SPUtli.getPointerTy(); + SDOperand Zero = CurDAG->getTargetConstant(0, PtrVT); + SDOperand TFI = CurDAG->getTargetFrameIndex(FI, PtrVT); DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 , 0\n"); - return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, - CurDAG->getTargetConstant(0, MVT::i32)); + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, Zero); + CurDAG->getTargetNode(SPU::AIr32, Op.getValueType(), TFI, Zero); } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT unsigned VT = N->getValueType(0); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 7893e677fbb..59e2068a7a0 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -82,7 +82,7 @@ namespace { /*! \arg Op Operand to test \return true if the operand is a memory target (i.e., global - address, external symbol, constant pool) or an existing D-Form + address, external symbol, constant pool) or an A-form address. */ bool isMemoryOperand(const SDOperand &Op) @@ -90,17 +90,17 @@ namespace { const unsigned Opc = Op.getOpcode(); return (Opc == ISD::GlobalAddress || Opc == ISD::GlobalTLSAddress - || Opc == ISD::FrameIndex + /* || Opc == ISD::FrameIndex */ || Opc == ISD::JumpTable || Opc == ISD::ConstantPool || Opc == ISD::ExternalSymbol || Opc == ISD::TargetGlobalAddress || Opc == ISD::TargetGlobalTLSAddress - || Opc == ISD::TargetFrameIndex + /* || Opc == ISD::TargetFrameIndex */ || Opc == ISD::TargetJumpTable || Opc == ISD::TargetConstantPool || Opc == ISD::TargetExternalSymbol - || Opc == SPUISD::DFormAddr); + || Opc == SPUISD::AFormAddr); } } @@ -356,7 +356,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::OR, MVT::v16i8, Custom); setOperationAction(ISD::XOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); - + setSetCCResultType(MVT::i32); setShiftAmountType(MVT::i32); setSetCCResultContents(ZeroOrOneSetCCResult); @@ -377,6 +377,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; + node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr"; node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr"; node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; @@ -430,6 +431,105 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const // LowerOperation implementation //===----------------------------------------------------------------------===// +/// Aligned load common code for CellSPU +/*! + \param[in] Op The SelectionDAG load or store operand + \param[in] DAG The selection DAG + \param[in] ST CellSPU subtarget information structure + \param[in,out] alignment Caller initializes this to the load or store node's + value from getAlignment(), may be updated while generating the aligned load + \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned + offset (divisible by 16, modulo 16 == 0) + \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the + offset of the preferred slot (modulo 16 != 0) + \param[in,out] VT Caller initializes this value type to the the load or store + node's loaded or stored value type; may be updated if an i1-extended load or + store. + \param[out] was16aligned true if the base pointer had 16-byte alignment, + otherwise false. Can help to determine if the chunk needs to be rotated. + + Both load and store lowering load a block of data aligned on a 16-byte + boundary. This is the common aligned load code shared between both. + */ +static SDOperand +AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, + LSBaseSDNode *LSN, + unsigned &alignment, int &alignOffs, int &prefSlotOffs, + unsigned &VT, bool &was16aligned) +{ + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + SDOperand basePtr = LSN->getBasePtr(); + SDOperand chain = LSN->getChain(); + + if (basePtr.getOpcode() == ISD::ADD) { + SDOperand Op1 = basePtr.Val->getOperand(1); + + if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) { + const ConstantSDNode *CN = cast(basePtr.Val->getOperand(1)); + + alignOffs = (int) CN->getValue(); + prefSlotOffs = (int) (alignOffs & 0xf); + + // Adjust the rotation amount to ensure that the final result ends up in + // the preferred slot: + prefSlotOffs -= vtm->prefslot_byte; + basePtr = basePtr.getOperand(0); + + // Modify alignment, since the ADD is likely from getElementPtr: + switch (basePtr.getOpcode()) { + case ISD::GlobalAddress: + case ISD::TargetGlobalAddress: { + GlobalAddressSDNode *GN = cast(basePtr.Val); + const GlobalValue *GV = GN->getGlobal(); + alignment = GV->getAlignment(); + break; + } + } + } else { + alignOffs = 0; + prefSlotOffs = -vtm->prefslot_byte; + } + } else { + alignOffs = 0; + prefSlotOffs = -vtm->prefslot_byte; + } + + if (alignment == 16) { + // Realign the base pointer as a D-Form address: + if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { + if (isMemoryOperand(basePtr)) { + SDOperand Zero = DAG.getConstant(0, PtrVT); + unsigned Opc = (!ST->usingLargeMem() + ? SPUISD::AFormAddr + : SPUISD::XFormAddr); + basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero); + } + basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, + basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT)); + } + + // Emit the vector load: + was16aligned = true; + return DAG.getLoad(MVT::v16i8, chain, basePtr, + LSN->getSrcValue(), LSN->getSrcValueOffset(), + LSN->isVolatile(), 16); + } + + // Unaligned load or we're using the "large memory" model, which means that + // we have to be very pessimistic: + if (isMemoryOperand(basePtr)) { + basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + } + + // Add the offset + basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT)); + was16aligned = false; + return DAG.getLoad(MVT::v16i8, chain, basePtr, + LSN->getSrcValue(), LSN->getSrcValueOffset(), + LSN->isVolatile(), 16); +} + /// Custom lower loads for CellSPU /*! All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements @@ -438,22 +538,13 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const static SDOperand LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast(Op); - SDOperand basep = LN->getBasePtr(); SDOperand the_chain = LN->getChain(); - MVT::ValueType BasepOpc = basep.Val->getOpcode(); MVT::ValueType VT = LN->getLoadedVT(); MVT::ValueType OpVT = Op.Val->getValueType(0); - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - const valtype_map_s *vtm = getValueTypeMapEntry(VT); SDOperand Ops[8]; - if (BasepOpc == ISD::FrameIndex) { - // Loading from a frame index is always properly aligned. Always. - return SDOperand(); - } - // For an extending load of an i1 variable, just call it i8 (or whatever we // were passed) and make it zero-extended: if (VT == MVT::i1) { @@ -463,178 +554,76 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { switch (LN->getAddressingMode()) { case ISD::UNINDEXED: { - SDOperand result; - SDOperand rot_op, rotamt; - SDOperand ptrp; - int c_offset; - int c_rotamt; + int offset, rotamt; + bool was16aligned; + SDOperand result = + AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned); - // The vector type we really want to be when we load the 16-byte chunk - MVT::ValueType vecVT, opVecVT; - - vecVT = MVT::v16i8; - if (VT != MVT::i1) - vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); - opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT))); - - if (basep.getOpcode() == ISD::ADD) { - const ConstantSDNode *CN = cast(basep.Val->getOperand(1)); - - assert(CN != NULL - && "LowerLOAD: ISD::ADD operand 1 is not constant"); - - c_offset = (int) CN->getValue(); - c_rotamt = (int) (c_offset & 0xf); - - // Adjust the rotation amount to ensure that the final result ends up in - // the preferred slot: - c_rotamt -= vtm->prefslot_byte; - ptrp = basep.getOperand(0); - } else { - c_offset = 0; - c_rotamt = -vtm->prefslot_byte; - ptrp = basep; - } - - if (alignment == 16) { - // 16-byte aligned load into preferred slot, no rotation - if (c_rotamt == 0) { - if (isMemoryOperand(ptrp)) - // Return unchanged - return SDOperand(); - else { - // Return modified D-Form address for pointer: - ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT, - ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT)); - if (VT == OpVT) - return DAG.getLoad(VT, LN->getChain(), ptrp, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - else - return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(), - LN->getSrcValueOffset(), OpVT, - LN->isVolatile(), 16); - } - } else { - // Need to rotate... - if (c_rotamt < 0) - c_rotamt += 16; - // Realign the base pointer, with a D-Form address - if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp)) - basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, - ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32)); - else - basep = ptrp; - - // Rotate the load: - rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - the_chain = rot_op.getValue(1); - rotamt = DAG.getConstant(c_rotamt, MVT::i16); - - SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); - Ops[0] = the_chain; - Ops[1] = rot_op; - Ops[2] = rotamt; - - result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); - the_chain = result.getValue(1); - - if (VT == OpVT || ExtType == ISD::EXTLOAD) { - SDVTList scalarvts; - Ops[0] = the_chain; - Ops[1] = result; - if (OpVT == VT) { - scalarvts = DAG.getVTList(VT, MVT::Other); - } else { - scalarvts = DAG.getVTList(OpVT, MVT::Other); - } - - result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT), - result); - Ops[0] = the_chain; - Ops[1] = result; - result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); - the_chain = result.getValue(1); - } else { - // Handle the sign and zero-extending loads for i1 and i8: - unsigned NewOpC; - - if (ExtType == ISD::SEXTLOAD) { - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_SEXT - : SPUISD::EXTRACT_I8_SEXT); - } else { - assert(ExtType == ISD::ZEXTLOAD); - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_ZEXT - : SPUISD::EXTRACT_I8_ZEXT); - } - - result = DAG.getNode(NewOpC, OpVT, result); - } - - SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); - SDOperand retops[2] = { result, the_chain }; - - result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); - return result; - /*UNREACHED*/ - } - } else { - // Misaligned 16-byte load: - if (basep.getOpcode() == ISD::LOAD) { - LN = cast(basep); - if (LN->getAlignment() == 16) { - // We can verify that we're really loading from a 16-byte aligned - // chunk. Encapsulate basep as a D-Form address and return a new - // load: - basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep, - DAG.getConstant(0, PtrVT)); - if (OpVT == VT) - return DAG.getLoad(VT, LN->getChain(), basep, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - else - return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep, - LN->getSrcValue(), LN->getSrcValueOffset(), - OpVT, LN->isVolatile(), 16); - } - } - - // Catch all other cases where we can't guarantee that we have a - // 16-byte aligned entity, which means resorting to an X-form - // address scheme: - - SDOperand ZeroOffs = DAG.getConstant(0, PtrVT); - SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs); - SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs); - - ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp); - - SDOperand alignLoad = - DAG.getLoad(opVecVT, LN->getChain(), ptrp, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - - SDOperand insertEltOp = - DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp); - - result = DAG.getNode(SPUISD::SHUFB, opVecVT, - alignLoad, - alignLoad, - DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp)); - - result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result); - - SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); - SDOperand retops[2] = { result, the_chain }; - - result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + if (result.Val == 0) return result; + + the_chain = result.getValue(1); + // Rotate the chunk if necessary + if (rotamt < 0) + rotamt += 16; + if (rotamt != 0) { + SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); + + if (was16aligned) { + Ops[0] = the_chain; + Ops[1] = result; + Ops[2] = DAG.getConstant(rotamt, MVT::i16); + } else { + LoadSDNode *LN1 = cast(result); + Ops[0] = the_chain; + Ops[1] = result; + Ops[2] = LN1->getBasePtr(); + } + + result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); + the_chain = result.getValue(1); } - break; + + if (VT == OpVT || ExtType == ISD::EXTLOAD) { + SDVTList scalarvts; + MVT::ValueType vecVT = MVT::v16i8; + + // Convert the loaded v16i8 vector to the appropriate vector type + // specified by the operand: + if (OpVT == VT) { + if (VT != MVT::i1) + vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + } else + vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT))); + + Ops[0] = the_chain; + Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result); + scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other); + result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); + the_chain = result.getValue(1); + } else { + // Handle the sign and zero-extending loads for i1 and i8: + unsigned NewOpC; + + if (ExtType == ISD::SEXTLOAD) { + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_SEXT + : SPUISD::EXTRACT_I8_SEXT); + } else { + assert(ExtType == ISD::ZEXTLOAD); + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_ZEXT + : SPUISD::EXTRACT_I8_ZEXT); + } + + result = DAG.getNode(NewOpC, OpVT, result); + } + + SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); + SDOperand retops[2] = { result, the_chain }; + + result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + return result; } case ISD::PRE_INC: case ISD::PRE_DEC: @@ -664,58 +653,31 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT::ValueType VT = Value.getValueType(); MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT()); MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDOperand the_chain = SN->getChain(); - //unsigned alignment = SN->getAlignment(); - //const valtype_map_s *vtm = getValueTypeMapEntry(VT); + unsigned alignment = SN->getAlignment(); switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { - SDOperand basep = SN->getBasePtr(); - SDOperand ptrOp; - int offset; - - if (basep.getOpcode() == ISD::FrameIndex) { - // FrameIndex nodes are always properly aligned. Really. - return SDOperand(); - } - - if (basep.getOpcode() == ISD::ADD) { - const ConstantSDNode *CN = cast(basep.Val->getOperand(1)); - assert(CN != NULL - && "LowerSTORE: ISD::ADD operand 1 is not constant"); - offset = unsigned(CN->getValue()); - ptrOp = basep.getOperand(0); - DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = " - << offset - << "\n"); - } else { - ptrOp = basep; - offset = 0; - } + int chunk_offset, slot_offset; + bool was16aligned; // The vector type we really want to load from the 16-byte chunk, except // in the case of MVT::i1, which has to be v16i8. - unsigned vecVT, stVecVT; - + unsigned vecVT, stVecVT = MVT::v16i8; + if (StVT != MVT::i1) stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT))); - else - stVecVT = MVT::v16i8; vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); - // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is - // the actual dform addr offs($reg). - basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp, - DAG.getConstant((offset & ~0xf), PtrVT)); + SDOperand alignLoadVec = + AlignedLoad(Op, DAG, ST, SN, alignment, + chunk_offset, slot_offset, VT, was16aligned); - // Create the 16-byte aligned vector load - SDOperand alignLoad = - DAG.getLoad(vecVT, the_chain, basep, - SN->getSrcValue(), SN->getSrcValueOffset(), - SN->isVolatile(), 16); - the_chain = alignLoad.getValue(1); + if (alignLoadVec.Val == 0) + return alignLoadVec; - LoadSDNode *LN = cast(alignLoad); + LoadSDNode *LN = cast(alignLoadVec); + SDOperand basePtr = LN->getBasePtr(); + SDOperand the_chain = alignLoadVec.getValue(1); SDOperand theValue = SN->getValue(); SDOperand result; @@ -727,18 +689,34 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { theValue = theValue.getOperand(0); } - SDOperand insertEltOp = - DAG.getNode(SPUISD::INSERT_MASK, stVecVT, - DAG.getNode(SPUISD::DFormAddr, PtrVT, - ptrOp, - DAG.getConstant((offset & 0xf), PtrVT))); + chunk_offset &= 0xf; + chunk_offset /= (MVT::getSizeInBits(StVT == MVT::i1 ? (unsigned) MVT::i8 : StVT) / 8); + SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT); + SDOperand insertEltPtr; + SDOperand insertEltOp; + + // If the base pointer is already a D-form address, then just create + // a new D-form address with a slot offset and the orignal base pointer. + // Otherwise generate a D-form address with the slot offset relative + // to the stack pointer, which is always aligned. + if (basePtr.getOpcode() == SPUISD::DFormAddr) { + insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, + basePtr.getOperand(0), + insertEltOffs); + } else { + insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + insertEltOffs); + } + + insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr); result = DAG.getNode(SPUISD::SHUFB, vecVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue), - alignLoad, + alignLoadVec, DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp)); - result = DAG.getStore(the_chain, result, basep, + result = DAG.getStore(the_chain, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), LN->isVolatile(), LN->getAlignment()); @@ -767,19 +745,23 @@ LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { ConstantPoolSDNode *CP = cast(Op); Constant *C = CP->getConstVal(); SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); - const TargetMachine &TM = DAG.getTarget(); SDOperand Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { // Just return the SDOperand with the constant pool address in it. return CPI; } else { +#if 1 // Generate hi/lo address pair SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); +#else + return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero); +#endif } } @@ -797,16 +779,9 @@ LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { const TargetMachine &TM = DAG.getTarget(); if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - // Just return the SDOperand with the jump table address in it. - return JTI; - } else { - // Generate hi/lo address pair - SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero); - SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero); - - return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); - } + return (!ST->usingLargeMem() + ? JTI + : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero)); } assert(0 && @@ -820,20 +795,13 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { GlobalAddressSDNode *GSDN = cast(Op); GlobalValue *GV = GSDN->getGlobal(); SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); - SDOperand Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + SDOperand Zero = DAG.getConstant(0, PtrVT); if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - // Generate a local store address - return GA; - } else { - // Generate hi/lo address pair - SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); - SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); - - return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); - } + return (!ST->usingLargeMem() + ? GA + : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero)); } else { cerr << "LowerGlobalAddress: Relocation model other than static not " << "supported.\n"; @@ -1074,7 +1042,7 @@ static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) { static SDOperand -LowerCALL(SDOperand Op, SelectionDAG &DAG) { +LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDOperand Chain = Op.getOperand(0); #if 0 bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; @@ -1184,25 +1152,35 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { GlobalValue *GV = G->getGlobal(); unsigned CalleeVT = Callee.getValueType(); + SDOperand Zero = DAG.getConstant(0, PtrVT); + SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT); - // Turn calls to targets that are defined (i.e., have bodies) into BRSL - // style calls, otherwise, external symbols are BRASL calls. - // NOTE: - // This may be an unsafe assumption for JIT and really large compilation - // units. - if (GV->isDeclaration()) { - Callee = DAG.getGlobalAddress(GV, CalleeVT); + if (!ST->usingLargeMem()) { + // Turn calls to targets that are defined (i.e., have bodies) into BRSL + // style calls, otherwise, external symbols are BRASL calls. This assumes + // that declared/defined symbols are in the same compilation unit and can + // be reached through PC-relative jumps. + // + // NOTE: + // This may be an unsafe assumption for JIT and really large compilation + // units. + if (GV->isDeclaration()) { + Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero); + } else { + Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero); + } } else { - Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, - DAG.getTargetGlobalAddress(GV, CalleeVT), - DAG.getConstant(0, PtrVT)); + // "Large memory" mode: Turn all calls into indirect calls with a X-form + // address pairs: + Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isLSAAddress(Callee, DAG)) + else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { // If this is an absolute destination address that appears to be a legal // local store address, use the munged value. Callee = SDOperand(Dest, 0); + } Ops.push_back(Chain); Ops.push_back(Callee); @@ -2468,7 +2446,7 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); case ISD::CALL: - return LowerCALL(Op, DAG); + return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index b15aed6c0cb..d9e4e7ed4ed 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -31,8 +31,9 @@ namespace llvm { Hi, ///< High address component (upper 16) Lo, ///< Low address component (lower 16) PCRelAddr, ///< Program counter relative address + AFormAddr, ///< A-form address (local store) DFormAddr, ///< D-Form address "imm($r)" - XFormAddr, ///< X-Form address "$r1($r2)" + XFormAddr, ///< X-Form address "$r($r)" LDRESULT, ///< Load result (value, chain) CALL, ///< CALL instruction diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 2a0eef7f091..94aa390fe9d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -158,7 +158,7 @@ let isSimpleLoad = 1 in { def LQAr32: RI16Form<0b100001100, (outs R32C:$rT), (ins addr256k:$src), "lqa\t$rT, $src", LoadStore, - [(set R32C:$rT, (load aform_addr:$src))]>; + [(set R32C:$rT, (load aform_addr:$src))]>; def LQAf32: RI16Form<0b100001100, (outs R32FP:$rT), (ins addr256k:$src), @@ -610,6 +610,13 @@ def IOHLf32: RegConstraint<"$rS = $rT">, NoEncode<"$rS">; +def IOHLlo: + RI16Form<0b100000110, (outs R32C:$rT), (ins R32C:$rS, symbolLo:$val), + "iohl\t$rT, $val", ImmLoad, + [/* no pattern */]>, + RegConstraint<"$rS = $rT">, + NoEncode<"$rS">; + // Form select mask for bytes using immediate, used in conjunction with the // SELB instruction: @@ -2367,12 +2374,12 @@ def ROTIr32_i8: // are used here for type checking (instances where ROTQBI is used actually // use vector registers) def ROTQBYvec: - RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), "rotqby\t$rT, $rA, $rB", RotateShift, - [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R16C:$rB))]>; + [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R32C:$rB))]>; -def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R16C:$rB), - (ROTQBYvec VECREG:$rA, R16C:$rB)>; +def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R32C:$rB), + (ROTQBYvec VECREG:$rA, R32C:$rB)>; // See ROTQBY note above. def ROTQBYIvec: @@ -2720,12 +2727,12 @@ def CEQBv16i8: [/* no pattern to match: intrinsic */]>; def CEQBIr8: - RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm:$val), + RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm_i8:$val), "ceqbi\t$rT, $rA, $val", ByteOp, [/* no pattern to match: intrinsic */]>; def CEQBIv16i8: - RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm:$val), + RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm_i8:$val), "ceqbi\t$rT, $rA, $val", ByteOp, [/* no pattern to match: intrinsic */]>; @@ -2793,7 +2800,7 @@ let isCall = 1, def BRASL: BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops), "brasl\t$$lr, $func", - [(SPUcall tglobaladdr:$func)]>; + [(SPUcall (SPUaform tglobaladdr:$func, 0))]>; // Branch indirect and set link if external data. These instructions are not // actually generated, matched by an intrinsic: @@ -3468,20 +3475,21 @@ def : Pat<(i32 (anyext R16C:$rSrc)), // low parts in order to load them into a register. //===----------------------------------------------------------------------===// -def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>; -def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>; -def : Pat<(SPUdform tglobaladdr:$in, imm:$imm), (ILAlsa tglobaladdr:$in)>; -def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; -def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>; -def : Pat<(SPUdform tconstpool:$in, imm:$imm), (ILAlsa tconstpool:$in)>; -def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>; -def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>; -def : Pat<(SPUdform tjumptable:$in, imm:$imm), (ILAlsa tjumptable:$in)>; +def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>; +def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>; +def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; +def : Pat<(SPUxform tglobaladdr:$in, 0), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; +def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>; +def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>; +def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; +def : Pat<(SPUxform tjumptable:$in, 0), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; +def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; +def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>; +def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; +/* def : Pat<(SPUxform tconstpool:$in, 0), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; */ -// Force load of global address to a register. These forms show up in -// SPUISD::DFormAddr pseudo instructions: -def : Pat<(add tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(add tconstpool:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(add tjumptable:$in, 0), (ILAlsa tglobaladdr:$in)>; // Instrinsics: include "CellSDKIntrinsics.td" diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 4e78bee9550..ae513d22663 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -186,9 +186,15 @@ def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; // PC-relative address def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; +// A-Form local store addresses +def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; + // D-Form "imm($reg)" addresses def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>; +// X-Form "$reg($reg)" addresses +def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>; + // SPU 32-bit sign-extension to 64-bits def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>; diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index a9ca3c2f971..2a3551d4f64 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -140,6 +140,17 @@ def imm18 : PatLeaf<(imm), [{ return ((Value & ((1 << 19) - 1)) == Value); }]>; +def lo16 : PatLeaf<(imm), [{ + // hi16 predicate - returns true if the immediate has all zeros in the + // low order bits and is a 32-bit constant: + if (N->getValueType(0) == MVT::i32) { + uint32_t val = N->getValue(); + return ((val & 0x0000ffff) == val); + } + + return false; +}], LO16>; + def hi16 : PatLeaf<(imm), [{ // hi16 predicate - returns true if the immediate has all zeros in the // low order bits and is a 32-bit constant: @@ -411,7 +422,11 @@ def v2i64Imm: PatLeaf<(build_vector), [{ //===----------------------------------------------------------------------===// // Operand Definitions. -def s7imm: Operand { +def s7imm: Operand { + let PrintMethod = "printS7ImmOperand"; +} + +def s7imm_i8: Operand { let PrintMethod = "printS7ImmOperand"; } diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll index f23355ee53c..6858dbabe64 100644 --- a/test/CodeGen/CellSPU/and_ops.ll +++ b/test/CodeGen/CellSPU/and_ops.ll @@ -4,6 +4,8 @@ ; RUN: grep andi %t1.s | count 36 ; RUN: grep andhi %t1.s | count 30 ; RUN: grep andbi %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; AND instruction generation: define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll new file mode 100644 index 00000000000..7aa8abc0040 --- /dev/null +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -0,0 +1,29 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep bisl %t1.s | count 6 && +; RUN: grep ila %t1.s | count 1 && +; RUN: grep rotqbyi %t1.s | count 4 && +; RUN: grep lqa %t1.s | count 4 && +; RUN: grep lqd %t1.s | count 6 && +; RUN: grep dispatch_tab %t1.s | count 10 +; ModuleID = 'call_indirect.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" +target triple = "spu-unknown-elf" + +@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16 + +define void @dispatcher(i32 %i_arg, float %f_arg) { +entry: + %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16 + tail call void %tmp2( i32 %i_arg, float %f_arg ) + %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4 + tail call void %tmp2.1( i32 %i_arg, float %f_arg ) + %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4 + tail call void %tmp2.2( i32 %i_arg, float %f_arg ) + %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4 + tail call void %tmp2.3( i32 %i_arg, float %f_arg ) + %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4 + tail call void %tmp2.4( i32 %i_arg, float %f_arg ) + %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4 + tail call void %tmp2.5( i32 %i_arg, float %f_arg ) + ret void +} diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll index 3e2bc64f4d8..406a20accc8 100644 --- a/test/CodeGen/CellSPU/ctpop.ll +++ b/test/CodeGen/CellSPU/ctpop.ll @@ -3,6 +3,8 @@ ; RUN: grep andi %t1.s | count 3 && ; RUN: grep rotmi %t1.s | count 2 && ; RUN: grep rothmi %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" declare i32 @llvm.ctpop.i8(i8) declare i32 @llvm.ctpop.i16(i16) diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll index 58c56e14705..5cdb33ee681 100644 --- a/test/CodeGen/CellSPU/dp_farith.ll +++ b/test/CodeGen/CellSPU/dp_farith.ll @@ -7,6 +7,8 @@ ; RUN: grep dfnms %t1.s | count 4 ; ; This file includes double precision floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define double @fadd(double %arg1, double %arg2) { %A = add double %arg1, %arg2 diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll index a4d6dbbbd4e..0f02180b226 100644 --- a/test/CodeGen/CellSPU/eqv.ll +++ b/test/CodeGen/CellSPU/eqv.ll @@ -10,6 +10,8 @@ ; Alternatively, a ^ ~b, which the compiler will also match. ; ModuleID = 'eqv.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll index ab485a81fd3..f9cc32e8f29 100644 --- a/test/CodeGen/CellSPU/extract_elt.ll +++ b/test/CodeGen/CellSPU/extract_elt.ll @@ -5,6 +5,8 @@ ; RUN: grep lqx %t2.s | count 27 && ; RUN: grep space %t1.s | count 8 && ; RUN: grep byte %t1.s | count 424 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define i32 @i32_extract_0(<4 x i32> %v) { entry: diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll index 8ae97e6ff59..f4406d63dfb 100644 --- a/test/CodeGen/CellSPU/fcmp.ll +++ b/test/CodeGen/CellSPU/fcmp.ll @@ -3,6 +3,8 @@ ; RUN: grep fcmeq %t1.s | count 1 ; ; This file includes standard floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" declare double @fabs(double) declare float @fabsf(float) diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll index d55b12b9f51..a107bbe1f73 100644 --- a/test/CodeGen/CellSPU/fdiv.ll +++ b/test/CodeGen/CellSPU/fdiv.ll @@ -6,6 +6,8 @@ ; RUN: grep fnms %t1.s | count 2 ; ; This file includes standard floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define float @fdiv32(float %arg1, float %arg2) { %A = fdiv float %arg1, %arg2 diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll index 1abdcf6a34d..a183483cded 100644 --- a/test/CodeGen/CellSPU/fneg-fabs.ll +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -4,6 +4,8 @@ ; RUN: grep xor %t1.s | count 4 && ; RUN: grep and %t1.s | count 5 && ; RUN: grep andbi %t1.s | count 3 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define double @fneg_dp(double %X) { %Y = sub double -0.000000e+00, %X diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll index 19cabc4e94c..603ec058e97 100644 --- a/test/CodeGen/CellSPU/immed16.ll +++ b/test/CodeGen/CellSPU/immed16.ll @@ -1,5 +1,7 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: grep "ilh" %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define i16 @test_1() { %x = alloca i16, align 16 diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll index 6a5a3615619..4bf5bbd517a 100644 --- a/test/CodeGen/CellSPU/immed32.ll +++ b/test/CodeGen/CellSPU/immed32.ll @@ -12,6 +12,8 @@ ; RUN: grep 49077 %t1.s | count 1 && ; RUN: grep 1267 %t1.s | count 2 && ; RUN: grep 16309 %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define i32 @test_1() { ret i32 4784128 ;; ILHU via pattern (0x49000) diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll index c4eec8ba81e..4d388b1d223 100644 --- a/test/CodeGen/CellSPU/immed64.ll +++ b/test/CodeGen/CellSPU/immed64.ll @@ -11,6 +11,9 @@ ; RUN: grep 128 %t1.s | count 30 && ; RUN: grep 224 %t1.s | count 2 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + ; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202) ; 18446744073709551591 => 0x ffffffff ffffffe7 (-25) ; 18446744073708516742 => 0x ffffffff fff03586 (-1034874) diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll index 95a498428ec..b4cfea8a0b4 100644 --- a/test/CodeGen/CellSPU/int2fp.ll +++ b/test/CodeGen/CellSPU/int2fp.ll @@ -7,6 +7,9 @@ ; RUN: grep andi %t1.s | count 1 && ; RUN: grep ila %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + define float @sitofp_i32(i32 %arg1) { %A = sitofp i32 %arg1 to float ; [#uses=1] ret float %A diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll new file mode 100644 index 00000000000..5051cd56994 --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_branch.ll @@ -0,0 +1,150 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ceq %t1.s | count 30 && +; RUN: grep ceqb %t1.s | count 10 && +; RUN: grep ceqhi %t1.s | count 5 && +; RUN: grep ceqi %t1.s | count 5 && +; RUN: grep cgt %t1.s | count 30 && +; RUN: grep cgtb %t1.s | count 10 && +; RUN: grep cgthi %t1.s | count 5 && +; RUN: grep cgti %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) + +declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8) + + + +define <4 x i32> @test(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @ceqitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @ceqhitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @ceqbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @cgtitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @cgthitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @cgtbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @clgtitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @clgthitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @clgtbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll new file mode 100644 index 00000000000..f5a192a0a7b --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_float.ll @@ -0,0 +1,94 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep fa %t1.s | count 5 && +; RUN: grep fs %t1.s | count 5 && +; RUN: grep fm %t1.s | count 15 && +; RUN: grep fceq %t1.s | count 5 && +; RUN: grep fcmeq %t1.s | count 5 && +; RUN: grep fcgt %t1.s | count 5 && +; RUN: grep fcmgt %t1.s | count 5 && +; RUN: grep fma %t1.s | count 5 && +; RUN: grep fnms %t1.s | count 5 && +; RUN: grep fms %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) + +declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>) + +declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>) + +declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>) + +define <4 x i32> @test(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} \ No newline at end of file diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll new file mode 100644 index 00000000000..e43558c0f25 --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_logical.ll @@ -0,0 +1,49 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep and %t1.s | count 20 && +; RUN: grep andc %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>) + +define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @anditest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @andhitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll index 091f4b2edcc..841a3ec54d6 100644 --- a/test/CodeGen/CellSPU/nand.ll +++ b/test/CodeGen/CellSPU/nand.ll @@ -3,6 +3,8 @@ ; RUN: grep and %t1.s | count 94 ; RUN: grep xsbh %t1.s | count 2 ; RUN: grep xshw %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll index 6c46b413871..91e3e2145ab 100644 --- a/test/CodeGen/CellSPU/or_ops.ll +++ b/test/CodeGen/CellSPU/or_ops.ll @@ -4,6 +4,8 @@ ; RUN: grep ori %t1.s | count 30 ; RUN: grep orhi %t1.s | count 30 ; RUN: grep orbi %t1.s | count 15 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; OR instruction generation: define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll index 6983c184c3c..0386838a555 100644 --- a/test/CodeGen/CellSPU/rotate_ops.ll +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -8,6 +8,8 @@ ; RUN grep rothi.*,.3 %t1.s | count 1 ; RUN: grep andhi %t1.s | count 4 ; RUN: grep shlhi %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; Vector rotates are not currently supported in gcc or llvm assembly. These are ; not tested. diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll index 3cbb7a06dc7..b1600bf8f2b 100644 --- a/test/CodeGen/CellSPU/select_bits.ll +++ b/test/CodeGen/CellSPU/select_bits.ll @@ -3,6 +3,8 @@ ; RUN: grep and %t1.s | count 2 ; RUN: grep xsbh %t1.s | count 1 ; RUN: grep xshw %t1.s | count 2 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 162ca16776b..4256d91fdb3 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -5,6 +5,8 @@ ; RUN: grep shli %t1.s | count 51 ; RUN: grep xshw %t1.s | count 5 ; RUN: grep and %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; Vector shifts are not currently supported in gcc or llvm assembly. These are ; not tested. diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll index c7e719982d6..473e9a3ecec 100644 --- a/test/CodeGen/CellSPU/sp_farith.ll +++ b/test/CodeGen/CellSPU/sp_farith.ll @@ -8,6 +8,8 @@ ; ; This file includes standard floating point arithmetic instructions ; NOTE fdiv is tested separately since it is a compound operation +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define float @fp_add(float %arg1, float %arg2) { %A = add float %arg1, %arg2 ; [#uses=1] diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll new file mode 100644 index 00000000000..1159b55fab1 --- /dev/null +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -0,0 +1,107 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep lqa %t1.s | count 10 && +; RUN: grep lqd %t1.s | count 2 && +; RUN: grep rotqbyi %t1.s | count 5 && +; RUN: grep xshw %t1.s | count 1 && +; RUN: grep andi %t1.s | count 4 && +; RUN: grep cbd %t1.s | count 3 && +; RUN: grep chd %t1.s | count 1 && +; RUN: grep cwd %t1.s | count 1 && +; RUN: grep shufb %t1.s | count 5 && +; RUN: grep stqa %t1.s | count 5 +; ModuleID = 'struct_1.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; struct hackstate { +; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3) +; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3) +; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3) +; int i1; // offset 4 (rotate left by 4 bytes to byte 0) +; short s1; // offset 8 (rotate left by 6 bytes to byte 2) +; int i2; // offset 12 [ignored] +; unsigned char c4; // offset 16 [ignored] +; unsigned char c5; // offset 17 [ignored] +; unsigned char c6; // offset 18 [ignored] +; unsigned char c7; // offset 19 (no rotate, in preferred slot) +; int i3; // offset 20 [ignored] +; int i4; // offset 24 [ignored] +; int i5; // offset 28 [ignored] +; int i6; // offset 32 (no rotate, in preferred slot) +; } +%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 } + +; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +@state = global %struct.hackstate zeroinitializer, align 16 + +define i8 @get_hackstate_c1() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret i8 %tmp2 +} + +define i8 @get_hackstate_c2() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret i8 %tmp2 +} + +define i8 @get_hackstate_c3() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret i8 %tmp2 +} + +define i32 @get_hackstate_i1() { +entry: + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret i32 %tmp2 +} + +define i16 @get_hackstate_s1() signext { +entry: + %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret i16 %tmp2 +} + +define i8 @get_hackstate_c7() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 + ret i8 %tmp2 +} + +define i32 @get_hackstate_i6() zeroext { +entry: + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret i32 %tmp2 +} + +define void @set_hackstate_c1(i8 zeroext %c) { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret void +} + +define void @set_hackstate_c2(i8 zeroext %c) { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret void +} + +define void @set_hackstate_c3(i8 zeroext %c) { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret void +} + +define void @set_hackstate_i1(i32 %i) { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret void +} + +define void @set_hackstate_s1(i16 signext %s) { +entry: + store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret void +}