mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-30 16:17:05 +00:00 
			
		
		
		
	- Start moving target-dependent nodes that could be represented by an
instruction sequence and cannot ordinarily be simplified by DAGcombine into the various target description files or SPUDAGToDAGISel.cpp. This makes some 64-bit operations legal. - Eliminate target-dependent ISD enums. - Update tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61508 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -15,6 +15,13 @@ | |||||||
| // | // | ||||||
| include "llvm/Target/Target.td" | include "llvm/Target/Target.td" | ||||||
|  |  | ||||||
|  | // Holder of code fragments (you'd think this'd already be in | ||||||
|  | // a td file somewhere... :-) | ||||||
|  |  | ||||||
|  | class CodeFrag<dag frag> { | ||||||
|  |   dag Fragment = frag; | ||||||
|  | } | ||||||
|  |  | ||||||
| //===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||||
| // Register File Description | // Register File Description | ||||||
| //===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||||
|   | |||||||
| @@ -1,8 +1,17 @@ | |||||||
|  | //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====// | ||||||
|  | // | ||||||
|  | //                     Cell SPU 64-bit operations | ||||||
|  | // | ||||||
|  | // Primary author: Scott Michel (scottm@aero.org) | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| // 64-bit comparisons: | // 64-bit comparisons: | ||||||
| // | // | ||||||
| // 1. The instruction sequences for vector vice scalar differ by a | // 1. The instruction sequences for vector vice scalar differ by a | ||||||
| //    constant. | //    constant. In the scalar case, we're only interested in the | ||||||
|  | //    top two 32-bit slots, whereas we're interested in an exact | ||||||
|  | //    all-four-slot match in the vector case. | ||||||
| // | // | ||||||
| // 2. There are no "immediate" forms, since loading 64-bit constants | // 2. There are no "immediate" forms, since loading 64-bit constants | ||||||
| //    could be a constant pool load. | //    could be a constant pool load. | ||||||
| @@ -10,10 +19,10 @@ | |||||||
| // 3. i64 setcc results are i32, which are subsequently converted to a FSM | // 3. i64 setcc results are i32, which are subsequently converted to a FSM | ||||||
| //    mask when used in a select pattern. | //    mask when used in a select pattern. | ||||||
| // | // | ||||||
| // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask | // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) | ||||||
| //    (TODO) | //    [Note: this may be moot, since gb produces v4i32 or r32.] | ||||||
| // | // | ||||||
| // M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!) | // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!) | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
| // selb instruction definition for i64. Note that the selection mask is | // selb instruction definition for i64. Note that the selection mask is | ||||||
| @@ -22,17 +31,15 @@ def SELBr64_cond: | |||||||
|    SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), |    SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), | ||||||
|             [/* no pattern */]>; |             [/* no pattern */]>; | ||||||
|  |  | ||||||
| class CodeFrag<dag frag> { | // select the negative condition: | ||||||
|   dag Fragment = frag; | class I64SELECTNegCond<PatFrag cond, CodeFrag compare>: | ||||||
| } |  | ||||||
|  |  | ||||||
| class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>: |  | ||||||
|   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), |   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), | ||||||
|       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>; |       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; | ||||||
|  |  | ||||||
| class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>: | // setcc the negative condition: | ||||||
|  | class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: | ||||||
|   Pat<(cond R64C:$rA, R64C:$rB), |   Pat<(cond R64C:$rA, R64C:$rB), | ||||||
|       (XORIr32 cmpare.Fragment, -1)>; |       (XORIr32 compare.Fragment, -1)>; | ||||||
|  |  | ||||||
| // The i64 seteq fragment that does the scalar->vector conversion and | // The i64 seteq fragment that does the scalar->vector conversion and | ||||||
| // comparison: | // comparison: | ||||||
| @@ -64,14 +71,13 @@ multiclass CompareEqual64 { | |||||||
| defm I64EQ: CompareEqual64; | defm I64EQ: CompareEqual64; | ||||||
|  |  | ||||||
| def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; | def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; | ||||||
|  | def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; | ||||||
|  |  | ||||||
| def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), | def : Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), | ||||||
|           I64EQv2i64.Fragment>; |           (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>; | ||||||
|  |  | ||||||
| def I64Select: |  | ||||||
|     Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), |  | ||||||
|         (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>; |  | ||||||
|  |  | ||||||
|  | // i64 setne: | ||||||
| def : I64SETCCNegCond<setne, I64EQr64>; | def : I64SETCCNegCond<setne, I64EQr64>; | ||||||
|  | def : I64SELECTNegCond<setne, I64EQr64>; | ||||||
|  |  | ||||||
| def : I64SELECTNegCond<setne, I64EQr64>; | // i64 setugt: | ||||||
|   | |||||||
| @@ -149,7 +149,7 @@ namespace { | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   bool |   bool | ||||||
|   isHighLow(const SDValue &Op)  |   isHighLow(const SDValue &Op) | ||||||
|   { |   { | ||||||
|     return (Op.getOpcode() == SPUISD::IndirectAddr |     return (Op.getOpcode() == SPUISD::IndirectAddr | ||||||
|             && ((Op.getOperand(0).getOpcode() == SPUISD::Hi |             && ((Op.getOperand(0).getOpcode() == SPUISD::Hi | ||||||
| @@ -229,14 +229,14 @@ public: | |||||||
|     TM(tm), |     TM(tm), | ||||||
|     SPUtli(*tm.getTargetLowering()) |     SPUtli(*tm.getTargetLowering()) | ||||||
|   {} |   {} | ||||||
|      |  | ||||||
|   virtual bool runOnFunction(Function &Fn) { |   virtual bool runOnFunction(Function &Fn) { | ||||||
|     // Make sure we re-emit a set of the global base reg if necessary |     // Make sure we re-emit a set of the global base reg if necessary | ||||||
|     GlobalBaseReg = 0; |     GlobalBaseReg = 0; | ||||||
|     SelectionDAGISel::runOnFunction(Fn); |     SelectionDAGISel::runOnFunction(Fn); | ||||||
|     return true; |     return true; | ||||||
|   } |   } | ||||||
|     |  | ||||||
|   /// getI32Imm - Return a target constant with the specified value, of type |   /// getI32Imm - Return a target constant with the specified value, of type | ||||||
|   /// i32. |   /// i32. | ||||||
|   inline SDValue getI32Imm(uint32_t Imm) { |   inline SDValue getI32Imm(uint32_t Imm) { | ||||||
| @@ -248,7 +248,7 @@ public: | |||||||
|   inline SDValue getI64Imm(uint64_t Imm) { |   inline SDValue getI64Imm(uint64_t Imm) { | ||||||
|     return CurDAG->getTargetConstant(Imm, MVT::i64); |     return CurDAG->getTargetConstant(Imm, MVT::i64); | ||||||
|   } |   } | ||||||
|      |  | ||||||
|   /// getSmallIPtrImm - Return a target constant of pointer type. |   /// getSmallIPtrImm - Return a target constant of pointer type. | ||||||
|   inline SDValue getSmallIPtrImm(unsigned Imm) { |   inline SDValue getSmallIPtrImm(unsigned Imm) { | ||||||
|     return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); |     return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); | ||||||
| @@ -258,6 +258,15 @@ public: | |||||||
|   /// target-specific node if it hasn't already been changed. |   /// target-specific node if it hasn't already been changed. | ||||||
|   SDNode *Select(SDValue Op); |   SDNode *Select(SDValue Op); | ||||||
|  |  | ||||||
|  |   //! Emit the instruction sequence for i64 shl | ||||||
|  |   SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); | ||||||
|  |  | ||||||
|  |   //! Emit the instruction sequence for i64 srl | ||||||
|  |   SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); | ||||||
|  |  | ||||||
|  |   //! Emit the instruction sequence for i64 sra | ||||||
|  |   SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); | ||||||
|  |  | ||||||
|   //! Returns true if the address N is an A-form (local store) address |   //! Returns true if the address N is an A-form (local store) address | ||||||
|   bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, |   bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, | ||||||
|                        SDValue &Index); |                        SDValue &Index); | ||||||
| @@ -287,7 +296,7 @@ public: | |||||||
|     switch (ConstraintCode) { |     switch (ConstraintCode) { | ||||||
|     default: return true; |     default: return true; | ||||||
|     case 'm':   // memory |     case 'm':   // memory | ||||||
|       if (!SelectDFormAddr(Op, Op, Op0, Op1)  |       if (!SelectDFormAddr(Op, Op, Op0, Op1) | ||||||
|           && !SelectAFormAddr(Op, Op, Op0, Op1)) |           && !SelectAFormAddr(Op, Op, Op0, Op1)) | ||||||
|         SelectXFormAddr(Op, Op, Op0, Op1); |         SelectXFormAddr(Op, Op, Op0, Op1); | ||||||
|       break; |       break; | ||||||
| @@ -306,7 +315,7 @@ public: | |||||||
| #endif | #endif | ||||||
|       break; |       break; | ||||||
|     } |     } | ||||||
|        |  | ||||||
|     OutOps.push_back(Op0); |     OutOps.push_back(Op0); | ||||||
|     OutOps.push_back(Op1); |     OutOps.push_back(Op1); | ||||||
|     return false; |     return false; | ||||||
| @@ -318,14 +327,14 @@ public: | |||||||
|  |  | ||||||
|   virtual const char *getPassName() const { |   virtual const char *getPassName() const { | ||||||
|     return "Cell SPU DAG->DAG Pattern Instruction Selection"; |     return "Cell SPU DAG->DAG Pattern Instruction Selection"; | ||||||
|   }  |   } | ||||||
|      |  | ||||||
|   /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for |   /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for | ||||||
|   /// this target when scheduling the DAG. |   /// this target when scheduling the DAG. | ||||||
|   virtual HazardRecognizer *CreateTargetHazardRecognizer() { |   virtual HazardRecognizer *CreateTargetHazardRecognizer() { | ||||||
|     const TargetInstrInfo *II = TM.getInstrInfo(); |     const TargetInstrInfo *II = TM.getInstrInfo(); | ||||||
|     assert(II && "No InstrInfo?"); |     assert(II && "No InstrInfo?"); | ||||||
|     return new SPUHazardRecognizer(*II);  |     return new SPUHazardRecognizer(*II); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // Include the pieces autogenerated from the target description. |   // Include the pieces autogenerated from the target description. | ||||||
| @@ -375,7 +384,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, | |||||||
|     abort(); |     abort(); | ||||||
|     /*NOTREACHED*/ |     /*NOTREACHED*/ | ||||||
|  |  | ||||||
|   case SPUISD::AFormAddr:  |   case SPUISD::AFormAddr: | ||||||
|     // Just load from memory if there's only a single use of the location, |     // Just load from memory if there's only a single use of the location, | ||||||
|     // otherwise, this will get handled below with D-form offset addresses |     // otherwise, this will get handled below with D-form offset addresses | ||||||
|     if (N.hasOneUse()) { |     if (N.hasOneUse()) { | ||||||
| @@ -404,7 +413,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, | |||||||
|   return false; |   return false; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool  | bool | ||||||
| SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, | SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, | ||||||
|                                   SDValue &Base) { |                                   SDValue &Base) { | ||||||
|   const int minDForm2Offset = -(1 << 7); |   const int minDForm2Offset = -(1 << 7); | ||||||
| @@ -527,7 +536,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, | |||||||
|         ConstantSDNode *CN = cast<ConstantSDNode>(Op0); |         ConstantSDNode *CN = cast<ConstantSDNode>(Op0); | ||||||
|         offset = int32_t(CN->getSExtValue()); |         offset = int32_t(CN->getSExtValue()); | ||||||
|         idxOp = Op1; |         idxOp = Op1; | ||||||
|       }  |       } | ||||||
|  |  | ||||||
|       if (offset >= minOffset && offset <= maxOffset) { |       if (offset >= minOffset && offset <= maxOffset) { | ||||||
|         Base = CurDAG->getTargetConstant(offset, PtrTy); |         Base = CurDAG->getTargetConstant(offset, PtrTy); | ||||||
| @@ -622,27 +631,20 @@ SPUDAGToDAGISel::Select(SDValue Op) { | |||||||
|   if (N->isMachineOpcode()) { |   if (N->isMachineOpcode()) { | ||||||
|     return NULL;   // Already selected. |     return NULL;   // Already selected. | ||||||
|   } else if (Opc == ISD::FrameIndex) { |   } else if (Opc == ISD::FrameIndex) { | ||||||
|     // Selects to (add $sp, FI * stackSlotSize) |     int FI = cast<FrameIndexSDNode>(N)->getIndex(); | ||||||
|     int FI = |     SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); | ||||||
|       SPUFrameInfo::FItoStackOffset(cast<FrameIndexSDNode>(N)->getIndex()); |     SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); | ||||||
|     MVT PtrVT = SPUtli.getPointerTy(); |  | ||||||
|  |  | ||||||
|     // Adjust stack slot to actual offset in frame: |     if (FI < 128) { | ||||||
|     if (isS10Constant(FI)) { |  | ||||||
|       DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AIr32 $sp, " |  | ||||||
|                  << FI |  | ||||||
|                  << "\n"); |  | ||||||
|       NewOpc = SPU::AIr32; |       NewOpc = SPU::AIr32; | ||||||
|       Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT); |       Ops[0] = TFI; | ||||||
|       Ops[1] = CurDAG->getTargetConstant(FI, PtrVT); |       Ops[1] = Imm0; | ||||||
|       n_ops = 2; |       n_ops = 2; | ||||||
|     } else { |     } else { | ||||||
|       DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with Ar32 $sp, " |  | ||||||
|                  << FI |  | ||||||
|                  << "\n"); |  | ||||||
|       NewOpc = SPU::Ar32; |       NewOpc = SPU::Ar32; | ||||||
|       Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT); |       Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType()); | ||||||
|       Ops[1] = CurDAG->getConstant(FI, PtrVT); |       Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, Op.getValueType(), | ||||||
|  |                                              TFI, Imm0), 0); | ||||||
|       n_ops = 2; |       n_ops = 2; | ||||||
|     } |     } | ||||||
|   } else if (Opc == ISD::ZERO_EXTEND) { |   } else if (Opc == ISD::ZERO_EXTEND) { | ||||||
| @@ -661,6 +663,18 @@ SPUDAGToDAGISel::Select(SDValue Op) { | |||||||
|         n_ops = 2; |         n_ops = 2; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |   } else if (Opc == ISD::SHL) { | ||||||
|  |     if (OpVT == MVT::i64) { | ||||||
|  |       return SelectSHLi64(Op, OpVT); | ||||||
|  |     } | ||||||
|  |   } else if (Opc == ISD::SRL) { | ||||||
|  |     if (OpVT == MVT::i64) { | ||||||
|  |       return SelectSRLi64(Op, OpVT); | ||||||
|  |     } | ||||||
|  |   } else if (Opc == ISD::SRA) { | ||||||
|  |     if (OpVT == MVT::i64) { | ||||||
|  |       return SelectSRAi64(Op, OpVT); | ||||||
|  |     } | ||||||
|   } else if (Opc == SPUISD::LDRESULT) { |   } else if (Opc == SPUISD::LDRESULT) { | ||||||
|     // Custom select instructions for LDRESULT |     // Custom select instructions for LDRESULT | ||||||
|     MVT VT = N->getValueType(0); |     MVT VT = N->getValueType(0); | ||||||
| @@ -713,7 +727,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { | |||||||
|       n_ops = 2; |       n_ops = 2; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|    |  | ||||||
|   if (n_ops > 0) { |   if (n_ops > 0) { | ||||||
|     if (N->hasOneUse()) |     if (N->hasOneUse()) | ||||||
|       return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); |       return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); | ||||||
| @@ -723,7 +737,213 @@ SPUDAGToDAGISel::Select(SDValue Op) { | |||||||
|     return SelectCode(Op); |     return SelectCode(Op); | ||||||
| } | } | ||||||
|  |  | ||||||
| /// createPPCISelDag - This pass converts a legalized DAG into a  | /*! | ||||||
|  |  * Emit the instruction sequence for i64 left shifts. The basic algorithm | ||||||
|  |  * is to fill the bottom two word slots with zeros so that zeros are shifted | ||||||
|  |  * in as the entire quadword is shifted left. | ||||||
|  |  * | ||||||
|  |  * \note This code could also be used to implement v2i64 shl. | ||||||
|  |  * | ||||||
|  |  * @param Op The shl operand | ||||||
|  |  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||||
|  |  * makes life easier.) | ||||||
|  |  * @return The SDNode with the entire instruction sequence | ||||||
|  |  */ | ||||||
|  | SDNode * | ||||||
|  | SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) { | ||||||
|  |   SDValue Op0 = Op.getOperand(0); | ||||||
|  |   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||||
|  |   SDValue ShiftAmt = Op.getOperand(1); | ||||||
|  |   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||||
|  |   SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; | ||||||
|  |   SDValue SelMaskVal; | ||||||
|  |  | ||||||
|  |   VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0); | ||||||
|  |   SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); | ||||||
|  |   SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, SelMaskVal); | ||||||
|  |   ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, VecVT, | ||||||
|  |                                    CurDAG->getTargetConstant(0, OpVT)); | ||||||
|  |   VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, VecVT, | ||||||
|  |                                  SDValue(ZeroFill, 0), | ||||||
|  |                                  SDValue(VecOp0, 0), | ||||||
|  |                                  SDValue(SelMask, 0)); | ||||||
|  |  | ||||||
|  |   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||||
|  |     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||||
|  |     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||||
|  |  | ||||||
|  |     if (bytes > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::SHLQBYIv2i64, VecVT, | ||||||
|  |                               SDValue(VecOp0, 0), | ||||||
|  |                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (bits > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::SHLQBIIv2i64, VecVT, | ||||||
|  |                               SDValue((Shift != 0 ? Shift : VecOp0), 0), | ||||||
|  |                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     SDNode *Bytes = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(3, ShiftAmtVT)); | ||||||
|  |     SDNode *Bits = | ||||||
|  |       CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(7, ShiftAmtVT)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::SHLQBYv2i64, VecVT, | ||||||
|  |                             SDValue(VecOp0, 0), SDValue(Bytes, 0)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::SHLQBIv2i64, VecVT, | ||||||
|  |                             SDValue(Shift, 0), SDValue(Bits, 0)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /*! | ||||||
|  |  * Emit the instruction sequence for i64 logical right shifts. | ||||||
|  |  * | ||||||
|  |  * @param Op The shl operand | ||||||
|  |  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||||
|  |  * makes life easier.) | ||||||
|  |  * @return The SDNode with the entire instruction sequence | ||||||
|  |  */ | ||||||
|  | SDNode * | ||||||
|  | SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) { | ||||||
|  |   SDValue Op0 = Op.getOperand(0); | ||||||
|  |   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||||
|  |   SDValue ShiftAmt = Op.getOperand(1); | ||||||
|  |   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||||
|  |   SDNode *VecOp0, *Shift = 0; | ||||||
|  |  | ||||||
|  |   VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0); | ||||||
|  |  | ||||||
|  |   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||||
|  |     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||||
|  |     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||||
|  |  | ||||||
|  |     if (bytes > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, VecVT, | ||||||
|  |                               SDValue(VecOp0, 0), | ||||||
|  |                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (bits > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, VecVT, | ||||||
|  |                               SDValue((Shift != 0 ? Shift : VecOp0), 0), | ||||||
|  |                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     SDNode *Bytes = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(3, ShiftAmtVT)); | ||||||
|  |     SDNode *Bits = | ||||||
|  |       CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(7, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     // Ensure that the shift amounts are negated! | ||||||
|  |     Bytes = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||||
|  |                                   SDValue(Bytes, 0), | ||||||
|  |                                   CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     Bits = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||||
|  |                                  SDValue(Bits, 0), | ||||||
|  |                                  CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQMBYv2i64, VecVT, | ||||||
|  |                             SDValue(VecOp0, 0), SDValue(Bytes, 0)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQMBIv2i64, VecVT, | ||||||
|  |                             SDValue(Shift, 0), SDValue(Bits, 0)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /*! | ||||||
|  |  * Emit the instruction sequence for i64 arithmetic right shifts. | ||||||
|  |  * | ||||||
|  |  * @param Op The shl operand | ||||||
|  |  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||||
|  |  * makes life easier.) | ||||||
|  |  * @return The SDNode with the entire instruction sequence | ||||||
|  |  */ | ||||||
|  | SDNode * | ||||||
|  | SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { | ||||||
|  |   // Promote Op0 to vector | ||||||
|  |   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||||
|  |   SDValue ShiftAmt = Op.getOperand(1); | ||||||
|  |   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||||
|  |  | ||||||
|  |   SDNode *VecOp0 = | ||||||
|  |     CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op.getOperand(0)); | ||||||
|  |  | ||||||
|  |   SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); | ||||||
|  |   SDNode *SignRot = | ||||||
|  |     CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, MVT::v2i64, | ||||||
|  |                           SDValue(VecOp0, 0), SignRotAmt); | ||||||
|  |   SDNode *UpperHalfSign = | ||||||
|  |     CurDAG->getTargetNode(SPU::ORi32_v4i32, MVT::i32, SDValue(SignRot, 0)); | ||||||
|  |  | ||||||
|  |   SDNode *UpperHalfSignMask = | ||||||
|  |     CurDAG->getTargetNode(SPU::FSM64r32, VecVT, SDValue(UpperHalfSign, 0)); | ||||||
|  |   SDNode *UpperLowerMask = | ||||||
|  |     CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, | ||||||
|  |                           CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); | ||||||
|  |   SDNode *UpperLowerSelect = | ||||||
|  |     CurDAG->getTargetNode(SPU::SELBv2i64, VecVT, | ||||||
|  |                           SDValue(UpperHalfSignMask, 0), | ||||||
|  |                           SDValue(VecOp0, 0), | ||||||
|  |                           SDValue(UpperLowerMask, 0)); | ||||||
|  |  | ||||||
|  |   SDNode *Shift = 0; | ||||||
|  |  | ||||||
|  |   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||||
|  |     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||||
|  |     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||||
|  |  | ||||||
|  |     if (bytes > 0) { | ||||||
|  |       bytes = 31 - bytes; | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQBYIv2i64, VecVT, | ||||||
|  |                               SDValue(UpperLowerSelect, 0), | ||||||
|  |                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (bits > 0) { | ||||||
|  |       bits = 8 - bits; | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQBIIv2i64, VecVT, | ||||||
|  |                               SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), | ||||||
|  |                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     SDNode *NegShift = | ||||||
|  |       CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, VecVT, | ||||||
|  |                             SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQBIv2i64, VecVT, | ||||||
|  |                             SDValue(Shift, 0), SDValue(NegShift, 0)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// createSPUISelDag - This pass converts a legalized DAG into a | ||||||
| /// SPU-specific DAG, ready for instruction scheduling. | /// SPU-specific DAG, ready for instruction scheduling. | ||||||
| /// | /// | ||||||
| FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { | FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { | ||||||
|   | |||||||
| @@ -204,10 +204,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::SRL,  MVT::i8,     Custom); |   setOperationAction(ISD::SRL,  MVT::i8,     Custom); | ||||||
|   setOperationAction(ISD::SRA,  MVT::i8,     Custom); |   setOperationAction(ISD::SRA,  MVT::i8,     Custom); | ||||||
|  |  | ||||||
|   // SPU needs custom lowering for shift left/right for i64 |   // Make these operations legal and handle them during instruction selection: | ||||||
|   setOperationAction(ISD::SHL,  MVT::i64,    Custom); |   setOperationAction(ISD::SHL,  MVT::i64,    Legal); | ||||||
|   setOperationAction(ISD::SRL,  MVT::i64,    Custom); |   setOperationAction(ISD::SRL,  MVT::i64,    Legal); | ||||||
|   setOperationAction(ISD::SRA,  MVT::i64,    Custom); |   setOperationAction(ISD::SRA,  MVT::i64,    Legal); | ||||||
|  |  | ||||||
|   // Custom lower i8, i32 and i64 multiplications |   // Custom lower i8, i32 and i64 multiplications | ||||||
|   setOperationAction(ISD::MUL,  MVT::i8,     Custom); |   setOperationAction(ISD::MUL,  MVT::i8,     Custom); | ||||||
| @@ -215,6 +215,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall |   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall | ||||||
|  |  | ||||||
|   // Need to custom handle (some) common i8, i64 math ops |   // Need to custom handle (some) common i8, i64 math ops | ||||||
|  |   setOperationAction(ISD::ADD,  MVT::i8,     Custom); | ||||||
|   setOperationAction(ISD::ADD,  MVT::i64,    Custom); |   setOperationAction(ISD::ADD,  MVT::i64,    Custom); | ||||||
|   setOperationAction(ISD::SUB,  MVT::i8,     Custom); |   setOperationAction(ISD::SUB,  MVT::i8,     Custom); | ||||||
|   setOperationAction(ISD::SUB,  MVT::i64,    Custom); |   setOperationAction(ISD::SUB,  MVT::i64,    Custom); | ||||||
| @@ -249,7 +250,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   // Zero extension and sign extension for i64 have to be |   // Zero extension and sign extension for i64 have to be | ||||||
|   // custom legalized |   // custom legalized | ||||||
|   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); |   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); | ||||||
|   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); |  | ||||||
|   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom); |   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom); | ||||||
|  |  | ||||||
|   // Custom lower i128 -> i64 truncates |   // Custom lower i128 -> i64 truncates | ||||||
| @@ -262,7 +262,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); | ||||||
|  |  | ||||||
|   // FDIV on SPU requires custom lowering |   // FDIV on SPU requires custom lowering | ||||||
|   setOperationAction(ISD::FDIV, MVT::f32, Custom); |  | ||||||
|   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall |   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall | ||||||
|  |  | ||||||
|   // SPU has [U|S]INT_TO_FP |   // SPU has [U|S]INT_TO_FP | ||||||
| @@ -340,7 +339,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|     setOperationAction(ISD::ADD , VT, Legal); |     setOperationAction(ISD::ADD , VT, Legal); | ||||||
|     setOperationAction(ISD::SUB , VT, Legal); |     setOperationAction(ISD::SUB , VT, Legal); | ||||||
|     // mul has to be custom lowered. |     // mul has to be custom lowered. | ||||||
|     setOperationAction(ISD::MUL , VT, Custom); |     // TODO: v2i64 vector multiply | ||||||
|  |     setOperationAction(ISD::MUL , VT, Legal); | ||||||
|  |  | ||||||
|     setOperationAction(ISD::AND   , VT, Legal); |     setOperationAction(ISD::AND   , VT, Legal); | ||||||
|     setOperationAction(ISD::OR    , VT, Legal); |     setOperationAction(ISD::OR    , VT, Legal); | ||||||
| @@ -354,7 +354,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|     setOperationAction(ISD::SREM, VT, Expand); |     setOperationAction(ISD::SREM, VT, Expand); | ||||||
|     setOperationAction(ISD::UDIV, VT, Expand); |     setOperationAction(ISD::UDIV, VT, Expand); | ||||||
|     setOperationAction(ISD::UREM, VT, Expand); |     setOperationAction(ISD::UREM, VT, Expand); | ||||||
|     setOperationAction(ISD::FDIV, VT, Custom); |  | ||||||
|  |  | ||||||
|     // Custom lower build_vector, constant pool spills, insert and |     // Custom lower build_vector, constant pool spills, insert and | ||||||
|     // extract vector elements: |     // extract vector elements: | ||||||
| @@ -371,9 +370,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::XOR, MVT::v16i8, Custom); |   setOperationAction(ISD::XOR, MVT::v16i8, Custom); | ||||||
|   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); |   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); | ||||||
|  |  | ||||||
|   // FIXME: This is only temporary until I put all vector multiplications in |   setOperationAction(ISD::FDIV, MVT::v4f32, Legal); | ||||||
|   // SPUInstrInfo.td: |  | ||||||
|   setOperationAction(ISD::MUL, MVT::v4i32, Legal); |  | ||||||
|  |  | ||||||
|   setShiftAmountType(MVT::i32); |   setShiftAmountType(MVT::i32); | ||||||
|   setBooleanContents(ZeroOrNegativeOneBooleanContent); |   setBooleanContents(ZeroOrNegativeOneBooleanContent); | ||||||
| @@ -411,10 +408,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const | |||||||
|     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; |     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; | ||||||
|     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; |     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; | ||||||
|     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; |     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; | ||||||
|     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; |  | ||||||
|     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; |  | ||||||
|     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; |  | ||||||
|     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; |  | ||||||
|     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; |     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; | ||||||
|     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; |     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; | ||||||
|     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; |     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; | ||||||
| @@ -422,21 +415,12 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const | |||||||
|     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; |     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; | ||||||
|     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; |     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; | ||||||
|     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; |     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; | ||||||
|     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] = |  | ||||||
|       "SPUISD::ROTQUAD_RZ_BYTES"; |  | ||||||
|     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] = |  | ||||||
|       "SPUISD::ROTQUAD_RZ_BITS"; |  | ||||||
|     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; |  | ||||||
|     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = |  | ||||||
|       "SPUISD::ROTBYTES_LEFT_BITS"; |  | ||||||
|     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; |     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; | ||||||
|     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; |     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; | ||||||
|     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; |     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; | ||||||
|     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; |     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; | ||||||
|     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; |     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; | ||||||
|     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; |     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; | ||||||
|     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; |  | ||||||
|     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; |  | ||||||
|     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; |     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -1922,182 +1906,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { | |||||||
|   return SDValue(); |   return SDValue(); | ||||||
| } | } | ||||||
|  |  | ||||||
| static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) { |  | ||||||
|   switch (Op.getValueType().getSimpleVT()) { |  | ||||||
|   default: |  | ||||||
|     cerr << "CellSPU: Unknown vector multiplication, got " |  | ||||||
|          << Op.getValueType().getMVTString() |  | ||||||
|          << "\n"; |  | ||||||
|     abort(); |  | ||||||
|     /*NOTREACHED*/ |  | ||||||
|  |  | ||||||
|   case MVT::v4i32: |  | ||||||
| 	  break; |  | ||||||
|  |  | ||||||
|   // Multiply two v8i16 vectors (pipeline friendly version): |  | ||||||
|   // a) multiply lower halves, mask off upper 16-bit of 32-bit product |  | ||||||
|   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) |  | ||||||
|   // c) Use SELB to select upper and lower halves from the intermediate results |  | ||||||
|   // |  | ||||||
|   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the |  | ||||||
|   // dual-issue. This code does manage to do this, even if it's a little on |  | ||||||
|   // the wacky side |  | ||||||
|   case MVT::v8i16: { |  | ||||||
|     MachineFunction &MF = DAG.getMachineFunction(); |  | ||||||
|     MachineRegisterInfo &RegInfo = MF.getRegInfo(); |  | ||||||
|     SDValue Chain = Op.getOperand(0); |  | ||||||
|     SDValue rA = Op.getOperand(0); |  | ||||||
|     SDValue rB = Op.getOperand(1); |  | ||||||
|     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|  |  | ||||||
|     SDValue FSMBOp = |  | ||||||
|       DAG.getCopyToReg(Chain, FSMBIreg, |  | ||||||
|                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, |  | ||||||
|                                    DAG.getConstant(0xcccc, MVT::i16))); |  | ||||||
|  |  | ||||||
|     SDValue HHProd = |  | ||||||
|       DAG.getCopyToReg(FSMBOp, HiProdReg, |  | ||||||
|                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); |  | ||||||
|  |  | ||||||
|     SDValue HHProd_v4i32 = |  | ||||||
|       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, |  | ||||||
|                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::SELB, MVT::v8i16, |  | ||||||
|                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), |  | ||||||
|                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), |  | ||||||
|                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, |  | ||||||
|                                                HHProd_v4i32, |  | ||||||
|                                                DAG.getConstant(16, MVT::i16))), |  | ||||||
|                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // This M00sE is N@stI! (apologies to Monty Python) |  | ||||||
|   // |  | ||||||
|   // SPU doesn't know how to do any 8-bit multiplication, so the solution |  | ||||||
|   // is to break it all apart, sign extend, and reassemble the various |  | ||||||
|   // intermediate products. |  | ||||||
|   case MVT::v16i8: { |  | ||||||
|     SDValue rA = Op.getOperand(0); |  | ||||||
|     SDValue rB = Op.getOperand(1); |  | ||||||
|     SDValue c8 = DAG.getConstant(8, MVT::i32); |  | ||||||
|     SDValue c16 = DAG.getConstant(16, MVT::i32); |  | ||||||
|  |  | ||||||
|     SDValue LLProd = |  | ||||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); |  | ||||||
|  |  | ||||||
|     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); |  | ||||||
|  |  | ||||||
|     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); |  | ||||||
|  |  | ||||||
|     SDValue LHProd = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, |  | ||||||
|                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); |  | ||||||
|  |  | ||||||
|     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, |  | ||||||
|                                      DAG.getConstant(0x2222, MVT::i16)); |  | ||||||
|  |  | ||||||
|     SDValue LoProdParts = |  | ||||||
|       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, |  | ||||||
|                   DAG.getNode(SPUISD::SELB, MVT::v8i16, |  | ||||||
|                               LLProd, LHProd, FSMBmask)); |  | ||||||
|  |  | ||||||
|     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32); |  | ||||||
|  |  | ||||||
|     SDValue LoProd = |  | ||||||
|       DAG.getNode(ISD::AND, MVT::v4i32, |  | ||||||
|                   LoProdParts, |  | ||||||
|                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, |  | ||||||
|                               LoProdMask, LoProdMask, |  | ||||||
|                               LoProdMask, LoProdMask)); |  | ||||||
|  |  | ||||||
|     SDValue rAH = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); |  | ||||||
|  |  | ||||||
|     SDValue rBH = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); |  | ||||||
|  |  | ||||||
|     SDValue HLProd = |  | ||||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); |  | ||||||
|  |  | ||||||
|     SDValue HHProd_1 = |  | ||||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, |  | ||||||
|                               DAG.getNode(SPUISD::VEC_SRA, |  | ||||||
|                                           MVT::v4i32, rAH, c8)), |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, |  | ||||||
|                               DAG.getNode(SPUISD::VEC_SRA, |  | ||||||
|                                           MVT::v4i32, rBH, c8))); |  | ||||||
|  |  | ||||||
|     SDValue HHProd = |  | ||||||
|       DAG.getNode(SPUISD::SELB, MVT::v8i16, |  | ||||||
|                   HLProd, |  | ||||||
|                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), |  | ||||||
|                   FSMBmask); |  | ||||||
|  |  | ||||||
|     SDValue HiProd = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, |  | ||||||
|                        DAG.getNode(ISD::OR, MVT::v4i32, |  | ||||||
|                                    LoProd, HiProd)); |  | ||||||
|   } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   return SDValue(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) { |  | ||||||
|   MachineFunction &MF = DAG.getMachineFunction(); |  | ||||||
|   MachineRegisterInfo &RegInfo = MF.getRegInfo(); |  | ||||||
|  |  | ||||||
|   SDValue A = Op.getOperand(0); |  | ||||||
|   SDValue B = Op.getOperand(1); |  | ||||||
|   MVT VT = Op.getValueType(); |  | ||||||
|  |  | ||||||
|   unsigned VRegBR, VRegC; |  | ||||||
|  |  | ||||||
|   if (VT == MVT::f32) { |  | ||||||
|     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); |  | ||||||
|     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); |  | ||||||
|   } else { |  | ||||||
|     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|   } |  | ||||||
|   // TODO: make sure we're feeding FPInterp the right arguments |  | ||||||
|   // Right now: fi B, frest(B) |  | ||||||
|  |  | ||||||
|   // Computes BRcpl = |  | ||||||
|   // (Floating Interpolate (FP Reciprocal Estimate B)) |  | ||||||
|   SDValue BRcpl = |  | ||||||
|       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, |  | ||||||
|                        DAG.getNode(SPUISD::FPInterp, VT, B, |  | ||||||
|                                 DAG.getNode(SPUISD::FPRecipEst, VT, B))); |  | ||||||
|  |  | ||||||
|   // Computes A * BRcpl and stores in a temporary register |  | ||||||
|   SDValue AxBRcpl = |  | ||||||
|       DAG.getCopyToReg(BRcpl, VRegC, |  | ||||||
|                  DAG.getNode(ISD::FMUL, VT, A, |  | ||||||
|                         DAG.getCopyFromReg(BRcpl, VRegBR, VT))); |  | ||||||
|   // What's the Chain variable do? It's magic! |  | ||||||
|   // TODO: set Chain = Op(0).getEntryNode() |  | ||||||
|  |  | ||||||
|   return DAG.getNode(ISD::FADD, VT, |  | ||||||
|                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT), |  | ||||||
|                 DAG.getNode(ISD::FMUL, VT, |  | ||||||
|                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), |  | ||||||
|                         DAG.getNode(ISD::FSUB, VT, A, |  | ||||||
|                             DAG.getNode(ISD::FMUL, VT, B, |  | ||||||
|                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { | static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { | ||||||
|   MVT VT = Op.getValueType(); |   MVT VT = Op.getValueType(); | ||||||
|   SDValue N = Op.getOperand(0); |   SDValue N = Op.getOperand(0); | ||||||
| @@ -2296,18 +2104,23 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, | |||||||
|     assert(0 && "Unhandled i8 math operator"); |     assert(0 && "Unhandled i8 math operator"); | ||||||
|     /*NOTREACHED*/ |     /*NOTREACHED*/ | ||||||
|     break; |     break; | ||||||
|  |   case ISD::ADD: { | ||||||
|  |     // 8-bit addition: Promote the arguments up to 16-bits and truncate | ||||||
|  |     // the result: | ||||||
|  |     SDValue N1 = Op.getOperand(1); | ||||||
|  |     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0); | ||||||
|  |     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1); | ||||||
|  |     return DAG.getNode(ISD::TRUNCATE, MVT::i8, | ||||||
|  |                        DAG.getNode(Opc, MVT::i16, N0, N1)); | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|   case ISD::SUB: { |   case ISD::SUB: { | ||||||
|     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate |     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate | ||||||
|     // the result: |     // the result: | ||||||
|     SDValue N1 = Op.getOperand(1); |     SDValue N1 = Op.getOperand(1); | ||||||
|     N0 = (N0.getOpcode() != ISD::Constant |     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0); | ||||||
|           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) |     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1); | ||||||
|           : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(), |  | ||||||
|                             MVT::i16)); |  | ||||||
|     N1 = (N1.getOpcode() != ISD::Constant |  | ||||||
|           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) |  | ||||||
|           : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(), |  | ||||||
|                             MVT::i16)); |  | ||||||
|     return DAG.getNode(ISD::TRUNCATE, MVT::i8, |     return DAG.getNode(ISD::TRUNCATE, MVT::i8, | ||||||
|                        DAG.getNode(Opc, MVT::i16, N0, N1)); |                        DAG.getNode(Opc, MVT::i16, N0, N1)); | ||||||
|   } |   } | ||||||
| @@ -2397,7 +2210,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | |||||||
|  |  | ||||||
|   switch (Opc) { |   switch (Opc) { | ||||||
|   case ISD::ZERO_EXTEND: |   case ISD::ZERO_EXTEND: | ||||||
|   case ISD::SIGN_EXTEND: |  | ||||||
|   case ISD::ANY_EXTEND: { |   case ISD::ANY_EXTEND: { | ||||||
|     MVT Op0VT = Op0.getValueType(); |     MVT Op0VT = Op0.getValueType(); | ||||||
|     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); |     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); | ||||||
| @@ -2410,39 +2222,16 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | |||||||
|     SDValue PromoteScalar = |     SDValue PromoteScalar = | ||||||
|             DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); |             DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); | ||||||
|  |  | ||||||
|     if (Opc != ISD::SIGN_EXTEND) { |     // Use a shuffle to zero extend the i32 to i64 directly: | ||||||
|       // Use a shuffle to zero extend the i32 to i64 directly: |     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, | ||||||
|       SDValue shufMask = |         DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203, | ||||||
|               DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, |             MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant( | ||||||
|                           DAG.getConstant(0x80808080, MVT::i32), |             0x08090a0b, MVT::i32)); | ||||||
|                           DAG.getConstant(0x00010203, MVT::i32), |     SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar, | ||||||
|                           DAG.getConstant(0x80808080, MVT::i32), |         PromoteScalar, shufMask); | ||||||
|                           DAG.getConstant(0x08090a0b, MVT::i32)); |  | ||||||
|       SDValue zextShuffle = |  | ||||||
|               DAG.getNode(SPUISD::SHUFB, Op0VecVT, |  | ||||||
|                           PromoteScalar, PromoteScalar, shufMask); |  | ||||||
|  |  | ||||||
|       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, |     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT, | ||||||
|                          DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); |         VecVT, zextShuffle)); | ||||||
|     } else { |  | ||||||
|       // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift |  | ||||||
|       // right and propagate the sign bit) instruction. |  | ||||||
|       SDValue RotQuad = |  | ||||||
|               DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT, |  | ||||||
|                           PromoteScalar, DAG.getConstant(4, MVT::i32)); |  | ||||||
|       SDValue SignQuad = |  | ||||||
|               DAG.getNode(SPUISD::VEC_SRA, Op0VecVT, |  | ||||||
|                           PromoteScalar, DAG.getConstant(32, MVT::i32)); |  | ||||||
|       SDValue SelMask = |  | ||||||
|               DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT, |  | ||||||
|                           DAG.getConstant(0xf0f0, MVT::i16)); |  | ||||||
|       SDValue CombineQuad = |  | ||||||
|               DAG.getNode(SPUISD::SELB, Op0VecVT, |  | ||||||
|                           SignQuad, RotQuad, SelMask); |  | ||||||
|  |  | ||||||
|       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, |  | ||||||
|                          DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad)); |  | ||||||
|     } |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   case ISD::ADD: { |   case ISD::ADD: { | ||||||
| @@ -2502,88 +2291,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | |||||||
|                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, |                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, | ||||||
|                                    Op0, Op1, ShiftedBorrow)); |                                    Op0, Op1, ShiftedBorrow)); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   case ISD::SHL: { |  | ||||||
|     SDValue ShiftAmt = Op.getOperand(1); |  | ||||||
|     MVT ShiftAmtVT = ShiftAmt.getValueType(); |  | ||||||
|     SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0); |  | ||||||
|     SDValue MaskLower = |  | ||||||
|       DAG.getNode(SPUISD::SELB, VecVT, |  | ||||||
|                   Op0Vec, |  | ||||||
|                   DAG.getConstant(0, VecVT), |  | ||||||
|                   DAG.getNode(SPUISD::SELECT_MASK, VecVT, |  | ||||||
|                               DAG.getConstant(0xff00ULL, MVT::i16))); |  | ||||||
|     SDValue ShiftAmtBytes = |  | ||||||
|       DAG.getNode(ISD::SRL, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(3, ShiftAmtVT)); |  | ||||||
|     SDValue ShiftAmtBits = |  | ||||||
|       DAG.getNode(ISD::AND, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(7, ShiftAmtVT)); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, |  | ||||||
|                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT, |  | ||||||
|                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, |  | ||||||
|                                                MaskLower, ShiftAmtBytes), |  | ||||||
|                                    ShiftAmtBits)); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   case ISD::SRL: { |  | ||||||
|     MVT VT = Op.getValueType(); |  | ||||||
|     SDValue ShiftAmt = Op.getOperand(1); |  | ||||||
|     MVT ShiftAmtVT = ShiftAmt.getValueType(); |  | ||||||
|     SDValue ShiftAmtBytes = |  | ||||||
|       DAG.getNode(ISD::SRL, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(3, ShiftAmtVT)); |  | ||||||
|     SDValue ShiftAmtBits = |  | ||||||
|       DAG.getNode(ISD::AND, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(7, ShiftAmtVT)); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT, |  | ||||||
|                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT, |  | ||||||
|                                    Op0, ShiftAmtBytes), |  | ||||||
|                        ShiftAmtBits); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   case ISD::SRA: { |  | ||||||
|     // Promote Op0 to vector |  | ||||||
|     SDValue Op0 = |  | ||||||
|       DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); |  | ||||||
|     SDValue ShiftAmt = Op.getOperand(1); |  | ||||||
|     MVT ShiftVT = ShiftAmt.getValueType(); |  | ||||||
|  |  | ||||||
|     // Negate variable shift amounts |  | ||||||
|     if (!isa<ConstantSDNode>(ShiftAmt)) { |  | ||||||
|       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT, |  | ||||||
|                              DAG.getConstant(0, ShiftVT), ShiftAmt); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     SDValue UpperHalfSign = |  | ||||||
|       DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, |  | ||||||
|                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, |  | ||||||
|                                           Op0, DAG.getConstant(31, MVT::i32)))); |  | ||||||
|     SDValue UpperHalfSignMask = |  | ||||||
|       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign); |  | ||||||
|     SDValue UpperLowerMask = |  | ||||||
|       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, |  | ||||||
|                   DAG.getConstant(0xff00, MVT::i16)); |  | ||||||
|     SDValue UpperLowerSelect = |  | ||||||
|       DAG.getNode(SPUISD::SELB, MVT::v2i64, |  | ||||||
|                   UpperHalfSignMask, Op0, UpperLowerMask); |  | ||||||
|     SDValue RotateLeftBytes = |  | ||||||
|       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64, |  | ||||||
|                   UpperLowerSelect, ShiftAmt); |  | ||||||
|     SDValue RotateLeftBits = |  | ||||||
|       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, |  | ||||||
|                   RotateLeftBytes, ShiftAmt); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, |  | ||||||
|                        RotateLeftBits); |  | ||||||
|   } |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   return SDValue(); |   return SDValue(); | ||||||
| @@ -2890,10 +2597,11 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) | |||||||
|     return LowerRET(Op, DAG, getTargetMachine()); |     return LowerRET(Op, DAG, getTargetMachine()); | ||||||
|  |  | ||||||
|  |  | ||||||
|   // i8, i64 math ops: |  | ||||||
|   case ISD::ZERO_EXTEND: |   case ISD::ZERO_EXTEND: | ||||||
|   case ISD::SIGN_EXTEND: |  | ||||||
|   case ISD::ANY_EXTEND: |   case ISD::ANY_EXTEND: | ||||||
|  |     return LowerI64Math(Op, DAG, Opc); | ||||||
|  |  | ||||||
|  |   // i8, i64 math ops: | ||||||
|   case ISD::ADD: |   case ISD::ADD: | ||||||
|   case ISD::SUB: |   case ISD::SUB: | ||||||
|   case ISD::ROTR: |   case ISD::ROTR: | ||||||
| @@ -2928,22 +2636,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) | |||||||
|  |  | ||||||
|   // Vector and i8 multiply: |   // Vector and i8 multiply: | ||||||
|   case ISD::MUL: |   case ISD::MUL: | ||||||
|     if (VT.isVector()) |     if (VT == MVT::i8) | ||||||
|       return LowerVectorMUL(Op, DAG); |  | ||||||
|     else if (VT == MVT::i8) |  | ||||||
|       return LowerI8Math(Op, DAG, Opc, *this); |       return LowerI8Math(Op, DAG, Opc, *this); | ||||||
|  |  | ||||||
|   case ISD::FDIV: |  | ||||||
|     if (VT == MVT::f32 || VT == MVT::v4f32) |  | ||||||
|       return LowerFDIVf32(Op, DAG); |  | ||||||
| #if 0 |  | ||||||
|     // This is probably a libcall |  | ||||||
|     else if (Op.getValueType() == MVT::f64) |  | ||||||
|       return LowerFDIVf64(Op, DAG); |  | ||||||
| #endif |  | ||||||
|     else |  | ||||||
|       assert(0 && "Calling FDIV on unsupported MVT"); |  | ||||||
|  |  | ||||||
|   case ISD::CTPOP: |   case ISD::CTPOP: | ||||||
|     return LowerCTPOP(Op, DAG); |     return LowerCTPOP(Op, DAG); | ||||||
|  |  | ||||||
| @@ -3119,8 +2814,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const | |||||||
|   case SPUISD::VEC_SHL: |   case SPUISD::VEC_SHL: | ||||||
|   case SPUISD::VEC_SRL: |   case SPUISD::VEC_SRL: | ||||||
|   case SPUISD::VEC_SRA: |   case SPUISD::VEC_SRA: | ||||||
|   case SPUISD::ROTQUAD_RZ_BYTES: |  | ||||||
|   case SPUISD::ROTQUAD_RZ_BITS: |  | ||||||
|   case SPUISD::ROTBYTES_LEFT: { |   case SPUISD::ROTBYTES_LEFT: { | ||||||
|     SDValue Op1 = N->getOperand(1); |     SDValue Op1 = N->getOperand(1); | ||||||
|  |  | ||||||
| @@ -3268,10 +2961,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, | |||||||
|   } |   } | ||||||
|  |  | ||||||
| #if 0 | #if 0 | ||||||
|   case MPY: |  | ||||||
|   case MPYU: |  | ||||||
|   case MPYH: |  | ||||||
|   case MPYHH: |  | ||||||
|   case SPUISD::SHLQUAD_L_BITS: |   case SPUISD::SHLQUAD_L_BITS: | ||||||
|   case SPUISD::SHLQUAD_L_BYTES: |   case SPUISD::SHLQUAD_L_BYTES: | ||||||
|   case SPUISD::VEC_SHL: |   case SPUISD::VEC_SHL: | ||||||
| @@ -3279,18 +2968,14 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, | |||||||
|   case SPUISD::VEC_SRA: |   case SPUISD::VEC_SRA: | ||||||
|   case SPUISD::VEC_ROTL: |   case SPUISD::VEC_ROTL: | ||||||
|   case SPUISD::VEC_ROTR: |   case SPUISD::VEC_ROTR: | ||||||
|   case SPUISD::ROTQUAD_RZ_BYTES: |  | ||||||
|   case SPUISD::ROTQUAD_RZ_BITS: |  | ||||||
|   case SPUISD::ROTBYTES_LEFT: |   case SPUISD::ROTBYTES_LEFT: | ||||||
|   case SPUISD::SELECT_MASK: |   case SPUISD::SELECT_MASK: | ||||||
|   case SPUISD::SELB: |   case SPUISD::SELB: | ||||||
|   case SPUISD::FPInterp: |  | ||||||
|   case SPUISD::FPRecipEst: |  | ||||||
|   case SPUISD::SEXT32TO64: |   case SPUISD::SEXT32TO64: | ||||||
| #endif | #endif | ||||||
|   } |   } | ||||||
| } | } | ||||||
|    |  | ||||||
| unsigned | unsigned | ||||||
| SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, | SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, | ||||||
|                                                    unsigned Depth) const { |                                                    unsigned Depth) const { | ||||||
|   | |||||||
| @@ -24,10 +24,10 @@ namespace llvm { | |||||||
|     enum NodeType { |     enum NodeType { | ||||||
|       // Start the numbering where the builting ops and target ops leave off. |       // Start the numbering where the builting ops and target ops leave off. | ||||||
|       FIRST_NUMBER = ISD::BUILTIN_OP_END, |       FIRST_NUMBER = ISD::BUILTIN_OP_END, | ||||||
|        |  | ||||||
|       // Pseudo instructions: |       // Pseudo instructions: | ||||||
|       RET_FLAG,                 ///< Return with flag, matched by bi instruction |       RET_FLAG,                 ///< Return with flag, matched by bi instruction | ||||||
|        |  | ||||||
|       Hi,                       ///< High address component (upper 16) |       Hi,                       ///< High address component (upper 16) | ||||||
|       Lo,                       ///< Low address component (lower 16) |       Lo,                       ///< Low address component (lower 16) | ||||||
|       PCRelAddr,                ///< Program counter relative address |       PCRelAddr,                ///< Program counter relative address | ||||||
| @@ -41,10 +41,6 @@ namespace llvm { | |||||||
|       CNTB,                     ///< Count leading ones in bytes |       CNTB,                     ///< Count leading ones in bytes | ||||||
|       PREFSLOT2VEC,             ///< Promote scalar->vector |       PREFSLOT2VEC,             ///< Promote scalar->vector | ||||||
|       VEC2PREFSLOT,             ///< Extract element 0 |       VEC2PREFSLOT,             ///< Extract element 0 | ||||||
|       MPY,                      ///< 16-bit Multiply (low parts of a 32-bit) |  | ||||||
|       MPYU,                     ///< Multiply Unsigned |  | ||||||
|       MPYH,                     ///< Multiply High |  | ||||||
|       MPYHH,                    ///< Multiply High-High |  | ||||||
|       SHLQUAD_L_BITS,           ///< Rotate quad left, by bits |       SHLQUAD_L_BITS,           ///< Rotate quad left, by bits | ||||||
|       SHLQUAD_L_BYTES,          ///< Rotate quad left, by bytes |       SHLQUAD_L_BYTES,          ///< Rotate quad left, by bytes | ||||||
|       VEC_SHL,                  ///< Vector shift left |       VEC_SHL,                  ///< Vector shift left | ||||||
| @@ -52,8 +48,6 @@ namespace llvm { | |||||||
|       VEC_SRA,                  ///< Vector shift right (arithmetic) |       VEC_SRA,                  ///< Vector shift right (arithmetic) | ||||||
|       VEC_ROTL,                 ///< Vector rotate left |       VEC_ROTL,                 ///< Vector rotate left | ||||||
|       VEC_ROTR,                 ///< Vector rotate right |       VEC_ROTR,                 ///< Vector rotate right | ||||||
|       ROTQUAD_RZ_BYTES,         ///< Rotate quad right, by bytes, zero fill |  | ||||||
|       ROTQUAD_RZ_BITS,          ///< Rotate quad right, by bits, zero fill |  | ||||||
|       ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI) |       ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI) | ||||||
|       ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count |       ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count | ||||||
|       SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI) |       SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI) | ||||||
| @@ -63,8 +57,6 @@ namespace llvm { | |||||||
|       CARRY_GENERATE,           ///< Carry generate for ADD_EXTENDED |       CARRY_GENERATE,           ///< Carry generate for ADD_EXTENDED | ||||||
|       SUB_EXTENDED,             ///< Subtract extended, with borrow |       SUB_EXTENDED,             ///< Subtract extended, with borrow | ||||||
|       BORROW_GENERATE,          ///< Borrow generate for SUB_EXTENDED |       BORROW_GENERATE,          ///< Borrow generate for SUB_EXTENDED | ||||||
|       FPInterp,                 ///< Floating point interpolate |  | ||||||
|       FPRecipEst,               ///< Floating point reciprocal estimate |  | ||||||
|       SEXT32TO64,               ///< Sign-extended 32-bit const -> 64-bits |       SEXT32TO64,               ///< Sign-extended 32-bit const -> 64-bits | ||||||
|       LAST_SPUISD               ///< Last user-defined instruction |       LAST_SPUISD               ///< Last user-defined instruction | ||||||
|     }; |     }; | ||||||
| @@ -87,7 +79,7 @@ namespace llvm { | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   class SPUTargetMachine;            // forward dec'l. |   class SPUTargetMachine;            // forward dec'l. | ||||||
|    |  | ||||||
|   class SPUTargetLowering : |   class SPUTargetLowering : | ||||||
|     public TargetLowering |     public TargetLowering | ||||||
|   { |   { | ||||||
| @@ -97,14 +89,14 @@ namespace llvm { | |||||||
|  |  | ||||||
|   public: |   public: | ||||||
|     SPUTargetLowering(SPUTargetMachine &TM); |     SPUTargetLowering(SPUTargetMachine &TM); | ||||||
|      |  | ||||||
|     /// getTargetNodeName() - This method returns the name of a target specific |     /// getTargetNodeName() - This method returns the name of a target specific | ||||||
|     /// DAG node. |     /// DAG node. | ||||||
|     virtual const char *getTargetNodeName(unsigned Opcode) const; |     virtual const char *getTargetNodeName(unsigned Opcode) const; | ||||||
|  |  | ||||||
|     /// getSetCCResultType - Return the ValueType for ISD::SETCC |     /// getSetCCResultType - Return the ValueType for ISD::SETCC | ||||||
|     virtual MVT getSetCCResultType(const SDValue &) const; |     virtual MVT getSetCCResultType(const SDValue &) const; | ||||||
|      |  | ||||||
|     //! Custom lowering hooks |     //! Custom lowering hooks | ||||||
|     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); |     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); | ||||||
|  |  | ||||||
| @@ -116,7 +108,7 @@ namespace llvm { | |||||||
|  |  | ||||||
|     virtual void computeMaskedBitsForTargetNode(const SDValue Op, |     virtual void computeMaskedBitsForTargetNode(const SDValue Op, | ||||||
|                                                 const APInt &Mask, |                                                 const APInt &Mask, | ||||||
|                                                 APInt &KnownZero,  |                                                 APInt &KnownZero, | ||||||
|                                                 APInt &KnownOne, |                                                 APInt &KnownOne, | ||||||
|                                                 const SelectionDAG &DAG, |                                                 const SelectionDAG &DAG, | ||||||
|                                                 unsigned Depth = 0) const; |                                                 unsigned Depth = 0) const; | ||||||
| @@ -126,12 +118,12 @@ namespace llvm { | |||||||
|  |  | ||||||
|     ConstraintType getConstraintType(const std::string &ConstraintLetter) const; |     ConstraintType getConstraintType(const std::string &ConstraintLetter) const; | ||||||
|  |  | ||||||
|     std::pair<unsigned, const TargetRegisterClass*>  |     std::pair<unsigned, const TargetRegisterClass*> | ||||||
|       getRegForInlineAsmConstraint(const std::string &Constraint, |       getRegForInlineAsmConstraint(const std::string &Constraint, | ||||||
|                                    MVT VT) const; |                                    MVT VT) const; | ||||||
|  |  | ||||||
|     void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, |     void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, | ||||||
|                                       bool hasMemory,  |                                       bool hasMemory, | ||||||
|                                       std::vector<SDValue> &Ops, |                                       std::vector<SDValue> &Ops, | ||||||
|                                       SelectionDAG &DAG) const; |                                       SelectionDAG &DAG) const; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -82,7 +82,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, | |||||||
|   case SPU::ORIi8i32: |   case SPU::ORIi8i32: | ||||||
|   case SPU::AHIvec: |   case SPU::AHIvec: | ||||||
|   case SPU::AHIr16: |   case SPU::AHIr16: | ||||||
|   case SPU::AIvec: |   case SPU::AIv4i32: | ||||||
|     assert(MI.getNumOperands() == 3 && |     assert(MI.getNumOperands() == 3 && | ||||||
|            MI.getOperand(0).isReg() && |            MI.getOperand(0).isReg() && | ||||||
|            MI.getOperand(1).isReg() && |            MI.getOperand(1).isReg() && | ||||||
| @@ -98,8 +98,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, | |||||||
|     assert(MI.getNumOperands() == 3 && |     assert(MI.getNumOperands() == 3 && | ||||||
|            "wrong number of operands to AIr32"); |            "wrong number of operands to AIr32"); | ||||||
|     if (MI.getOperand(0).isReg() && |     if (MI.getOperand(0).isReg() && | ||||||
|         (MI.getOperand(1).isReg() || |         MI.getOperand(1).isReg() && | ||||||
|          MI.getOperand(1).isFI()) && |  | ||||||
|         (MI.getOperand(2).isImm() && |         (MI.getOperand(2).isImm() && | ||||||
|          MI.getOperand(2).getImm() == 0)) { |          MI.getOperand(2).getImm() == 0)) { | ||||||
|       sourceReg = MI.getOperand(1).getReg(); |       sourceReg = MI.getOperand(1).getReg(); | ||||||
| @@ -265,7 +264,7 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, | |||||||
|   // reg class to any other reg class containing R3.  This is required because |   // reg class to any other reg class containing R3.  This is required because | ||||||
|   // we instruction select bitconvert i64 -> f64 as a noop for example, so our |   // we instruction select bitconvert i64 -> f64 as a noop for example, so our | ||||||
|   // types have no specific meaning. |   // types have no specific meaning. | ||||||
|    |  | ||||||
|   if (DestRC == SPU::R8CRegisterClass) { |   if (DestRC == SPU::R8CRegisterClass) { | ||||||
|     BuildMI(MBB, MI, get(SPU::ORBIr8), DestReg).addReg(SrcReg).addImm(0); |     BuildMI(MBB, MI, get(SPU::ORBIr8), DestReg).addReg(SrcReg).addImm(0); | ||||||
|   } else if (DestRC == SPU::R16CRegisterClass) { |   } else if (DestRC == SPU::R16CRegisterClass) { | ||||||
| @@ -291,7 +290,7 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, | |||||||
|     // Attempt to copy unknown/unsupported register class! |     // Attempt to copy unknown/unsupported register class! | ||||||
|     return false; |     return false; | ||||||
|   } |   } | ||||||
|    |  | ||||||
|   return true; |   return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -464,7 +463,7 @@ SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, | |||||||
|   unsigned OpNum = Ops[0]; |   unsigned OpNum = Ops[0]; | ||||||
|   unsigned Opc = MI->getOpcode(); |   unsigned Opc = MI->getOpcode(); | ||||||
|   MachineInstr *NewMI = 0; |   MachineInstr *NewMI = 0; | ||||||
|    |  | ||||||
|   if ((Opc == SPU::ORr32 |   if ((Opc == SPU::ORr32 | ||||||
|        || Opc == SPU::ORv4i32) |        || Opc == SPU::ORv4i32) | ||||||
|        && MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { |        && MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { | ||||||
| @@ -508,7 +507,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, | |||||||
|  |  | ||||||
|   // Get the last instruction in the block. |   // Get the last instruction in the block. | ||||||
|   MachineInstr *LastInst = I; |   MachineInstr *LastInst = I; | ||||||
|    |  | ||||||
|   // If there is only one terminator instruction, process it. |   // If there is only one terminator instruction, process it. | ||||||
|   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { |   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { | ||||||
|     if (isUncondBranch(LastInst)) { |     if (isUncondBranch(LastInst)) { | ||||||
| @@ -524,7 +523,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, | |||||||
|     // Otherwise, don't know what this is. |     // Otherwise, don't know what this is. | ||||||
|     return true; |     return true; | ||||||
|   } |   } | ||||||
|    |  | ||||||
|   // Get the instruction before it if it's a terminator. |   // Get the instruction before it if it's a terminator. | ||||||
|   MachineInstr *SecondLastInst = I; |   MachineInstr *SecondLastInst = I; | ||||||
|  |  | ||||||
| @@ -532,7 +531,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, | |||||||
|   if (SecondLastInst && I != MBB.begin() && |   if (SecondLastInst && I != MBB.begin() && | ||||||
|       isUnpredicatedTerminator(--I)) |       isUnpredicatedTerminator(--I)) | ||||||
|     return true; |     return true; | ||||||
|    |  | ||||||
|   // If the block ends with a conditional and unconditional branch, handle it. |   // If the block ends with a conditional and unconditional branch, handle it. | ||||||
|   if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { |   if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { | ||||||
|     TBB =  SecondLastInst->getOperand(1).getMBB(); |     TBB =  SecondLastInst->getOperand(1).getMBB(); | ||||||
| @@ -541,7 +540,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, | |||||||
|     FBB = LastInst->getOperand(0).getMBB(); |     FBB = LastInst->getOperand(0).getMBB(); | ||||||
|     return false; |     return false; | ||||||
|   } |   } | ||||||
|    |  | ||||||
|   // If the block ends with two unconditional branches, handle it.  The second |   // If the block ends with two unconditional branches, handle it.  The second | ||||||
|   // one is not executed, so remove it. |   // one is not executed, so remove it. | ||||||
|   if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) { |   if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) { | ||||||
| @@ -554,7 +553,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, | |||||||
|   // Otherwise, can't handle this. |   // Otherwise, can't handle this. | ||||||
|   return true; |   return true; | ||||||
| } | } | ||||||
|      |  | ||||||
| unsigned | unsigned | ||||||
| SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { | SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { | ||||||
|   MachineBasicBlock::iterator I = MBB.end(); |   MachineBasicBlock::iterator I = MBB.end(); | ||||||
| @@ -578,16 +577,16 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { | |||||||
|   I->eraseFromParent(); |   I->eraseFromParent(); | ||||||
|   return 2; |   return 2; | ||||||
| } | } | ||||||
|      |  | ||||||
| unsigned | unsigned | ||||||
| SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, | SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, | ||||||
| 			   MachineBasicBlock *FBB, | 			   MachineBasicBlock *FBB, | ||||||
| 			   const SmallVectorImpl<MachineOperand> &Cond) const { | 			   const SmallVectorImpl<MachineOperand> &Cond) const { | ||||||
|   // Shouldn't be a fall through. |   // Shouldn't be a fall through. | ||||||
|   assert(TBB && "InsertBranch must not be told to insert a fallthrough"); |   assert(TBB && "InsertBranch must not be told to insert a fallthrough"); | ||||||
|   assert((Cond.size() == 2 || Cond.size() == 0) &&  |   assert((Cond.size() == 2 || Cond.size() == 0) && | ||||||
|          "SPU branch conditions have two components!"); |          "SPU branch conditions have two components!"); | ||||||
|    |  | ||||||
|   // One-way branch. |   // One-way branch. | ||||||
|   if (FBB == 0) { |   if (FBB == 0) { | ||||||
|     if (Cond.empty())   // Unconditional branch |     if (Cond.empty())   // Unconditional branch | ||||||
| @@ -600,7 +599,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, | |||||||
|     } |     } | ||||||
|     return 1; |     return 1; | ||||||
|   } |   } | ||||||
|    |  | ||||||
|   // Two-way Conditional Branch. |   // Two-way Conditional Branch. | ||||||
| #if 0 | #if 0 | ||||||
|   BuildMI(&MBB, get(SPU::BRNZ)) |   BuildMI(&MBB, get(SPU::BRNZ)) | ||||||
|   | |||||||
| @@ -583,7 +583,9 @@ def AHIvec: | |||||||
| def AHIr16: | def AHIr16: | ||||||
|   RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), |   RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), | ||||||
|     "ahi\t$rT, $rA, $val", IntegerOp, |     "ahi\t$rT, $rA, $val", IntegerOp, | ||||||
|     [(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>; |     [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>; | ||||||
|  |  | ||||||
|  | // v4i32, i32 add instruction: | ||||||
|  |  | ||||||
| class AInst<dag OOL, dag IOL, list<dag> pattern>: | class AInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|   RRForm<0b00000011000, OOL, IOL, |   RRForm<0b00000011000, OOL, IOL, | ||||||
| @@ -604,21 +606,42 @@ multiclass AddInstruction { | |||||||
|   def v16i8: AVecInst<v16i8>; |   def v16i8: AVecInst<v16i8>; | ||||||
|    |    | ||||||
|   def r32:   ARegInst<R32C>; |   def r32:   ARegInst<R32C>; | ||||||
|   def r8:    AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| defm A : AddInstruction; | defm A : AddInstruction; | ||||||
|  |  | ||||||
| def AIvec: | class AIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|     RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), |     RI10Form<0b00111000, OOL, IOL, | ||||||
|       "ai\t$rT, $rA, $val", IntegerOp, | 	     "ai\t$rT, $rA, $val", IntegerOp, | ||||||
|       [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), | 	     pattern>; | ||||||
|                                       v4i32SExt10Imm:$val))]>; |  | ||||||
|  |  | ||||||
| def AIr32: | class AIVecInst<ValueType vectype, PatLeaf immpred>: | ||||||
|     RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), |     AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||||
|       "ai\t$rT, $rA, $val", IntegerOp, | 	    [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>; | ||||||
|       [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>; |  | ||||||
|  | class AIFPVecInst<ValueType vectype, PatLeaf immpred>: | ||||||
|  |     AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||||
|  | 	    [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | class AIRegInst<RegisterClass rclass, PatLeaf immpred>: | ||||||
|  |     AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), | ||||||
|  | 	   [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>; | ||||||
|  |  | ||||||
|  | // This is used to add epsilons to floating point numbers in the f32 fdiv code: | ||||||
|  | class AIFPInst<RegisterClass rclass, PatLeaf immpred>: | ||||||
|  |     AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), | ||||||
|  | 	   [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | multiclass AddImmediate { | ||||||
|  |   def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>; | ||||||
|  |  | ||||||
|  |   def r32: AIRegInst<R32C, i32ImmSExt10>; | ||||||
|  |  | ||||||
|  |   def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>; | ||||||
|  |   def f32: AIFPInst<R32FP, i32ImmSExt10>; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | defm AI : AddImmediate; | ||||||
|  |  | ||||||
| def SFHvec: | def SFHvec: | ||||||
|     RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
| @@ -795,8 +818,7 @@ def BGXvec: | |||||||
| def MPYv8i16: | def MPYv8i16: | ||||||
|   RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |   RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|     "mpy\t$rT, $rA, $rB", IntegerMulDiv, |     "mpy\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|     [(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA), |     [/* no pattern */]>; | ||||||
|                                           (v8i16 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def MPYr16: | def MPYr16: | ||||||
|   RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), |   RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), | ||||||
| @@ -812,8 +834,7 @@ class MPYUInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|  |  | ||||||
| def MPYUv4i32: | def MPYUv4i32: | ||||||
|   MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |   MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|            [(set (v4i32 VECREG:$rT), |            [/* no pattern */]>; | ||||||
|                  (SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def MPYUr16: | def MPYUr16: | ||||||
|   MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), |   MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), | ||||||
| @@ -821,7 +842,7 @@ def MPYUr16: | |||||||
|  |  | ||||||
| def MPYUr32: | def MPYUr32: | ||||||
|   MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |   MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||||
|            [(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>; |            [/* no pattern */]>; | ||||||
|  |  | ||||||
| // mpyi: multiply 16 x s10imm -> 32 result. | // mpyi: multiply 16 x s10imm -> 32 result. | ||||||
|  |  | ||||||
| @@ -892,87 +913,78 @@ class MPYHInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|           |           | ||||||
| def MPYHv4i32: | def MPYHv4i32: | ||||||
|     MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|              [(set (v4i32 VECREG:$rT), |              [/* no pattern */]>; | ||||||
|                    (SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def MPYHr32: | def MPYHr32: | ||||||
|     MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||||
|              [(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>; |              [/* no pattern */]>; | ||||||
|  |  | ||||||
| // mpys: multiply high and shift right (returns the top half of | // mpys: multiply high and shift right (returns the top half of | ||||||
| // a 16-bit multiply, sign extended to 32 bits.) | // a 16-bit multiply, sign extended to 32 bits.) | ||||||
| def MPYSvec: |  | ||||||
|     RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |  | ||||||
|       "mpys\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| def MPYSr16: | class MPYSInst<dag OOL, dag IOL>: | ||||||
|     RRForm<0b11100011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB), |     RRForm<0b11100011110, OOL, IOL,  | ||||||
|       "mpys\t$rT, $rA, $rB", IntegerMulDiv, |       "mpys\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       []>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def MPYSvec: | ||||||
|  |     MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|  |      | ||||||
|  | def MPYSr16: | ||||||
|  |     MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>; | ||||||
|  |  | ||||||
| // mpyhh: multiply high-high (returns the 32-bit result from multiplying | // mpyhh: multiply high-high (returns the 32-bit result from multiplying | ||||||
| // the top 16 bits of the $rA, $rB) | // the top 16 bits of the $rA, $rB) | ||||||
|  |  | ||||||
|  | class MPYHHInst<dag OOL, dag IOL>: | ||||||
|  |   RRForm<0b01100011110, OOL, IOL, | ||||||
|  |         "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|  |         [/* no pattern */]>; | ||||||
|  |          | ||||||
| def MPYHHv8i16: | def MPYHHv8i16: | ||||||
|     RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|       "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       [(set (v8i16 VECREG:$rT), |  | ||||||
|             (SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def MPYHHr32: | def MPYHHr32: | ||||||
|     RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|       "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| // mpyhha: Multiply high-high, add to $rT: | // mpyhha: Multiply high-high, add to $rT: | ||||||
| def MPYHHAvec: |  | ||||||
|     RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |  | ||||||
|       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| def MPYHHAr32: | class MPYHHAInst<dag OOL, dag IOL>: | ||||||
|     RRForm<0b01100010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     RRForm<0b01100010110, OOL, IOL, | ||||||
|       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, |       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       []>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def MPYHHAvec: | ||||||
|  |     MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|  |      | ||||||
|  | def MPYHHAr32: | ||||||
|  |     MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|  |  | ||||||
| // mpyhhu: Multiply high-high, unsigned | // mpyhhu: Multiply high-high, unsigned | ||||||
| def MPYHHUvec: |  | ||||||
|     RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |  | ||||||
|       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| def MPYHHUr32: | class MPYHHUInst<dag OOL, dag IOL>: | ||||||
|     RRForm<0b01110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     RRForm<0b01110011110, OOL, IOL, | ||||||
|       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, |       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       []>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def MPYHHUvec: | ||||||
|  |     MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|  |      | ||||||
|  | def MPYHHUr32: | ||||||
|  |     MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|  |  | ||||||
| // mpyhhau: Multiply high-high, unsigned | // mpyhhau: Multiply high-high, unsigned | ||||||
|  |  | ||||||
|  | class MPYHHAUInst<dag OOL, dag IOL>: | ||||||
|  |     RRForm<0b01110010110, OOL, IOL, | ||||||
|  |       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|  |       [/* no pattern */]>; | ||||||
|  |  | ||||||
| def MPYHHAUvec: | def MPYHHAUvec: | ||||||
|     RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, |      | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| def MPYHHAUr32: | def MPYHHAUr32: | ||||||
|     RRForm<0b01110010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |  | ||||||
| // v4i32, i32 multiply instruction sequence: |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |  | ||||||
| def MPYv4i32: |  | ||||||
|   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), |  | ||||||
|       (Av4i32 |  | ||||||
|         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), |  | ||||||
|                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)), |  | ||||||
|         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; |  | ||||||
|  |  | ||||||
| def MPYi32: |  | ||||||
|   Pat<(mul R32C:$rA, R32C:$rB), |  | ||||||
|       (Ar32 |  | ||||||
|         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), |  | ||||||
|               (MPYHr32 R32C:$rB, R32C:$rA)), |  | ||||||
|         (MPYUr32 R32C:$rA, R32C:$rB))>; |  | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| // clz: Count leading zeroes | // clz: Count leading zeroes | ||||||
| @@ -983,7 +995,7 @@ class CLZInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|  |  | ||||||
| class CLZRegInst<RegisterClass rclass>: | class CLZRegInst<RegisterClass rclass>: | ||||||
|     CLZInst<(outs rclass:$rT), (ins rclass:$rA), |     CLZInst<(outs rclass:$rT), (ins rclass:$rA), | ||||||
| 	    [(set rclass:$rT, (ctlz rclass:$rA))]>; |             [(set rclass:$rT, (ctlz rclass:$rA))]>; | ||||||
|  |  | ||||||
| class CLZVecInst<ValueType vectype>: | class CLZVecInst<ValueType vectype>: | ||||||
|     CLZInst<(outs VECREG:$rT), (ins VECREG:$rA), |     CLZInst<(outs VECREG:$rT), (ins VECREG:$rA), | ||||||
| @@ -1424,7 +1436,7 @@ multiclass BitwiseOr | |||||||
|   def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), |   def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), | ||||||
|                   [/* no pattern */]>; |                   [/* no pattern */]>; | ||||||
|  |  | ||||||
|   // scalar->vector promotion: |   // scalar->vector promotion, prefslot2vec: | ||||||
|   def v16i8_i8:  ORPromoteScalar<R8C>; |   def v16i8_i8:  ORPromoteScalar<R8C>; | ||||||
|   def v8i16_i16: ORPromoteScalar<R16C>; |   def v8i16_i16: ORPromoteScalar<R16C>; | ||||||
|   def v4i32_i32: ORPromoteScalar<R32C>; |   def v4i32_i32: ORPromoteScalar<R32C>; | ||||||
| @@ -1432,7 +1444,7 @@ multiclass BitwiseOr | |||||||
|   def v4f32_f32: ORPromoteScalar<R32FP>; |   def v4f32_f32: ORPromoteScalar<R32FP>; | ||||||
|   def v2f64_f64: ORPromoteScalar<R64FP>; |   def v2f64_f64: ORPromoteScalar<R64FP>; | ||||||
|  |  | ||||||
|   // extract element 0: |   // vector->scalar demotion, vec2prefslot: | ||||||
|   def i8_v16i8:  ORExtractElt<R8C>; |   def i8_v16i8:  ORExtractElt<R8C>; | ||||||
|   def i16_v8i16: ORExtractElt<R16C>; |   def i16_v8i16: ORExtractElt<R16C>; | ||||||
|   def i32_v4i32: ORExtractElt<R32C>; |   def i32_v4i32: ORExtractElt<R32C>; | ||||||
| @@ -1831,6 +1843,13 @@ class SELBVecInst<ValueType vectype>: | |||||||
|                      (and (vnot (vectype VECREG:$rC)), |                      (and (vnot (vectype VECREG:$rC)), | ||||||
|                           (vectype VECREG:$rA))))]>; |                           (vectype VECREG:$rA))))]>; | ||||||
|  |  | ||||||
|  | class SELBVecVCondInst<ValueType vectype>: | ||||||
|  |   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), | ||||||
|  |            [(set (vectype VECREG:$rT), | ||||||
|  |                  (select (vectype VECREG:$rC), | ||||||
|  |                          (vectype VECREG:$rB), | ||||||
|  |                          (vectype VECREG:$rA)))]>; | ||||||
|  |  | ||||||
| class SELBVecCondInst<ValueType vectype>: | class SELBVecCondInst<ValueType vectype>: | ||||||
|   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), |   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), | ||||||
|            [(set (vectype VECREG:$rT), |            [(set (vectype VECREG:$rT), | ||||||
| @@ -1867,8 +1886,21 @@ multiclass SelectBits | |||||||
|   def v4i32_cond: SELBVecCondInst<v4i32>; |   def v4i32_cond: SELBVecCondInst<v4i32>; | ||||||
|   def v2i64_cond: SELBVecCondInst<v2i64>; |   def v2i64_cond: SELBVecCondInst<v2i64>; | ||||||
|  |  | ||||||
|  |   def v16i8_vcond: SELBVecCondInst<v16i8>; | ||||||
|  |   def v8i16_vcond: SELBVecCondInst<v8i16>; | ||||||
|  |   def v4i32_vcond: SELBVecCondInst<v4i32>; | ||||||
|  |   def v2i64_vcond: SELBVecCondInst<v2i64>; | ||||||
|  |  | ||||||
|  |   def v4f32_cond: | ||||||
|  | 	SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), | ||||||
|  | 		 [(set (v4f32 VECREG:$rT), | ||||||
|  | 		       (select (v4i32 VECREG:$rC), | ||||||
|  | 			       (v4f32 VECREG:$rB), | ||||||
|  | 			       (v4f32 VECREG:$rA)))]>; | ||||||
|  |  | ||||||
|   // SELBr64_cond is defined further down, look for i64 comparisons |   // SELBr64_cond is defined further down, look for i64 comparisons | ||||||
|   def r32_cond:   SELBRegCondInst<R32C, R32C>; |   def r32_cond:   SELBRegCondInst<R32C, R32C>; | ||||||
|  |   def f32_cond:   SELBRegCondInst<R32C, R32FP>; | ||||||
|   def r16_cond:   SELBRegCondInst<R16C, R16C>; |   def r16_cond:   SELBRegCondInst<R16C, R16C>; | ||||||
|   def r8_cond:    SELBRegCondInst<R8C,  R8C>; |   def r8_cond:    SELBRegCondInst<R8C,  R8C>; | ||||||
| } | } | ||||||
| @@ -2454,11 +2486,11 @@ class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|            RotateShift, pattern>; |            RotateShift, pattern>; | ||||||
|  |  | ||||||
| class ROTQBIVecInst<ValueType vectype>: | class ROTQBIVecInst<ValueType vectype>: | ||||||
|     ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), | ||||||
|                [/* no pattern yet */]>; |                [/* no pattern yet */]>; | ||||||
|  |  | ||||||
| class ROTQBIRegInst<RegisterClass rclass>: | class ROTQBIRegInst<RegisterClass rclass>: | ||||||
|     ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), |     ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), | ||||||
|                [/* no pattern yet */]>; |                [/* no pattern yet */]>; | ||||||
|  |  | ||||||
| multiclass RotateQuadByBitCount | multiclass RotateQuadByBitCount | ||||||
| @@ -2645,9 +2677,6 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)), | |||||||
| // ROTQMBYvec: This is a vector form merely so that when used in an | // ROTQMBYvec: This is a vector form merely so that when used in an | ||||||
| // instruction pattern, type checking will succeed. This instruction assumes | // instruction pattern, type checking will succeed. This instruction assumes | ||||||
| // that the user knew to negate $rB. | // that the user knew to negate $rB. | ||||||
| // |  | ||||||
| // Using the SPUrotquad_rz_bytes target-specific DAG node, the patterns |  | ||||||
| // ensure that $rB is negated. |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
| class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>: | class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
| @@ -2660,8 +2689,7 @@ class ROTQMBYVecInst<ValueType vectype>: | |||||||
|  |  | ||||||
| class ROTQMBYRegInst<RegisterClass rclass>: | class ROTQMBYRegInst<RegisterClass rclass>: | ||||||
|     ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), |     ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), | ||||||
|                 [(set rclass:$rT, |                 [/* no pattern */]>; | ||||||
|                       (SPUrotquad_rz_bytes rclass:$rA, R32C:$rB))]>; |  | ||||||
|  |  | ||||||
| multiclass RotateQuadBytes | multiclass RotateQuadBytes | ||||||
| { | { | ||||||
| @@ -2676,32 +2704,17 @@ multiclass RotateQuadBytes | |||||||
|  |  | ||||||
| defm ROTQMBY : RotateQuadBytes; | defm ROTQMBY : RotateQuadBytes; | ||||||
|  |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v16i8 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v8i16 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v4i32 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v2i64 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes GPRC:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBYr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes R64C:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBYr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
|  |  | ||||||
| class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>: | class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|     RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", |     RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", | ||||||
|             RotateShift, pattern>; |             RotateShift, pattern>; | ||||||
|  |  | ||||||
| class ROTQMBYIVecInst<ValueType vectype>: | class ROTQMBYIVecInst<ValueType vectype>: | ||||||
|     ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), |     ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), | ||||||
|                  [(set (vectype VECREG:$rT), |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>; |  | ||||||
|  |  | ||||||
| class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>: | class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>: | ||||||
|     ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), |     ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), | ||||||
|                  [(set rclass:$rT, |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bytes rclass:$rA, (inttype pred:$val)))]>; |  | ||||||
|  |  | ||||||
| multiclass RotateQuadBytesImm | multiclass RotateQuadBytesImm | ||||||
| { | { | ||||||
| @@ -2725,8 +2738,8 @@ class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|            RotateShift, pattern>; |            RotateShift, pattern>; | ||||||
|  |  | ||||||
| class ROTQMBYBIVecInst<ValueType vectype>: | class ROTQMBYBIVecInst<ValueType vectype>: | ||||||
|     ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), | ||||||
|                   [/* no pattern, intrinsic? */]>; |                   [/* no pattern, */]>; | ||||||
|  |  | ||||||
| multiclass RotateMaskQuadByBitCount | multiclass RotateMaskQuadByBitCount | ||||||
| { | { | ||||||
| @@ -2768,19 +2781,6 @@ multiclass RotateMaskQuadByBits | |||||||
|  |  | ||||||
| defm ROTQMBI: RotateMaskQuadByBits; | defm ROTQMBI: RotateMaskQuadByBits; | ||||||
|  |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v16i8 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v8i16 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v4i32 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v2i64 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits GPRC:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBIr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits R64C:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBIr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| // Rotate quad and mask by bits, immediate | // Rotate quad and mask by bits, immediate | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| @@ -2791,13 +2791,11 @@ class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|  |  | ||||||
| class ROTQMBIIVecInst<ValueType vectype>: | class ROTQMBIIVecInst<ValueType vectype>: | ||||||
|    ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), |    ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), | ||||||
|                  [(set (vectype VECREG:$rT), |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bits (vectype VECREG:$rA), (i32 uimm7:$val)))]>; |  | ||||||
|  |  | ||||||
| class ROTQMBIIRegInst<RegisterClass rclass>: | class ROTQMBIIRegInst<RegisterClass rclass>: | ||||||
|    ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), |    ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), | ||||||
|                  [(set rclass:$rT, |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bits rclass:$rA, (i32 uimm7:$val)))]>; |  | ||||||
|  |  | ||||||
| multiclass RotateMaskQuadByBitsImm | multiclass RotateMaskQuadByBitsImm | ||||||
| { | { | ||||||
| @@ -3142,6 +3140,15 @@ multiclass CmpGtrWordImm | |||||||
|  |  | ||||||
|   def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), |   def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), | ||||||
|                     [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; |                     [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; | ||||||
|  |  | ||||||
|  |   // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence: | ||||||
|  |   def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||||
|  |                        [(set (v4i32 VECREG:$rT), | ||||||
|  |                              (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))), | ||||||
|  |                                     (v4i32 v4i32SExt16Imm:$val)))]>; | ||||||
|  |  | ||||||
|  |   def f32:   CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val), | ||||||
|  |   		      [/* no pattern */]>; | ||||||
| } | } | ||||||
|  |  | ||||||
| class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> : | class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> : | ||||||
| @@ -3750,62 +3757,63 @@ let isTerminator = 1, isBarrier = 1 in { | |||||||
|  |  | ||||||
| class FAInst<dag OOL, dag IOL, list<dag> pattern>: | class FAInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|     RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB", |     RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB", | ||||||
| 	   SPrecFP, pattern>; |            SPrecFP, pattern>; | ||||||
|  |  | ||||||
| class FAVecInst<ValueType vectype>: | class FAVecInst<ValueType vectype>: | ||||||
|     FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|              [(set (vectype VECREG:$rT), |              [(set (vectype VECREG:$rT), | ||||||
| 		   (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; |                    (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; | ||||||
|  |  | ||||||
| multiclass SFPAdd | multiclass SFPAdd | ||||||
| { | { | ||||||
|   def v4f32: FAVecInst<v4f32>; |   def v4f32: FAVecInst<v4f32>; | ||||||
|   def r32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), |   def f32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||||
| 		    [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; |                     [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; | ||||||
| } | } | ||||||
|  |  | ||||||
| defm FA : SFPAdd; | defm FA : SFPAdd; | ||||||
|  |  | ||||||
| class FSInst<dag OOL, dag IOL, list<dag> pattern>: | class FSInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|     RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB", |     RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB", | ||||||
| 	   SPrecFP, pattern>; |            SPrecFP, pattern>; | ||||||
|  |  | ||||||
| class FSVecInst<ValueType vectype>: | class FSVecInst<ValueType vectype>: | ||||||
|     FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|     	   [(set (vectype VECREG:$rT), |            [(set (vectype VECREG:$rT), | ||||||
| 	         (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; |                  (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; | ||||||
|  |  | ||||||
| multiclass SFPSub | multiclass SFPSub | ||||||
| { | { | ||||||
|   def v4f32: FSVecInst<v4f32>; |   def v4f32: FSVecInst<v4f32>; | ||||||
|   def r32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), |   def f32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||||
| 		    [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; |                     [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; | ||||||
| } | } | ||||||
|  |  | ||||||
| defm FS : SFPSub; | defm FS : SFPSub; | ||||||
|  |  | ||||||
| // Floating point reciprocal estimate | // Floating point reciprocal estimate | ||||||
| def FREv4f32 : |  | ||||||
|     RRForm_1<0b00011101100, (outs VECREG:$rT), (ins VECREG:$rA), |  | ||||||
|       "frest\t$rT, $rA", SPrecFP, |  | ||||||
|       [(set (v4f32 VECREG:$rT), (SPUreciprocalEst (v4f32 VECREG:$rA)))]>; |  | ||||||
|  |  | ||||||
| def FREf32 : | class FRESTInst<dag OOL, dag IOL>: | ||||||
|     RRForm_1<0b00011101100, (outs R32FP:$rT), (ins R32FP:$rA), |   RRForm_1<0b00110111000, OOL, IOL, | ||||||
|       "frest\t$rT, $rA", SPrecFP, |            "frest\t$rT, $rA", SPrecFP, | ||||||
|       [(set R32FP:$rT, (SPUreciprocalEst R32FP:$rA))]>; |            [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def FRESTv4f32 : | ||||||
|  |     FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>; | ||||||
|  |  | ||||||
|  | def FRESTf32 : | ||||||
|  |     FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>; | ||||||
|  |  | ||||||
| // Floating point interpolate (used in conjunction with reciprocal estimate) | // Floating point interpolate (used in conjunction with reciprocal estimate) | ||||||
| def FIv4f32 : | def FIv4f32 : | ||||||
|     RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|       "fi\t$rT, $rA, $rB", SPrecFP, |       "fi\t$rT, $rA, $rB", SPrecFP, | ||||||
|       [(set (v4f32 VECREG:$rT), (SPUinterpolate (v4f32 VECREG:$rA), |       [/* no pattern */]>; | ||||||
|                                                 (v4f32 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def FIf32 : | def FIf32 : | ||||||
|     RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), |     RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||||
|       "fi\t$rT, $rA, $rB", SPrecFP, |       "fi\t$rT, $rA, $rB", SPrecFP, | ||||||
|       [(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
| //-------------------------------------------------------------------------- | //-------------------------------------------------------------------------- | ||||||
| // Basic single precision floating point comparisons: | // Basic single precision floating point comparisons: | ||||||
| @@ -4445,12 +4453,14 @@ def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), | |||||||
|                        (SPUlo tconstpool:$in, 0)), |                        (SPUlo tconstpool:$in, 0)), | ||||||
|           (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; |           (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; | ||||||
|  |  | ||||||
|  | /* | ||||||
| def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm), | def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm), | ||||||
|           (AIr32 R32C:$sp, i32ImmSExt10:$imm)>; |           (AIr32 R32C:$sp, i32ImmSExt10:$imm)>; | ||||||
|  |  | ||||||
| def : Pat<(SPUindirect R32C:$sp, imm:$imm), | def : Pat<(SPUindirect R32C:$sp, imm:$imm), | ||||||
|           (Ar32 R32C:$sp, |           (Ar32 R32C:$sp, | ||||||
|                 (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>; |                 (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>; | ||||||
|  |  */ | ||||||
|  |  | ||||||
| def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), | def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), | ||||||
|           (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; |           (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; | ||||||
| @@ -4466,5 +4476,7 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), | |||||||
|  |  | ||||||
| // Instrinsics: | // Instrinsics: | ||||||
| include "CellSDKIntrinsics.td" | include "CellSDKIntrinsics.td" | ||||||
|  | // Various math operator instruction sequences | ||||||
|  | include "SPUMathInstr.td" | ||||||
| // 64-bit "instructions"/support | // 64-bit "instructions"/support | ||||||
| include "SPU64InstrInfo.td" | include "SPU64InstrInfo.td" | ||||||
|   | |||||||
							
								
								
									
										99
									
								
								lib/Target/CellSPU/SPUMathInstr.td
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								lib/Target/CellSPU/SPUMathInstr.td
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | |||||||
|  | //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// | ||||||
|  | // | ||||||
|  | //                     Cell SPU math operations | ||||||
|  | // | ||||||
|  | // This target description file contains instruction sequences for various | ||||||
|  | // math operations, such as vector multiplies, i32 multiply, etc., for the | ||||||
|  | // SPU's i32, i16 i8 and corresponding vector types. | ||||||
|  | // | ||||||
|  | // Any resemblance to libsimdmath or the Cell SDK simdmath library is | ||||||
|  | // purely and completely coincidental. | ||||||
|  | // | ||||||
|  | // Primary author: Scott Michel (scottm@aero.org) | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  |  | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // v16i8 multiply instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), | ||||||
|  |           (ORv4i32 | ||||||
|  |            (ANDv4i32 | ||||||
|  |             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), | ||||||
|  |                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), | ||||||
|  |                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8), | ||||||
|  |                        (FSMBIv8i16 0x2222)), | ||||||
|  |             (ILAv4i32 0x0000ffff)), | ||||||
|  |            (SHLIv4i32 | ||||||
|  |             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), | ||||||
|  |                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)), | ||||||
|  |                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), | ||||||
|  |                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), | ||||||
|  |                        (FSMBIv8i16 0x2222)), 16))>; | ||||||
|  |                          | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // v8i16 multiply instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), | ||||||
|  |           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), | ||||||
|  |                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), | ||||||
|  |                      (FSMBIv8i16 0xcccc))>; | ||||||
|  |                   | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // v4i32, i32 multiply instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | def MPYv4i32: | ||||||
|  |   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), | ||||||
|  |       (Av4i32 | ||||||
|  |         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), | ||||||
|  |                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)), | ||||||
|  |         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; | ||||||
|  |  | ||||||
|  | def MPYi32: | ||||||
|  |   Pat<(mul R32C:$rA, R32C:$rB), | ||||||
|  |       (Ar32 | ||||||
|  |         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), | ||||||
|  |               (MPYHr32 R32C:$rB, R32C:$rA)), | ||||||
|  |         (MPYUr32 R32C:$rA, R32C:$rB))>; | ||||||
|  |  | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // f32, v4f32 divide instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | // Reciprocal estimate and interpolation | ||||||
|  | def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; | ||||||
|  | // Division estimate | ||||||
|  | def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; | ||||||
|  | // Newton-Raphson iteration | ||||||
|  | def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), | ||||||
|  | 		  	       Interpf32.Fragment, | ||||||
|  | 	  	  	       DivEstf32.Fragment)>; | ||||||
|  | // Epsilon addition | ||||||
|  | def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; | ||||||
|  |  | ||||||
|  | def : Pat<(fdiv R32FP:$rA, R32FP:$rB), | ||||||
|  | 	  (SELBf32_cond NRaphf32.Fragment, | ||||||
|  | 			Epsilonf32.Fragment, | ||||||
|  | 			(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; | ||||||
|  |  | ||||||
|  | // Reciprocal estimate and interpolation | ||||||
|  | def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; | ||||||
|  | // Division estimate | ||||||
|  | def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; | ||||||
|  | // Newton-Raphson iteration | ||||||
|  | def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, | ||||||
|  | 					      (v4f32 VECREG:$rB), | ||||||
|  | 					      (v4f32 VECREG:$rA)), | ||||||
|  | 		  	           Interpv4f32.Fragment, | ||||||
|  | 	  	  	           DivEstv4f32.Fragment)>; | ||||||
|  | // Epsilon addition | ||||||
|  | def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; | ||||||
|  |  | ||||||
|  | def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), | ||||||
|  | 	  (SELBv4f32_cond NRaphv4f32.Fragment, | ||||||
|  | 			Epsilonv4f32.Fragment, | ||||||
|  | 			(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), | ||||||
|  | 					      Epsilonv4f32.Fragment, | ||||||
|  | 					      (v4f32 VECREG:$rA)), -1))>; | ||||||
| @@ -87,24 +87,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; | |||||||
| // SPUISelLowering.h): | // SPUISelLowering.h): | ||||||
| def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; | def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; | ||||||
|  |  | ||||||
| // SPU 16-bit multiply |  | ||||||
| def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>; |  | ||||||
|  |  | ||||||
| // SPU multiply unsigned, used in instruction lowering for v4i32 |  | ||||||
| // multiplies: |  | ||||||
| def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>; |  | ||||||
| def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>; |  | ||||||
|  |  | ||||||
| // SPU 16-bit multiply high x low, shift result 16-bits |  | ||||||
| // Used to compute intermediate products for 32-bit multiplies |  | ||||||
| def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>; |  | ||||||
| def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>; |  | ||||||
|  |  | ||||||
| // SPU 16-bit multiply high x high, 32-bit product |  | ||||||
| // Used to compute intermediate products for 16-bit multiplies |  | ||||||
| def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>; |  | ||||||
| def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>; |  | ||||||
|  |  | ||||||
| // Shift left quadword by bits and bytes | // Shift left quadword by bits and bytes | ||||||
| def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; | def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; | ||||||
| def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; | def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; | ||||||
| @@ -117,11 +99,6 @@ def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>; | |||||||
| def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; | def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; | ||||||
| def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; | def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; | ||||||
|  |  | ||||||
| def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES", |  | ||||||
|                                     SPUvecshift_type, []>; |  | ||||||
| def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS", |  | ||||||
|                                     SPUvecshift_type, []>; |  | ||||||
|  |  | ||||||
| // Vector rotate left, bits shifted out of the left are rotated in on the right | // Vector rotate left, bits shifted out of the left are rotated in on the right | ||||||
| def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", | def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", | ||||||
|                              SPUvecshift_type, []>; |                              SPUvecshift_type, []>; | ||||||
| @@ -141,12 +118,6 @@ def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; | |||||||
| // SPU gather bits instruction: | // SPU gather bits instruction: | ||||||
| def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>; | def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>; | ||||||
|  |  | ||||||
| // SPU floating point interpolate |  | ||||||
| def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>; |  | ||||||
|  |  | ||||||
| // SPU floating point reciprocal estimate (used for fdiv) |  | ||||||
| def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>; |  | ||||||
|  |  | ||||||
| def SDTprefslot2vec: SDTypeProfile<1, 1, []>; | def SDTprefslot2vec: SDTypeProfile<1, 1, []>; | ||||||
| def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; | def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -238,7 +238,7 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const | |||||||
|     SPU::R0,    /* link register */ |     SPU::R0,    /* link register */ | ||||||
|     0 /* end */ |     0 /* end */ | ||||||
|   }; |   }; | ||||||
|    |  | ||||||
|   return SPU_CalleeSaveRegs; |   return SPU_CalleeSaveRegs; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -268,7 +268,7 @@ SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const | |||||||
|     &SPU::GPRCRegClass, /* link register */ |     &SPU::GPRCRegClass, /* link register */ | ||||||
|     0 /* end */ |     0 /* end */ | ||||||
|   }; |   }; | ||||||
|   |  | ||||||
|   return SPU_CalleeSaveRegClasses; |   return SPU_CalleeSaveRegClasses; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -339,10 +339,13 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, | |||||||
|   // Now add the frame object offset to the offset from r1. |   // Now add the frame object offset to the offset from r1. | ||||||
|   int Offset = MFI->getObjectOffset(FrameIndex); |   int Offset = MFI->getObjectOffset(FrameIndex); | ||||||
|  |  | ||||||
|   // Most instructions, except for generated FrameIndex additions using AIr32, |   // Most instructions, except for generated FrameIndex additions using AIr32 | ||||||
|   // have the immediate in operand 1. AIr32, in this case, has the immediate |   // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the | ||||||
|   // in operand 2. |   // immediate in operand 2. | ||||||
|   unsigned OpNo = (MI.getOpcode() != SPU::AIr32 ? 1 : 2); |   unsigned OpNo = 1; | ||||||
|  |   if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32) | ||||||
|  |     OpNo = 2; | ||||||
|  |  | ||||||
|   MachineOperand &MO = MI.getOperand(OpNo); |   MachineOperand &MO = MI.getOperand(OpNo); | ||||||
|  |  | ||||||
|   // Offset is biased by $lr's slot at the bottom. |   // Offset is biased by $lr's slot at the bottom. | ||||||
| @@ -355,7 +358,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, | |||||||
|   if (Offset > SPUFrameInfo::maxFrameOffset() |   if (Offset > SPUFrameInfo::maxFrameOffset() | ||||||
|       || Offset < SPUFrameInfo::minFrameOffset()) { |       || Offset < SPUFrameInfo::minFrameOffset()) { | ||||||
|     cerr << "Large stack adjustment (" |     cerr << "Large stack adjustment (" | ||||||
|          << Offset  |          << Offset | ||||||
|          << ") in SPURegisterInfo::eliminateFrameIndex."; |          << ") in SPURegisterInfo::eliminateFrameIndex."; | ||||||
|   } else { |   } else { | ||||||
|     MO.ChangeToImmediate(Offset); |     MO.ChangeToImmediate(Offset); | ||||||
| @@ -371,7 +374,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const | |||||||
|  |  | ||||||
|   // Get the number of bytes to allocate from the FrameInfo |   // Get the number of bytes to allocate from the FrameInfo | ||||||
|   unsigned FrameSize = MFI->getStackSize(); |   unsigned FrameSize = MFI->getStackSize(); | ||||||
|    |  | ||||||
|   // Get the alignments provided by the target, and the maximum alignment |   // Get the alignments provided by the target, and the maximum alignment | ||||||
|   // (if any) of the fixed frame objects. |   // (if any) of the fixed frame objects. | ||||||
|   unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); |   unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); | ||||||
| @@ -381,7 +384,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const | |||||||
|  |  | ||||||
|   // Get the maximum call frame size of all the calls. |   // Get the maximum call frame size of all the calls. | ||||||
|   unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); |   unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); | ||||||
|      |  | ||||||
|   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so |   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so | ||||||
|   // that allocations will be aligned. |   // that allocations will be aligned. | ||||||
|   if (MFI->hasVarSizedObjects()) |   if (MFI->hasVarSizedObjects()) | ||||||
| @@ -389,7 +392,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const | |||||||
|  |  | ||||||
|   // Update maximum call frame size. |   // Update maximum call frame size. | ||||||
|   MFI->setMaxCallFrameSize(maxCallFrameSize); |   MFI->setMaxCallFrameSize(maxCallFrameSize); | ||||||
|    |  | ||||||
|   // Include call frame size in total. |   // Include call frame size in total. | ||||||
|   FrameSize += maxCallFrameSize; |   FrameSize += maxCallFrameSize; | ||||||
|  |  | ||||||
| @@ -418,18 +421,18 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const | |||||||
|   MachineBasicBlock::iterator MBBI = MBB.begin(); |   MachineBasicBlock::iterator MBBI = MBB.begin(); | ||||||
|   MachineFrameInfo *MFI = MF.getFrameInfo(); |   MachineFrameInfo *MFI = MF.getFrameInfo(); | ||||||
|   MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); |   MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); | ||||||
|    |  | ||||||
|   // Prepare for debug frame info. |   // Prepare for debug frame info. | ||||||
|   bool hasDebugInfo = MMI && MMI->hasDebugInfo(); |   bool hasDebugInfo = MMI && MMI->hasDebugInfo(); | ||||||
|   unsigned FrameLabelId = 0; |   unsigned FrameLabelId = 0; | ||||||
|    |  | ||||||
|   // Move MBBI back to the beginning of the function. |   // Move MBBI back to the beginning of the function. | ||||||
|   MBBI = MBB.begin(); |   MBBI = MBB.begin(); | ||||||
|    |  | ||||||
|   // Work out frame sizes. |   // Work out frame sizes. | ||||||
|   determineFrameLayout(MF); |   determineFrameLayout(MF); | ||||||
|   int FrameSize = MFI->getStackSize(); |   int FrameSize = MFI->getStackSize(); | ||||||
|    |  | ||||||
|   assert((FrameSize & 0xf) == 0 |   assert((FrameSize & 0xf) == 0 | ||||||
|          && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); |          && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); | ||||||
|  |  | ||||||
| @@ -440,7 +443,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const | |||||||
|       FrameLabelId = MMI->NextLabelID(); |       FrameLabelId = MMI->NextLabelID(); | ||||||
|       BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId); |       BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId); | ||||||
|     } |     } | ||||||
|    |  | ||||||
|     // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) |     // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) | ||||||
|     // for the ABI |     // for the ABI | ||||||
|     BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R0).addImm(16) |     BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R0).addImm(16) | ||||||
| @@ -476,15 +479,15 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const | |||||||
|       cerr << "Unhandled frame size: " << FrameSize << "\n"; |       cerr << "Unhandled frame size: " << FrameSize << "\n"; | ||||||
|       abort(); |       abort(); | ||||||
|     } |     } | ||||||
|   |  | ||||||
|     if (hasDebugInfo) { |     if (hasDebugInfo) { | ||||||
|       std::vector<MachineMove> &Moves = MMI->getFrameMoves(); |       std::vector<MachineMove> &Moves = MMI->getFrameMoves(); | ||||||
|      |  | ||||||
|       // Show update of SP. |       // Show update of SP. | ||||||
|       MachineLocation SPDst(MachineLocation::VirtualFP); |       MachineLocation SPDst(MachineLocation::VirtualFP); | ||||||
|       MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); |       MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); | ||||||
|       Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); |       Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); | ||||||
|      |  | ||||||
|       // Add callee saved registers to move list. |       // Add callee saved registers to move list. | ||||||
|       const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); |       const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); | ||||||
|       for (unsigned I = 0, E = CSI.size(); I != E; ++I) { |       for (unsigned I = 0, E = CSI.size(); I != E; ++I) { | ||||||
| @@ -495,11 +498,11 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const | |||||||
|         MachineLocation CSSrc(Reg); |         MachineLocation CSSrc(Reg); | ||||||
|         Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); |         Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); | ||||||
|       } |       } | ||||||
|      |  | ||||||
|       // Mark effective beginning of when frame pointer is ready. |       // Mark effective beginning of when frame pointer is ready. | ||||||
|       unsigned ReadyLabelId = MMI->NextLabelID(); |       unsigned ReadyLabelId = MMI->NextLabelID(); | ||||||
|       BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId); |       BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId); | ||||||
|      |  | ||||||
|       MachineLocation FPDst(SPU::R1); |       MachineLocation FPDst(SPU::R1); | ||||||
|       MachineLocation FPSrc(MachineLocation::VirtualFP); |       MachineLocation FPSrc(MachineLocation::VirtualFP); | ||||||
|       Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); |       Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); | ||||||
|   | |||||||
| @@ -1,9 +1,11 @@ | |||||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||||
| ; RUN: grep frest    %t1.s | count 2  | ; RUN: grep frest    %t1.s | count 2  | ||||||
| ; RUN: grep -w fi    %t1.s | count 2  | ; RUN: grep -w fi    %t1.s | count 2  | ||||||
| ; RUN: grep fm       %t1.s | count 4  | ; RUN: grep -w fm    %t1.s | count 2 | ||||||
| ; RUN: grep fma      %t1.s | count 2  | ; RUN: grep fma      %t1.s | count 2  | ||||||
| ; RUN: grep fnms     %t1.s | count 2 | ; RUN: grep fnms     %t1.s | count 4 | ||||||
|  | ; RUN: grep cgti     %t1.s | count 2 | ||||||
|  | ; RUN: grep selb     %t1.s | count 2 | ||||||
| ; | ; | ||||||
| ; This file includes standard floating point arithmetic instructions | ; This file includes standard floating point arithmetic instructions | ||||||
| target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | ||||||
|   | |||||||
| @@ -1,8 +1,5 @@ | |||||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||||
| ; RUN: grep {fsmbi.*61680}   %t1.s | count 1 | ; RUN: grep xswd	     %t1.s | count 1 | ||||||
| ; RUN: grep rotqmbyi         %t1.s | count 1 |  | ||||||
| ; RUN: grep rotmai           %t1.s | count 1 |  | ||||||
| ; RUN: grep selb             %t1.s | count 1 |  | ||||||
| ; RUN: grep shufb            %t1.s | count 2 | ; RUN: grep shufb            %t1.s | count 2 | ||||||
| ; RUN: grep cg               %t1.s | count 1 | ; RUN: grep cg               %t1.s | count 1 | ||||||
| ; RUN: grep addx             %t1.s | count 1 | ; RUN: grep addx             %t1.s | count 1 | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ | |||||||
| ; RUN: grep and     %t1.s | count 2 | ; RUN: grep and     %t1.s | count 2 | ||||||
| ; RUN: grep selb    %t1.s | count 6 | ; RUN: grep selb    %t1.s | count 6 | ||||||
| ; RUN: grep fsmbi   %t1.s | count 4 | ; RUN: grep fsmbi   %t1.s | count 4 | ||||||
| ; RUN: grep shli    %t1.s | count 2 | ; RUN: grep shli    %t1.s | count 4 | ||||||
| ; RUN: grep shlhi   %t1.s | count 4 | ; RUN: grep shlhi   %t1.s | count 4 | ||||||
| ; RUN: grep ila     %t1.s | count 2 | ; RUN: grep ila     %t1.s | count 2 | ||||||
| ; RUN: grep xsbh    %t1.s | count 4 | ; RUN: grep xsbh    %t1.s | count 4 | ||||||
|   | |||||||
| @@ -1,10 +1,21 @@ | |||||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||||
| ; RUN: grep shlh   %t1.s | count 84 | ; RUN: grep -w shlh      %t1.s | count 9 | ||||||
| ; RUN: grep shlhi  %t1.s | count 51 | ; RUN: grep -w shlhi     %t1.s | count 3 | ||||||
| ; RUN: grep shl    %t1.s | count 168 | ; RUN: grep -w shl       %t1.s | count 9 | ||||||
| ; RUN: grep shli   %t1.s | count 51 | ; RUN: grep -w shli      %t1.s | count 3 | ||||||
| ; RUN: grep xshw   %t1.s | count 5 | ; RUN: grep -w xshw      %t1.s | count 5 | ||||||
| ; RUN: grep and    %t1.s | count 5 | ; RUN: grep -w and       %t1.s | count 5 | ||||||
|  | ; RUN: grep -w andi      %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotmi     %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotqmbyi  %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqmbii  %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotqmby   %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqmbi   %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqbyi   %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqbii   %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotqbybi  %t1.s | count 1 | ||||||
|  | ; RUN: grep -w sfi       %t1.s | count 3 | ||||||
|  |  | ||||||
| target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | ||||||
| target triple = "spu" | target triple = "spu" | ||||||
|  |  | ||||||
| @@ -210,3 +221,57 @@ define i32 @shli_i32_12(i32 zeroext %arg1) zeroext { | |||||||
|         %A = shl i32 0, %arg1 |         %A = shl i32 0, %arg1 | ||||||
|         ret i32 %A |         ret i32 %A | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ;; i64 shift left | ||||||
|  |  | ||||||
|  | define i64 @shl_i64_1(i64 %arg1) { | ||||||
|  | 	%A = shl i64 %arg1, 9 | ||||||
|  | 	ret i64 %A | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @shl_i64_2(i64 %arg1) { | ||||||
|  | 	%A = shl i64 %arg1, 3 | ||||||
|  | 	ret i64 %A | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @shl_i64_3(i64 %arg1, i32 %shift) { | ||||||
|  | 	%1 = zext i32 %shift to i64 | ||||||
|  | 	%2 = shl i64 %arg1, %1 | ||||||
|  | 	ret i64 %2 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ;; i64 shift right logical (shift 0s from the right) | ||||||
|  |  | ||||||
|  | define i64 @lshr_i64_1(i64 %arg1) { | ||||||
|  | 	%1 = lshr i64 %arg1, 9 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @lshr_i64_2(i64 %arg1) { | ||||||
|  | 	%1 = lshr i64 %arg1, 3 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @lshr_i64_3(i64 %arg1, i32 %shift) { | ||||||
|  | 	%1 = zext i32 %shift to i64 | ||||||
|  | 	%2 = lshr i64 %arg1, %1 | ||||||
|  | 	ret i64 %2 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ;; i64 shift right arithmetic (shift 1s from the right) | ||||||
|  |  | ||||||
|  | define i64 @ashr_i64_1(i64 %arg) { | ||||||
|  | 	%1 = ashr i64 %arg, 9 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @ashr_i64_2(i64 %arg) { | ||||||
|  | 	%1 = ashr i64 %arg, 3 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @ashr_i64_3(i64 %arg1, i32 %shift) { | ||||||
|  | 	%1 = zext i32 %shift to i64 | ||||||
|  | 	%2 = ashr i64 %arg1, %1 | ||||||
|  | 	ret i64 %2 | ||||||
|  | } | ||||||
|   | |||||||
| @@ -34,19 +34,45 @@ struct pred_s preds[] = { | |||||||
|   { "neq", i64_neq, i64_neq_select } |   { "neq", i64_neq, i64_neq_select } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | uint64_t i64_shl_const(uint64_t a) { | ||||||
|  |   return a << 10; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint64_t i64_shl(uint64_t a, int amt) { | ||||||
|  |   return a << amt; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint64_t i64_srl_const(uint64_t a) { | ||||||
|  |   return a >> 10; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint64_t i64_srl(uint64_t a, int amt) { | ||||||
|  |   return a >> amt; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | int64_t i64_sra_const(int64_t a) { | ||||||
|  |   return a >> 10; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | int64_t i64_sra(int64_t a, int amt) { | ||||||
|  |   return a >> amt; | ||||||
|  | } | ||||||
|  |  | ||||||
| int main(void) { | int main(void) { | ||||||
|   int i; |   int i; | ||||||
|   int64_t a = 1234567890000LL; |   int64_t a =  1234567890003LL; | ||||||
|   int64_t b = 2345678901234LL; |   int64_t b =  2345678901235LL; | ||||||
|   int64_t c = 1234567890001LL; |   int64_t c =  1234567890001LL; | ||||||
|   int64_t d =         10001LL; |   int64_t d =          10001LL; | ||||||
|   int64_t e =         10000LL; |   int64_t e =          10000LL; | ||||||
|  |   int64_t f = -1068103409991LL; | ||||||
|  |  | ||||||
|   printf("a = %16lld (0x%016llx)\n", a, a); |   printf("a = %16lld (0x%016llx)\n", a, a); | ||||||
|   printf("b = %16lld (0x%016llx)\n", b, b); |   printf("b = %16lld (0x%016llx)\n", b, b); | ||||||
|   printf("c = %16lld (0x%016llx)\n", c, c); |   printf("c = %16lld (0x%016llx)\n", c, c); | ||||||
|   printf("d = %16lld (0x%016llx)\n", d, d); |   printf("d = %16lld (0x%016llx)\n", d, d); | ||||||
|   printf("e = %16lld (0x%016llx)\n", e, e); |   printf("e = %16lld (0x%016llx)\n", e, e); | ||||||
|  |   printf("f = %16lld (0x%016llx)\n", f, f); | ||||||
|   printf("----------------------------------------\n"); |   printf("----------------------------------------\n"); | ||||||
|  |  | ||||||
|   for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { |   for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { | ||||||
| @@ -64,5 +90,23 @@ int main(void) { | |||||||
|     printf("----------------------------------------\n"); |     printf("----------------------------------------\n"); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   printf("a                = 0x%016llx\n", a); | ||||||
|  |   printf("i64_shl_const(a) = 0x%016llx\n", i64_shl_const(a)); | ||||||
|  |   printf("i64_shl(a)       = 0x%016llx\n", i64_shl(a, 5)); | ||||||
|  |   printf("i64_srl_const(a) = 0x%016llx\n", i64_srl_const(a)); | ||||||
|  |   printf("i64_srl(a)       = 0x%016llx\n", i64_srl(a, 5)); | ||||||
|  |   printf("i64_sra_const(a) = 0x%016llx\n", i64_sra_const(a)); | ||||||
|  |   printf("i64_sra(a)       = 0x%016llx\n", i64_sra(a, 5)); | ||||||
|  |   printf("----------------------------------------\n"); | ||||||
|  |  | ||||||
|  |   printf("f                = 0x%016llx\n", f); | ||||||
|  |   printf("i64_shl_const(f) = 0x%016llx\n", i64_shl_const(f)); | ||||||
|  |   printf("i64_shl(f)       = 0x%016llx\n", i64_shl(f, 10)); | ||||||
|  |   printf("i64_srl_const(f) = 0x%016llx\n", i64_srl_const(f)); | ||||||
|  |   printf("i64_srl(f)       = 0x%016llx\n", i64_srl(f, 10)); | ||||||
|  |   printf("i64_sra_const(f) = 0x%016llx\n", i64_sra_const(f)); | ||||||
|  |   printf("i64_sra(f)       = 0x%016llx\n", i64_sra(f, 10)); | ||||||
|  |   printf("----------------------------------------\n"); | ||||||
|  |  | ||||||
|   return 0; |   return 0; | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user