mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	- Start moving target-dependent nodes that could be represented by an
instruction sequence and cannot ordinarily be simplified by DAGcombine into the various target description files or SPUDAGToDAGISel.cpp. This makes some 64-bit operations legal. - Eliminate target-dependent ISD enums. - Update tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61508 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -15,6 +15,13 @@ | ||||
| // | ||||
| include "llvm/Target/Target.td" | ||||
|  | ||||
| // Holder of code fragments (you'd think this'd already be in | ||||
| // a td file somewhere... :-) | ||||
|  | ||||
| class CodeFrag<dag frag> { | ||||
|   dag Fragment = frag; | ||||
| } | ||||
|  | ||||
| //===----------------------------------------------------------------------===// | ||||
| // Register File Description | ||||
| //===----------------------------------------------------------------------===// | ||||
|   | ||||
| @@ -1,8 +1,17 @@ | ||||
| //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====// | ||||
| // | ||||
| //                     Cell SPU 64-bit operations | ||||
| // | ||||
| // Primary author: Scott Michel (scottm@aero.org) | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // 64-bit comparisons: | ||||
| // | ||||
| // 1. The instruction sequences for vector vice scalar differ by a | ||||
| //    constant. | ||||
| //    constant. In the scalar case, we're only interested in the | ||||
| //    top two 32-bit slots, whereas we're interested in an exact | ||||
| //    all-four-slot match in the vector case. | ||||
| // | ||||
| // 2. There are no "immediate" forms, since loading 64-bit constants | ||||
| //    could be a constant pool load. | ||||
| @@ -10,10 +19,10 @@ | ||||
| // 3. i64 setcc results are i32, which are subsequently converted to a FSM | ||||
| //    mask when used in a select pattern. | ||||
| // | ||||
| // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask | ||||
| //    (TODO) | ||||
| // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) | ||||
| //    [Note: this may be moot, since gb produces v4i32 or r32.] | ||||
| // | ||||
| // M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!) | ||||
| // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!) | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
|  | ||||
| // selb instruction definition for i64. Note that the selection mask is | ||||
| @@ -22,17 +31,15 @@ def SELBr64_cond: | ||||
|    SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), | ||||
|             [/* no pattern */]>; | ||||
|  | ||||
| class CodeFrag<dag frag> { | ||||
|   dag Fragment = frag; | ||||
| } | ||||
|  | ||||
| class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>: | ||||
| // select the negative condition: | ||||
| class I64SELECTNegCond<PatFrag cond, CodeFrag compare>: | ||||
|   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), | ||||
|       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>; | ||||
|       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; | ||||
|  | ||||
| class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>: | ||||
| // setcc the negative condition: | ||||
| class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: | ||||
|   Pat<(cond R64C:$rA, R64C:$rB), | ||||
|       (XORIr32 cmpare.Fragment, -1)>; | ||||
|       (XORIr32 compare.Fragment, -1)>; | ||||
|  | ||||
| // The i64 seteq fragment that does the scalar->vector conversion and | ||||
| // comparison: | ||||
| @@ -64,14 +71,13 @@ multiclass CompareEqual64 { | ||||
| defm I64EQ: CompareEqual64; | ||||
|  | ||||
| def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; | ||||
| def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; | ||||
|  | ||||
| def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), | ||||
|           I64EQv2i64.Fragment>; | ||||
|  | ||||
| def I64Select: | ||||
|     Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), | ||||
| def : Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), | ||||
|           (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>; | ||||
|  | ||||
| // i64 setne: | ||||
| def : I64SETCCNegCond<setne, I64EQr64>; | ||||
|  | ||||
| def : I64SELECTNegCond<setne, I64EQr64>; | ||||
|  | ||||
| // i64 setugt: | ||||
|   | ||||
| @@ -258,6 +258,15 @@ public: | ||||
|   /// target-specific node if it hasn't already been changed. | ||||
|   SDNode *Select(SDValue Op); | ||||
|  | ||||
|   //! Emit the instruction sequence for i64 shl | ||||
|   SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); | ||||
|  | ||||
|   //! Emit the instruction sequence for i64 srl | ||||
|   SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); | ||||
|  | ||||
|   //! Emit the instruction sequence for i64 sra | ||||
|   SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); | ||||
|  | ||||
|   //! Returns true if the address N is an A-form (local store) address | ||||
|   bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, | ||||
|                        SDValue &Index); | ||||
| @@ -622,27 +631,20 @@ SPUDAGToDAGISel::Select(SDValue Op) { | ||||
|   if (N->isMachineOpcode()) { | ||||
|     return NULL;   // Already selected. | ||||
|   } else if (Opc == ISD::FrameIndex) { | ||||
|     // Selects to (add $sp, FI * stackSlotSize) | ||||
|     int FI = | ||||
|       SPUFrameInfo::FItoStackOffset(cast<FrameIndexSDNode>(N)->getIndex()); | ||||
|     MVT PtrVT = SPUtli.getPointerTy(); | ||||
|     int FI = cast<FrameIndexSDNode>(N)->getIndex(); | ||||
|     SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); | ||||
|     SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); | ||||
|  | ||||
|     // Adjust stack slot to actual offset in frame: | ||||
|     if (isS10Constant(FI)) { | ||||
|       DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AIr32 $sp, " | ||||
|                  << FI | ||||
|                  << "\n"); | ||||
|     if (FI < 128) { | ||||
|       NewOpc = SPU::AIr32; | ||||
|       Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT); | ||||
|       Ops[1] = CurDAG->getTargetConstant(FI, PtrVT); | ||||
|       Ops[0] = TFI; | ||||
|       Ops[1] = Imm0; | ||||
|       n_ops = 2; | ||||
|     } else { | ||||
|       DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with Ar32 $sp, " | ||||
|                  << FI | ||||
|                  << "\n"); | ||||
|       NewOpc = SPU::Ar32; | ||||
|       Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT); | ||||
|       Ops[1] = CurDAG->getConstant(FI, PtrVT); | ||||
|       Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType()); | ||||
|       Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, Op.getValueType(), | ||||
|                                              TFI, Imm0), 0); | ||||
|       n_ops = 2; | ||||
|     } | ||||
|   } else if (Opc == ISD::ZERO_EXTEND) { | ||||
| @@ -661,6 +663,18 @@ SPUDAGToDAGISel::Select(SDValue Op) { | ||||
|         n_ops = 2; | ||||
|       } | ||||
|     } | ||||
|   } else if (Opc == ISD::SHL) { | ||||
|     if (OpVT == MVT::i64) { | ||||
|       return SelectSHLi64(Op, OpVT); | ||||
|     } | ||||
|   } else if (Opc == ISD::SRL) { | ||||
|     if (OpVT == MVT::i64) { | ||||
|       return SelectSRLi64(Op, OpVT); | ||||
|     } | ||||
|   } else if (Opc == ISD::SRA) { | ||||
|     if (OpVT == MVT::i64) { | ||||
|       return SelectSRAi64(Op, OpVT); | ||||
|     } | ||||
|   } else if (Opc == SPUISD::LDRESULT) { | ||||
|     // Custom select instructions for LDRESULT | ||||
|     MVT VT = N->getValueType(0); | ||||
| @@ -723,7 +737,213 @@ SPUDAGToDAGISel::Select(SDValue Op) { | ||||
|     return SelectCode(Op); | ||||
| } | ||||
|  | ||||
| /// createPPCISelDag - This pass converts a legalized DAG into a  | ||||
| /*! | ||||
|  * Emit the instruction sequence for i64 left shifts. The basic algorithm | ||||
|  * is to fill the bottom two word slots with zeros so that zeros are shifted | ||||
|  * in as the entire quadword is shifted left. | ||||
|  * | ||||
|  * \note This code could also be used to implement v2i64 shl. | ||||
|  * | ||||
|  * @param Op The shl operand | ||||
|  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||
|  * makes life easier.) | ||||
|  * @return The SDNode with the entire instruction sequence | ||||
|  */ | ||||
| SDNode * | ||||
| SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) { | ||||
|   SDValue Op0 = Op.getOperand(0); | ||||
|   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||
|   SDValue ShiftAmt = Op.getOperand(1); | ||||
|   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||
|   SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; | ||||
|   SDValue SelMaskVal; | ||||
|  | ||||
|   VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0); | ||||
|   SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); | ||||
|   SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, SelMaskVal); | ||||
|   ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, VecVT, | ||||
|                                    CurDAG->getTargetConstant(0, OpVT)); | ||||
|   VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, VecVT, | ||||
|                                  SDValue(ZeroFill, 0), | ||||
|                                  SDValue(VecOp0, 0), | ||||
|                                  SDValue(SelMask, 0)); | ||||
|  | ||||
|   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||
|     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||
|     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||
|  | ||||
|     if (bytes > 0) { | ||||
|       Shift = | ||||
|         CurDAG->getTargetNode(SPU::SHLQBYIv2i64, VecVT, | ||||
|                               SDValue(VecOp0, 0), | ||||
|                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||
|     } | ||||
|  | ||||
|     if (bits > 0) { | ||||
|       Shift = | ||||
|         CurDAG->getTargetNode(SPU::SHLQBIIv2i64, VecVT, | ||||
|                               SDValue((Shift != 0 ? Shift : VecOp0), 0), | ||||
|                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||
|     } | ||||
|   } else { | ||||
|     SDNode *Bytes = | ||||
|       CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT, | ||||
|                             ShiftAmt, | ||||
|                             CurDAG->getTargetConstant(3, ShiftAmtVT)); | ||||
|     SDNode *Bits = | ||||
|       CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT, | ||||
|                             ShiftAmt, | ||||
|                             CurDAG->getTargetConstant(7, ShiftAmtVT)); | ||||
|     Shift = | ||||
|       CurDAG->getTargetNode(SPU::SHLQBYv2i64, VecVT, | ||||
|                             SDValue(VecOp0, 0), SDValue(Bytes, 0)); | ||||
|     Shift = | ||||
|       CurDAG->getTargetNode(SPU::SHLQBIv2i64, VecVT, | ||||
|                             SDValue(Shift, 0), SDValue(Bits, 0)); | ||||
|   } | ||||
|  | ||||
|   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||
| } | ||||
|  | ||||
| /*! | ||||
|  * Emit the instruction sequence for i64 logical right shifts. | ||||
|  * | ||||
|  * @param Op The shl operand | ||||
|  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||
|  * makes life easier.) | ||||
|  * @return The SDNode with the entire instruction sequence | ||||
|  */ | ||||
| SDNode * | ||||
| SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) { | ||||
|   SDValue Op0 = Op.getOperand(0); | ||||
|   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||
|   SDValue ShiftAmt = Op.getOperand(1); | ||||
|   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||
|   SDNode *VecOp0, *Shift = 0; | ||||
|  | ||||
|   VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0); | ||||
|  | ||||
|   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||
|     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||
|     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||
|  | ||||
|     if (bytes > 0) { | ||||
|       Shift = | ||||
|         CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, VecVT, | ||||
|                               SDValue(VecOp0, 0), | ||||
|                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||
|     } | ||||
|  | ||||
|     if (bits > 0) { | ||||
|       Shift = | ||||
|         CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, VecVT, | ||||
|                               SDValue((Shift != 0 ? Shift : VecOp0), 0), | ||||
|                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||
|     } | ||||
|   } else { | ||||
|     SDNode *Bytes = | ||||
|       CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT, | ||||
|                             ShiftAmt, | ||||
|                             CurDAG->getTargetConstant(3, ShiftAmtVT)); | ||||
|     SDNode *Bits = | ||||
|       CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT, | ||||
|                             ShiftAmt, | ||||
|                             CurDAG->getTargetConstant(7, ShiftAmtVT)); | ||||
|  | ||||
|     // Ensure that the shift amounts are negated! | ||||
|     Bytes = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||
|                                   SDValue(Bytes, 0), | ||||
|                                   CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||
|  | ||||
|     Bits = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||
|                                  SDValue(Bits, 0), | ||||
|                                  CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||
|  | ||||
|     Shift = | ||||
|       CurDAG->getTargetNode(SPU::ROTQMBYv2i64, VecVT, | ||||
|                             SDValue(VecOp0, 0), SDValue(Bytes, 0)); | ||||
|     Shift = | ||||
|       CurDAG->getTargetNode(SPU::ROTQMBIv2i64, VecVT, | ||||
|                             SDValue(Shift, 0), SDValue(Bits, 0)); | ||||
|   } | ||||
|  | ||||
|   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||
| } | ||||
|  | ||||
| /*! | ||||
|  * Emit the instruction sequence for i64 arithmetic right shifts. | ||||
|  * | ||||
|  * @param Op The shl operand | ||||
|  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||
|  * makes life easier.) | ||||
|  * @return The SDNode with the entire instruction sequence | ||||
|  */ | ||||
| SDNode * | ||||
| SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { | ||||
|   // Promote Op0 to vector | ||||
|   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||
|   SDValue ShiftAmt = Op.getOperand(1); | ||||
|   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||
|  | ||||
|   SDNode *VecOp0 = | ||||
|     CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op.getOperand(0)); | ||||
|  | ||||
|   SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); | ||||
|   SDNode *SignRot = | ||||
|     CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, MVT::v2i64, | ||||
|                           SDValue(VecOp0, 0), SignRotAmt); | ||||
|   SDNode *UpperHalfSign = | ||||
|     CurDAG->getTargetNode(SPU::ORi32_v4i32, MVT::i32, SDValue(SignRot, 0)); | ||||
|  | ||||
|   SDNode *UpperHalfSignMask = | ||||
|     CurDAG->getTargetNode(SPU::FSM64r32, VecVT, SDValue(UpperHalfSign, 0)); | ||||
|   SDNode *UpperLowerMask = | ||||
|     CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, | ||||
|                           CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); | ||||
|   SDNode *UpperLowerSelect = | ||||
|     CurDAG->getTargetNode(SPU::SELBv2i64, VecVT, | ||||
|                           SDValue(UpperHalfSignMask, 0), | ||||
|                           SDValue(VecOp0, 0), | ||||
|                           SDValue(UpperLowerMask, 0)); | ||||
|  | ||||
|   SDNode *Shift = 0; | ||||
|  | ||||
|   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||
|     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||
|     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||
|  | ||||
|     if (bytes > 0) { | ||||
|       bytes = 31 - bytes; | ||||
|       Shift = | ||||
|         CurDAG->getTargetNode(SPU::ROTQBYIv2i64, VecVT, | ||||
|                               SDValue(UpperLowerSelect, 0), | ||||
|                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||
|     } | ||||
|  | ||||
|     if (bits > 0) { | ||||
|       bits = 8 - bits; | ||||
|       Shift = | ||||
|         CurDAG->getTargetNode(SPU::ROTQBIIv2i64, VecVT, | ||||
|                               SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), | ||||
|                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||
|     } | ||||
|   } else { | ||||
|     SDNode *NegShift = | ||||
|       CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||
|                             ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||
|  | ||||
|     Shift = | ||||
|       CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, VecVT, | ||||
|                             SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); | ||||
|     Shift = | ||||
|       CurDAG->getTargetNode(SPU::ROTQBIv2i64, VecVT, | ||||
|                             SDValue(Shift, 0), SDValue(NegShift, 0)); | ||||
|   } | ||||
|  | ||||
|   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||
| } | ||||
|  | ||||
| /// createSPUISelDag - This pass converts a legalized DAG into a | ||||
| /// SPU-specific DAG, ready for instruction scheduling. | ||||
| /// | ||||
| FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { | ||||
|   | ||||
| @@ -204,10 +204,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | ||||
|   setOperationAction(ISD::SRL,  MVT::i8,     Custom); | ||||
|   setOperationAction(ISD::SRA,  MVT::i8,     Custom); | ||||
|  | ||||
|   // SPU needs custom lowering for shift left/right for i64 | ||||
|   setOperationAction(ISD::SHL,  MVT::i64,    Custom); | ||||
|   setOperationAction(ISD::SRL,  MVT::i64,    Custom); | ||||
|   setOperationAction(ISD::SRA,  MVT::i64,    Custom); | ||||
|   // Make these operations legal and handle them during instruction selection: | ||||
|   setOperationAction(ISD::SHL,  MVT::i64,    Legal); | ||||
|   setOperationAction(ISD::SRL,  MVT::i64,    Legal); | ||||
|   setOperationAction(ISD::SRA,  MVT::i64,    Legal); | ||||
|  | ||||
|   // Custom lower i8, i32 and i64 multiplications | ||||
|   setOperationAction(ISD::MUL,  MVT::i8,     Custom); | ||||
| @@ -215,6 +215,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | ||||
|   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall | ||||
|  | ||||
|   // Need to custom handle (some) common i8, i64 math ops | ||||
|   setOperationAction(ISD::ADD,  MVT::i8,     Custom); | ||||
|   setOperationAction(ISD::ADD,  MVT::i64,    Custom); | ||||
|   setOperationAction(ISD::SUB,  MVT::i8,     Custom); | ||||
|   setOperationAction(ISD::SUB,  MVT::i64,    Custom); | ||||
| @@ -249,7 +250,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | ||||
|   // Zero extension and sign extension for i64 have to be | ||||
|   // custom legalized | ||||
|   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); | ||||
|   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); | ||||
|   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom); | ||||
|  | ||||
|   // Custom lower i128 -> i64 truncates | ||||
| @@ -262,7 +262,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | ||||
|   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); | ||||
|  | ||||
|   // FDIV on SPU requires custom lowering | ||||
|   setOperationAction(ISD::FDIV, MVT::f32, Custom); | ||||
|   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall | ||||
|  | ||||
|   // SPU has [U|S]INT_TO_FP | ||||
| @@ -340,7 +339,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | ||||
|     setOperationAction(ISD::ADD , VT, Legal); | ||||
|     setOperationAction(ISD::SUB , VT, Legal); | ||||
|     // mul has to be custom lowered. | ||||
|     setOperationAction(ISD::MUL , VT, Custom); | ||||
|     // TODO: v2i64 vector multiply | ||||
|     setOperationAction(ISD::MUL , VT, Legal); | ||||
|  | ||||
|     setOperationAction(ISD::AND   , VT, Legal); | ||||
|     setOperationAction(ISD::OR    , VT, Legal); | ||||
| @@ -354,7 +354,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | ||||
|     setOperationAction(ISD::SREM, VT, Expand); | ||||
|     setOperationAction(ISD::UDIV, VT, Expand); | ||||
|     setOperationAction(ISD::UREM, VT, Expand); | ||||
|     setOperationAction(ISD::FDIV, VT, Custom); | ||||
|  | ||||
|     // Custom lower build_vector, constant pool spills, insert and | ||||
|     // extract vector elements: | ||||
| @@ -371,9 +370,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | ||||
|   setOperationAction(ISD::XOR, MVT::v16i8, Custom); | ||||
|   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); | ||||
|  | ||||
|   // FIXME: This is only temporary until I put all vector multiplications in | ||||
|   // SPUInstrInfo.td: | ||||
|   setOperationAction(ISD::MUL, MVT::v4i32, Legal); | ||||
|   setOperationAction(ISD::FDIV, MVT::v4f32, Legal); | ||||
|  | ||||
|   setShiftAmountType(MVT::i32); | ||||
|   setBooleanContents(ZeroOrNegativeOneBooleanContent); | ||||
| @@ -411,10 +408,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const | ||||
|     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; | ||||
|     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; | ||||
|     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; | ||||
|     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; | ||||
|     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; | ||||
|     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; | ||||
|     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; | ||||
|     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; | ||||
|     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; | ||||
|     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; | ||||
| @@ -422,21 +415,12 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const | ||||
|     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; | ||||
|     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; | ||||
|     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; | ||||
|     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] = | ||||
|       "SPUISD::ROTQUAD_RZ_BYTES"; | ||||
|     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] = | ||||
|       "SPUISD::ROTQUAD_RZ_BITS"; | ||||
|     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; | ||||
|     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = | ||||
|       "SPUISD::ROTBYTES_LEFT_BITS"; | ||||
|     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; | ||||
|     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; | ||||
|     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; | ||||
|     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; | ||||
|     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; | ||||
|     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; | ||||
|     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; | ||||
|     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; | ||||
|     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; | ||||
|   } | ||||
|  | ||||
| @@ -1922,182 +1906,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { | ||||
|   return SDValue(); | ||||
| } | ||||
|  | ||||
| static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) { | ||||
|   switch (Op.getValueType().getSimpleVT()) { | ||||
|   default: | ||||
|     cerr << "CellSPU: Unknown vector multiplication, got " | ||||
|          << Op.getValueType().getMVTString() | ||||
|          << "\n"; | ||||
|     abort(); | ||||
|     /*NOTREACHED*/ | ||||
|  | ||||
|   case MVT::v4i32: | ||||
| 	  break; | ||||
|  | ||||
|   // Multiply two v8i16 vectors (pipeline friendly version): | ||||
|   // a) multiply lower halves, mask off upper 16-bit of 32-bit product | ||||
|   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) | ||||
|   // c) Use SELB to select upper and lower halves from the intermediate results | ||||
|   // | ||||
|   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the | ||||
|   // dual-issue. This code does manage to do this, even if it's a little on | ||||
|   // the wacky side | ||||
|   case MVT::v8i16: { | ||||
|     MachineFunction &MF = DAG.getMachineFunction(); | ||||
|     MachineRegisterInfo &RegInfo = MF.getRegInfo(); | ||||
|     SDValue Chain = Op.getOperand(0); | ||||
|     SDValue rA = Op.getOperand(0); | ||||
|     SDValue rB = Op.getOperand(1); | ||||
|     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); | ||||
|     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); | ||||
|  | ||||
|     SDValue FSMBOp = | ||||
|       DAG.getCopyToReg(Chain, FSMBIreg, | ||||
|                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, | ||||
|                                    DAG.getConstant(0xcccc, MVT::i16))); | ||||
|  | ||||
|     SDValue HHProd = | ||||
|       DAG.getCopyToReg(FSMBOp, HiProdReg, | ||||
|                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); | ||||
|  | ||||
|     SDValue HHProd_v4i32 = | ||||
|       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, | ||||
|                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); | ||||
|  | ||||
|     return DAG.getNode(SPUISD::SELB, MVT::v8i16, | ||||
|                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), | ||||
|                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), | ||||
|                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, | ||||
|                                                HHProd_v4i32, | ||||
|                                                DAG.getConstant(16, MVT::i16))), | ||||
|                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); | ||||
|   } | ||||
|  | ||||
|   // This M00sE is N@stI! (apologies to Monty Python) | ||||
|   // | ||||
|   // SPU doesn't know how to do any 8-bit multiplication, so the solution | ||||
|   // is to break it all apart, sign extend, and reassemble the various | ||||
|   // intermediate products. | ||||
|   case MVT::v16i8: { | ||||
|     SDValue rA = Op.getOperand(0); | ||||
|     SDValue rB = Op.getOperand(1); | ||||
|     SDValue c8 = DAG.getConstant(8, MVT::i32); | ||||
|     SDValue c16 = DAG.getConstant(16, MVT::i32); | ||||
|  | ||||
|     SDValue LLProd = | ||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); | ||||
|  | ||||
|     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); | ||||
|  | ||||
|     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); | ||||
|  | ||||
|     SDValue LHProd = | ||||
|       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, | ||||
|                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); | ||||
|  | ||||
|     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, | ||||
|                                      DAG.getConstant(0x2222, MVT::i16)); | ||||
|  | ||||
|     SDValue LoProdParts = | ||||
|       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, | ||||
|                   DAG.getNode(SPUISD::SELB, MVT::v8i16, | ||||
|                               LLProd, LHProd, FSMBmask)); | ||||
|  | ||||
|     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32); | ||||
|  | ||||
|     SDValue LoProd = | ||||
|       DAG.getNode(ISD::AND, MVT::v4i32, | ||||
|                   LoProdParts, | ||||
|                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, | ||||
|                               LoProdMask, LoProdMask, | ||||
|                               LoProdMask, LoProdMask)); | ||||
|  | ||||
|     SDValue rAH = | ||||
|       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); | ||||
|  | ||||
|     SDValue rBH = | ||||
|       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); | ||||
|  | ||||
|     SDValue HLProd = | ||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); | ||||
|  | ||||
|     SDValue HHProd_1 = | ||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, | ||||
|                               DAG.getNode(SPUISD::VEC_SRA, | ||||
|                                           MVT::v4i32, rAH, c8)), | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, | ||||
|                               DAG.getNode(SPUISD::VEC_SRA, | ||||
|                                           MVT::v4i32, rBH, c8))); | ||||
|  | ||||
|     SDValue HHProd = | ||||
|       DAG.getNode(SPUISD::SELB, MVT::v8i16, | ||||
|                   HLProd, | ||||
|                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), | ||||
|                   FSMBmask); | ||||
|  | ||||
|     SDValue HiProd = | ||||
|       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16); | ||||
|  | ||||
|     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, | ||||
|                        DAG.getNode(ISD::OR, MVT::v4i32, | ||||
|                                    LoProd, HiProd)); | ||||
|   } | ||||
|   } | ||||
|  | ||||
|   return SDValue(); | ||||
| } | ||||
|  | ||||
| static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) { | ||||
|   MachineFunction &MF = DAG.getMachineFunction(); | ||||
|   MachineRegisterInfo &RegInfo = MF.getRegInfo(); | ||||
|  | ||||
|   SDValue A = Op.getOperand(0); | ||||
|   SDValue B = Op.getOperand(1); | ||||
|   MVT VT = Op.getValueType(); | ||||
|  | ||||
|   unsigned VRegBR, VRegC; | ||||
|  | ||||
|   if (VT == MVT::f32) { | ||||
|     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); | ||||
|     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); | ||||
|   } else { | ||||
|     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); | ||||
|     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); | ||||
|   } | ||||
|   // TODO: make sure we're feeding FPInterp the right arguments | ||||
|   // Right now: fi B, frest(B) | ||||
|  | ||||
|   // Computes BRcpl = | ||||
|   // (Floating Interpolate (FP Reciprocal Estimate B)) | ||||
|   SDValue BRcpl = | ||||
|       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, | ||||
|                        DAG.getNode(SPUISD::FPInterp, VT, B, | ||||
|                                 DAG.getNode(SPUISD::FPRecipEst, VT, B))); | ||||
|  | ||||
|   // Computes A * BRcpl and stores in a temporary register | ||||
|   SDValue AxBRcpl = | ||||
|       DAG.getCopyToReg(BRcpl, VRegC, | ||||
|                  DAG.getNode(ISD::FMUL, VT, A, | ||||
|                         DAG.getCopyFromReg(BRcpl, VRegBR, VT))); | ||||
|   // What's the Chain variable do? It's magic! | ||||
|   // TODO: set Chain = Op(0).getEntryNode() | ||||
|  | ||||
|   return DAG.getNode(ISD::FADD, VT, | ||||
|                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT), | ||||
|                 DAG.getNode(ISD::FMUL, VT, | ||||
|                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), | ||||
|                         DAG.getNode(ISD::FSUB, VT, A, | ||||
|                             DAG.getNode(ISD::FMUL, VT, B, | ||||
|                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); | ||||
| } | ||||
|  | ||||
| static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { | ||||
|   MVT VT = Op.getValueType(); | ||||
|   SDValue N = Op.getOperand(0); | ||||
| @@ -2296,18 +2104,23 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, | ||||
|     assert(0 && "Unhandled i8 math operator"); | ||||
|     /*NOTREACHED*/ | ||||
|     break; | ||||
|   case ISD::ADD: { | ||||
|     // 8-bit addition: Promote the arguments up to 16-bits and truncate | ||||
|     // the result: | ||||
|     SDValue N1 = Op.getOperand(1); | ||||
|     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0); | ||||
|     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1); | ||||
|     return DAG.getNode(ISD::TRUNCATE, MVT::i8, | ||||
|                        DAG.getNode(Opc, MVT::i16, N0, N1)); | ||||
|  | ||||
|   } | ||||
|  | ||||
|   case ISD::SUB: { | ||||
|     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate | ||||
|     // the result: | ||||
|     SDValue N1 = Op.getOperand(1); | ||||
|     N0 = (N0.getOpcode() != ISD::Constant | ||||
|           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) | ||||
|           : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(), | ||||
|                             MVT::i16)); | ||||
|     N1 = (N1.getOpcode() != ISD::Constant | ||||
|           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) | ||||
|           : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(), | ||||
|                             MVT::i16)); | ||||
|     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0); | ||||
|     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1); | ||||
|     return DAG.getNode(ISD::TRUNCATE, MVT::i8, | ||||
|                        DAG.getNode(Opc, MVT::i16, N0, N1)); | ||||
|   } | ||||
| @@ -2397,7 +2210,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | ||||
|  | ||||
|   switch (Opc) { | ||||
|   case ISD::ZERO_EXTEND: | ||||
|   case ISD::SIGN_EXTEND: | ||||
|   case ISD::ANY_EXTEND: { | ||||
|     MVT Op0VT = Op0.getValueType(); | ||||
|     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); | ||||
| @@ -2410,39 +2222,16 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | ||||
|     SDValue PromoteScalar = | ||||
|             DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); | ||||
|  | ||||
|     if (Opc != ISD::SIGN_EXTEND) { | ||||
|     // Use a shuffle to zero extend the i32 to i64 directly: | ||||
|       SDValue shufMask = | ||||
|               DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, | ||||
|                           DAG.getConstant(0x80808080, MVT::i32), | ||||
|                           DAG.getConstant(0x00010203, MVT::i32), | ||||
|                           DAG.getConstant(0x80808080, MVT::i32), | ||||
|                           DAG.getConstant(0x08090a0b, MVT::i32)); | ||||
|       SDValue zextShuffle = | ||||
|               DAG.getNode(SPUISD::SHUFB, Op0VecVT, | ||||
|                           PromoteScalar, PromoteScalar, shufMask); | ||||
|     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, | ||||
|         DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203, | ||||
|             MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant( | ||||
|             0x08090a0b, MVT::i32)); | ||||
|     SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar, | ||||
|         PromoteScalar, shufMask); | ||||
|  | ||||
|       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, | ||||
|                          DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); | ||||
|     } else { | ||||
|       // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift | ||||
|       // right and propagate the sign bit) instruction. | ||||
|       SDValue RotQuad = | ||||
|               DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT, | ||||
|                           PromoteScalar, DAG.getConstant(4, MVT::i32)); | ||||
|       SDValue SignQuad = | ||||
|               DAG.getNode(SPUISD::VEC_SRA, Op0VecVT, | ||||
|                           PromoteScalar, DAG.getConstant(32, MVT::i32)); | ||||
|       SDValue SelMask = | ||||
|               DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT, | ||||
|                           DAG.getConstant(0xf0f0, MVT::i16)); | ||||
|       SDValue CombineQuad = | ||||
|               DAG.getNode(SPUISD::SELB, Op0VecVT, | ||||
|                           SignQuad, RotQuad, SelMask); | ||||
|  | ||||
|       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, | ||||
|                          DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad)); | ||||
|     } | ||||
|     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT, | ||||
|         VecVT, zextShuffle)); | ||||
|   } | ||||
|  | ||||
|   case ISD::ADD: { | ||||
| @@ -2502,88 +2291,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | ||||
|                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, | ||||
|                                    Op0, Op1, ShiftedBorrow)); | ||||
|   } | ||||
|  | ||||
|   case ISD::SHL: { | ||||
|     SDValue ShiftAmt = Op.getOperand(1); | ||||
|     MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||
|     SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0); | ||||
|     SDValue MaskLower = | ||||
|       DAG.getNode(SPUISD::SELB, VecVT, | ||||
|                   Op0Vec, | ||||
|                   DAG.getConstant(0, VecVT), | ||||
|                   DAG.getNode(SPUISD::SELECT_MASK, VecVT, | ||||
|                               DAG.getConstant(0xff00ULL, MVT::i16))); | ||||
|     SDValue ShiftAmtBytes = | ||||
|       DAG.getNode(ISD::SRL, ShiftAmtVT, | ||||
|                   ShiftAmt, | ||||
|                   DAG.getConstant(3, ShiftAmtVT)); | ||||
|     SDValue ShiftAmtBits = | ||||
|       DAG.getNode(ISD::AND, ShiftAmtVT, | ||||
|                   ShiftAmt, | ||||
|                   DAG.getConstant(7, ShiftAmtVT)); | ||||
|  | ||||
|     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, | ||||
|                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT, | ||||
|                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, | ||||
|                                                MaskLower, ShiftAmtBytes), | ||||
|                                    ShiftAmtBits)); | ||||
|   } | ||||
|  | ||||
|   case ISD::SRL: { | ||||
|     MVT VT = Op.getValueType(); | ||||
|     SDValue ShiftAmt = Op.getOperand(1); | ||||
|     MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||
|     SDValue ShiftAmtBytes = | ||||
|       DAG.getNode(ISD::SRL, ShiftAmtVT, | ||||
|                   ShiftAmt, | ||||
|                   DAG.getConstant(3, ShiftAmtVT)); | ||||
|     SDValue ShiftAmtBits = | ||||
|       DAG.getNode(ISD::AND, ShiftAmtVT, | ||||
|                   ShiftAmt, | ||||
|                   DAG.getConstant(7, ShiftAmtVT)); | ||||
|  | ||||
|     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT, | ||||
|                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT, | ||||
|                                    Op0, ShiftAmtBytes), | ||||
|                        ShiftAmtBits); | ||||
|   } | ||||
|  | ||||
|   case ISD::SRA: { | ||||
|     // Promote Op0 to vector | ||||
|     SDValue Op0 = | ||||
|       DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); | ||||
|     SDValue ShiftAmt = Op.getOperand(1); | ||||
|     MVT ShiftVT = ShiftAmt.getValueType(); | ||||
|  | ||||
|     // Negate variable shift amounts | ||||
|     if (!isa<ConstantSDNode>(ShiftAmt)) { | ||||
|       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT, | ||||
|                              DAG.getConstant(0, ShiftVT), ShiftAmt); | ||||
|     } | ||||
|  | ||||
|     SDValue UpperHalfSign = | ||||
|       DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32, | ||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, | ||||
|                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, | ||||
|                                           Op0, DAG.getConstant(31, MVT::i32)))); | ||||
|     SDValue UpperHalfSignMask = | ||||
|       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign); | ||||
|     SDValue UpperLowerMask = | ||||
|       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, | ||||
|                   DAG.getConstant(0xff00, MVT::i16)); | ||||
|     SDValue UpperLowerSelect = | ||||
|       DAG.getNode(SPUISD::SELB, MVT::v2i64, | ||||
|                   UpperHalfSignMask, Op0, UpperLowerMask); | ||||
|     SDValue RotateLeftBytes = | ||||
|       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64, | ||||
|                   UpperLowerSelect, ShiftAmt); | ||||
|     SDValue RotateLeftBits = | ||||
|       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, | ||||
|                   RotateLeftBytes, ShiftAmt); | ||||
|  | ||||
|     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, | ||||
|                        RotateLeftBits); | ||||
|   } | ||||
|   } | ||||
|  | ||||
|   return SDValue(); | ||||
| @@ -2890,10 +2597,11 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) | ||||
|     return LowerRET(Op, DAG, getTargetMachine()); | ||||
|  | ||||
|  | ||||
|   // i8, i64 math ops: | ||||
|   case ISD::ZERO_EXTEND: | ||||
|   case ISD::SIGN_EXTEND: | ||||
|   case ISD::ANY_EXTEND: | ||||
|     return LowerI64Math(Op, DAG, Opc); | ||||
|  | ||||
|   // i8, i64 math ops: | ||||
|   case ISD::ADD: | ||||
|   case ISD::SUB: | ||||
|   case ISD::ROTR: | ||||
| @@ -2928,22 +2636,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) | ||||
|  | ||||
|   // Vector and i8 multiply: | ||||
|   case ISD::MUL: | ||||
|     if (VT.isVector()) | ||||
|       return LowerVectorMUL(Op, DAG); | ||||
|     else if (VT == MVT::i8) | ||||
|     if (VT == MVT::i8) | ||||
|       return LowerI8Math(Op, DAG, Opc, *this); | ||||
|  | ||||
|   case ISD::FDIV: | ||||
|     if (VT == MVT::f32 || VT == MVT::v4f32) | ||||
|       return LowerFDIVf32(Op, DAG); | ||||
| #if 0 | ||||
|     // This is probably a libcall | ||||
|     else if (Op.getValueType() == MVT::f64) | ||||
|       return LowerFDIVf64(Op, DAG); | ||||
| #endif | ||||
|     else | ||||
|       assert(0 && "Calling FDIV on unsupported MVT"); | ||||
|  | ||||
|   case ISD::CTPOP: | ||||
|     return LowerCTPOP(Op, DAG); | ||||
|  | ||||
| @@ -3119,8 +2814,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const | ||||
|   case SPUISD::VEC_SHL: | ||||
|   case SPUISD::VEC_SRL: | ||||
|   case SPUISD::VEC_SRA: | ||||
|   case SPUISD::ROTQUAD_RZ_BYTES: | ||||
|   case SPUISD::ROTQUAD_RZ_BITS: | ||||
|   case SPUISD::ROTBYTES_LEFT: { | ||||
|     SDValue Op1 = N->getOperand(1); | ||||
|  | ||||
| @@ -3268,10 +2961,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, | ||||
|   } | ||||
|  | ||||
| #if 0 | ||||
|   case MPY: | ||||
|   case MPYU: | ||||
|   case MPYH: | ||||
|   case MPYHH: | ||||
|   case SPUISD::SHLQUAD_L_BITS: | ||||
|   case SPUISD::SHLQUAD_L_BYTES: | ||||
|   case SPUISD::VEC_SHL: | ||||
| @@ -3279,13 +2968,9 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, | ||||
|   case SPUISD::VEC_SRA: | ||||
|   case SPUISD::VEC_ROTL: | ||||
|   case SPUISD::VEC_ROTR: | ||||
|   case SPUISD::ROTQUAD_RZ_BYTES: | ||||
|   case SPUISD::ROTQUAD_RZ_BITS: | ||||
|   case SPUISD::ROTBYTES_LEFT: | ||||
|   case SPUISD::SELECT_MASK: | ||||
|   case SPUISD::SELB: | ||||
|   case SPUISD::FPInterp: | ||||
|   case SPUISD::FPRecipEst: | ||||
|   case SPUISD::SEXT32TO64: | ||||
| #endif | ||||
|   } | ||||
|   | ||||
| @@ -41,10 +41,6 @@ namespace llvm { | ||||
|       CNTB,                     ///< Count leading ones in bytes | ||||
|       PREFSLOT2VEC,             ///< Promote scalar->vector | ||||
|       VEC2PREFSLOT,             ///< Extract element 0 | ||||
|       MPY,                      ///< 16-bit Multiply (low parts of a 32-bit) | ||||
|       MPYU,                     ///< Multiply Unsigned | ||||
|       MPYH,                     ///< Multiply High | ||||
|       MPYHH,                    ///< Multiply High-High | ||||
|       SHLQUAD_L_BITS,           ///< Rotate quad left, by bits | ||||
|       SHLQUAD_L_BYTES,          ///< Rotate quad left, by bytes | ||||
|       VEC_SHL,                  ///< Vector shift left | ||||
| @@ -52,8 +48,6 @@ namespace llvm { | ||||
|       VEC_SRA,                  ///< Vector shift right (arithmetic) | ||||
|       VEC_ROTL,                 ///< Vector rotate left | ||||
|       VEC_ROTR,                 ///< Vector rotate right | ||||
|       ROTQUAD_RZ_BYTES,         ///< Rotate quad right, by bytes, zero fill | ||||
|       ROTQUAD_RZ_BITS,          ///< Rotate quad right, by bits, zero fill | ||||
|       ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI) | ||||
|       ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count | ||||
|       SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI) | ||||
| @@ -63,8 +57,6 @@ namespace llvm { | ||||
|       CARRY_GENERATE,           ///< Carry generate for ADD_EXTENDED | ||||
|       SUB_EXTENDED,             ///< Subtract extended, with borrow | ||||
|       BORROW_GENERATE,          ///< Borrow generate for SUB_EXTENDED | ||||
|       FPInterp,                 ///< Floating point interpolate | ||||
|       FPRecipEst,               ///< Floating point reciprocal estimate | ||||
|       SEXT32TO64,               ///< Sign-extended 32-bit const -> 64-bits | ||||
|       LAST_SPUISD               ///< Last user-defined instruction | ||||
|     }; | ||||
|   | ||||
| @@ -82,7 +82,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, | ||||
|   case SPU::ORIi8i32: | ||||
|   case SPU::AHIvec: | ||||
|   case SPU::AHIr16: | ||||
|   case SPU::AIvec: | ||||
|   case SPU::AIv4i32: | ||||
|     assert(MI.getNumOperands() == 3 && | ||||
|            MI.getOperand(0).isReg() && | ||||
|            MI.getOperand(1).isReg() && | ||||
| @@ -98,8 +98,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, | ||||
|     assert(MI.getNumOperands() == 3 && | ||||
|            "wrong number of operands to AIr32"); | ||||
|     if (MI.getOperand(0).isReg() && | ||||
|         (MI.getOperand(1).isReg() || | ||||
|          MI.getOperand(1).isFI()) && | ||||
|         MI.getOperand(1).isReg() && | ||||
|         (MI.getOperand(2).isImm() && | ||||
|          MI.getOperand(2).getImm() == 0)) { | ||||
|       sourceReg = MI.getOperand(1).getReg(); | ||||
|   | ||||
| @@ -583,7 +583,9 @@ def AHIvec: | ||||
| def AHIr16: | ||||
|   RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), | ||||
|     "ahi\t$rT, $rA, $val", IntegerOp, | ||||
|     [(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>; | ||||
|     [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>; | ||||
|  | ||||
| // v4i32, i32 add instruction: | ||||
|  | ||||
| class AInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|   RRForm<0b00000011000, OOL, IOL, | ||||
| @@ -604,21 +606,42 @@ multiclass AddInstruction { | ||||
|   def v16i8: AVecInst<v16i8>; | ||||
|    | ||||
|   def r32:   ARegInst<R32C>; | ||||
|   def r8:    AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;  | ||||
| } | ||||
|  | ||||
| defm A : AddInstruction; | ||||
|  | ||||
| def AIvec: | ||||
|     RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||
| class AIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|     RI10Form<0b00111000, OOL, IOL, | ||||
| 	     "ai\t$rT, $rA, $val", IntegerOp, | ||||
|       [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), | ||||
|                                       v4i32SExt10Imm:$val))]>; | ||||
| 	     pattern>; | ||||
|  | ||||
| def AIr32: | ||||
|     RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), | ||||
|       "ai\t$rT, $rA, $val", IntegerOp, | ||||
|       [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>; | ||||
| class AIVecInst<ValueType vectype, PatLeaf immpred>: | ||||
|     AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||
| 	    [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>; | ||||
|  | ||||
| class AIFPVecInst<ValueType vectype, PatLeaf immpred>: | ||||
|     AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||
| 	    [/* no pattern */]>; | ||||
|  | ||||
| class AIRegInst<RegisterClass rclass, PatLeaf immpred>: | ||||
|     AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), | ||||
| 	   [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>; | ||||
|  | ||||
| // This is used to add epsilons to floating point numbers in the f32 fdiv code: | ||||
| class AIFPInst<RegisterClass rclass, PatLeaf immpred>: | ||||
|     AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), | ||||
| 	   [/* no pattern */]>; | ||||
|  | ||||
| multiclass AddImmediate { | ||||
|   def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>; | ||||
|  | ||||
|   def r32: AIRegInst<R32C, i32ImmSExt10>; | ||||
|  | ||||
|   def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>; | ||||
|   def f32: AIFPInst<R32FP, i32ImmSExt10>; | ||||
| } | ||||
|  | ||||
| defm AI : AddImmediate; | ||||
|  | ||||
| def SFHvec: | ||||
|     RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
| @@ -795,8 +818,7 @@ def BGXvec: | ||||
| def MPYv8i16: | ||||
|   RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|     "mpy\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|     [(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA), | ||||
|                                           (v8i16 VECREG:$rB)))]>; | ||||
|     [/* no pattern */]>; | ||||
|  | ||||
| def MPYr16: | ||||
|   RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), | ||||
| @@ -812,8 +834,7 @@ class MPYUInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|  | ||||
| def MPYUv4i32: | ||||
|   MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|            [(set (v4i32 VECREG:$rT), | ||||
|                  (SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; | ||||
|            [/* no pattern */]>; | ||||
|  | ||||
| def MPYUr16: | ||||
|   MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), | ||||
| @@ -821,7 +842,7 @@ def MPYUr16: | ||||
|  | ||||
| def MPYUr32: | ||||
|   MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||
|            [(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>; | ||||
|            [/* no pattern */]>; | ||||
|  | ||||
| // mpyi: multiply 16 x s10imm -> 32 result. | ||||
|  | ||||
| @@ -892,87 +913,78 @@ class MPYHInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|           | ||||
| def MPYHv4i32: | ||||
|     MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|              [(set (v4i32 VECREG:$rT), | ||||
|                    (SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; | ||||
|              [/* no pattern */]>; | ||||
|  | ||||
| def MPYHr32: | ||||
|     MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||
|              [(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>; | ||||
|              [/* no pattern */]>; | ||||
|  | ||||
| // mpys: multiply high and shift right (returns the top half of | ||||
| // a 16-bit multiply, sign extended to 32 bits.) | ||||
| def MPYSvec: | ||||
|     RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|  | ||||
| class MPYSInst<dag OOL, dag IOL>: | ||||
|     RRForm<0b11100011110, OOL, IOL,  | ||||
|       "mpys\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|       [/* no pattern */]>; | ||||
|  | ||||
| def MPYSvec: | ||||
|     MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||
|      | ||||
| def MPYSr16: | ||||
|     RRForm<0b11100011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB), | ||||
|       "mpys\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|     MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>; | ||||
|  | ||||
| // mpyhh: multiply high-high (returns the 32-bit result from multiplying | ||||
| // the top 16 bits of the $rA, $rB) | ||||
| def MPYHHv8i16: | ||||
|     RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|  | ||||
| class MPYHHInst<dag OOL, dag IOL>: | ||||
|   RRForm<0b01100011110, OOL, IOL, | ||||
|         "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       [(set (v8i16 VECREG:$rT), | ||||
|             (SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; | ||||
|         [/* no pattern */]>; | ||||
|          | ||||
| def MPYHHv8i16: | ||||
|     MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||
|  | ||||
| def MPYHHr32: | ||||
|     RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||
|       "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|     MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||
|  | ||||
| // mpyhha: Multiply high-high, add to $rT: | ||||
| def MPYHHAvec: | ||||
|     RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|  | ||||
| class MPYHHAInst<dag OOL, dag IOL>: | ||||
|     RRForm<0b01100010110, OOL, IOL, | ||||
|       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|       [/* no pattern */]>; | ||||
|  | ||||
| def MPYHHAvec: | ||||
|     MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||
|      | ||||
| def MPYHHAr32: | ||||
|     RRForm<0b01100010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||
|       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|     MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||
|  | ||||
| // mpyhhu: Multiply high-high, unsigned | ||||
| def MPYHHUvec: | ||||
|     RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|  | ||||
| class MPYHHUInst<dag OOL, dag IOL>: | ||||
|     RRForm<0b01110011110, OOL, IOL, | ||||
|       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|       [/* no pattern */]>; | ||||
|  | ||||
| def MPYHHUvec: | ||||
|     MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||
|      | ||||
| def MPYHHUr32: | ||||
|     RRForm<0b01110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||
|       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|     MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||
|  | ||||
| // mpyhhau: Multiply high-high, unsigned | ||||
| def MPYHHAUvec: | ||||
|     RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|  | ||||
| class MPYHHAUInst<dag OOL, dag IOL>: | ||||
|     RRForm<0b01110010110, OOL, IOL, | ||||
|       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|       [/* no pattern */]>; | ||||
|  | ||||
| def MPYHHAUvec: | ||||
|     MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||
|      | ||||
| def MPYHHAUr32: | ||||
|     RRForm<0b01110010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||
|       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, | ||||
|       []>; | ||||
|  | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // v4i32, i32 multiply instruction sequence: | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| def MPYv4i32: | ||||
|   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), | ||||
|       (Av4i32 | ||||
|         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), | ||||
|                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)), | ||||
|         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; | ||||
|  | ||||
| def MPYi32: | ||||
|   Pat<(mul R32C:$rA, R32C:$rB), | ||||
|       (Ar32 | ||||
|         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), | ||||
|               (MPYHr32 R32C:$rB, R32C:$rA)), | ||||
|         (MPYUr32 R32C:$rA, R32C:$rB))>; | ||||
|     MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||
|  | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // clz: Count leading zeroes | ||||
| @@ -1424,7 +1436,7 @@ multiclass BitwiseOr | ||||
|   def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), | ||||
|                   [/* no pattern */]>; | ||||
|  | ||||
|   // scalar->vector promotion: | ||||
|   // scalar->vector promotion, prefslot2vec: | ||||
|   def v16i8_i8:  ORPromoteScalar<R8C>; | ||||
|   def v8i16_i16: ORPromoteScalar<R16C>; | ||||
|   def v4i32_i32: ORPromoteScalar<R32C>; | ||||
| @@ -1432,7 +1444,7 @@ multiclass BitwiseOr | ||||
|   def v4f32_f32: ORPromoteScalar<R32FP>; | ||||
|   def v2f64_f64: ORPromoteScalar<R64FP>; | ||||
|  | ||||
|   // extract element 0: | ||||
|   // vector->scalar demotion, vec2prefslot: | ||||
|   def i8_v16i8:  ORExtractElt<R8C>; | ||||
|   def i16_v8i16: ORExtractElt<R16C>; | ||||
|   def i32_v4i32: ORExtractElt<R32C>; | ||||
| @@ -1831,6 +1843,13 @@ class SELBVecInst<ValueType vectype>: | ||||
|                      (and (vnot (vectype VECREG:$rC)), | ||||
|                           (vectype VECREG:$rA))))]>; | ||||
|  | ||||
| class SELBVecVCondInst<ValueType vectype>: | ||||
|   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), | ||||
|            [(set (vectype VECREG:$rT), | ||||
|                  (select (vectype VECREG:$rC), | ||||
|                          (vectype VECREG:$rB), | ||||
|                          (vectype VECREG:$rA)))]>; | ||||
|  | ||||
| class SELBVecCondInst<ValueType vectype>: | ||||
|   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), | ||||
|            [(set (vectype VECREG:$rT), | ||||
| @@ -1867,8 +1886,21 @@ multiclass SelectBits | ||||
|   def v4i32_cond: SELBVecCondInst<v4i32>; | ||||
|   def v2i64_cond: SELBVecCondInst<v2i64>; | ||||
|  | ||||
|   def v16i8_vcond: SELBVecCondInst<v16i8>; | ||||
|   def v8i16_vcond: SELBVecCondInst<v8i16>; | ||||
|   def v4i32_vcond: SELBVecCondInst<v4i32>; | ||||
|   def v2i64_vcond: SELBVecCondInst<v2i64>; | ||||
|  | ||||
|   def v4f32_cond: | ||||
| 	SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), | ||||
| 		 [(set (v4f32 VECREG:$rT), | ||||
| 		       (select (v4i32 VECREG:$rC), | ||||
| 			       (v4f32 VECREG:$rB), | ||||
| 			       (v4f32 VECREG:$rA)))]>; | ||||
|  | ||||
|   // SELBr64_cond is defined further down, look for i64 comparisons | ||||
|   def r32_cond:   SELBRegCondInst<R32C, R32C>; | ||||
|   def f32_cond:   SELBRegCondInst<R32C, R32FP>; | ||||
|   def r16_cond:   SELBRegCondInst<R16C, R16C>; | ||||
|   def r8_cond:    SELBRegCondInst<R8C,  R8C>; | ||||
| } | ||||
| @@ -2454,11 +2486,11 @@ class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|            RotateShift, pattern>; | ||||
|  | ||||
| class ROTQBIVecInst<ValueType vectype>: | ||||
|     ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|     ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), | ||||
|                [/* no pattern yet */]>; | ||||
|  | ||||
| class ROTQBIRegInst<RegisterClass rclass>: | ||||
|     ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), | ||||
|     ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), | ||||
|                [/* no pattern yet */]>; | ||||
|  | ||||
| multiclass RotateQuadByBitCount | ||||
| @@ -2645,9 +2677,6 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)), | ||||
| // ROTQMBYvec: This is a vector form merely so that when used in an | ||||
| // instruction pattern, type checking will succeed. This instruction assumes | ||||
| // that the user knew to negate $rB. | ||||
| // | ||||
| // Using the SPUrotquad_rz_bytes target-specific DAG node, the patterns | ||||
| // ensure that $rB is negated. | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
|  | ||||
| class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
| @@ -2660,8 +2689,7 @@ class ROTQMBYVecInst<ValueType vectype>: | ||||
|  | ||||
| class ROTQMBYRegInst<RegisterClass rclass>: | ||||
|     ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), | ||||
|                 [(set rclass:$rT, | ||||
|                       (SPUrotquad_rz_bytes rclass:$rA, R32C:$rB))]>; | ||||
|                 [/* no pattern */]>; | ||||
|  | ||||
| multiclass RotateQuadBytes | ||||
| { | ||||
| @@ -2676,32 +2704,17 @@ multiclass RotateQuadBytes | ||||
|  | ||||
| defm ROTQMBY : RotateQuadBytes; | ||||
|  | ||||
| def : Pat<(SPUrotquad_rz_bytes (v16i8 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBYv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bytes (v8i16 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBYv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bytes (v4i32 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBYv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bytes (v2i64 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBYv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bytes GPRC:$rA, R32C:$rB), | ||||
|           (ROTQMBYr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bytes R64C:$rA, R32C:$rB), | ||||
|           (ROTQMBYr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
|  | ||||
| class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|     RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", | ||||
|             RotateShift, pattern>; | ||||
|  | ||||
| class ROTQMBYIVecInst<ValueType vectype>: | ||||
|     ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), | ||||
|                  [(set (vectype VECREG:$rT), | ||||
|                        (SPUrotquad_rz_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>; | ||||
|                  [/* no pattern */]>; | ||||
|  | ||||
| class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>: | ||||
|     ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), | ||||
|                  [(set rclass:$rT, | ||||
|                        (SPUrotquad_rz_bytes rclass:$rA, (inttype pred:$val)))]>; | ||||
|                  [/* no pattern */]>; | ||||
|  | ||||
| multiclass RotateQuadBytesImm | ||||
| { | ||||
| @@ -2725,8 +2738,8 @@ class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|            RotateShift, pattern>; | ||||
|  | ||||
| class ROTQMBYBIVecInst<ValueType vectype>: | ||||
|     ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|                   [/* no pattern, intrinsic? */]>; | ||||
|     ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), | ||||
|                   [/* no pattern, */]>; | ||||
|  | ||||
| multiclass RotateMaskQuadByBitCount | ||||
| { | ||||
| @@ -2768,19 +2781,6 @@ multiclass RotateMaskQuadByBits | ||||
|  | ||||
| defm ROTQMBI: RotateMaskQuadByBits; | ||||
|  | ||||
| def : Pat<(SPUrotquad_rz_bits (v16i8 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBIv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bits (v8i16 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBIv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bits (v4i32 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBIv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bits (v2i64 VECREG:$rA), R32C:$rB), | ||||
|           (ROTQMBIv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bits GPRC:$rA, R32C:$rB), | ||||
|           (ROTQMBIr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
| def : Pat<(SPUrotquad_rz_bits R64C:$rA, R32C:$rB), | ||||
|           (ROTQMBIr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>; | ||||
|  | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // Rotate quad and mask by bits, immediate | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| @@ -2791,13 +2791,11 @@ class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||
|  | ||||
| class ROTQMBIIVecInst<ValueType vectype>: | ||||
|    ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), | ||||
|                  [(set (vectype VECREG:$rT), | ||||
|                        (SPUrotquad_rz_bits (vectype VECREG:$rA), (i32 uimm7:$val)))]>; | ||||
|                  [/* no pattern */]>; | ||||
|  | ||||
| class ROTQMBIIRegInst<RegisterClass rclass>: | ||||
|    ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), | ||||
|                  [(set rclass:$rT, | ||||
|                        (SPUrotquad_rz_bits rclass:$rA, (i32 uimm7:$val)))]>; | ||||
|                  [/* no pattern */]>; | ||||
|  | ||||
| multiclass RotateMaskQuadByBitsImm | ||||
| { | ||||
| @@ -3142,6 +3140,15 @@ multiclass CmpGtrWordImm | ||||
|  | ||||
|   def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), | ||||
|                     [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; | ||||
|  | ||||
|   // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence: | ||||
|   def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||
|                        [(set (v4i32 VECREG:$rT), | ||||
|                              (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))), | ||||
|                                     (v4i32 v4i32SExt16Imm:$val)))]>; | ||||
|  | ||||
|   def f32:   CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val), | ||||
|   		      [/* no pattern */]>; | ||||
| } | ||||
|  | ||||
| class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> : | ||||
| @@ -3760,7 +3767,7 @@ class FAVecInst<ValueType vectype>: | ||||
| multiclass SFPAdd | ||||
| { | ||||
|   def v4f32: FAVecInst<v4f32>; | ||||
|   def r32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||
|   def f32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||
|                     [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; | ||||
| } | ||||
|  | ||||
| @@ -3778,34 +3785,35 @@ class FSVecInst<ValueType vectype>: | ||||
| multiclass SFPSub | ||||
| { | ||||
|   def v4f32: FSVecInst<v4f32>; | ||||
|   def r32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||
|   def f32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||
|                     [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; | ||||
| } | ||||
|  | ||||
| defm FS : SFPSub; | ||||
|  | ||||
| // Floating point reciprocal estimate | ||||
| def FREv4f32 : | ||||
|     RRForm_1<0b00011101100, (outs VECREG:$rT), (ins VECREG:$rA), | ||||
|       "frest\t$rT, $rA", SPrecFP, | ||||
|       [(set (v4f32 VECREG:$rT), (SPUreciprocalEst (v4f32 VECREG:$rA)))]>; | ||||
|  | ||||
| def FREf32 : | ||||
|     RRForm_1<0b00011101100, (outs R32FP:$rT), (ins R32FP:$rA), | ||||
| class FRESTInst<dag OOL, dag IOL>: | ||||
|   RRForm_1<0b00110111000, OOL, IOL, | ||||
|            "frest\t$rT, $rA", SPrecFP, | ||||
|       [(set R32FP:$rT, (SPUreciprocalEst R32FP:$rA))]>; | ||||
|            [/* no pattern */]>; | ||||
|  | ||||
| def FRESTv4f32 : | ||||
|     FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>; | ||||
|  | ||||
| def FRESTf32 : | ||||
|     FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>; | ||||
|  | ||||
| // Floating point interpolate (used in conjunction with reciprocal estimate) | ||||
| def FIv4f32 : | ||||
|     RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||
|       "fi\t$rT, $rA, $rB", SPrecFP, | ||||
|       [(set (v4f32 VECREG:$rT), (SPUinterpolate (v4f32 VECREG:$rA), | ||||
|                                                 (v4f32 VECREG:$rB)))]>; | ||||
|       [/* no pattern */]>; | ||||
|  | ||||
| def FIf32 : | ||||
|     RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||
|       "fi\t$rT, $rA, $rB", SPrecFP, | ||||
|       [(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>; | ||||
|       [/* no pattern */]>; | ||||
|  | ||||
| //-------------------------------------------------------------------------- | ||||
| // Basic single precision floating point comparisons: | ||||
| @@ -4445,12 +4453,14 @@ def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), | ||||
|                        (SPUlo tconstpool:$in, 0)), | ||||
|           (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; | ||||
|  | ||||
| /* | ||||
| def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm), | ||||
|           (AIr32 R32C:$sp, i32ImmSExt10:$imm)>; | ||||
|  | ||||
| def : Pat<(SPUindirect R32C:$sp, imm:$imm), | ||||
|           (Ar32 R32C:$sp, | ||||
|                 (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>; | ||||
|  */ | ||||
|  | ||||
| def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), | ||||
|           (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; | ||||
| @@ -4466,5 +4476,7 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), | ||||
|  | ||||
| // Instrinsics: | ||||
| include "CellSDKIntrinsics.td" | ||||
| // Various math operator instruction sequences | ||||
| include "SPUMathInstr.td" | ||||
| // 64-bit "instructions"/support | ||||
| include "SPU64InstrInfo.td" | ||||
|   | ||||
							
								
								
									
										99
									
								
								lib/Target/CellSPU/SPUMathInstr.td
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								lib/Target/CellSPU/SPUMathInstr.td
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | ||||
| //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// | ||||
| // | ||||
| //                     Cell SPU math operations | ||||
| // | ||||
| // This target description file contains instruction sequences for various | ||||
| // math operations, such as vector multiplies, i32 multiply, etc., for the | ||||
| // SPU's i32, i16 i8 and corresponding vector types. | ||||
| // | ||||
| // Any resemblance to libsimdmath or the Cell SDK simdmath library is | ||||
| // purely and completely coincidental. | ||||
| // | ||||
| // Primary author: Scott Michel (scottm@aero.org) | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // v16i8 multiply instruction sequence: | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
|  | ||||
| def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), | ||||
|           (ORv4i32 | ||||
|            (ANDv4i32 | ||||
|             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), | ||||
|                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), | ||||
|                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8), | ||||
|                        (FSMBIv8i16 0x2222)), | ||||
|             (ILAv4i32 0x0000ffff)), | ||||
|            (SHLIv4i32 | ||||
|             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), | ||||
|                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)), | ||||
|                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), | ||||
|                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), | ||||
|                        (FSMBIv8i16 0x2222)), 16))>; | ||||
|                          | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // v8i16 multiply instruction sequence: | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
|  | ||||
| def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), | ||||
|           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), | ||||
|                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), | ||||
|                      (FSMBIv8i16 0xcccc))>; | ||||
|                   | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // v4i32, i32 multiply instruction sequence: | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
|  | ||||
| def MPYv4i32: | ||||
|   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), | ||||
|       (Av4i32 | ||||
|         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), | ||||
|                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)), | ||||
|         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; | ||||
|  | ||||
| def MPYi32: | ||||
|   Pat<(mul R32C:$rA, R32C:$rB), | ||||
|       (Ar32 | ||||
|         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), | ||||
|               (MPYHr32 R32C:$rB, R32C:$rA)), | ||||
|         (MPYUr32 R32C:$rA, R32C:$rB))>; | ||||
|  | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
| // f32, v4f32 divide instruction sequence: | ||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||
|  | ||||
| // Reciprocal estimate and interpolation | ||||
| def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; | ||||
| // Division estimate | ||||
| def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; | ||||
| // Newton-Raphson iteration | ||||
| def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), | ||||
| 		  	       Interpf32.Fragment, | ||||
| 	  	  	       DivEstf32.Fragment)>; | ||||
| // Epsilon addition | ||||
| def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; | ||||
|  | ||||
| def : Pat<(fdiv R32FP:$rA, R32FP:$rB), | ||||
| 	  (SELBf32_cond NRaphf32.Fragment, | ||||
| 			Epsilonf32.Fragment, | ||||
| 			(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; | ||||
|  | ||||
| // Reciprocal estimate and interpolation | ||||
| def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; | ||||
| // Division estimate | ||||
| def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; | ||||
| // Newton-Raphson iteration | ||||
| def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, | ||||
| 					      (v4f32 VECREG:$rB), | ||||
| 					      (v4f32 VECREG:$rA)), | ||||
| 		  	           Interpv4f32.Fragment, | ||||
| 	  	  	           DivEstv4f32.Fragment)>; | ||||
| // Epsilon addition | ||||
| def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; | ||||
|  | ||||
| def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), | ||||
| 	  (SELBv4f32_cond NRaphv4f32.Fragment, | ||||
| 			Epsilonv4f32.Fragment, | ||||
| 			(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), | ||||
| 					      Epsilonv4f32.Fragment, | ||||
| 					      (v4f32 VECREG:$rA)), -1))>; | ||||
| @@ -87,24 +87,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; | ||||
| // SPUISelLowering.h): | ||||
| def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; | ||||
|  | ||||
| // SPU 16-bit multiply | ||||
| def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>; | ||||
|  | ||||
| // SPU multiply unsigned, used in instruction lowering for v4i32 | ||||
| // multiplies: | ||||
| def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>; | ||||
| def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>; | ||||
|  | ||||
| // SPU 16-bit multiply high x low, shift result 16-bits | ||||
| // Used to compute intermediate products for 32-bit multiplies | ||||
| def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>; | ||||
| def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>; | ||||
|  | ||||
| // SPU 16-bit multiply high x high, 32-bit product | ||||
| // Used to compute intermediate products for 16-bit multiplies | ||||
| def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>; | ||||
| def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>; | ||||
|  | ||||
| // Shift left quadword by bits and bytes | ||||
| def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; | ||||
| def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; | ||||
| @@ -117,11 +99,6 @@ def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>; | ||||
| def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; | ||||
| def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; | ||||
|  | ||||
| def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES", | ||||
|                                     SPUvecshift_type, []>; | ||||
| def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS", | ||||
|                                     SPUvecshift_type, []>; | ||||
|  | ||||
| // Vector rotate left, bits shifted out of the left are rotated in on the right | ||||
| def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", | ||||
|                              SPUvecshift_type, []>; | ||||
| @@ -141,12 +118,6 @@ def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; | ||||
| // SPU gather bits instruction: | ||||
| def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>; | ||||
|  | ||||
| // SPU floating point interpolate | ||||
| def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>; | ||||
|  | ||||
| // SPU floating point reciprocal estimate (used for fdiv) | ||||
| def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>; | ||||
|  | ||||
| def SDTprefslot2vec: SDTypeProfile<1, 1, []>; | ||||
| def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; | ||||
|  | ||||
|   | ||||
| @@ -339,10 +339,13 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, | ||||
|   // Now add the frame object offset to the offset from r1. | ||||
|   int Offset = MFI->getObjectOffset(FrameIndex); | ||||
|  | ||||
|   // Most instructions, except for generated FrameIndex additions using AIr32, | ||||
|   // have the immediate in operand 1. AIr32, in this case, has the immediate | ||||
|   // in operand 2. | ||||
|   unsigned OpNo = (MI.getOpcode() != SPU::AIr32 ? 1 : 2); | ||||
|   // Most instructions, except for generated FrameIndex additions using AIr32 | ||||
|   // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the | ||||
|   // immediate in operand 2. | ||||
|   unsigned OpNo = 1; | ||||
|   if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32) | ||||
|     OpNo = 2; | ||||
|  | ||||
|   MachineOperand &MO = MI.getOperand(OpNo); | ||||
|  | ||||
|   // Offset is biased by $lr's slot at the bottom. | ||||
|   | ||||
| @@ -1,9 +1,11 @@ | ||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||
| ; RUN: grep frest    %t1.s | count 2  | ||||
| ; RUN: grep -w fi    %t1.s | count 2  | ||||
| ; RUN: grep fm       %t1.s | count 4  | ||||
| ; RUN: grep -w fm    %t1.s | count 2 | ||||
| ; RUN: grep fma      %t1.s | count 2  | ||||
| ; RUN: grep fnms     %t1.s | count 2 | ||||
| ; RUN: grep fnms     %t1.s | count 4 | ||||
| ; RUN: grep cgti     %t1.s | count 2 | ||||
| ; RUN: grep selb     %t1.s | count 2 | ||||
| ; | ||||
| ; This file includes standard floating point arithmetic instructions | ||||
| target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | ||||
|   | ||||
| @@ -1,8 +1,5 @@ | ||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||
| ; RUN: grep {fsmbi.*61680}   %t1.s | count 1 | ||||
| ; RUN: grep rotqmbyi         %t1.s | count 1 | ||||
| ; RUN: grep rotmai           %t1.s | count 1 | ||||
| ; RUN: grep selb             %t1.s | count 1 | ||||
| ; RUN: grep xswd	     %t1.s | count 1 | ||||
| ; RUN: grep shufb            %t1.s | count 2 | ||||
| ; RUN: grep cg               %t1.s | count 1 | ||||
| ; RUN: grep addx             %t1.s | count 1 | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
| ; RUN: grep and     %t1.s | count 2 | ||||
| ; RUN: grep selb    %t1.s | count 6 | ||||
| ; RUN: grep fsmbi   %t1.s | count 4 | ||||
| ; RUN: grep shli    %t1.s | count 2 | ||||
| ; RUN: grep shli    %t1.s | count 4 | ||||
| ; RUN: grep shlhi   %t1.s | count 4 | ||||
| ; RUN: grep ila     %t1.s | count 2 | ||||
| ; RUN: grep xsbh    %t1.s | count 4 | ||||
|   | ||||
| @@ -1,10 +1,21 @@ | ||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||
| ; RUN: grep shlh   %t1.s | count 84 | ||||
| ; RUN: grep shlhi  %t1.s | count 51 | ||||
| ; RUN: grep shl    %t1.s | count 168 | ||||
| ; RUN: grep shli   %t1.s | count 51 | ||||
| ; RUN: grep xshw   %t1.s | count 5 | ||||
| ; RUN: grep and    %t1.s | count 5 | ||||
| ; RUN: grep -w shlh      %t1.s | count 9 | ||||
| ; RUN: grep -w shlhi     %t1.s | count 3 | ||||
| ; RUN: grep -w shl       %t1.s | count 9 | ||||
| ; RUN: grep -w shli      %t1.s | count 3 | ||||
| ; RUN: grep -w xshw      %t1.s | count 5 | ||||
| ; RUN: grep -w and       %t1.s | count 5 | ||||
| ; RUN: grep -w andi      %t1.s | count 2 | ||||
| ; RUN: grep -w rotmi     %t1.s | count 2 | ||||
| ; RUN: grep -w rotqmbyi  %t1.s | count 1 | ||||
| ; RUN: grep -w rotqmbii  %t1.s | count 2 | ||||
| ; RUN: grep -w rotqmby   %t1.s | count 1 | ||||
| ; RUN: grep -w rotqmbi   %t1.s | count 1 | ||||
| ; RUN: grep -w rotqbyi   %t1.s | count 1 | ||||
| ; RUN: grep -w rotqbii   %t1.s | count 2 | ||||
| ; RUN: grep -w rotqbybi  %t1.s | count 1 | ||||
| ; RUN: grep -w sfi       %t1.s | count 3 | ||||
|  | ||||
| target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | ||||
| target triple = "spu" | ||||
|  | ||||
| @@ -210,3 +221,57 @@ define i32 @shli_i32_12(i32 zeroext %arg1) zeroext { | ||||
|         %A = shl i32 0, %arg1 | ||||
|         ret i32 %A | ||||
| } | ||||
|  | ||||
| ;; i64 shift left | ||||
|  | ||||
| define i64 @shl_i64_1(i64 %arg1) { | ||||
| 	%A = shl i64 %arg1, 9 | ||||
| 	ret i64 %A | ||||
| } | ||||
|  | ||||
| define i64 @shl_i64_2(i64 %arg1) { | ||||
| 	%A = shl i64 %arg1, 3 | ||||
| 	ret i64 %A | ||||
| } | ||||
|  | ||||
| define i64 @shl_i64_3(i64 %arg1, i32 %shift) { | ||||
| 	%1 = zext i32 %shift to i64 | ||||
| 	%2 = shl i64 %arg1, %1 | ||||
| 	ret i64 %2 | ||||
| } | ||||
|  | ||||
| ;; i64 shift right logical (shift 0s from the right) | ||||
|  | ||||
| define i64 @lshr_i64_1(i64 %arg1) { | ||||
| 	%1 = lshr i64 %arg1, 9 | ||||
| 	ret i64 %1 | ||||
| } | ||||
|  | ||||
| define i64 @lshr_i64_2(i64 %arg1) { | ||||
| 	%1 = lshr i64 %arg1, 3 | ||||
| 	ret i64 %1 | ||||
| } | ||||
|  | ||||
| define i64 @lshr_i64_3(i64 %arg1, i32 %shift) { | ||||
| 	%1 = zext i32 %shift to i64 | ||||
| 	%2 = lshr i64 %arg1, %1 | ||||
| 	ret i64 %2 | ||||
| } | ||||
|  | ||||
| ;; i64 shift right arithmetic (shift 1s from the right) | ||||
|  | ||||
| define i64 @ashr_i64_1(i64 %arg) { | ||||
| 	%1 = ashr i64 %arg, 9 | ||||
| 	ret i64 %1 | ||||
| } | ||||
|  | ||||
| define i64 @ashr_i64_2(i64 %arg) { | ||||
| 	%1 = ashr i64 %arg, 3 | ||||
| 	ret i64 %1 | ||||
| } | ||||
|  | ||||
| define i64 @ashr_i64_3(i64 %arg1, i32 %shift) { | ||||
| 	%1 = zext i32 %shift to i64 | ||||
| 	%2 = ashr i64 %arg1, %1 | ||||
| 	ret i64 %2 | ||||
| } | ||||
|   | ||||
| @@ -34,19 +34,45 @@ struct pred_s preds[] = { | ||||
|   { "neq", i64_neq, i64_neq_select } | ||||
| }; | ||||
|  | ||||
| uint64_t i64_shl_const(uint64_t a) { | ||||
|   return a << 10; | ||||
| } | ||||
|  | ||||
| uint64_t i64_shl(uint64_t a, int amt) { | ||||
|   return a << amt; | ||||
| } | ||||
|  | ||||
| uint64_t i64_srl_const(uint64_t a) { | ||||
|   return a >> 10; | ||||
| } | ||||
|  | ||||
| uint64_t i64_srl(uint64_t a, int amt) { | ||||
|   return a >> amt; | ||||
| } | ||||
|  | ||||
| int64_t i64_sra_const(int64_t a) { | ||||
|   return a >> 10; | ||||
| } | ||||
|  | ||||
| int64_t i64_sra(int64_t a, int amt) { | ||||
|   return a >> amt; | ||||
| } | ||||
|  | ||||
| int main(void) { | ||||
|   int i; | ||||
|   int64_t a = 1234567890000LL; | ||||
|   int64_t b = 2345678901234LL; | ||||
|   int64_t a =  1234567890003LL; | ||||
|   int64_t b =  2345678901235LL; | ||||
|   int64_t c =  1234567890001LL; | ||||
|   int64_t d =          10001LL; | ||||
|   int64_t e =          10000LL; | ||||
|   int64_t f = -1068103409991LL; | ||||
|  | ||||
|   printf("a = %16lld (0x%016llx)\n", a, a); | ||||
|   printf("b = %16lld (0x%016llx)\n", b, b); | ||||
|   printf("c = %16lld (0x%016llx)\n", c, c); | ||||
|   printf("d = %16lld (0x%016llx)\n", d, d); | ||||
|   printf("e = %16lld (0x%016llx)\n", e, e); | ||||
|   printf("f = %16lld (0x%016llx)\n", f, f); | ||||
|   printf("----------------------------------------\n"); | ||||
|  | ||||
|   for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { | ||||
| @@ -64,5 +90,23 @@ int main(void) { | ||||
|     printf("----------------------------------------\n"); | ||||
|   } | ||||
|  | ||||
|   printf("a                = 0x%016llx\n", a); | ||||
|   printf("i64_shl_const(a) = 0x%016llx\n", i64_shl_const(a)); | ||||
|   printf("i64_shl(a)       = 0x%016llx\n", i64_shl(a, 5)); | ||||
|   printf("i64_srl_const(a) = 0x%016llx\n", i64_srl_const(a)); | ||||
|   printf("i64_srl(a)       = 0x%016llx\n", i64_srl(a, 5)); | ||||
|   printf("i64_sra_const(a) = 0x%016llx\n", i64_sra_const(a)); | ||||
|   printf("i64_sra(a)       = 0x%016llx\n", i64_sra(a, 5)); | ||||
|   printf("----------------------------------------\n"); | ||||
|  | ||||
|   printf("f                = 0x%016llx\n", f); | ||||
|   printf("i64_shl_const(f) = 0x%016llx\n", i64_shl_const(f)); | ||||
|   printf("i64_shl(f)       = 0x%016llx\n", i64_shl(f, 10)); | ||||
|   printf("i64_srl_const(f) = 0x%016llx\n", i64_srl_const(f)); | ||||
|   printf("i64_srl(f)       = 0x%016llx\n", i64_srl(f, 10)); | ||||
|   printf("i64_sra_const(f) = 0x%016llx\n", i64_sra_const(f)); | ||||
|   printf("i64_sra(f)       = 0x%016llx\n", i64_sra(f, 10)); | ||||
|   printf("----------------------------------------\n"); | ||||
|  | ||||
|   return 0; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user