mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	- Start moving target-dependent nodes that could be represented by an
instruction sequence and cannot ordinarily be simplified by DAGcombine into the various target description files or SPUDAGToDAGISel.cpp. This makes some 64-bit operations legal. - Eliminate target-dependent ISD enums. - Update tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61508 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -15,6 +15,13 @@ | |||||||
| // | // | ||||||
| include "llvm/Target/Target.td" | include "llvm/Target/Target.td" | ||||||
|  |  | ||||||
|  | // Holder of code fragments (you'd think this'd already be in | ||||||
|  | // a td file somewhere... :-) | ||||||
|  |  | ||||||
|  | class CodeFrag<dag frag> { | ||||||
|  |   dag Fragment = frag; | ||||||
|  | } | ||||||
|  |  | ||||||
| //===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||||
| // Register File Description | // Register File Description | ||||||
| //===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||||
|   | |||||||
| @@ -1,8 +1,17 @@ | |||||||
|  | //====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====// | ||||||
|  | // | ||||||
|  | //                     Cell SPU 64-bit operations | ||||||
|  | // | ||||||
|  | // Primary author: Scott Michel (scottm@aero.org) | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| // 64-bit comparisons: | // 64-bit comparisons: | ||||||
| // | // | ||||||
| // 1. The instruction sequences for vector vice scalar differ by a | // 1. The instruction sequences for vector vice scalar differ by a | ||||||
| //    constant. | //    constant. In the scalar case, we're only interested in the | ||||||
|  | //    top two 32-bit slots, whereas we're interested in an exact | ||||||
|  | //    all-four-slot match in the vector case. | ||||||
| // | // | ||||||
| // 2. There are no "immediate" forms, since loading 64-bit constants | // 2. There are no "immediate" forms, since loading 64-bit constants | ||||||
| //    could be a constant pool load. | //    could be a constant pool load. | ||||||
| @@ -10,10 +19,10 @@ | |||||||
| // 3. i64 setcc results are i32, which are subsequently converted to a FSM | // 3. i64 setcc results are i32, which are subsequently converted to a FSM | ||||||
| //    mask when used in a select pattern. | //    mask when used in a select pattern. | ||||||
| // | // | ||||||
| // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask | // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) | ||||||
| //    (TODO) | //    [Note: this may be moot, since gb produces v4i32 or r32.] | ||||||
| // | // | ||||||
| // M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!) | // M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!) | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
| // selb instruction definition for i64. Note that the selection mask is | // selb instruction definition for i64. Note that the selection mask is | ||||||
| @@ -22,17 +31,15 @@ def SELBr64_cond: | |||||||
|    SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), |    SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), | ||||||
|             [/* no pattern */]>; |             [/* no pattern */]>; | ||||||
|  |  | ||||||
| class CodeFrag<dag frag> { | // select the negative condition: | ||||||
|   dag Fragment = frag; | class I64SELECTNegCond<PatFrag cond, CodeFrag compare>: | ||||||
| } |  | ||||||
|  |  | ||||||
| class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>: |  | ||||||
|   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), |   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), | ||||||
|       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>; |       (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; | ||||||
|  |  | ||||||
| class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>: | // setcc the negative condition: | ||||||
|  | class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: | ||||||
|   Pat<(cond R64C:$rA, R64C:$rB), |   Pat<(cond R64C:$rA, R64C:$rB), | ||||||
|       (XORIr32 cmpare.Fragment, -1)>; |       (XORIr32 compare.Fragment, -1)>; | ||||||
|  |  | ||||||
| // The i64 seteq fragment that does the scalar->vector conversion and | // The i64 seteq fragment that does the scalar->vector conversion and | ||||||
| // comparison: | // comparison: | ||||||
| @@ -64,14 +71,13 @@ multiclass CompareEqual64 { | |||||||
| defm I64EQ: CompareEqual64; | defm I64EQ: CompareEqual64; | ||||||
|  |  | ||||||
| def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; | def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; | ||||||
|  | def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; | ||||||
|  |  | ||||||
| def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), | def : Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), | ||||||
|           I64EQv2i64.Fragment>; |  | ||||||
|  |  | ||||||
| def I64Select: |  | ||||||
|     Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), |  | ||||||
|           (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>; |           (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>; | ||||||
|  |  | ||||||
|  | // i64 setne: | ||||||
| def : I64SETCCNegCond<setne, I64EQr64>; | def : I64SETCCNegCond<setne, I64EQr64>; | ||||||
|  |  | ||||||
| def : I64SELECTNegCond<setne, I64EQr64>; | def : I64SELECTNegCond<setne, I64EQr64>; | ||||||
|  |  | ||||||
|  | // i64 setugt: | ||||||
|   | |||||||
| @@ -258,6 +258,15 @@ public: | |||||||
|   /// target-specific node if it hasn't already been changed. |   /// target-specific node if it hasn't already been changed. | ||||||
|   SDNode *Select(SDValue Op); |   SDNode *Select(SDValue Op); | ||||||
|  |  | ||||||
|  |   //! Emit the instruction sequence for i64 shl | ||||||
|  |   SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); | ||||||
|  |  | ||||||
|  |   //! Emit the instruction sequence for i64 srl | ||||||
|  |   SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); | ||||||
|  |  | ||||||
|  |   //! Emit the instruction sequence for i64 sra | ||||||
|  |   SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); | ||||||
|  |  | ||||||
|   //! Returns true if the address N is an A-form (local store) address |   //! Returns true if the address N is an A-form (local store) address | ||||||
|   bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, |   bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, | ||||||
|                        SDValue &Index); |                        SDValue &Index); | ||||||
| @@ -622,27 +631,20 @@ SPUDAGToDAGISel::Select(SDValue Op) { | |||||||
|   if (N->isMachineOpcode()) { |   if (N->isMachineOpcode()) { | ||||||
|     return NULL;   // Already selected. |     return NULL;   // Already selected. | ||||||
|   } else if (Opc == ISD::FrameIndex) { |   } else if (Opc == ISD::FrameIndex) { | ||||||
|     // Selects to (add $sp, FI * stackSlotSize) |     int FI = cast<FrameIndexSDNode>(N)->getIndex(); | ||||||
|     int FI = |     SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); | ||||||
|       SPUFrameInfo::FItoStackOffset(cast<FrameIndexSDNode>(N)->getIndex()); |     SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); | ||||||
|     MVT PtrVT = SPUtli.getPointerTy(); |  | ||||||
|  |  | ||||||
|     // Adjust stack slot to actual offset in frame: |     if (FI < 128) { | ||||||
|     if (isS10Constant(FI)) { |  | ||||||
|       DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AIr32 $sp, " |  | ||||||
|                  << FI |  | ||||||
|                  << "\n"); |  | ||||||
|       NewOpc = SPU::AIr32; |       NewOpc = SPU::AIr32; | ||||||
|       Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT); |       Ops[0] = TFI; | ||||||
|       Ops[1] = CurDAG->getTargetConstant(FI, PtrVT); |       Ops[1] = Imm0; | ||||||
|       n_ops = 2; |       n_ops = 2; | ||||||
|     } else { |     } else { | ||||||
|       DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with Ar32 $sp, " |  | ||||||
|                  << FI |  | ||||||
|                  << "\n"); |  | ||||||
|       NewOpc = SPU::Ar32; |       NewOpc = SPU::Ar32; | ||||||
|       Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT); |       Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType()); | ||||||
|       Ops[1] = CurDAG->getConstant(FI, PtrVT); |       Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, Op.getValueType(), | ||||||
|  |                                              TFI, Imm0), 0); | ||||||
|       n_ops = 2; |       n_ops = 2; | ||||||
|     } |     } | ||||||
|   } else if (Opc == ISD::ZERO_EXTEND) { |   } else if (Opc == ISD::ZERO_EXTEND) { | ||||||
| @@ -661,6 +663,18 @@ SPUDAGToDAGISel::Select(SDValue Op) { | |||||||
|         n_ops = 2; |         n_ops = 2; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |   } else if (Opc == ISD::SHL) { | ||||||
|  |     if (OpVT == MVT::i64) { | ||||||
|  |       return SelectSHLi64(Op, OpVT); | ||||||
|  |     } | ||||||
|  |   } else if (Opc == ISD::SRL) { | ||||||
|  |     if (OpVT == MVT::i64) { | ||||||
|  |       return SelectSRLi64(Op, OpVT); | ||||||
|  |     } | ||||||
|  |   } else if (Opc == ISD::SRA) { | ||||||
|  |     if (OpVT == MVT::i64) { | ||||||
|  |       return SelectSRAi64(Op, OpVT); | ||||||
|  |     } | ||||||
|   } else if (Opc == SPUISD::LDRESULT) { |   } else if (Opc == SPUISD::LDRESULT) { | ||||||
|     // Custom select instructions for LDRESULT |     // Custom select instructions for LDRESULT | ||||||
|     MVT VT = N->getValueType(0); |     MVT VT = N->getValueType(0); | ||||||
| @@ -723,7 +737,213 @@ SPUDAGToDAGISel::Select(SDValue Op) { | |||||||
|     return SelectCode(Op); |     return SelectCode(Op); | ||||||
| } | } | ||||||
|  |  | ||||||
| /// createPPCISelDag - This pass converts a legalized DAG into a  | /*! | ||||||
|  |  * Emit the instruction sequence for i64 left shifts. The basic algorithm | ||||||
|  |  * is to fill the bottom two word slots with zeros so that zeros are shifted | ||||||
|  |  * in as the entire quadword is shifted left. | ||||||
|  |  * | ||||||
|  |  * \note This code could also be used to implement v2i64 shl. | ||||||
|  |  * | ||||||
|  |  * @param Op The shl operand | ||||||
|  |  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||||
|  |  * makes life easier.) | ||||||
|  |  * @return The SDNode with the entire instruction sequence | ||||||
|  |  */ | ||||||
|  | SDNode * | ||||||
|  | SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) { | ||||||
|  |   SDValue Op0 = Op.getOperand(0); | ||||||
|  |   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||||
|  |   SDValue ShiftAmt = Op.getOperand(1); | ||||||
|  |   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||||
|  |   SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; | ||||||
|  |   SDValue SelMaskVal; | ||||||
|  |  | ||||||
|  |   VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0); | ||||||
|  |   SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); | ||||||
|  |   SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, SelMaskVal); | ||||||
|  |   ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, VecVT, | ||||||
|  |                                    CurDAG->getTargetConstant(0, OpVT)); | ||||||
|  |   VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, VecVT, | ||||||
|  |                                  SDValue(ZeroFill, 0), | ||||||
|  |                                  SDValue(VecOp0, 0), | ||||||
|  |                                  SDValue(SelMask, 0)); | ||||||
|  |  | ||||||
|  |   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||||
|  |     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||||
|  |     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||||
|  |  | ||||||
|  |     if (bytes > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::SHLQBYIv2i64, VecVT, | ||||||
|  |                               SDValue(VecOp0, 0), | ||||||
|  |                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (bits > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::SHLQBIIv2i64, VecVT, | ||||||
|  |                               SDValue((Shift != 0 ? Shift : VecOp0), 0), | ||||||
|  |                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     SDNode *Bytes = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(3, ShiftAmtVT)); | ||||||
|  |     SDNode *Bits = | ||||||
|  |       CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(7, ShiftAmtVT)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::SHLQBYv2i64, VecVT, | ||||||
|  |                             SDValue(VecOp0, 0), SDValue(Bytes, 0)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::SHLQBIv2i64, VecVT, | ||||||
|  |                             SDValue(Shift, 0), SDValue(Bits, 0)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /*! | ||||||
|  |  * Emit the instruction sequence for i64 logical right shifts. | ||||||
|  |  * | ||||||
|  |  * @param Op The shl operand | ||||||
|  |  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||||
|  |  * makes life easier.) | ||||||
|  |  * @return The SDNode with the entire instruction sequence | ||||||
|  |  */ | ||||||
|  | SDNode * | ||||||
|  | SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) { | ||||||
|  |   SDValue Op0 = Op.getOperand(0); | ||||||
|  |   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||||
|  |   SDValue ShiftAmt = Op.getOperand(1); | ||||||
|  |   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||||
|  |   SDNode *VecOp0, *Shift = 0; | ||||||
|  |  | ||||||
|  |   VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0); | ||||||
|  |  | ||||||
|  |   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||||
|  |     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||||
|  |     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||||
|  |  | ||||||
|  |     if (bytes > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, VecVT, | ||||||
|  |                               SDValue(VecOp0, 0), | ||||||
|  |                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (bits > 0) { | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, VecVT, | ||||||
|  |                               SDValue((Shift != 0 ? Shift : VecOp0), 0), | ||||||
|  |                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     SDNode *Bytes = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(3, ShiftAmtVT)); | ||||||
|  |     SDNode *Bits = | ||||||
|  |       CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, | ||||||
|  |                             CurDAG->getTargetConstant(7, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     // Ensure that the shift amounts are negated! | ||||||
|  |     Bytes = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||||
|  |                                   SDValue(Bytes, 0), | ||||||
|  |                                   CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     Bits = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||||
|  |                                  SDValue(Bits, 0), | ||||||
|  |                                  CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQMBYv2i64, VecVT, | ||||||
|  |                             SDValue(VecOp0, 0), SDValue(Bytes, 0)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQMBIv2i64, VecVT, | ||||||
|  |                             SDValue(Shift, 0), SDValue(Bits, 0)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /*! | ||||||
|  |  * Emit the instruction sequence for i64 arithmetic right shifts. | ||||||
|  |  * | ||||||
|  |  * @param Op The shl operand | ||||||
|  |  * @param OpVT Op's machine value value type (doesn't need to be passed, but | ||||||
|  |  * makes life easier.) | ||||||
|  |  * @return The SDNode with the entire instruction sequence | ||||||
|  |  */ | ||||||
|  | SDNode * | ||||||
|  | SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { | ||||||
|  |   // Promote Op0 to vector | ||||||
|  |   MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); | ||||||
|  |   SDValue ShiftAmt = Op.getOperand(1); | ||||||
|  |   MVT ShiftAmtVT = ShiftAmt.getValueType(); | ||||||
|  |  | ||||||
|  |   SDNode *VecOp0 = | ||||||
|  |     CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op.getOperand(0)); | ||||||
|  |  | ||||||
|  |   SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); | ||||||
|  |   SDNode *SignRot = | ||||||
|  |     CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, MVT::v2i64, | ||||||
|  |                           SDValue(VecOp0, 0), SignRotAmt); | ||||||
|  |   SDNode *UpperHalfSign = | ||||||
|  |     CurDAG->getTargetNode(SPU::ORi32_v4i32, MVT::i32, SDValue(SignRot, 0)); | ||||||
|  |  | ||||||
|  |   SDNode *UpperHalfSignMask = | ||||||
|  |     CurDAG->getTargetNode(SPU::FSM64r32, VecVT, SDValue(UpperHalfSign, 0)); | ||||||
|  |   SDNode *UpperLowerMask = | ||||||
|  |     CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, | ||||||
|  |                           CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); | ||||||
|  |   SDNode *UpperLowerSelect = | ||||||
|  |     CurDAG->getTargetNode(SPU::SELBv2i64, VecVT, | ||||||
|  |                           SDValue(UpperHalfSignMask, 0), | ||||||
|  |                           SDValue(VecOp0, 0), | ||||||
|  |                           SDValue(UpperLowerMask, 0)); | ||||||
|  |  | ||||||
|  |   SDNode *Shift = 0; | ||||||
|  |  | ||||||
|  |   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { | ||||||
|  |     unsigned bytes = unsigned(CN->getZExtValue()) >> 3; | ||||||
|  |     unsigned bits = unsigned(CN->getZExtValue()) & 7; | ||||||
|  |  | ||||||
|  |     if (bytes > 0) { | ||||||
|  |       bytes = 31 - bytes; | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQBYIv2i64, VecVT, | ||||||
|  |                               SDValue(UpperLowerSelect, 0), | ||||||
|  |                               CurDAG->getTargetConstant(bytes, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (bits > 0) { | ||||||
|  |       bits = 8 - bits; | ||||||
|  |       Shift = | ||||||
|  |         CurDAG->getTargetNode(SPU::ROTQBIIv2i64, VecVT, | ||||||
|  |                               SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), | ||||||
|  |                               CurDAG->getTargetConstant(bits, ShiftAmtVT)); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     SDNode *NegShift = | ||||||
|  |       CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT, | ||||||
|  |                             ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); | ||||||
|  |  | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, VecVT, | ||||||
|  |                             SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); | ||||||
|  |     Shift = | ||||||
|  |       CurDAG->getTargetNode(SPU::ROTQBIv2i64, VecVT, | ||||||
|  |                             SDValue(Shift, 0), SDValue(NegShift, 0)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// createSPUISelDag - This pass converts a legalized DAG into a | ||||||
| /// SPU-specific DAG, ready for instruction scheduling. | /// SPU-specific DAG, ready for instruction scheduling. | ||||||
| /// | /// | ||||||
| FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { | FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { | ||||||
|   | |||||||
| @@ -204,10 +204,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::SRL,  MVT::i8,     Custom); |   setOperationAction(ISD::SRL,  MVT::i8,     Custom); | ||||||
|   setOperationAction(ISD::SRA,  MVT::i8,     Custom); |   setOperationAction(ISD::SRA,  MVT::i8,     Custom); | ||||||
|  |  | ||||||
|   // SPU needs custom lowering for shift left/right for i64 |   // Make these operations legal and handle them during instruction selection: | ||||||
|   setOperationAction(ISD::SHL,  MVT::i64,    Custom); |   setOperationAction(ISD::SHL,  MVT::i64,    Legal); | ||||||
|   setOperationAction(ISD::SRL,  MVT::i64,    Custom); |   setOperationAction(ISD::SRL,  MVT::i64,    Legal); | ||||||
|   setOperationAction(ISD::SRA,  MVT::i64,    Custom); |   setOperationAction(ISD::SRA,  MVT::i64,    Legal); | ||||||
|  |  | ||||||
|   // Custom lower i8, i32 and i64 multiplications |   // Custom lower i8, i32 and i64 multiplications | ||||||
|   setOperationAction(ISD::MUL,  MVT::i8,     Custom); |   setOperationAction(ISD::MUL,  MVT::i8,     Custom); | ||||||
| @@ -215,6 +215,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall |   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall | ||||||
|  |  | ||||||
|   // Need to custom handle (some) common i8, i64 math ops |   // Need to custom handle (some) common i8, i64 math ops | ||||||
|  |   setOperationAction(ISD::ADD,  MVT::i8,     Custom); | ||||||
|   setOperationAction(ISD::ADD,  MVT::i64,    Custom); |   setOperationAction(ISD::ADD,  MVT::i64,    Custom); | ||||||
|   setOperationAction(ISD::SUB,  MVT::i8,     Custom); |   setOperationAction(ISD::SUB,  MVT::i8,     Custom); | ||||||
|   setOperationAction(ISD::SUB,  MVT::i64,    Custom); |   setOperationAction(ISD::SUB,  MVT::i64,    Custom); | ||||||
| @@ -249,7 +250,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   // Zero extension and sign extension for i64 have to be |   // Zero extension and sign extension for i64 have to be | ||||||
|   // custom legalized |   // custom legalized | ||||||
|   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); |   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); | ||||||
|   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); |  | ||||||
|   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom); |   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom); | ||||||
|  |  | ||||||
|   // Custom lower i128 -> i64 truncates |   // Custom lower i128 -> i64 truncates | ||||||
| @@ -262,7 +262,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); | ||||||
|  |  | ||||||
|   // FDIV on SPU requires custom lowering |   // FDIV on SPU requires custom lowering | ||||||
|   setOperationAction(ISD::FDIV, MVT::f32, Custom); |  | ||||||
|   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall |   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall | ||||||
|  |  | ||||||
|   // SPU has [U|S]INT_TO_FP |   // SPU has [U|S]INT_TO_FP | ||||||
| @@ -340,7 +339,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|     setOperationAction(ISD::ADD , VT, Legal); |     setOperationAction(ISD::ADD , VT, Legal); | ||||||
|     setOperationAction(ISD::SUB , VT, Legal); |     setOperationAction(ISD::SUB , VT, Legal); | ||||||
|     // mul has to be custom lowered. |     // mul has to be custom lowered. | ||||||
|     setOperationAction(ISD::MUL , VT, Custom); |     // TODO: v2i64 vector multiply | ||||||
|  |     setOperationAction(ISD::MUL , VT, Legal); | ||||||
|  |  | ||||||
|     setOperationAction(ISD::AND   , VT, Legal); |     setOperationAction(ISD::AND   , VT, Legal); | ||||||
|     setOperationAction(ISD::OR    , VT, Legal); |     setOperationAction(ISD::OR    , VT, Legal); | ||||||
| @@ -354,7 +354,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|     setOperationAction(ISD::SREM, VT, Expand); |     setOperationAction(ISD::SREM, VT, Expand); | ||||||
|     setOperationAction(ISD::UDIV, VT, Expand); |     setOperationAction(ISD::UDIV, VT, Expand); | ||||||
|     setOperationAction(ISD::UREM, VT, Expand); |     setOperationAction(ISD::UREM, VT, Expand); | ||||||
|     setOperationAction(ISD::FDIV, VT, Custom); |  | ||||||
|  |  | ||||||
|     // Custom lower build_vector, constant pool spills, insert and |     // Custom lower build_vector, constant pool spills, insert and | ||||||
|     // extract vector elements: |     // extract vector elements: | ||||||
| @@ -371,9 +370,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | |||||||
|   setOperationAction(ISD::XOR, MVT::v16i8, Custom); |   setOperationAction(ISD::XOR, MVT::v16i8, Custom); | ||||||
|   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); |   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); | ||||||
|  |  | ||||||
|   // FIXME: This is only temporary until I put all vector multiplications in |   setOperationAction(ISD::FDIV, MVT::v4f32, Legal); | ||||||
|   // SPUInstrInfo.td: |  | ||||||
|   setOperationAction(ISD::MUL, MVT::v4i32, Legal); |  | ||||||
|  |  | ||||||
|   setShiftAmountType(MVT::i32); |   setShiftAmountType(MVT::i32); | ||||||
|   setBooleanContents(ZeroOrNegativeOneBooleanContent); |   setBooleanContents(ZeroOrNegativeOneBooleanContent); | ||||||
| @@ -411,10 +408,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const | |||||||
|     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; |     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; | ||||||
|     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; |     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; | ||||||
|     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; |     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; | ||||||
|     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; |  | ||||||
|     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; |  | ||||||
|     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; |  | ||||||
|     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; |  | ||||||
|     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; |     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; | ||||||
|     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; |     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; | ||||||
|     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; |     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; | ||||||
| @@ -422,21 +415,12 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const | |||||||
|     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; |     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; | ||||||
|     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; |     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; | ||||||
|     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; |     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; | ||||||
|     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] = |  | ||||||
|       "SPUISD::ROTQUAD_RZ_BYTES"; |  | ||||||
|     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] = |  | ||||||
|       "SPUISD::ROTQUAD_RZ_BITS"; |  | ||||||
|     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; |  | ||||||
|     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = |  | ||||||
|       "SPUISD::ROTBYTES_LEFT_BITS"; |  | ||||||
|     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; |     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; | ||||||
|     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; |     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; | ||||||
|     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; |     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; | ||||||
|     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; |     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; | ||||||
|     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; |     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; | ||||||
|     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; |     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; | ||||||
|     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; |  | ||||||
|     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; |  | ||||||
|     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; |     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -1922,182 +1906,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { | |||||||
|   return SDValue(); |   return SDValue(); | ||||||
| } | } | ||||||
|  |  | ||||||
| static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) { |  | ||||||
|   switch (Op.getValueType().getSimpleVT()) { |  | ||||||
|   default: |  | ||||||
|     cerr << "CellSPU: Unknown vector multiplication, got " |  | ||||||
|          << Op.getValueType().getMVTString() |  | ||||||
|          << "\n"; |  | ||||||
|     abort(); |  | ||||||
|     /*NOTREACHED*/ |  | ||||||
|  |  | ||||||
|   case MVT::v4i32: |  | ||||||
| 	  break; |  | ||||||
|  |  | ||||||
|   // Multiply two v8i16 vectors (pipeline friendly version): |  | ||||||
|   // a) multiply lower halves, mask off upper 16-bit of 32-bit product |  | ||||||
|   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) |  | ||||||
|   // c) Use SELB to select upper and lower halves from the intermediate results |  | ||||||
|   // |  | ||||||
|   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the |  | ||||||
|   // dual-issue. This code does manage to do this, even if it's a little on |  | ||||||
|   // the wacky side |  | ||||||
|   case MVT::v8i16: { |  | ||||||
|     MachineFunction &MF = DAG.getMachineFunction(); |  | ||||||
|     MachineRegisterInfo &RegInfo = MF.getRegInfo(); |  | ||||||
|     SDValue Chain = Op.getOperand(0); |  | ||||||
|     SDValue rA = Op.getOperand(0); |  | ||||||
|     SDValue rB = Op.getOperand(1); |  | ||||||
|     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|  |  | ||||||
|     SDValue FSMBOp = |  | ||||||
|       DAG.getCopyToReg(Chain, FSMBIreg, |  | ||||||
|                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, |  | ||||||
|                                    DAG.getConstant(0xcccc, MVT::i16))); |  | ||||||
|  |  | ||||||
|     SDValue HHProd = |  | ||||||
|       DAG.getCopyToReg(FSMBOp, HiProdReg, |  | ||||||
|                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); |  | ||||||
|  |  | ||||||
|     SDValue HHProd_v4i32 = |  | ||||||
|       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, |  | ||||||
|                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::SELB, MVT::v8i16, |  | ||||||
|                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), |  | ||||||
|                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), |  | ||||||
|                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, |  | ||||||
|                                                HHProd_v4i32, |  | ||||||
|                                                DAG.getConstant(16, MVT::i16))), |  | ||||||
|                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // This M00sE is N@stI! (apologies to Monty Python) |  | ||||||
|   // |  | ||||||
|   // SPU doesn't know how to do any 8-bit multiplication, so the solution |  | ||||||
|   // is to break it all apart, sign extend, and reassemble the various |  | ||||||
|   // intermediate products. |  | ||||||
|   case MVT::v16i8: { |  | ||||||
|     SDValue rA = Op.getOperand(0); |  | ||||||
|     SDValue rB = Op.getOperand(1); |  | ||||||
|     SDValue c8 = DAG.getConstant(8, MVT::i32); |  | ||||||
|     SDValue c16 = DAG.getConstant(16, MVT::i32); |  | ||||||
|  |  | ||||||
|     SDValue LLProd = |  | ||||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); |  | ||||||
|  |  | ||||||
|     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); |  | ||||||
|  |  | ||||||
|     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); |  | ||||||
|  |  | ||||||
|     SDValue LHProd = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, |  | ||||||
|                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); |  | ||||||
|  |  | ||||||
|     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, |  | ||||||
|                                      DAG.getConstant(0x2222, MVT::i16)); |  | ||||||
|  |  | ||||||
|     SDValue LoProdParts = |  | ||||||
|       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, |  | ||||||
|                   DAG.getNode(SPUISD::SELB, MVT::v8i16, |  | ||||||
|                               LLProd, LHProd, FSMBmask)); |  | ||||||
|  |  | ||||||
|     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32); |  | ||||||
|  |  | ||||||
|     SDValue LoProd = |  | ||||||
|       DAG.getNode(ISD::AND, MVT::v4i32, |  | ||||||
|                   LoProdParts, |  | ||||||
|                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, |  | ||||||
|                               LoProdMask, LoProdMask, |  | ||||||
|                               LoProdMask, LoProdMask)); |  | ||||||
|  |  | ||||||
|     SDValue rAH = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); |  | ||||||
|  |  | ||||||
|     SDValue rBH = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); |  | ||||||
|  |  | ||||||
|     SDValue HLProd = |  | ||||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); |  | ||||||
|  |  | ||||||
|     SDValue HHProd_1 = |  | ||||||
|       DAG.getNode(SPUISD::MPY, MVT::v8i16, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, |  | ||||||
|                               DAG.getNode(SPUISD::VEC_SRA, |  | ||||||
|                                           MVT::v4i32, rAH, c8)), |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, |  | ||||||
|                               DAG.getNode(SPUISD::VEC_SRA, |  | ||||||
|                                           MVT::v4i32, rBH, c8))); |  | ||||||
|  |  | ||||||
|     SDValue HHProd = |  | ||||||
|       DAG.getNode(SPUISD::SELB, MVT::v8i16, |  | ||||||
|                   HLProd, |  | ||||||
|                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), |  | ||||||
|                   FSMBmask); |  | ||||||
|  |  | ||||||
|     SDValue HiProd = |  | ||||||
|       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, |  | ||||||
|                        DAG.getNode(ISD::OR, MVT::v4i32, |  | ||||||
|                                    LoProd, HiProd)); |  | ||||||
|   } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   return SDValue(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) { |  | ||||||
|   MachineFunction &MF = DAG.getMachineFunction(); |  | ||||||
|   MachineRegisterInfo &RegInfo = MF.getRegInfo(); |  | ||||||
|  |  | ||||||
|   SDValue A = Op.getOperand(0); |  | ||||||
|   SDValue B = Op.getOperand(1); |  | ||||||
|   MVT VT = Op.getValueType(); |  | ||||||
|  |  | ||||||
|   unsigned VRegBR, VRegC; |  | ||||||
|  |  | ||||||
|   if (VT == MVT::f32) { |  | ||||||
|     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); |  | ||||||
|     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); |  | ||||||
|   } else { |  | ||||||
|     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); |  | ||||||
|   } |  | ||||||
|   // TODO: make sure we're feeding FPInterp the right arguments |  | ||||||
|   // Right now: fi B, frest(B) |  | ||||||
|  |  | ||||||
|   // Computes BRcpl = |  | ||||||
|   // (Floating Interpolate (FP Reciprocal Estimate B)) |  | ||||||
|   SDValue BRcpl = |  | ||||||
|       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, |  | ||||||
|                        DAG.getNode(SPUISD::FPInterp, VT, B, |  | ||||||
|                                 DAG.getNode(SPUISD::FPRecipEst, VT, B))); |  | ||||||
|  |  | ||||||
|   // Computes A * BRcpl and stores in a temporary register |  | ||||||
|   SDValue AxBRcpl = |  | ||||||
|       DAG.getCopyToReg(BRcpl, VRegC, |  | ||||||
|                  DAG.getNode(ISD::FMUL, VT, A, |  | ||||||
|                         DAG.getCopyFromReg(BRcpl, VRegBR, VT))); |  | ||||||
|   // What's the Chain variable do? It's magic! |  | ||||||
|   // TODO: set Chain = Op(0).getEntryNode() |  | ||||||
|  |  | ||||||
|   return DAG.getNode(ISD::FADD, VT, |  | ||||||
|                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT), |  | ||||||
|                 DAG.getNode(ISD::FMUL, VT, |  | ||||||
|                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), |  | ||||||
|                         DAG.getNode(ISD::FSUB, VT, A, |  | ||||||
|                             DAG.getNode(ISD::FMUL, VT, B, |  | ||||||
|                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { | static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { | ||||||
|   MVT VT = Op.getValueType(); |   MVT VT = Op.getValueType(); | ||||||
|   SDValue N = Op.getOperand(0); |   SDValue N = Op.getOperand(0); | ||||||
| @@ -2296,18 +2104,23 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, | |||||||
|     assert(0 && "Unhandled i8 math operator"); |     assert(0 && "Unhandled i8 math operator"); | ||||||
|     /*NOTREACHED*/ |     /*NOTREACHED*/ | ||||||
|     break; |     break; | ||||||
|  |   case ISD::ADD: { | ||||||
|  |     // 8-bit addition: Promote the arguments up to 16-bits and truncate | ||||||
|  |     // the result: | ||||||
|  |     SDValue N1 = Op.getOperand(1); | ||||||
|  |     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0); | ||||||
|  |     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1); | ||||||
|  |     return DAG.getNode(ISD::TRUNCATE, MVT::i8, | ||||||
|  |                        DAG.getNode(Opc, MVT::i16, N0, N1)); | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|   case ISD::SUB: { |   case ISD::SUB: { | ||||||
|     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate |     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate | ||||||
|     // the result: |     // the result: | ||||||
|     SDValue N1 = Op.getOperand(1); |     SDValue N1 = Op.getOperand(1); | ||||||
|     N0 = (N0.getOpcode() != ISD::Constant |     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0); | ||||||
|           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) |     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1); | ||||||
|           : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(), |  | ||||||
|                             MVT::i16)); |  | ||||||
|     N1 = (N1.getOpcode() != ISD::Constant |  | ||||||
|           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) |  | ||||||
|           : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(), |  | ||||||
|                             MVT::i16)); |  | ||||||
|     return DAG.getNode(ISD::TRUNCATE, MVT::i8, |     return DAG.getNode(ISD::TRUNCATE, MVT::i8, | ||||||
|                        DAG.getNode(Opc, MVT::i16, N0, N1)); |                        DAG.getNode(Opc, MVT::i16, N0, N1)); | ||||||
|   } |   } | ||||||
| @@ -2397,7 +2210,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | |||||||
|  |  | ||||||
|   switch (Opc) { |   switch (Opc) { | ||||||
|   case ISD::ZERO_EXTEND: |   case ISD::ZERO_EXTEND: | ||||||
|   case ISD::SIGN_EXTEND: |  | ||||||
|   case ISD::ANY_EXTEND: { |   case ISD::ANY_EXTEND: { | ||||||
|     MVT Op0VT = Op0.getValueType(); |     MVT Op0VT = Op0.getValueType(); | ||||||
|     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); |     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); | ||||||
| @@ -2410,39 +2222,16 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | |||||||
|     SDValue PromoteScalar = |     SDValue PromoteScalar = | ||||||
|             DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); |             DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); | ||||||
|  |  | ||||||
|     if (Opc != ISD::SIGN_EXTEND) { |  | ||||||
|     // Use a shuffle to zero extend the i32 to i64 directly: |     // Use a shuffle to zero extend the i32 to i64 directly: | ||||||
|       SDValue shufMask = |     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, | ||||||
|               DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, |         DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203, | ||||||
|                           DAG.getConstant(0x80808080, MVT::i32), |             MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant( | ||||||
|                           DAG.getConstant(0x00010203, MVT::i32), |             0x08090a0b, MVT::i32)); | ||||||
|                           DAG.getConstant(0x80808080, MVT::i32), |     SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar, | ||||||
|                           DAG.getConstant(0x08090a0b, MVT::i32)); |         PromoteScalar, shufMask); | ||||||
|       SDValue zextShuffle = |  | ||||||
|               DAG.getNode(SPUISD::SHUFB, Op0VecVT, |  | ||||||
|                           PromoteScalar, PromoteScalar, shufMask); |  | ||||||
|  |  | ||||||
|       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, |     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT, | ||||||
|                          DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); |         VecVT, zextShuffle)); | ||||||
|     } else { |  | ||||||
|       // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift |  | ||||||
|       // right and propagate the sign bit) instruction. |  | ||||||
|       SDValue RotQuad = |  | ||||||
|               DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT, |  | ||||||
|                           PromoteScalar, DAG.getConstant(4, MVT::i32)); |  | ||||||
|       SDValue SignQuad = |  | ||||||
|               DAG.getNode(SPUISD::VEC_SRA, Op0VecVT, |  | ||||||
|                           PromoteScalar, DAG.getConstant(32, MVT::i32)); |  | ||||||
|       SDValue SelMask = |  | ||||||
|               DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT, |  | ||||||
|                           DAG.getConstant(0xf0f0, MVT::i16)); |  | ||||||
|       SDValue CombineQuad = |  | ||||||
|               DAG.getNode(SPUISD::SELB, Op0VecVT, |  | ||||||
|                           SignQuad, RotQuad, SelMask); |  | ||||||
|  |  | ||||||
|       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, |  | ||||||
|                          DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad)); |  | ||||||
|     } |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   case ISD::ADD: { |   case ISD::ADD: { | ||||||
| @@ -2502,88 +2291,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) | |||||||
|                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, |                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, | ||||||
|                                    Op0, Op1, ShiftedBorrow)); |                                    Op0, Op1, ShiftedBorrow)); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   case ISD::SHL: { |  | ||||||
|     SDValue ShiftAmt = Op.getOperand(1); |  | ||||||
|     MVT ShiftAmtVT = ShiftAmt.getValueType(); |  | ||||||
|     SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0); |  | ||||||
|     SDValue MaskLower = |  | ||||||
|       DAG.getNode(SPUISD::SELB, VecVT, |  | ||||||
|                   Op0Vec, |  | ||||||
|                   DAG.getConstant(0, VecVT), |  | ||||||
|                   DAG.getNode(SPUISD::SELECT_MASK, VecVT, |  | ||||||
|                               DAG.getConstant(0xff00ULL, MVT::i16))); |  | ||||||
|     SDValue ShiftAmtBytes = |  | ||||||
|       DAG.getNode(ISD::SRL, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(3, ShiftAmtVT)); |  | ||||||
|     SDValue ShiftAmtBits = |  | ||||||
|       DAG.getNode(ISD::AND, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(7, ShiftAmtVT)); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, |  | ||||||
|                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT, |  | ||||||
|                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, |  | ||||||
|                                                MaskLower, ShiftAmtBytes), |  | ||||||
|                                    ShiftAmtBits)); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   case ISD::SRL: { |  | ||||||
|     MVT VT = Op.getValueType(); |  | ||||||
|     SDValue ShiftAmt = Op.getOperand(1); |  | ||||||
|     MVT ShiftAmtVT = ShiftAmt.getValueType(); |  | ||||||
|     SDValue ShiftAmtBytes = |  | ||||||
|       DAG.getNode(ISD::SRL, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(3, ShiftAmtVT)); |  | ||||||
|     SDValue ShiftAmtBits = |  | ||||||
|       DAG.getNode(ISD::AND, ShiftAmtVT, |  | ||||||
|                   ShiftAmt, |  | ||||||
|                   DAG.getConstant(7, ShiftAmtVT)); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT, |  | ||||||
|                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT, |  | ||||||
|                                    Op0, ShiftAmtBytes), |  | ||||||
|                        ShiftAmtBits); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   case ISD::SRA: { |  | ||||||
|     // Promote Op0 to vector |  | ||||||
|     SDValue Op0 = |  | ||||||
|       DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); |  | ||||||
|     SDValue ShiftAmt = Op.getOperand(1); |  | ||||||
|     MVT ShiftVT = ShiftAmt.getValueType(); |  | ||||||
|  |  | ||||||
|     // Negate variable shift amounts |  | ||||||
|     if (!isa<ConstantSDNode>(ShiftAmt)) { |  | ||||||
|       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT, |  | ||||||
|                              DAG.getConstant(0, ShiftVT), ShiftAmt); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     SDValue UpperHalfSign = |  | ||||||
|       DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32, |  | ||||||
|                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, |  | ||||||
|                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, |  | ||||||
|                                           Op0, DAG.getConstant(31, MVT::i32)))); |  | ||||||
|     SDValue UpperHalfSignMask = |  | ||||||
|       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign); |  | ||||||
|     SDValue UpperLowerMask = |  | ||||||
|       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, |  | ||||||
|                   DAG.getConstant(0xff00, MVT::i16)); |  | ||||||
|     SDValue UpperLowerSelect = |  | ||||||
|       DAG.getNode(SPUISD::SELB, MVT::v2i64, |  | ||||||
|                   UpperHalfSignMask, Op0, UpperLowerMask); |  | ||||||
|     SDValue RotateLeftBytes = |  | ||||||
|       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64, |  | ||||||
|                   UpperLowerSelect, ShiftAmt); |  | ||||||
|     SDValue RotateLeftBits = |  | ||||||
|       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, |  | ||||||
|                   RotateLeftBytes, ShiftAmt); |  | ||||||
|  |  | ||||||
|     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, |  | ||||||
|                        RotateLeftBits); |  | ||||||
|   } |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   return SDValue(); |   return SDValue(); | ||||||
| @@ -2890,10 +2597,11 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) | |||||||
|     return LowerRET(Op, DAG, getTargetMachine()); |     return LowerRET(Op, DAG, getTargetMachine()); | ||||||
|  |  | ||||||
|  |  | ||||||
|   // i8, i64 math ops: |  | ||||||
|   case ISD::ZERO_EXTEND: |   case ISD::ZERO_EXTEND: | ||||||
|   case ISD::SIGN_EXTEND: |  | ||||||
|   case ISD::ANY_EXTEND: |   case ISD::ANY_EXTEND: | ||||||
|  |     return LowerI64Math(Op, DAG, Opc); | ||||||
|  |  | ||||||
|  |   // i8, i64 math ops: | ||||||
|   case ISD::ADD: |   case ISD::ADD: | ||||||
|   case ISD::SUB: |   case ISD::SUB: | ||||||
|   case ISD::ROTR: |   case ISD::ROTR: | ||||||
| @@ -2928,22 +2636,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) | |||||||
|  |  | ||||||
|   // Vector and i8 multiply: |   // Vector and i8 multiply: | ||||||
|   case ISD::MUL: |   case ISD::MUL: | ||||||
|     if (VT.isVector()) |     if (VT == MVT::i8) | ||||||
|       return LowerVectorMUL(Op, DAG); |  | ||||||
|     else if (VT == MVT::i8) |  | ||||||
|       return LowerI8Math(Op, DAG, Opc, *this); |       return LowerI8Math(Op, DAG, Opc, *this); | ||||||
|  |  | ||||||
|   case ISD::FDIV: |  | ||||||
|     if (VT == MVT::f32 || VT == MVT::v4f32) |  | ||||||
|       return LowerFDIVf32(Op, DAG); |  | ||||||
| #if 0 |  | ||||||
|     // This is probably a libcall |  | ||||||
|     else if (Op.getValueType() == MVT::f64) |  | ||||||
|       return LowerFDIVf64(Op, DAG); |  | ||||||
| #endif |  | ||||||
|     else |  | ||||||
|       assert(0 && "Calling FDIV on unsupported MVT"); |  | ||||||
|  |  | ||||||
|   case ISD::CTPOP: |   case ISD::CTPOP: | ||||||
|     return LowerCTPOP(Op, DAG); |     return LowerCTPOP(Op, DAG); | ||||||
|  |  | ||||||
| @@ -3119,8 +2814,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const | |||||||
|   case SPUISD::VEC_SHL: |   case SPUISD::VEC_SHL: | ||||||
|   case SPUISD::VEC_SRL: |   case SPUISD::VEC_SRL: | ||||||
|   case SPUISD::VEC_SRA: |   case SPUISD::VEC_SRA: | ||||||
|   case SPUISD::ROTQUAD_RZ_BYTES: |  | ||||||
|   case SPUISD::ROTQUAD_RZ_BITS: |  | ||||||
|   case SPUISD::ROTBYTES_LEFT: { |   case SPUISD::ROTBYTES_LEFT: { | ||||||
|     SDValue Op1 = N->getOperand(1); |     SDValue Op1 = N->getOperand(1); | ||||||
|  |  | ||||||
| @@ -3268,10 +2961,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, | |||||||
|   } |   } | ||||||
|  |  | ||||||
| #if 0 | #if 0 | ||||||
|   case MPY: |  | ||||||
|   case MPYU: |  | ||||||
|   case MPYH: |  | ||||||
|   case MPYHH: |  | ||||||
|   case SPUISD::SHLQUAD_L_BITS: |   case SPUISD::SHLQUAD_L_BITS: | ||||||
|   case SPUISD::SHLQUAD_L_BYTES: |   case SPUISD::SHLQUAD_L_BYTES: | ||||||
|   case SPUISD::VEC_SHL: |   case SPUISD::VEC_SHL: | ||||||
| @@ -3279,13 +2968,9 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, | |||||||
|   case SPUISD::VEC_SRA: |   case SPUISD::VEC_SRA: | ||||||
|   case SPUISD::VEC_ROTL: |   case SPUISD::VEC_ROTL: | ||||||
|   case SPUISD::VEC_ROTR: |   case SPUISD::VEC_ROTR: | ||||||
|   case SPUISD::ROTQUAD_RZ_BYTES: |  | ||||||
|   case SPUISD::ROTQUAD_RZ_BITS: |  | ||||||
|   case SPUISD::ROTBYTES_LEFT: |   case SPUISD::ROTBYTES_LEFT: | ||||||
|   case SPUISD::SELECT_MASK: |   case SPUISD::SELECT_MASK: | ||||||
|   case SPUISD::SELB: |   case SPUISD::SELB: | ||||||
|   case SPUISD::FPInterp: |  | ||||||
|   case SPUISD::FPRecipEst: |  | ||||||
|   case SPUISD::SEXT32TO64: |   case SPUISD::SEXT32TO64: | ||||||
| #endif | #endif | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -41,10 +41,6 @@ namespace llvm { | |||||||
|       CNTB,                     ///< Count leading ones in bytes |       CNTB,                     ///< Count leading ones in bytes | ||||||
|       PREFSLOT2VEC,             ///< Promote scalar->vector |       PREFSLOT2VEC,             ///< Promote scalar->vector | ||||||
|       VEC2PREFSLOT,             ///< Extract element 0 |       VEC2PREFSLOT,             ///< Extract element 0 | ||||||
|       MPY,                      ///< 16-bit Multiply (low parts of a 32-bit) |  | ||||||
|       MPYU,                     ///< Multiply Unsigned |  | ||||||
|       MPYH,                     ///< Multiply High |  | ||||||
|       MPYHH,                    ///< Multiply High-High |  | ||||||
|       SHLQUAD_L_BITS,           ///< Rotate quad left, by bits |       SHLQUAD_L_BITS,           ///< Rotate quad left, by bits | ||||||
|       SHLQUAD_L_BYTES,          ///< Rotate quad left, by bytes |       SHLQUAD_L_BYTES,          ///< Rotate quad left, by bytes | ||||||
|       VEC_SHL,                  ///< Vector shift left |       VEC_SHL,                  ///< Vector shift left | ||||||
| @@ -52,8 +48,6 @@ namespace llvm { | |||||||
|       VEC_SRA,                  ///< Vector shift right (arithmetic) |       VEC_SRA,                  ///< Vector shift right (arithmetic) | ||||||
|       VEC_ROTL,                 ///< Vector rotate left |       VEC_ROTL,                 ///< Vector rotate left | ||||||
|       VEC_ROTR,                 ///< Vector rotate right |       VEC_ROTR,                 ///< Vector rotate right | ||||||
|       ROTQUAD_RZ_BYTES,         ///< Rotate quad right, by bytes, zero fill |  | ||||||
|       ROTQUAD_RZ_BITS,          ///< Rotate quad right, by bits, zero fill |  | ||||||
|       ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI) |       ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI) | ||||||
|       ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count |       ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count | ||||||
|       SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI) |       SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI) | ||||||
| @@ -63,8 +57,6 @@ namespace llvm { | |||||||
|       CARRY_GENERATE,           ///< Carry generate for ADD_EXTENDED |       CARRY_GENERATE,           ///< Carry generate for ADD_EXTENDED | ||||||
|       SUB_EXTENDED,             ///< Subtract extended, with borrow |       SUB_EXTENDED,             ///< Subtract extended, with borrow | ||||||
|       BORROW_GENERATE,          ///< Borrow generate for SUB_EXTENDED |       BORROW_GENERATE,          ///< Borrow generate for SUB_EXTENDED | ||||||
|       FPInterp,                 ///< Floating point interpolate |  | ||||||
|       FPRecipEst,               ///< Floating point reciprocal estimate |  | ||||||
|       SEXT32TO64,               ///< Sign-extended 32-bit const -> 64-bits |       SEXT32TO64,               ///< Sign-extended 32-bit const -> 64-bits | ||||||
|       LAST_SPUISD               ///< Last user-defined instruction |       LAST_SPUISD               ///< Last user-defined instruction | ||||||
|     }; |     }; | ||||||
|   | |||||||
| @@ -82,7 +82,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, | |||||||
|   case SPU::ORIi8i32: |   case SPU::ORIi8i32: | ||||||
|   case SPU::AHIvec: |   case SPU::AHIvec: | ||||||
|   case SPU::AHIr16: |   case SPU::AHIr16: | ||||||
|   case SPU::AIvec: |   case SPU::AIv4i32: | ||||||
|     assert(MI.getNumOperands() == 3 && |     assert(MI.getNumOperands() == 3 && | ||||||
|            MI.getOperand(0).isReg() && |            MI.getOperand(0).isReg() && | ||||||
|            MI.getOperand(1).isReg() && |            MI.getOperand(1).isReg() && | ||||||
| @@ -98,8 +98,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, | |||||||
|     assert(MI.getNumOperands() == 3 && |     assert(MI.getNumOperands() == 3 && | ||||||
|            "wrong number of operands to AIr32"); |            "wrong number of operands to AIr32"); | ||||||
|     if (MI.getOperand(0).isReg() && |     if (MI.getOperand(0).isReg() && | ||||||
|         (MI.getOperand(1).isReg() || |         MI.getOperand(1).isReg() && | ||||||
|          MI.getOperand(1).isFI()) && |  | ||||||
|         (MI.getOperand(2).isImm() && |         (MI.getOperand(2).isImm() && | ||||||
|          MI.getOperand(2).getImm() == 0)) { |          MI.getOperand(2).getImm() == 0)) { | ||||||
|       sourceReg = MI.getOperand(1).getReg(); |       sourceReg = MI.getOperand(1).getReg(); | ||||||
|   | |||||||
| @@ -583,7 +583,9 @@ def AHIvec: | |||||||
| def AHIr16: | def AHIr16: | ||||||
|   RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), |   RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), | ||||||
|     "ahi\t$rT, $rA, $val", IntegerOp, |     "ahi\t$rT, $rA, $val", IntegerOp, | ||||||
|     [(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>; |     [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>; | ||||||
|  |  | ||||||
|  | // v4i32, i32 add instruction: | ||||||
|  |  | ||||||
| class AInst<dag OOL, dag IOL, list<dag> pattern>: | class AInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|   RRForm<0b00000011000, OOL, IOL, |   RRForm<0b00000011000, OOL, IOL, | ||||||
| @@ -604,21 +606,42 @@ multiclass AddInstruction { | |||||||
|   def v16i8: AVecInst<v16i8>; |   def v16i8: AVecInst<v16i8>; | ||||||
|    |    | ||||||
|   def r32:   ARegInst<R32C>; |   def r32:   ARegInst<R32C>; | ||||||
|   def r8:    AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| defm A : AddInstruction; | defm A : AddInstruction; | ||||||
|  |  | ||||||
| def AIvec: | class AIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|     RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), |     RI10Form<0b00111000, OOL, IOL, | ||||||
| 	     "ai\t$rT, $rA, $val", IntegerOp, | 	     "ai\t$rT, $rA, $val", IntegerOp, | ||||||
|       [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), | 	     pattern>; | ||||||
|                                       v4i32SExt10Imm:$val))]>; |  | ||||||
|  |  | ||||||
| def AIr32: | class AIVecInst<ValueType vectype, PatLeaf immpred>: | ||||||
|     RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), |     AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||||
|       "ai\t$rT, $rA, $val", IntegerOp, | 	    [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>; | ||||||
|       [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>; |  | ||||||
|  | class AIFPVecInst<ValueType vectype, PatLeaf immpred>: | ||||||
|  |     AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||||
|  | 	    [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | class AIRegInst<RegisterClass rclass, PatLeaf immpred>: | ||||||
|  |     AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), | ||||||
|  | 	   [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>; | ||||||
|  |  | ||||||
|  | // This is used to add epsilons to floating point numbers in the f32 fdiv code: | ||||||
|  | class AIFPInst<RegisterClass rclass, PatLeaf immpred>: | ||||||
|  |     AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), | ||||||
|  | 	   [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | multiclass AddImmediate { | ||||||
|  |   def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>; | ||||||
|  |  | ||||||
|  |   def r32: AIRegInst<R32C, i32ImmSExt10>; | ||||||
|  |  | ||||||
|  |   def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>; | ||||||
|  |   def f32: AIFPInst<R32FP, i32ImmSExt10>; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | defm AI : AddImmediate; | ||||||
|  |  | ||||||
| def SFHvec: | def SFHvec: | ||||||
|     RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
| @@ -795,8 +818,7 @@ def BGXvec: | |||||||
| def MPYv8i16: | def MPYv8i16: | ||||||
|   RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |   RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|     "mpy\t$rT, $rA, $rB", IntegerMulDiv, |     "mpy\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|     [(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA), |     [/* no pattern */]>; | ||||||
|                                           (v8i16 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def MPYr16: | def MPYr16: | ||||||
|   RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), |   RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), | ||||||
| @@ -812,8 +834,7 @@ class MPYUInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|  |  | ||||||
| def MPYUv4i32: | def MPYUv4i32: | ||||||
|   MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |   MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|            [(set (v4i32 VECREG:$rT), |            [/* no pattern */]>; | ||||||
|                  (SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def MPYUr16: | def MPYUr16: | ||||||
|   MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), |   MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), | ||||||
| @@ -821,7 +842,7 @@ def MPYUr16: | |||||||
|  |  | ||||||
| def MPYUr32: | def MPYUr32: | ||||||
|   MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |   MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||||
|            [(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>; |            [/* no pattern */]>; | ||||||
|  |  | ||||||
| // mpyi: multiply 16 x s10imm -> 32 result. | // mpyi: multiply 16 x s10imm -> 32 result. | ||||||
|  |  | ||||||
| @@ -892,87 +913,78 @@ class MPYHInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|           |           | ||||||
| def MPYHv4i32: | def MPYHv4i32: | ||||||
|     MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|              [(set (v4i32 VECREG:$rT), |              [/* no pattern */]>; | ||||||
|                    (SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def MPYHr32: | def MPYHr32: | ||||||
|     MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), | ||||||
|              [(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>; |              [/* no pattern */]>; | ||||||
|  |  | ||||||
| // mpys: multiply high and shift right (returns the top half of | // mpys: multiply high and shift right (returns the top half of | ||||||
| // a 16-bit multiply, sign extended to 32 bits.) | // a 16-bit multiply, sign extended to 32 bits.) | ||||||
| def MPYSvec: |  | ||||||
|     RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | class MPYSInst<dag OOL, dag IOL>: | ||||||
|  |     RRForm<0b11100011110, OOL, IOL,  | ||||||
|       "mpys\t$rT, $rA, $rB", IntegerMulDiv, |       "mpys\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       []>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def MPYSvec: | ||||||
|  |     MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|      |      | ||||||
| def MPYSr16: | def MPYSr16: | ||||||
|     RRForm<0b11100011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB), |     MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>; | ||||||
|       "mpys\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| // mpyhh: multiply high-high (returns the 32-bit result from multiplying | // mpyhh: multiply high-high (returns the 32-bit result from multiplying | ||||||
| // the top 16 bits of the $rA, $rB) | // the top 16 bits of the $rA, $rB) | ||||||
| def MPYHHv8i16: |  | ||||||
|     RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | class MPYHHInst<dag OOL, dag IOL>: | ||||||
|  |   RRForm<0b01100011110, OOL, IOL, | ||||||
|         "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, |         "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       [(set (v8i16 VECREG:$rT), |         [/* no pattern */]>; | ||||||
|             (SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; |          | ||||||
|  | def MPYHHv8i16: | ||||||
|  |     MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|  |  | ||||||
| def MPYHHr32: | def MPYHHr32: | ||||||
|     RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|       "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| // mpyhha: Multiply high-high, add to $rT: | // mpyhha: Multiply high-high, add to $rT: | ||||||
| def MPYHHAvec: |  | ||||||
|     RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | class MPYHHAInst<dag OOL, dag IOL>: | ||||||
|  |     RRForm<0b01100010110, OOL, IOL, | ||||||
|       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, |       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       []>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def MPYHHAvec: | ||||||
|  |     MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|      |      | ||||||
| def MPYHHAr32: | def MPYHHAr32: | ||||||
|     RRForm<0b01100010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|       "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| // mpyhhu: Multiply high-high, unsigned | // mpyhhu: Multiply high-high, unsigned | ||||||
| def MPYHHUvec: |  | ||||||
|     RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | class MPYHHUInst<dag OOL, dag IOL>: | ||||||
|  |     RRForm<0b01110011110, OOL, IOL, | ||||||
|       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, |       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       []>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def MPYHHUvec: | ||||||
|  |     MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|      |      | ||||||
| def MPYHHUr32: | def MPYHHUr32: | ||||||
|     RRForm<0b01110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|       "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| // mpyhhau: Multiply high-high, unsigned | // mpyhhau: Multiply high-high, unsigned | ||||||
| def MPYHHAUvec: |  | ||||||
|     RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | class MPYHHAUInst<dag OOL, dag IOL>: | ||||||
|  |     RRForm<0b01110010110, OOL, IOL, | ||||||
|       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, |       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, | ||||||
|       []>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def MPYHHAUvec: | ||||||
|  |     MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; | ||||||
|      |      | ||||||
| def MPYHHAUr32: | def MPYHHAUr32: | ||||||
|     RRForm<0b01110010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), |     MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; | ||||||
|       "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, |  | ||||||
|       []>; |  | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |  | ||||||
| // v4i32, i32 multiply instruction sequence: |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ |  | ||||||
| def MPYv4i32: |  | ||||||
|   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), |  | ||||||
|       (Av4i32 |  | ||||||
|         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), |  | ||||||
|                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)), |  | ||||||
|         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; |  | ||||||
|  |  | ||||||
| def MPYi32: |  | ||||||
|   Pat<(mul R32C:$rA, R32C:$rB), |  | ||||||
|       (Ar32 |  | ||||||
|         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), |  | ||||||
|               (MPYHr32 R32C:$rB, R32C:$rA)), |  | ||||||
|         (MPYUr32 R32C:$rA, R32C:$rB))>; |  | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| // clz: Count leading zeroes | // clz: Count leading zeroes | ||||||
| @@ -1424,7 +1436,7 @@ multiclass BitwiseOr | |||||||
|   def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), |   def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), | ||||||
|                   [/* no pattern */]>; |                   [/* no pattern */]>; | ||||||
|  |  | ||||||
|   // scalar->vector promotion: |   // scalar->vector promotion, prefslot2vec: | ||||||
|   def v16i8_i8:  ORPromoteScalar<R8C>; |   def v16i8_i8:  ORPromoteScalar<R8C>; | ||||||
|   def v8i16_i16: ORPromoteScalar<R16C>; |   def v8i16_i16: ORPromoteScalar<R16C>; | ||||||
|   def v4i32_i32: ORPromoteScalar<R32C>; |   def v4i32_i32: ORPromoteScalar<R32C>; | ||||||
| @@ -1432,7 +1444,7 @@ multiclass BitwiseOr | |||||||
|   def v4f32_f32: ORPromoteScalar<R32FP>; |   def v4f32_f32: ORPromoteScalar<R32FP>; | ||||||
|   def v2f64_f64: ORPromoteScalar<R64FP>; |   def v2f64_f64: ORPromoteScalar<R64FP>; | ||||||
|  |  | ||||||
|   // extract element 0: |   // vector->scalar demotion, vec2prefslot: | ||||||
|   def i8_v16i8:  ORExtractElt<R8C>; |   def i8_v16i8:  ORExtractElt<R8C>; | ||||||
|   def i16_v8i16: ORExtractElt<R16C>; |   def i16_v8i16: ORExtractElt<R16C>; | ||||||
|   def i32_v4i32: ORExtractElt<R32C>; |   def i32_v4i32: ORExtractElt<R32C>; | ||||||
| @@ -1831,6 +1843,13 @@ class SELBVecInst<ValueType vectype>: | |||||||
|                      (and (vnot (vectype VECREG:$rC)), |                      (and (vnot (vectype VECREG:$rC)), | ||||||
|                           (vectype VECREG:$rA))))]>; |                           (vectype VECREG:$rA))))]>; | ||||||
|  |  | ||||||
|  | class SELBVecVCondInst<ValueType vectype>: | ||||||
|  |   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), | ||||||
|  |            [(set (vectype VECREG:$rT), | ||||||
|  |                  (select (vectype VECREG:$rC), | ||||||
|  |                          (vectype VECREG:$rB), | ||||||
|  |                          (vectype VECREG:$rA)))]>; | ||||||
|  |  | ||||||
| class SELBVecCondInst<ValueType vectype>: | class SELBVecCondInst<ValueType vectype>: | ||||||
|   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), |   SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), | ||||||
|            [(set (vectype VECREG:$rT), |            [(set (vectype VECREG:$rT), | ||||||
| @@ -1867,8 +1886,21 @@ multiclass SelectBits | |||||||
|   def v4i32_cond: SELBVecCondInst<v4i32>; |   def v4i32_cond: SELBVecCondInst<v4i32>; | ||||||
|   def v2i64_cond: SELBVecCondInst<v2i64>; |   def v2i64_cond: SELBVecCondInst<v2i64>; | ||||||
|  |  | ||||||
|  |   def v16i8_vcond: SELBVecCondInst<v16i8>; | ||||||
|  |   def v8i16_vcond: SELBVecCondInst<v8i16>; | ||||||
|  |   def v4i32_vcond: SELBVecCondInst<v4i32>; | ||||||
|  |   def v2i64_vcond: SELBVecCondInst<v2i64>; | ||||||
|  |  | ||||||
|  |   def v4f32_cond: | ||||||
|  | 	SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), | ||||||
|  | 		 [(set (v4f32 VECREG:$rT), | ||||||
|  | 		       (select (v4i32 VECREG:$rC), | ||||||
|  | 			       (v4f32 VECREG:$rB), | ||||||
|  | 			       (v4f32 VECREG:$rA)))]>; | ||||||
|  |  | ||||||
|   // SELBr64_cond is defined further down, look for i64 comparisons |   // SELBr64_cond is defined further down, look for i64 comparisons | ||||||
|   def r32_cond:   SELBRegCondInst<R32C, R32C>; |   def r32_cond:   SELBRegCondInst<R32C, R32C>; | ||||||
|  |   def f32_cond:   SELBRegCondInst<R32C, R32FP>; | ||||||
|   def r16_cond:   SELBRegCondInst<R16C, R16C>; |   def r16_cond:   SELBRegCondInst<R16C, R16C>; | ||||||
|   def r8_cond:    SELBRegCondInst<R8C,  R8C>; |   def r8_cond:    SELBRegCondInst<R8C,  R8C>; | ||||||
| } | } | ||||||
| @@ -2454,11 +2486,11 @@ class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|            RotateShift, pattern>; |            RotateShift, pattern>; | ||||||
|  |  | ||||||
| class ROTQBIVecInst<ValueType vectype>: | class ROTQBIVecInst<ValueType vectype>: | ||||||
|     ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), | ||||||
|                [/* no pattern yet */]>; |                [/* no pattern yet */]>; | ||||||
|  |  | ||||||
| class ROTQBIRegInst<RegisterClass rclass>: | class ROTQBIRegInst<RegisterClass rclass>: | ||||||
|     ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), |     ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), | ||||||
|                [/* no pattern yet */]>; |                [/* no pattern yet */]>; | ||||||
|  |  | ||||||
| multiclass RotateQuadByBitCount | multiclass RotateQuadByBitCount | ||||||
| @@ -2645,9 +2677,6 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)), | |||||||
| // ROTQMBYvec: This is a vector form merely so that when used in an | // ROTQMBYvec: This is a vector form merely so that when used in an | ||||||
| // instruction pattern, type checking will succeed. This instruction assumes | // instruction pattern, type checking will succeed. This instruction assumes | ||||||
| // that the user knew to negate $rB. | // that the user knew to negate $rB. | ||||||
| // |  | ||||||
| // Using the SPUrotquad_rz_bytes target-specific DAG node, the patterns |  | ||||||
| // ensure that $rB is negated. |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
| class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>: | class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
| @@ -2660,8 +2689,7 @@ class ROTQMBYVecInst<ValueType vectype>: | |||||||
|  |  | ||||||
| class ROTQMBYRegInst<RegisterClass rclass>: | class ROTQMBYRegInst<RegisterClass rclass>: | ||||||
|     ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), |     ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), | ||||||
|                 [(set rclass:$rT, |                 [/* no pattern */]>; | ||||||
|                       (SPUrotquad_rz_bytes rclass:$rA, R32C:$rB))]>; |  | ||||||
|  |  | ||||||
| multiclass RotateQuadBytes | multiclass RotateQuadBytes | ||||||
| { | { | ||||||
| @@ -2676,32 +2704,17 @@ multiclass RotateQuadBytes | |||||||
|  |  | ||||||
| defm ROTQMBY : RotateQuadBytes; | defm ROTQMBY : RotateQuadBytes; | ||||||
|  |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v16i8 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v8i16 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v4i32 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes (v2i64 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBYv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes GPRC:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBYr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bytes R64C:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBYr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
|  |  | ||||||
| class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>: | class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>: | ||||||
|     RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", |     RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", | ||||||
|             RotateShift, pattern>; |             RotateShift, pattern>; | ||||||
|  |  | ||||||
| class ROTQMBYIVecInst<ValueType vectype>: | class ROTQMBYIVecInst<ValueType vectype>: | ||||||
|     ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), |     ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), | ||||||
|                  [(set (vectype VECREG:$rT), |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>; |  | ||||||
|  |  | ||||||
| class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>: | class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>: | ||||||
|     ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), |     ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), | ||||||
|                  [(set rclass:$rT, |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bytes rclass:$rA, (inttype pred:$val)))]>; |  | ||||||
|  |  | ||||||
| multiclass RotateQuadBytesImm | multiclass RotateQuadBytesImm | ||||||
| { | { | ||||||
| @@ -2725,8 +2738,8 @@ class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|            RotateShift, pattern>; |            RotateShift, pattern>; | ||||||
|  |  | ||||||
| class ROTQMBYBIVecInst<ValueType vectype>: | class ROTQMBYBIVecInst<ValueType vectype>: | ||||||
|     ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), | ||||||
|                   [/* no pattern, intrinsic? */]>; |                   [/* no pattern, */]>; | ||||||
|  |  | ||||||
| multiclass RotateMaskQuadByBitCount | multiclass RotateMaskQuadByBitCount | ||||||
| { | { | ||||||
| @@ -2768,19 +2781,6 @@ multiclass RotateMaskQuadByBits | |||||||
|  |  | ||||||
| defm ROTQMBI: RotateMaskQuadByBits; | defm ROTQMBI: RotateMaskQuadByBits; | ||||||
|  |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v16i8 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v8i16 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v4i32 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits (v2i64 VECREG:$rA), R32C:$rB), |  | ||||||
|           (ROTQMBIv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits GPRC:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBIr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
| def : Pat<(SPUrotquad_rz_bits R64C:$rA, R32C:$rB), |  | ||||||
|           (ROTQMBIr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>; |  | ||||||
|  |  | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| // Rotate quad and mask by bits, immediate | // Rotate quad and mask by bits, immediate | ||||||
| //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
| @@ -2791,13 +2791,11 @@ class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>: | |||||||
|  |  | ||||||
| class ROTQMBIIVecInst<ValueType vectype>: | class ROTQMBIIVecInst<ValueType vectype>: | ||||||
|    ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), |    ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), | ||||||
|                  [(set (vectype VECREG:$rT), |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bits (vectype VECREG:$rA), (i32 uimm7:$val)))]>; |  | ||||||
|  |  | ||||||
| class ROTQMBIIRegInst<RegisterClass rclass>: | class ROTQMBIIRegInst<RegisterClass rclass>: | ||||||
|    ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), |    ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), | ||||||
|                  [(set rclass:$rT, |                  [/* no pattern */]>; | ||||||
|                        (SPUrotquad_rz_bits rclass:$rA, (i32 uimm7:$val)))]>; |  | ||||||
|  |  | ||||||
| multiclass RotateMaskQuadByBitsImm | multiclass RotateMaskQuadByBitsImm | ||||||
| { | { | ||||||
| @@ -3142,6 +3140,15 @@ multiclass CmpGtrWordImm | |||||||
|  |  | ||||||
|   def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), |   def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), | ||||||
|                     [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; |                     [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; | ||||||
|  |  | ||||||
|  |   // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence: | ||||||
|  |   def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), | ||||||
|  |                        [(set (v4i32 VECREG:$rT), | ||||||
|  |                              (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))), | ||||||
|  |                                     (v4i32 v4i32SExt16Imm:$val)))]>; | ||||||
|  |  | ||||||
|  |   def f32:   CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val), | ||||||
|  |   		      [/* no pattern */]>; | ||||||
| } | } | ||||||
|  |  | ||||||
| class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> : | class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> : | ||||||
| @@ -3760,7 +3767,7 @@ class FAVecInst<ValueType vectype>: | |||||||
| multiclass SFPAdd | multiclass SFPAdd | ||||||
| { | { | ||||||
|   def v4f32: FAVecInst<v4f32>; |   def v4f32: FAVecInst<v4f32>; | ||||||
|   def r32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), |   def f32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||||
|                     [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; |                     [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -3778,34 +3785,35 @@ class FSVecInst<ValueType vectype>: | |||||||
| multiclass SFPSub | multiclass SFPSub | ||||||
| { | { | ||||||
|   def v4f32: FSVecInst<v4f32>; |   def v4f32: FSVecInst<v4f32>; | ||||||
|   def r32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), |   def f32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||||
|                     [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; |                     [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; | ||||||
| } | } | ||||||
|  |  | ||||||
| defm FS : SFPSub; | defm FS : SFPSub; | ||||||
|  |  | ||||||
| // Floating point reciprocal estimate | // Floating point reciprocal estimate | ||||||
| def FREv4f32 : |  | ||||||
|     RRForm_1<0b00011101100, (outs VECREG:$rT), (ins VECREG:$rA), |  | ||||||
|       "frest\t$rT, $rA", SPrecFP, |  | ||||||
|       [(set (v4f32 VECREG:$rT), (SPUreciprocalEst (v4f32 VECREG:$rA)))]>; |  | ||||||
|  |  | ||||||
| def FREf32 : | class FRESTInst<dag OOL, dag IOL>: | ||||||
|     RRForm_1<0b00011101100, (outs R32FP:$rT), (ins R32FP:$rA), |   RRForm_1<0b00110111000, OOL, IOL, | ||||||
|            "frest\t$rT, $rA", SPrecFP, |            "frest\t$rT, $rA", SPrecFP, | ||||||
|       [(set R32FP:$rT, (SPUreciprocalEst R32FP:$rA))]>; |            [/* no pattern */]>; | ||||||
|  |  | ||||||
|  | def FRESTv4f32 : | ||||||
|  |     FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>; | ||||||
|  |  | ||||||
|  | def FRESTf32 : | ||||||
|  |     FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>; | ||||||
|  |  | ||||||
| // Floating point interpolate (used in conjunction with reciprocal estimate) | // Floating point interpolate (used in conjunction with reciprocal estimate) | ||||||
| def FIv4f32 : | def FIv4f32 : | ||||||
|     RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), |     RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), | ||||||
|       "fi\t$rT, $rA, $rB", SPrecFP, |       "fi\t$rT, $rA, $rB", SPrecFP, | ||||||
|       [(set (v4f32 VECREG:$rT), (SPUinterpolate (v4f32 VECREG:$rA), |       [/* no pattern */]>; | ||||||
|                                                 (v4f32 VECREG:$rB)))]>; |  | ||||||
|  |  | ||||||
| def FIf32 : | def FIf32 : | ||||||
|     RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), |     RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), | ||||||
|       "fi\t$rT, $rA, $rB", SPrecFP, |       "fi\t$rT, $rA, $rB", SPrecFP, | ||||||
|       [(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>; |       [/* no pattern */]>; | ||||||
|  |  | ||||||
| //-------------------------------------------------------------------------- | //-------------------------------------------------------------------------- | ||||||
| // Basic single precision floating point comparisons: | // Basic single precision floating point comparisons: | ||||||
| @@ -4445,12 +4453,14 @@ def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), | |||||||
|                        (SPUlo tconstpool:$in, 0)), |                        (SPUlo tconstpool:$in, 0)), | ||||||
|           (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; |           (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; | ||||||
|  |  | ||||||
|  | /* | ||||||
| def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm), | def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm), | ||||||
|           (AIr32 R32C:$sp, i32ImmSExt10:$imm)>; |           (AIr32 R32C:$sp, i32ImmSExt10:$imm)>; | ||||||
|  |  | ||||||
| def : Pat<(SPUindirect R32C:$sp, imm:$imm), | def : Pat<(SPUindirect R32C:$sp, imm:$imm), | ||||||
|           (Ar32 R32C:$sp, |           (Ar32 R32C:$sp, | ||||||
|                 (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>; |                 (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>; | ||||||
|  |  */ | ||||||
|  |  | ||||||
| def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), | def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), | ||||||
|           (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; |           (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; | ||||||
| @@ -4466,5 +4476,7 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), | |||||||
|  |  | ||||||
| // Instrinsics: | // Instrinsics: | ||||||
| include "CellSDKIntrinsics.td" | include "CellSDKIntrinsics.td" | ||||||
|  | // Various math operator instruction sequences | ||||||
|  | include "SPUMathInstr.td" | ||||||
| // 64-bit "instructions"/support | // 64-bit "instructions"/support | ||||||
| include "SPU64InstrInfo.td" | include "SPU64InstrInfo.td" | ||||||
|   | |||||||
							
								
								
									
										99
									
								
								lib/Target/CellSPU/SPUMathInstr.td
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								lib/Target/CellSPU/SPUMathInstr.td
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | |||||||
|  | //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// | ||||||
|  | // | ||||||
|  | //                     Cell SPU math operations | ||||||
|  | // | ||||||
|  | // This target description file contains instruction sequences for various | ||||||
|  | // math operations, such as vector multiplies, i32 multiply, etc., for the | ||||||
|  | // SPU's i32, i16 i8 and corresponding vector types. | ||||||
|  | // | ||||||
|  | // Any resemblance to libsimdmath or the Cell SDK simdmath library is | ||||||
|  | // purely and completely coincidental. | ||||||
|  | // | ||||||
|  | // Primary author: Scott Michel (scottm@aero.org) | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  |  | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // v16i8 multiply instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), | ||||||
|  |           (ORv4i32 | ||||||
|  |            (ANDv4i32 | ||||||
|  |             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), | ||||||
|  |                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), | ||||||
|  |                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8), | ||||||
|  |                        (FSMBIv8i16 0x2222)), | ||||||
|  |             (ILAv4i32 0x0000ffff)), | ||||||
|  |            (SHLIv4i32 | ||||||
|  |             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), | ||||||
|  |                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)), | ||||||
|  |                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), | ||||||
|  |                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), | ||||||
|  |                        (FSMBIv8i16 0x2222)), 16))>; | ||||||
|  |                          | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // v8i16 multiply instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), | ||||||
|  |           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), | ||||||
|  |                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), | ||||||
|  |                      (FSMBIv8i16 0xcccc))>; | ||||||
|  |                   | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // v4i32, i32 multiply instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | def MPYv4i32: | ||||||
|  |   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), | ||||||
|  |       (Av4i32 | ||||||
|  |         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), | ||||||
|  |                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)), | ||||||
|  |         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; | ||||||
|  |  | ||||||
|  | def MPYi32: | ||||||
|  |   Pat<(mul R32C:$rA, R32C:$rB), | ||||||
|  |       (Ar32 | ||||||
|  |         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), | ||||||
|  |               (MPYHr32 R32C:$rB, R32C:$rA)), | ||||||
|  |         (MPYUr32 R32C:$rA, R32C:$rB))>; | ||||||
|  |  | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  | // f32, v4f32 divide instruction sequence: | ||||||
|  | //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ | ||||||
|  |  | ||||||
|  | // Reciprocal estimate and interpolation | ||||||
|  | def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; | ||||||
|  | // Division estimate | ||||||
|  | def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; | ||||||
|  | // Newton-Raphson iteration | ||||||
|  | def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), | ||||||
|  | 		  	       Interpf32.Fragment, | ||||||
|  | 	  	  	       DivEstf32.Fragment)>; | ||||||
|  | // Epsilon addition | ||||||
|  | def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; | ||||||
|  |  | ||||||
|  | def : Pat<(fdiv R32FP:$rA, R32FP:$rB), | ||||||
|  | 	  (SELBf32_cond NRaphf32.Fragment, | ||||||
|  | 			Epsilonf32.Fragment, | ||||||
|  | 			(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; | ||||||
|  |  | ||||||
|  | // Reciprocal estimate and interpolation | ||||||
|  | def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; | ||||||
|  | // Division estimate | ||||||
|  | def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; | ||||||
|  | // Newton-Raphson iteration | ||||||
|  | def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, | ||||||
|  | 					      (v4f32 VECREG:$rB), | ||||||
|  | 					      (v4f32 VECREG:$rA)), | ||||||
|  | 		  	           Interpv4f32.Fragment, | ||||||
|  | 	  	  	           DivEstv4f32.Fragment)>; | ||||||
|  | // Epsilon addition | ||||||
|  | def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; | ||||||
|  |  | ||||||
|  | def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), | ||||||
|  | 	  (SELBv4f32_cond NRaphv4f32.Fragment, | ||||||
|  | 			Epsilonv4f32.Fragment, | ||||||
|  | 			(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), | ||||||
|  | 					      Epsilonv4f32.Fragment, | ||||||
|  | 					      (v4f32 VECREG:$rA)), -1))>; | ||||||
| @@ -87,24 +87,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; | |||||||
| // SPUISelLowering.h): | // SPUISelLowering.h): | ||||||
| def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; | def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; | ||||||
|  |  | ||||||
| // SPU 16-bit multiply |  | ||||||
| def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>; |  | ||||||
|  |  | ||||||
| // SPU multiply unsigned, used in instruction lowering for v4i32 |  | ||||||
| // multiplies: |  | ||||||
| def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>; |  | ||||||
| def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>; |  | ||||||
|  |  | ||||||
| // SPU 16-bit multiply high x low, shift result 16-bits |  | ||||||
| // Used to compute intermediate products for 32-bit multiplies |  | ||||||
| def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>; |  | ||||||
| def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>; |  | ||||||
|  |  | ||||||
| // SPU 16-bit multiply high x high, 32-bit product |  | ||||||
| // Used to compute intermediate products for 16-bit multiplies |  | ||||||
| def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>; |  | ||||||
| def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>; |  | ||||||
|  |  | ||||||
| // Shift left quadword by bits and bytes | // Shift left quadword by bits and bytes | ||||||
| def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; | def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; | ||||||
| def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; | def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; | ||||||
| @@ -117,11 +99,6 @@ def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>; | |||||||
| def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; | def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; | ||||||
| def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; | def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; | ||||||
|  |  | ||||||
| def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES", |  | ||||||
|                                     SPUvecshift_type, []>; |  | ||||||
| def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS", |  | ||||||
|                                     SPUvecshift_type, []>; |  | ||||||
|  |  | ||||||
| // Vector rotate left, bits shifted out of the left are rotated in on the right | // Vector rotate left, bits shifted out of the left are rotated in on the right | ||||||
| def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", | def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", | ||||||
|                              SPUvecshift_type, []>; |                              SPUvecshift_type, []>; | ||||||
| @@ -141,12 +118,6 @@ def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; | |||||||
| // SPU gather bits instruction: | // SPU gather bits instruction: | ||||||
| def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>; | def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>; | ||||||
|  |  | ||||||
| // SPU floating point interpolate |  | ||||||
| def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>; |  | ||||||
|  |  | ||||||
| // SPU floating point reciprocal estimate (used for fdiv) |  | ||||||
| def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>; |  | ||||||
|  |  | ||||||
| def SDTprefslot2vec: SDTypeProfile<1, 1, []>; | def SDTprefslot2vec: SDTypeProfile<1, 1, []>; | ||||||
| def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; | def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -339,10 +339,13 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, | |||||||
|   // Now add the frame object offset to the offset from r1. |   // Now add the frame object offset to the offset from r1. | ||||||
|   int Offset = MFI->getObjectOffset(FrameIndex); |   int Offset = MFI->getObjectOffset(FrameIndex); | ||||||
|  |  | ||||||
|   // Most instructions, except for generated FrameIndex additions using AIr32, |   // Most instructions, except for generated FrameIndex additions using AIr32 | ||||||
|   // have the immediate in operand 1. AIr32, in this case, has the immediate |   // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the | ||||||
|   // in operand 2. |   // immediate in operand 2. | ||||||
|   unsigned OpNo = (MI.getOpcode() != SPU::AIr32 ? 1 : 2); |   unsigned OpNo = 1; | ||||||
|  |   if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32) | ||||||
|  |     OpNo = 2; | ||||||
|  |  | ||||||
|   MachineOperand &MO = MI.getOperand(OpNo); |   MachineOperand &MO = MI.getOperand(OpNo); | ||||||
|  |  | ||||||
|   // Offset is biased by $lr's slot at the bottom. |   // Offset is biased by $lr's slot at the bottom. | ||||||
|   | |||||||
| @@ -1,9 +1,11 @@ | |||||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||||
| ; RUN: grep frest    %t1.s | count 2  | ; RUN: grep frest    %t1.s | count 2  | ||||||
| ; RUN: grep -w fi    %t1.s | count 2  | ; RUN: grep -w fi    %t1.s | count 2  | ||||||
| ; RUN: grep fm       %t1.s | count 4  | ; RUN: grep -w fm    %t1.s | count 2 | ||||||
| ; RUN: grep fma      %t1.s | count 2  | ; RUN: grep fma      %t1.s | count 2  | ||||||
| ; RUN: grep fnms     %t1.s | count 2 | ; RUN: grep fnms     %t1.s | count 4 | ||||||
|  | ; RUN: grep cgti     %t1.s | count 2 | ||||||
|  | ; RUN: grep selb     %t1.s | count 2 | ||||||
| ; | ; | ||||||
| ; This file includes standard floating point arithmetic instructions | ; This file includes standard floating point arithmetic instructions | ||||||
| target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | ||||||
|   | |||||||
| @@ -1,8 +1,5 @@ | |||||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||||
| ; RUN: grep {fsmbi.*61680}   %t1.s | count 1 | ; RUN: grep xswd	     %t1.s | count 1 | ||||||
| ; RUN: grep rotqmbyi         %t1.s | count 1 |  | ||||||
| ; RUN: grep rotmai           %t1.s | count 1 |  | ||||||
| ; RUN: grep selb             %t1.s | count 1 |  | ||||||
| ; RUN: grep shufb            %t1.s | count 2 | ; RUN: grep shufb            %t1.s | count 2 | ||||||
| ; RUN: grep cg               %t1.s | count 1 | ; RUN: grep cg               %t1.s | count 1 | ||||||
| ; RUN: grep addx             %t1.s | count 1 | ; RUN: grep addx             %t1.s | count 1 | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ | |||||||
| ; RUN: grep and     %t1.s | count 2 | ; RUN: grep and     %t1.s | count 2 | ||||||
| ; RUN: grep selb    %t1.s | count 6 | ; RUN: grep selb    %t1.s | count 6 | ||||||
| ; RUN: grep fsmbi   %t1.s | count 4 | ; RUN: grep fsmbi   %t1.s | count 4 | ||||||
| ; RUN: grep shli    %t1.s | count 2 | ; RUN: grep shli    %t1.s | count 4 | ||||||
| ; RUN: grep shlhi   %t1.s | count 4 | ; RUN: grep shlhi   %t1.s | count 4 | ||||||
| ; RUN: grep ila     %t1.s | count 2 | ; RUN: grep ila     %t1.s | count 2 | ||||||
| ; RUN: grep xsbh    %t1.s | count 4 | ; RUN: grep xsbh    %t1.s | count 4 | ||||||
|   | |||||||
| @@ -1,10 +1,21 @@ | |||||||
| ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s | ||||||
| ; RUN: grep shlh   %t1.s | count 84 | ; RUN: grep -w shlh      %t1.s | count 9 | ||||||
| ; RUN: grep shlhi  %t1.s | count 51 | ; RUN: grep -w shlhi     %t1.s | count 3 | ||||||
| ; RUN: grep shl    %t1.s | count 168 | ; RUN: grep -w shl       %t1.s | count 9 | ||||||
| ; RUN: grep shli   %t1.s | count 51 | ; RUN: grep -w shli      %t1.s | count 3 | ||||||
| ; RUN: grep xshw   %t1.s | count 5 | ; RUN: grep -w xshw      %t1.s | count 5 | ||||||
| ; RUN: grep and    %t1.s | count 5 | ; RUN: grep -w and       %t1.s | count 5 | ||||||
|  | ; RUN: grep -w andi      %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotmi     %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotqmbyi  %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqmbii  %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotqmby   %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqmbi   %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqbyi   %t1.s | count 1 | ||||||
|  | ; RUN: grep -w rotqbii   %t1.s | count 2 | ||||||
|  | ; RUN: grep -w rotqbybi  %t1.s | count 1 | ||||||
|  | ; RUN: grep -w sfi       %t1.s | count 3 | ||||||
|  |  | ||||||
| target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" | ||||||
| target triple = "spu" | target triple = "spu" | ||||||
|  |  | ||||||
| @@ -210,3 +221,57 @@ define i32 @shli_i32_12(i32 zeroext %arg1) zeroext { | |||||||
|         %A = shl i32 0, %arg1 |         %A = shl i32 0, %arg1 | ||||||
|         ret i32 %A |         ret i32 %A | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ;; i64 shift left | ||||||
|  |  | ||||||
|  | define i64 @shl_i64_1(i64 %arg1) { | ||||||
|  | 	%A = shl i64 %arg1, 9 | ||||||
|  | 	ret i64 %A | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @shl_i64_2(i64 %arg1) { | ||||||
|  | 	%A = shl i64 %arg1, 3 | ||||||
|  | 	ret i64 %A | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @shl_i64_3(i64 %arg1, i32 %shift) { | ||||||
|  | 	%1 = zext i32 %shift to i64 | ||||||
|  | 	%2 = shl i64 %arg1, %1 | ||||||
|  | 	ret i64 %2 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ;; i64 shift right logical (shift 0s from the right) | ||||||
|  |  | ||||||
|  | define i64 @lshr_i64_1(i64 %arg1) { | ||||||
|  | 	%1 = lshr i64 %arg1, 9 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @lshr_i64_2(i64 %arg1) { | ||||||
|  | 	%1 = lshr i64 %arg1, 3 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @lshr_i64_3(i64 %arg1, i32 %shift) { | ||||||
|  | 	%1 = zext i32 %shift to i64 | ||||||
|  | 	%2 = lshr i64 %arg1, %1 | ||||||
|  | 	ret i64 %2 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ;; i64 shift right arithmetic (shift 1s from the right) | ||||||
|  |  | ||||||
|  | define i64 @ashr_i64_1(i64 %arg) { | ||||||
|  | 	%1 = ashr i64 %arg, 9 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @ashr_i64_2(i64 %arg) { | ||||||
|  | 	%1 = ashr i64 %arg, 3 | ||||||
|  | 	ret i64 %1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @ashr_i64_3(i64 %arg1, i32 %shift) { | ||||||
|  | 	%1 = zext i32 %shift to i64 | ||||||
|  | 	%2 = ashr i64 %arg1, %1 | ||||||
|  | 	ret i64 %2 | ||||||
|  | } | ||||||
|   | |||||||
| @@ -34,19 +34,45 @@ struct pred_s preds[] = { | |||||||
|   { "neq", i64_neq, i64_neq_select } |   { "neq", i64_neq, i64_neq_select } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | uint64_t i64_shl_const(uint64_t a) { | ||||||
|  |   return a << 10; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint64_t i64_shl(uint64_t a, int amt) { | ||||||
|  |   return a << amt; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint64_t i64_srl_const(uint64_t a) { | ||||||
|  |   return a >> 10; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint64_t i64_srl(uint64_t a, int amt) { | ||||||
|  |   return a >> amt; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | int64_t i64_sra_const(int64_t a) { | ||||||
|  |   return a >> 10; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | int64_t i64_sra(int64_t a, int amt) { | ||||||
|  |   return a >> amt; | ||||||
|  | } | ||||||
|  |  | ||||||
| int main(void) { | int main(void) { | ||||||
|   int i; |   int i; | ||||||
|   int64_t a = 1234567890000LL; |   int64_t a =  1234567890003LL; | ||||||
|   int64_t b = 2345678901234LL; |   int64_t b =  2345678901235LL; | ||||||
|   int64_t c =  1234567890001LL; |   int64_t c =  1234567890001LL; | ||||||
|   int64_t d =          10001LL; |   int64_t d =          10001LL; | ||||||
|   int64_t e =          10000LL; |   int64_t e =          10000LL; | ||||||
|  |   int64_t f = -1068103409991LL; | ||||||
|  |  | ||||||
|   printf("a = %16lld (0x%016llx)\n", a, a); |   printf("a = %16lld (0x%016llx)\n", a, a); | ||||||
|   printf("b = %16lld (0x%016llx)\n", b, b); |   printf("b = %16lld (0x%016llx)\n", b, b); | ||||||
|   printf("c = %16lld (0x%016llx)\n", c, c); |   printf("c = %16lld (0x%016llx)\n", c, c); | ||||||
|   printf("d = %16lld (0x%016llx)\n", d, d); |   printf("d = %16lld (0x%016llx)\n", d, d); | ||||||
|   printf("e = %16lld (0x%016llx)\n", e, e); |   printf("e = %16lld (0x%016llx)\n", e, e); | ||||||
|  |   printf("f = %16lld (0x%016llx)\n", f, f); | ||||||
|   printf("----------------------------------------\n"); |   printf("----------------------------------------\n"); | ||||||
|  |  | ||||||
|   for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { |   for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { | ||||||
| @@ -64,5 +90,23 @@ int main(void) { | |||||||
|     printf("----------------------------------------\n"); |     printf("----------------------------------------\n"); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   printf("a                = 0x%016llx\n", a); | ||||||
|  |   printf("i64_shl_const(a) = 0x%016llx\n", i64_shl_const(a)); | ||||||
|  |   printf("i64_shl(a)       = 0x%016llx\n", i64_shl(a, 5)); | ||||||
|  |   printf("i64_srl_const(a) = 0x%016llx\n", i64_srl_const(a)); | ||||||
|  |   printf("i64_srl(a)       = 0x%016llx\n", i64_srl(a, 5)); | ||||||
|  |   printf("i64_sra_const(a) = 0x%016llx\n", i64_sra_const(a)); | ||||||
|  |   printf("i64_sra(a)       = 0x%016llx\n", i64_sra(a, 5)); | ||||||
|  |   printf("----------------------------------------\n"); | ||||||
|  |  | ||||||
|  |   printf("f                = 0x%016llx\n", f); | ||||||
|  |   printf("i64_shl_const(f) = 0x%016llx\n", i64_shl_const(f)); | ||||||
|  |   printf("i64_shl(f)       = 0x%016llx\n", i64_shl(f, 10)); | ||||||
|  |   printf("i64_srl_const(f) = 0x%016llx\n", i64_srl_const(f)); | ||||||
|  |   printf("i64_srl(f)       = 0x%016llx\n", i64_srl(f, 10)); | ||||||
|  |   printf("i64_sra_const(f) = 0x%016llx\n", i64_sra_const(f)); | ||||||
|  |   printf("i64_sra(f)       = 0x%016llx\n", i64_sra(f, 10)); | ||||||
|  |   printf("----------------------------------------\n"); | ||||||
|  |  | ||||||
|   return 0; |   return 0; | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user