diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 1c5e739ef84..8b418a08cbe 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -322,6 +322,9 @@ namespace { /// target-specific node if it hasn't already been changed. SDNode *Select(SDValue Op); + //! Emit the instruction sequence for i128 sext + SDNode *SelectSEXTi128(SDValue &Op, EVT OpVT); + //! Emit the instruction sequence for i64 shl SDNode *SelectSHLi64(SDValue &Op, EVT OpVT); @@ -833,6 +836,10 @@ SPUDAGToDAGISel::Select(SDValue Op) { } } } + } else if (Opc == ISD::SIGN_EXTEND) { + if (OpVT == MVT::i128) { + return SelectSEXTi128(Op, OpVT); + } } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) { return SelectSHLi64(Op, OpVT); @@ -956,6 +963,58 @@ SPUDAGToDAGISel::Select(SDValue Op) { return SelectCode(Op); } +/*! + * Emit the instruction sequence for i64 -> i128 sign extend. The basic + * algorithm is to duplicate the sign bit using rotmai to generate at + * least one byte full of sign bits. Then propagate the "sign-byte" into + * theleftmost words and the i64 into the rightmost words using shufb. + * + * @param Op The sext operand + * @param OpVT The type to extend to + * @return The SDNode with the entire instruction sequence + */ +SDNode * +SPUDAGToDAGISel::SelectSEXTi128(SDValue &Op, EVT OpVT) +{ + DebugLoc dl = Op.getDebugLoc(); + + // Type to extend from + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + assert((OpVT == MVT::i128 && Op0VT == MVT::i64) && + "LowerSIGN_EXTEND: input and/or output operand have wrong size"); + + // Create shuffle mask + unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 + unsigned mask2 = 0x01020304; // byte 8 - 11 + unsigned mask3 = 0x05060708; // byte 12 - 15 + SDValue shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + CurDAG->getConstant(mask1, MVT::i32), + CurDAG->getConstant(mask1, MVT::i32), + CurDAG->getConstant(mask2, MVT::i32), + CurDAG->getConstant(mask3, MVT::i32)); + SDNode *shufMaskLoad = emitBuildVector(shufMask); + + // Word wise arithmetic right shift to generate at least one byte + // that contains sign bits. + SDNode *PromoteScalar = SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, + MVT::v2i64, Op0, Op0)); + SDNode *sraVal = SelectCode(CurDAG->getNode(ISD::SRA, dl, MVT::v2i64, + SDValue(PromoteScalar, 0), + CurDAG->getConstant(31, MVT::i32))); + + // Shuffle bytes - Copy the sign bits into the upper 64 bits + // and the input value into the lower 64 bits. + SDNode *extShuffle = SelectCode(CurDAG->getNode(SPUISD::SHUFB, dl, + MVT::v2i64, Op0, + SDValue(sraVal, 0), + SDValue(shufMaskLoad, 0))); + + return SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, MVT::i128, + SDValue(extShuffle, 0))); +} + /*! * Emit the instruction sequence for i64 left shifts. The basic algorithm * is to fill the bottom two word slots with zeros so that zeros are shifted diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e8b5ae6123c..edbceddaeda 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -350,6 +350,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); + // Custom lower i64 -> i128 sign extend + setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); @@ -511,9 +514,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; - node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; - node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; - node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; @@ -2610,6 +2610,45 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) return SDValue(); // Leave the truncate unmolested } +//! Custom lower ISD::SIGN_EXTEND +static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) +{ + // Type to extend to + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + // Type to extend from + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + assert((VT == MVT::i128 && Op0VT == MVT::i64) && + "LowerSIGN_EXTEND: input and/or output operand have wrong size"); + + // Create shuffle mask + unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 + unsigned mask2 = 0x01020304; // byte 8 - 11 + unsigned mask3 = 0x05060708; // byte 12 - 15 + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask2, MVT::i32), + DAG.getConstant(mask3, MVT::i32)); + + // Word wise arithmetic right shift to generate a byte that contains sign bits + SDValue sraVal = DAG.getNode(ISD::SRA, + dl, + MVT::v2i64, + DAG.getNode(SPUISD::PREFSLOT2VEC, dl, MVT::v2i64, Op0, Op0), + DAG.getConstant(31, MVT::i32)); + + // shuffle bytes - copies the sign bits into the upper 64 bits + // and the input value into the lower 64 bits + SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, MVT::v2i64, + Op0, sraVal, shufMask); + + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle); +} + //! Custom (target-specific) lowering entry point /*! This is where LLVM's DAG selection process calls to do target-specific @@ -2702,6 +2741,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); + + case ISD::SIGN_EXTEND: + return LowerSIGN_EXTEND(Op, DAG); } return SDValue(); @@ -2864,9 +2906,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_SHL: - case SPUISD::VEC_SRL: - case SPUISD::VEC_SRA: case SPUISD::ROTBYTES_LEFT: { SDValue Op1 = N->getOperand(1); @@ -2994,9 +3033,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case SPUISD::VEC2PREFSLOT: case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_SHL: - case SPUISD::VEC_SRL: - case SPUISD::VEC_SRA: case SPUISD::VEC_ROTL: case SPUISD::VEC_ROTR: case SPUISD::ROTBYTES_LEFT: diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index b0a118a2a82..ca1a66c922c 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -43,9 +43,6 @@ namespace llvm { VEC2PREFSLOT, ///< Extract element 0 SHLQUAD_L_BITS, ///< Rotate quad left, by bits SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes - VEC_SHL, ///< Vector shift left - VEC_SRL, ///< Vector shift right (logical) - VEC_SRA, ///< Vector shift right (arithmetic) VEC_ROTL, ///< Vector rotate left VEC_ROTR, ///< Vector rotate right ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 87c4115d1b1..c722e4b006e 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -87,9 +87,9 @@ def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; // Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): -def SPUvec_shl: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type, []>; -def SPUvec_srl: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type, []>; -def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>; +def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>; +def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>; +def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>; def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll new file mode 100644 index 00000000000..729daf726f9 --- /dev/null +++ b/test/CodeGen/CellSPU/sext128.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep {long.*269488144} %t1.s | count 2 +; RUN: grep {long.*16909060} %t1.s | count 1 +; RUN: grep {long.*84281096} %t1.s | count 1 +; RUN: grep {rotmai} %t1.s | count 1 +; RUN: grep {lqa} %t1.s | count 1 +; RUN: grep {shufb} %t1.s | count 1 + +; ModuleID = 'sext128.bc' +target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128" +target triple = "spu" + +define i128 @sext_i64_i128(i64 %a) { +entry: + %0 = sext i64 %a to i128 + ret i128 %0 +}