diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3814aa0d528..1289d7bfc75 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -987,7 +987,12 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. -int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) { +/// The ShuffleKind distinguishes between big-endian operations with two +/// different inputs (0), either-endian operations with two identical inputs +/// (1), and little-endian operations with two different inputs (2). For the +/// latter, the input operands are swapped (see PPCInstrAltivec.td). +int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG) { if (N->getValueType(0) != MVT::v16i8) return -1; @@ -1006,18 +1011,24 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) { if (ShiftAmt < i) return -1; ShiftAmt -= i; + bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()-> + isLittleEndian(); - if (!isUnary) { + if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; - } else { + } else if (ShuffleKind == 1) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; - } + } else + return -1; + + if (ShuffleKind == 2 && isLE) + ShiftAmt = 16 - ShiftAmt; return ShiftAmt; } @@ -6032,7 +6043,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || - PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 || + PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || @@ -6049,7 +6060,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || - PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 || + PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index cda376d0abe..c9394dd12e7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -315,9 +315,10 @@ namespace llvm { bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG); - /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift - /// amount, otherwise return -1. - int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the + /// shift amount, otherwise return -1. + int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG); /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index ee9942c5a6c..b271b5d5aa2 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -129,25 +129,36 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), def VSLDOI_get_imm : SDNodeXForm; def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1; + return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1; }], VSLDOI_get_imm>; /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// vector_shuffle(X,undef,mask) by the dag combiner. def VSLDOI_unary_get_imm : SDNodeXForm; def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1; + return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1; }], VSLDOI_unary_get_imm>; +/// VSLDOI_swapped* - These fragments are provided for little-endian, where +/// the inputs must be swapped for correct semantics. +def VSLDOI_swapped_get_imm : SDNodeXForm; +def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1; +}], VSLDOI_get_imm>; + + // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. def VSPLTB_get_imm : SDNodeXForm; -// Match vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands. -// These fragments are matched for little-endian, where the -// inputs must be swapped for correct semantics. +// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands. +// These fragments are matched for little-endian, where the inputs must +// be swapped for correct semantics. +def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB), + (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>; def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB), (VPKUWUM $vB, $vA)>; def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB), diff --git a/test/CodeGen/PowerPC/vec_shuffle_le.ll b/test/CodeGen/PowerPC/vec_shuffle_le.ll index efebece09f4..a4b2119f6eb 100644 --- a/test/CodeGen/PowerPC/vec_shuffle_le.ll +++ b/test/CodeGen/PowerPC/vec_shuffle_le.ll @@ -190,7 +190,9 @@ entry: %tmp = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> -; CHECK: vsldoi +; CHECK: lvx [[REG1:[0-9]+]] +; CHECK: lvx [[REG2:[0-9]+]] +; CHECK: vsldoi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 4 store <16 x i8> %tmp3, <16 x i8>* %A ret void }