[PPC] Implement vmrgew and vmrgow instructions

This patch adds support for the vector merge even word and vector merge odd word
instructions introduced in POWER8.

Phabricator review: http://reviews.llvm.org/D10704


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240650 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kit Barton 2015-06-25 15:17:40 +00:00
parent 39b2e22f00
commit 1ebbc68719
6 changed files with 269 additions and 4 deletions

View File

@ -1279,6 +1279,99 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
}
}
/**
* \brief Common function used to match vmrgew and vmrgow shuffles
*
* The indexOffset determines whether to look for even or odd words in
* the shuffle mask. This is based on the of the endianness of the target
* machine.
* - Little Endian:
* - Use offset of 0 to check for odd elements
* - Use offset of 4 to check for even elements
* - Big Endian:
* - Use offset of 0 to check for even elements
* - Use offset of 4 to check for odd elements
* A detailed description of the vector element ordering for little endian and
* big endian can be found at <a
* href="http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html">
* Targeting your applications - what little endian and big endian IBM XL C/C++
* compiler differences mean to you </a>
*
* The mask to the shuffle vector instruction specifies the indices of the
* elements from the two input vectors to place in the result. The elements are
* numbered in array-access order, starting with the first vector. These vectors
* are always of type v16i8, thus each vector will contain 16 elements of size
* 8. More info on the shuffle vector can be found in the <a
* href="http://llvm.org/docs/LangRef.html#shufflevector-instruction">Language
* Reference</a>.
*
* The RHSStartValue indicates whether the same input vectors are used (unary)
* or two different input vectors are used, based on the following:
* - If the instruction uses the same vector for both inputs, the range of the
* indices will be 0 to 15. In this case, the RHSStart value passed should
* be 0.
* - If the instruction has two different vectors then the range of the
* indices will be 0 to 31. In this case, the RHSStart value passed should
* be 16 (indices 0-15 specify elements in the first vector while indices 16
* to 31 specify elements in the second vector).
*
* \param[in] N The shuffle vector SD Node to analyze
* \param[in] IndexOffset Specifies whether to look for even or odd elements
* \param[in] RHSStartValue Specifies the starting index for the righthand input
* vector to the shuffle_vector instruction
* \return true iff this shuffle vector represents an even or odd word merge
*/
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
unsigned RHSStartValue) {
if (N->getValueType(0) != MVT::v16i8)
return false;
for (unsigned i = 0; i < 2; ++i)
for (unsigned j = 0; j < 4; ++j)
if (!isConstantOrUndef(N->getMaskElt(i*4+j),
i*RHSStartValue+j+IndexOffset) ||
!isConstantOrUndef(N->getMaskElt(i*4+j+8),
i*RHSStartValue+j+IndexOffset+8))
return false;
return true;
}
/**
* \brief Determine if the specified shuffle mask is suitable for the vmrgew or
* vmrgow instructions.
*
* \param[in] N The shuffle vector SD Node to analyze
* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
* \param[in] ShuffleKind Identify the type of merge:
* - 0 = big-endian merge with two different inputs;
* - 1 = either-endian merge with two identical inputs;
* - 2 = little-endian merge with two different inputs (inputs are swapped for
* little-endian merges).
* \param[in] DAG The current SelectionDAG
* \return true iff this shuffle mask
*/
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG) {
if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
unsigned indexOffset = CheckEven ? 4 : 0;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
else if (ShuffleKind == 2) // swapped
return isVMerge(N, indexOffset, 16);
else
return false;
}
else {
unsigned indexOffset = CheckEven ? 0 : 4;
if (ShuffleKind == 1) // Unary
return isVMerge(N, indexOffset, 0);
else if (ShuffleKind == 0) // Normal
return isVMerge(N, indexOffset, 16);
else
return false;
}
return false;
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
@ -7046,7 +7139,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
return Op;
}
}
@ -7064,7 +7159,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our

View File

@ -382,6 +382,11 @@ namespace llvm {
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG);
/// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGEW or VMRGOW instruction
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,

View File

@ -155,6 +155,33 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
}]>;
def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 0, *CurDAG);
}]>;
def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 0, *CurDAG);
}]>;
def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 1, *CurDAG);
}]>;
def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 1, *CurDAG);
}]>;
def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 2, *CurDAG);
}]>;
def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 2, *CurDAG);
}]>;
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N));
}]>;
@ -1008,6 +1035,29 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
// Vector merge
def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrgew $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrgow $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>;
// Match vmrgew(x,x) and vmrgow(x,x)
def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
(VMRGEW $vA, $vA)>;
def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef),
(VMRGOW $vA, $vA)>;
// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments
// are matched for little-endian, where the inputs must be swapped for correct
// semantics.w
def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
(VMRGEW $vB, $vA)>;
def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
(VMRGOW $vB, $vA)>;
// Vector shifts
def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),

View File

@ -0,0 +1,101 @@
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-LE
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-BE
; Check for a vector merge instruction using two inputs
; The shufflevector specifies the even elements, using big endian element
; ordering. If run on a big endian machine, this should produce the vmrgew
; instruction. If run on a little endian machine, this should produce the
; vmrgow instruction. Note also that on little endian the input registers
; are swapped also.
define void @check_merge_even_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK-LE-LABEL: @check_merge_even_xy
; CHECK-BE-LABEL: @check_merge_even_xy
%tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 16, i32 17, i32 18, i32 19,
i32 8, i32 9, i32 10, i32 11,
i32 24, i32 25, i32 26, i32 27>
; CHECK-LE: vmrgow 2, 3, 2
; CHECK-BE: vmrgew 2, 2, 3
store <16 x i8> %tmp3, <16 x i8>* %A
ret void
; CHECK-LE: blr
; CHECK-BE: blr
}
; Check for a vector merge instruction using a single input.
; The shufflevector specifies the even elements, using big endian element
; ordering. If run on a big endian machine, this should produce the vmrgew
; instruction. If run on a little endian machine, this should produce the
; vmrgow instruction.
define void @check_merge_even_xx(<16 x i8>* %A) {
entry:
; CHECK-LE-LABEL: @check_merge_even_xx
; CHECK-BE-LABEL: @check_merge_even_xx
%tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp,
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
i32 0, i32 1, i32 2, i32 3,
i32 8, i32 9, i32 10, i32 11,
i32 8, i32 9, i32 10, i32 11>
; CHECK-LE: vmrgow 2, 2, 2
; CHECK-BE: vmrgew 2, 2, 2
store <16 x i8> %tmp2, <16 x i8>* %A
ret void
; CHECK-LE: blr
; CHECK-BE: blr
}
; Check for a vector merge instruction using two inputs.
; The shufflevector specifies the odd elements, using big endian element
; ordering. If run on a big endian machine, this should produce the vmrgow
; instruction. If run on a little endian machine, this should produce the
; vmrgew instruction. Note also that on little endian the input registers
; are swapped also.
define void @check_merge_odd_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK-LE-LABEL: @check_merge_odd_xy
; CHECK-BE-LABEL: @check_merge_odd_xy
%tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2,
<16 x i32> <i32 4, i32 5, i32 6, i32 7,
i32 20, i32 21, i32 22, i32 23,
i32 12, i32 13, i32 14, i32 15,
i32 28, i32 29, i32 30, i32 31>
; CHECK-LE: vmrgew 2, 3, 2
; CHECK-BE: vmrgow 2, 2, 3
store <16 x i8> %tmp3, <16 x i8>* %A
ret void
; CHECK-LE: blr
; CHECK-BE: blr
}
; Check for a vector merge instruction using a single input.
; The shufflevector specifies the odd elements, using big endian element
; ordering. If run on a big endian machine, this should produce the vmrgow
; instruction. If run on a little endian machine, this should produce the
; vmrgew instruction.
define void @check_merge_odd_xx(<16 x i8>* %A) {
entry:
; CHECK-LE-LABEL: @check_merge_odd_xx
; CHECK-BE-LABEL: @check_merge_odd_xx
%tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp,
<16 x i32> <i32 4, i32 5, i32 6, i32 7,
i32 4, i32 5, i32 6, i32 7,
i32 12, i32 13, i32 14, i32 15,
i32 12, i32 13, i32 14, i32 15>
; CHECK-LE: vmrgew 2, 2, 2
; CHECK-BE: vmrgow 2, 2, 2
store <16 x i8> %tmp2, <16 x i8>* %A
ret void
; CHECK-LE: blr
; CHECK-BE: blr
}

View File

@ -99,6 +99,12 @@
# CHECK: vmrglw 2, 3, 4
0x10 0x43 0x21 0x8c
# CHECK: vmrgew 2, 3, 4
0x10 0x43 0x27 0x8c
# CHECK: vmrgow 2, 3, 4
0x10 0x43 0x26 0x8c
# CHECK: vspltb 2, 3, 1
0x10 0x41 0x1a 0x0c

View File

@ -1,5 +1,5 @@
# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
# Vector facility
@ -110,7 +110,13 @@
# CHECK-BE: vmrglw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x8c]
# CHECK-LE: vmrglw 2, 3, 4 # encoding: [0x8c,0x21,0x43,0x10]
vmrglw 2, 3, 4
# CHECK-BE: vmrgew 2, 3, 4 # encoding: [0x10,0x43,0x27,0x8c]
# CHECK-LE: vmrgew 2, 3, 4 # encoding: [0x8c,0x27,0x43,0x10]
vmrgew 2, 3, 4
# CHECK-BE: vmrgow 2, 3, 4 # encoding: [0x10,0x43,0x26,0x8c]
# CHECK-LE: vmrgow 2, 3, 4 # encoding: [0x8c,0x26,0x43,0x10]
vmrgow 2, 3, 4
# CHECK-BE: vspltb 2, 3, 1 # encoding: [0x10,0x41,0x1a,0x0c]
# CHECK-LE: vspltb 2, 3, 1 # encoding: [0x0c,0x1a,0x41,0x10]
vspltb 2, 3, 1