From af98f76fb5ec9ac2a392b32cf40b31f67e0bfd44 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Tue, 5 Aug 2014 17:22:59 +0000 Subject: [PATCH] [X86] Add lowering to VALIGN This was currently part of lowering to PALIGNR with some special-casing to make interlane shifting work. Since AVX512F has interlane alignr (valignd/q) and AVX512BW has vpalignr we need to support both of these *at the same time*, e.g. for SKX. This patch breaks out the common code and then add support to check both of these lowering options from LowerVECTOR_SHUFFLE. I also added some FIXMEs where I think the AVX512BW and AVX512VL additions should probably go. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214888 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 55 ++++++++++++++++++++++++------ lib/Target/X86/X86InstrAVX512.td | 14 ++++---- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 10867ef3111..c92fc8460bc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3464,6 +3464,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::PALIGNR: + case X86ISD::VALIGN: case X86ISD::SHUFP: case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, @@ -3802,16 +3803,9 @@ static bool isPSHUFLWMask(ArrayRef Mask, MVT VT, bool HasInt256) { return true; } -/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that -/// is suitable for input to PALIGNR. -static bool isPALIGNRMask(ArrayRef Mask, MVT VT, - const X86Subtarget *Subtarget) { - if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) || - (VT.is256BitVector() && !Subtarget->hasInt256())) - return false; - +static bool isAlignrMask(ArrayRef Mask, MVT VT, bool InterLane) { unsigned NumElts = VT.getVectorNumElements(); - unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128; + unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128; unsigned NumLaneElts = NumElts/NumLanes; // Do not handle 64-bit element shuffles with palignr. @@ -3875,6 +3869,28 @@ static bool isPALIGNRMask(ArrayRef Mask, MVT VT, return true; } +/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PALIGNR. +static bool isPALIGNRMask(ArrayRef Mask, MVT VT, + const X86Subtarget *Subtarget) { + if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) || + (VT.is256BitVector() && !Subtarget->hasInt256())) + // FIXME: Add AVX512BW. + return false; + + return isAlignrMask(Mask, VT, false); +} + +/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PALIGNR. +static bool isVALIGNMask(ArrayRef Mask, MVT VT, + const X86Subtarget *Subtarget) { + // FIXME: Add AVX512VL. + if (!VT.is512BitVector() || !Subtarget->hasAVX512()) + return false; + return isAlignrMask(Mask, VT, true); +} + /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, @@ -4701,9 +4717,10 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. -static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { +static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp, + bool InterLane) { MVT VT = SVOp->getSimpleValueType(0); - unsigned EltSize = VT.is512BitVector() ? 1 : + unsigned EltSize = InterLane ? 1 : VT.getVectorElementType().getSizeInBits() >> 3; unsigned NumElts = VT.getVectorNumElements(); @@ -4724,6 +4741,17 @@ static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { return (Val - i) * EltSize; } +/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. +static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { + return getShuffleAlignrImmediate(SVOp, false); +} + +static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) { + return getShuffleAlignrImmediate(SVOp, true); +} + + static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"); if (!isa(N->getOperand(1).getNode())) @@ -9609,6 +9637,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { getShufflePALIGNRImmediate(SVOp), DAG); + if (isVALIGNMask(M, VT, Subtarget)) + return getTargetShuffleNode(X86ISD::VALIGN, dl, VT, V1, V2, + getShuffleVALIGNImmediate(SVOp), + DAG); + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 995c5e40d5f..7328d992c1f 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4461,20 +4461,20 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (memopv8i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; -multiclass avx512_alignr { +multiclass avx512_valign { def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), !strconcat("valign"##Suffix, " \t{$src3, $src2, $src1, $dst|" "$dst, $src1, $src2, $src3}"), [(set RC:$dst, - (IntVT (X86PAlignr RC:$src2, RC:$src1, + (IntVT (X86VAlign RC:$src2, RC:$src1, (i8 imm:$src3))))]>, EVEX_4V; // Also match valign of packed floats. - def : Pat<(FloatVT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$imm))), + def : Pat<(FloatVT (X86VAlign RC:$src1, RC:$src2, (i8 imm:$imm))), (!cast(NAME##rri) RC:$src2, RC:$src1, imm:$imm)>; let mayLoad = 1 in @@ -4485,9 +4485,9 @@ multiclass avx512_alignr, EVEX_4V; } -defm VALIGND : avx512_alignr<"d", VR512, i512mem, v16i32, v16f32>, +defm VALIGND : avx512_valign<"d", VR512, i512mem, v16i32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_alignr<"q", VR512, i512mem, v8i64, v8f64>, +defm VALIGNQ : avx512_valign<"q", VR512, i512mem, v8i64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; // Helper fragments to match sext vXi1 to vXiY.