mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-23 20:29:30 +00:00
[X86] Emulate AVX 256bit MIN/MAX support by splitting the vector.
In AVX 256bit vectors are valid vectors and therefore the Type Legalizer doesn't split the VSELECT and SETCC nodes. AVX only supports MIN/MAX on 128bit vectors and this fix enables vector splitting for this special case in the X86 DAG Combiner. This fix is related to PR16695, PR17002, and <rdar://problem/14594431>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191131 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1941431f8a
commit
fcfc234130
@ -16371,24 +16371,28 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
|
/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
|
||||||
static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
|
static std::pair<unsigned, bool>
|
||||||
SDValue RHS, SelectionDAG &DAG,
|
matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
|
||||||
const X86Subtarget *Subtarget) {
|
SelectionDAG &DAG, const X86Subtarget *Subtarget) {
|
||||||
if (!VT.isVector())
|
if (!VT.isVector())
|
||||||
return 0;
|
return std::make_pair(0, false);
|
||||||
|
|
||||||
|
bool NeedSplit = false;
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
switch (VT.getSimpleVT().SimpleTy) {
|
||||||
default: return 0;
|
default: return std::make_pair(0, false);
|
||||||
case MVT::v32i8:
|
case MVT::v32i8:
|
||||||
case MVT::v16i16:
|
case MVT::v16i16:
|
||||||
case MVT::v8i32:
|
case MVT::v8i32:
|
||||||
if (!Subtarget->hasAVX2())
|
if (!Subtarget->hasAVX2())
|
||||||
return 0;
|
NeedSplit = true;
|
||||||
|
if (!Subtarget->hasAVX())
|
||||||
|
return std::make_pair(0, false);
|
||||||
|
break;
|
||||||
case MVT::v16i8:
|
case MVT::v16i8:
|
||||||
case MVT::v8i16:
|
case MVT::v8i16:
|
||||||
case MVT::v4i32:
|
case MVT::v4i32:
|
||||||
if (!Subtarget->hasSSE2())
|
if (!Subtarget->hasSSE2())
|
||||||
return 0;
|
return std::make_pair(0, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// SSE2 has only a small subset of the operations.
|
// SSE2 has only a small subset of the operations.
|
||||||
@ -16399,6 +16403,7 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
|
|||||||
|
|
||||||
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
||||||
|
|
||||||
|
unsigned Opc = 0;
|
||||||
// Check for x CC y ? x : y.
|
// Check for x CC y ? x : y.
|
||||||
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
|
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
|
||||||
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
|
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
|
||||||
@ -16406,16 +16411,16 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
|
|||||||
default: break;
|
default: break;
|
||||||
case ISD::SETULT:
|
case ISD::SETULT:
|
||||||
case ISD::SETULE:
|
case ISD::SETULE:
|
||||||
return hasUnsigned ? X86ISD::UMIN : 0;
|
Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
|
||||||
case ISD::SETUGT:
|
case ISD::SETUGT:
|
||||||
case ISD::SETUGE:
|
case ISD::SETUGE:
|
||||||
return hasUnsigned ? X86ISD::UMAX : 0;
|
Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
|
||||||
case ISD::SETLT:
|
case ISD::SETLT:
|
||||||
case ISD::SETLE:
|
case ISD::SETLE:
|
||||||
return hasSigned ? X86ISD::SMIN : 0;
|
Opc = hasSigned ? X86ISD::SMIN : 0; break;
|
||||||
case ISD::SETGT:
|
case ISD::SETGT:
|
||||||
case ISD::SETGE:
|
case ISD::SETGE:
|
||||||
return hasSigned ? X86ISD::SMAX : 0;
|
Opc = hasSigned ? X86ISD::SMAX : 0; break;
|
||||||
}
|
}
|
||||||
// Check for x CC y ? y : x -- a min/max with reversed arms.
|
// Check for x CC y ? y : x -- a min/max with reversed arms.
|
||||||
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
|
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
|
||||||
@ -16424,20 +16429,20 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
|
|||||||
default: break;
|
default: break;
|
||||||
case ISD::SETULT:
|
case ISD::SETULT:
|
||||||
case ISD::SETULE:
|
case ISD::SETULE:
|
||||||
return hasUnsigned ? X86ISD::UMAX : 0;
|
Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
|
||||||
case ISD::SETUGT:
|
case ISD::SETUGT:
|
||||||
case ISD::SETUGE:
|
case ISD::SETUGE:
|
||||||
return hasUnsigned ? X86ISD::UMIN : 0;
|
Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
|
||||||
case ISD::SETLT:
|
case ISD::SETLT:
|
||||||
case ISD::SETLE:
|
case ISD::SETLE:
|
||||||
return hasSigned ? X86ISD::SMAX : 0;
|
Opc = hasSigned ? X86ISD::SMAX : 0; break;
|
||||||
case ISD::SETGT:
|
case ISD::SETGT:
|
||||||
case ISD::SETGE:
|
case ISD::SETGE:
|
||||||
return hasSigned ? X86ISD::SMIN : 0;
|
Opc = hasSigned ? X86ISD::SMIN : 0; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return std::make_pair(Opc, NeedSplit);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
|
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
|
||||||
@ -16795,9 +16800,30 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Try to match a min/max vector operation.
|
// Try to match a min/max vector operation.
|
||||||
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
|
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
|
||||||
if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
|
unsigned Opc;
|
||||||
return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
|
bool NeedSplit;
|
||||||
|
std::tie(Opc, NeedSplit) = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
|
||||||
|
|
||||||
|
if (Opc && NeedSplit) {
|
||||||
|
unsigned NumElems = VT.getVectorNumElements();
|
||||||
|
// Extract the LHS vectors
|
||||||
|
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
|
||||||
|
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
|
||||||
|
|
||||||
|
// Extract the RHS vectors
|
||||||
|
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
|
||||||
|
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
|
||||||
|
|
||||||
|
// Create min/max for each subvector
|
||||||
|
LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
|
||||||
|
RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
|
||||||
|
|
||||||
|
// Merge the result
|
||||||
|
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
|
||||||
|
} else if (Opc)
|
||||||
|
return DAG.getNode(Opc, DL, VT, LHS, RHS);
|
||||||
|
}
|
||||||
|
|
||||||
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
|
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
|
||||||
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
|
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
|
; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
|
||||||
|
; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
|
||||||
; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
|
; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
|
||||||
|
|
||||||
define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
|
define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
|
||||||
; SSE4-LABEL: split16:
|
; SSE4-LABEL: split16:
|
||||||
; SSE4: pminuw
|
; SSE4: pminuw
|
||||||
; SSE4: pminuw
|
; SSE4: pminuw
|
||||||
|
; AVX1-LABEL: split16:
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
; AVX2-LABEL: split16:
|
; AVX2-LABEL: split16:
|
||||||
; AVX2: vpminuw
|
; AVX2: vpminuw
|
||||||
; AVX2: ret
|
; AVX2: ret
|
||||||
@ -19,6 +23,11 @@ define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
|
|||||||
; SSE4: pminuw
|
; SSE4: pminuw
|
||||||
; SSE4: pminuw
|
; SSE4: pminuw
|
||||||
; SSE4: pminuw
|
; SSE4: pminuw
|
||||||
|
; AVX1-LABEL: split32:
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
|
; AVX1: vpminuw
|
||||||
; AVX2-LABEL: split32:
|
; AVX2-LABEL: split32:
|
||||||
; AVX2: vpminuw
|
; AVX2: vpminuw
|
||||||
; AVX2: vpminuw
|
; AVX2: vpminuw
|
||||||
|
Loading…
x
Reference in New Issue
Block a user