mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
Fix PR12312
- Add a target-specific DAG optimization to recognize a pattern PTEST-able. Such a pattern is a OR'd tree with X86ISD::OR as the root node. When X86ISD::OR node has only its flag result being used as a boolean value and all its leaves are extracted from the same vector, it could be folded into an X86ISD::PTEST node. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162735 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
325907d086
commit
dbf8b5be97
@ -14036,7 +14036,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
||||
//
|
||||
// where Op could be BRCOND or CMOV.
|
||||
//
|
||||
static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
|
||||
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
|
||||
// Quit if not CMP and SUB with its value result used.
|
||||
if (Cmp.getOpcode() != X86ISD::CMP &&
|
||||
(Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
|
||||
@ -14087,7 +14087,85 @@ static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
|
||||
return SetCC.getOperand(1);
|
||||
}
|
||||
|
||||
static bool IsValidFCMOVCondition(X86::CondCode CC) {
|
||||
/// checkFlaggedOrCombine - DAG combination on X86ISD::OR, i.e. with EFLAGS
|
||||
/// updated. If only flag result is used and the result is evaluated from a
|
||||
/// series of element extraction, try to combine it into a PTEST.
|
||||
static SDValue checkFlaggedOrCombine(SDValue Or, X86::CondCode &CC,
|
||||
SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
SDNode *N = Or.getNode();
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// Only SSE4.1 and beyond supports PTEST or like.
|
||||
if (!Subtarget->hasSSE41())
|
||||
return SDValue();
|
||||
|
||||
if (N->getOpcode() != X86ISD::OR)
|
||||
return SDValue();
|
||||
|
||||
// Quit if the value result of OR is used.
|
||||
if (N->hasAnyUseOfValue(0))
|
||||
return SDValue();
|
||||
|
||||
// Quit if not used as a boolean value.
|
||||
if (CC != X86::COND_E && CC != X86::COND_NE)
|
||||
return SDValue();
|
||||
|
||||
SmallVector<SDValue, 8> Opnds;
|
||||
SDValue VecIn;
|
||||
EVT VT = MVT::Other;
|
||||
unsigned Mask = 0;
|
||||
|
||||
// Recognize a special case where a vector is casted into wide integer to
|
||||
// test all 0s.
|
||||
Opnds.push_back(N->getOperand(0));
|
||||
Opnds.push_back(N->getOperand(1));
|
||||
|
||||
for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
|
||||
SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot;
|
||||
// BFS traverse all OR'd operands.
|
||||
if (I->getOpcode() == ISD::OR) {
|
||||
Opnds.push_back(I->getOperand(0));
|
||||
Opnds.push_back(I->getOperand(1));
|
||||
// Re-evaluate the number of nodes to be traversed.
|
||||
e = Opnds.size();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Quit if a non-EXTRACT_VECTOR_ELT
|
||||
if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
|
||||
return SDValue();
|
||||
|
||||
// Quit if without a constant index.
|
||||
SDValue Idx = I->getOperand(1);
|
||||
if (!isa<ConstantSDNode>(Idx))
|
||||
return SDValue();
|
||||
|
||||
// Check if all elements are extracted from the same vector.
|
||||
SDValue ExtractedFromVec = I->getOperand(0);
|
||||
if (VecIn.getNode() == 0) {
|
||||
VT = ExtractedFromVec.getValueType();
|
||||
// FIXME: only 128-bit vector is supported so far.
|
||||
if (!VT.is128BitVector())
|
||||
return SDValue();
|
||||
VecIn = ExtractedFromVec;
|
||||
} else if (VecIn != ExtractedFromVec)
|
||||
return SDValue();
|
||||
|
||||
// Record the constant index.
|
||||
Mask |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
}
|
||||
|
||||
assert(VT.is128BitVector() && "Only 128-bit vector PTEST is supported so far.");
|
||||
|
||||
// Quit if not all elements are used.
|
||||
if (Mask != (1U << VT.getVectorNumElements()) - 1U)
|
||||
return SDValue();
|
||||
|
||||
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIn, VecIn);
|
||||
}
|
||||
|
||||
static bool isValidFCMOVCondition(X86::CondCode CC) {
|
||||
switch (CC) {
|
||||
default:
|
||||
return false;
|
||||
@ -14105,7 +14183,8 @@ static bool IsValidFCMOVCondition(X86::CondCode CC) {
|
||||
|
||||
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
|
||||
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// If the flag operand isn't dead, don't touch this CMOV.
|
||||
@ -14130,10 +14209,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
SDValue Flags;
|
||||
|
||||
Flags = BoolTestSetCCCombine(Cond, CC);
|
||||
Flags = checkBoolTestSetCCCombine(Cond, CC);
|
||||
if (Flags.getNode() &&
|
||||
// Extra check as FCMOV only supports a subset of X86 cond.
|
||||
(FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) {
|
||||
(FalseOp.getValueType() != MVT::f80 || isValidFCMOVCondition(CC))) {
|
||||
SDValue Ops[] = { FalseOp, TrueOp,
|
||||
DAG.getConstant(CC, MVT::i8), Flags };
|
||||
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
|
||||
Ops, array_lengthof(Ops));
|
||||
}
|
||||
|
||||
Flags = checkFlaggedOrCombine(Cond, CC, DAG, Subtarget);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Ops[] = { FalseOp, TrueOp,
|
||||
DAG.getConstant(CC, MVT::i8), Flags };
|
||||
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
|
||||
@ -15641,7 +15728,9 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
|
||||
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
|
||||
SDValue EFLAGS = N->getOperand(1);
|
||||
@ -15657,7 +15746,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
|
||||
SDValue Flags;
|
||||
|
||||
Flags = BoolTestSetCCCombine(EFLAGS, CC);
|
||||
Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Cond = DAG.getConstant(CC, MVT::i8);
|
||||
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
|
||||
}
|
||||
|
||||
Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Cond = DAG.getConstant(CC, MVT::i8);
|
||||
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
|
||||
@ -15679,7 +15774,14 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
SDValue Flags;
|
||||
|
||||
Flags = BoolTestSetCCCombine(EFLAGS, CC);
|
||||
Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Cond = DAG.getConstant(CC, MVT::i8);
|
||||
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
|
||||
Flags);
|
||||
}
|
||||
|
||||
Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget);
|
||||
if (Flags.getNode()) {
|
||||
SDValue Cond = DAG.getConstant(CC, MVT::i8);
|
||||
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
|
||||
@ -15874,7 +15976,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
|
||||
case ISD::VSELECT:
|
||||
case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
|
||||
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
|
||||
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
|
||||
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
|
||||
@ -15904,7 +16006,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
|
||||
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
|
||||
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
|
||||
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::SHUFP: // Handle all target specific shuffles
|
||||
case X86ISD::PALIGN:
|
||||
|
48
test/CodeGen/X86/pr12312.ll
Normal file
48
test/CodeGen/X86/pr12312.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; RUN: llc -march=x86-64 -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
|
||||
; RUN: llc -march=x86-64 -mattr=+avx < %s | FileCheck %s --check-prefix AVX
|
||||
|
||||
define i32 @veccond(<4 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %input to i128
|
||||
%1 = icmp ne i128 %0, 0
|
||||
br i1 %1, label %if-true-block, label %endif-block
|
||||
|
||||
if-true-block: ; preds = %entry
|
||||
ret i32 0
|
||||
endif-block: ; preds = %entry,
|
||||
ret i32 1
|
||||
; SSE41: veccond
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: veccond
|
||||
; AVX: vptest
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vectest(<4 x i32> %input) {
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %input to i128
|
||||
%1 = icmp ne i128 %0, 0
|
||||
%2 = zext i1 %1 to i32
|
||||
ret i32 %2
|
||||
; SSE41: vectest
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vectest
|
||||
; AVX: vptest
|
||||
; AVX: ret
|
||||
}
|
||||
|
||||
define i32 @vecsel(<4 x i32> %input, i32 %a, i32 %b) {
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %input to i128
|
||||
%1 = icmp ne i128 %0, 0
|
||||
%2 = select i1 %1, i32 %a, i32 %b
|
||||
ret i32 %2
|
||||
; SSE41: vecsel
|
||||
; SSE41: ptest
|
||||
; SSE41: ret
|
||||
; AVX: vecsel
|
||||
; AVX: vptest
|
||||
; AVX: ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user