mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-22 19:38:40 +00:00
AVX-512: simpyfied BUILD_VECTOR for masks; fixed cmp/test sequence
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201487 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8e810aeec3
commit
f280c65b32
@ -5802,16 +5802,20 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
|
|
||||||
bool AllContants = true;
|
bool AllContants = true;
|
||||||
uint64_t Immediate = 0;
|
uint64_t Immediate = 0;
|
||||||
|
int NonConstIdx = -1;
|
||||||
|
bool IsSplat = true;
|
||||||
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
|
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
|
||||||
SDValue In = Op.getOperand(idx);
|
SDValue In = Op.getOperand(idx);
|
||||||
if (In.getOpcode() == ISD::UNDEF)
|
if (In.getOpcode() == ISD::UNDEF)
|
||||||
continue;
|
continue;
|
||||||
if (!isa<ConstantSDNode>(In)) {
|
if (!isa<ConstantSDNode>(In)) {
|
||||||
AllContants = false;
|
AllContants = false;
|
||||||
break;
|
NonConstIdx = idx;
|
||||||
}
|
}
|
||||||
if (cast<ConstantSDNode>(In)->getZExtValue())
|
else if (cast<ConstantSDNode>(In)->getZExtValue())
|
||||||
Immediate |= (1ULL << idx);
|
Immediate |= (1ULL << idx);
|
||||||
|
if (In != Op.getOperand(0))
|
||||||
|
IsSplat = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AllContants) {
|
if (AllContants) {
|
||||||
@ -5821,63 +5825,19 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
DAG.getIntPtrConstant(0));
|
DAG.getIntPtrConstant(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Splat vector (with undefs)
|
if (!IsSplat && (NonConstIdx != 0))
|
||||||
SDValue In = Op.getOperand(0);
|
llvm_unreachable("Unsupported BUILD_VECTOR operation");
|
||||||
for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) {
|
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
|
||||||
if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF)
|
SDValue Select;
|
||||||
llvm_unreachable("Unsupported predicate operation");
|
if (IsSplat)
|
||||||
}
|
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
|
||||||
|
DAG.getConstant(-1, SelectVT),
|
||||||
SDValue EFLAGS, X86CC;
|
DAG.getConstant(0, SelectVT));
|
||||||
if (In.getOpcode() == ISD::SETCC) {
|
else
|
||||||
SDValue Op0 = In.getOperand(0);
|
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
|
||||||
SDValue Op1 = In.getOperand(1);
|
DAG.getConstant((Immediate | 1), SelectVT),
|
||||||
ISD::CondCode CC = cast<CondCodeSDNode>(In.getOperand(2))->get();
|
DAG.getConstant(Immediate, SelectVT));
|
||||||
bool isFP = Op1.getValueType().isFloatingPoint();
|
return DAG.getNode(ISD::BITCAST, dl, VT, Select);
|
||||||
unsigned X86CCVal = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
|
|
||||||
|
|
||||||
assert(X86CCVal != X86::COND_INVALID && "Unsupported predicate operation");
|
|
||||||
|
|
||||||
X86CC = DAG.getConstant(X86CCVal, MVT::i8);
|
|
||||||
EFLAGS = EmitCmp(Op0, Op1, X86CCVal, DAG);
|
|
||||||
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
|
|
||||||
} else if (In.getOpcode() == X86ISD::SETCC) {
|
|
||||||
X86CC = In.getOperand(0);
|
|
||||||
EFLAGS = In.getOperand(1);
|
|
||||||
} else {
|
|
||||||
// The algorithm:
|
|
||||||
// Bit1 = In & 0x1
|
|
||||||
// if (Bit1 != 0)
|
|
||||||
// ZF = 0
|
|
||||||
// else
|
|
||||||
// ZF = 1
|
|
||||||
// if (ZF == 0)
|
|
||||||
// res = allOnes ### CMOVNE -1, %res
|
|
||||||
// else
|
|
||||||
// res = allZero
|
|
||||||
MVT InVT = In.getSimpleValueType();
|
|
||||||
SDValue Bit1 = DAG.getNode(ISD::AND, dl, InVT, In, DAG.getConstant(1, InVT));
|
|
||||||
EFLAGS = EmitTest(Bit1, X86::COND_NE, DAG);
|
|
||||||
X86CC = DAG.getConstant(X86::COND_NE, MVT::i8);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (VT == MVT::v16i1) {
|
|
||||||
SDValue Cst1 = DAG.getConstant(-1, MVT::i16);
|
|
||||||
SDValue Cst0 = DAG.getConstant(0, MVT::i16);
|
|
||||||
SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i16,
|
|
||||||
Cst0, Cst1, X86CC, EFLAGS);
|
|
||||||
return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (VT == MVT::v8i1) {
|
|
||||||
SDValue Cst1 = DAG.getConstant(-1, MVT::i32);
|
|
||||||
SDValue Cst0 = DAG.getConstant(0, MVT::i32);
|
|
||||||
SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i32,
|
|
||||||
Cst0, Cst1, X86CC, EFLAGS);
|
|
||||||
CmovOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CmovOp);
|
|
||||||
return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp);
|
|
||||||
}
|
|
||||||
llvm_unreachable("Unsupported predicate operation");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
@ -9808,12 +9768,8 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
|
|||||||
if (C->getAPIntValue() == 0)
|
if (C->getAPIntValue() == 0)
|
||||||
return EmitTest(Op0, X86CC, DAG);
|
return EmitTest(Op0, X86CC, DAG);
|
||||||
|
|
||||||
if (Op0.getValueType() == MVT::i1) {
|
if (Op0.getValueType() == MVT::i1)
|
||||||
// invert the value
|
llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
|
||||||
Op0 = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0,
|
|
||||||
DAG.getConstant(-1, MVT::i1));
|
|
||||||
return EmitTest(Op0, X86CC, DAG);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
|
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
|
||||||
@ -10302,6 +10258,13 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return SetCC;
|
return SetCC;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) &&
|
||||||
|
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1) &&
|
||||||
|
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||||
|
|
||||||
|
ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
|
||||||
|
return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC);
|
||||||
|
}
|
||||||
|
|
||||||
bool isFP = Op1.getSimpleValueType().isFloatingPoint();
|
bool isFP = Op1.getSimpleValueType().isFloatingPoint();
|
||||||
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
|
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
|
||||||
|
@ -1063,11 +1063,6 @@ def : Pat<(or VK1:$src1, VK1:$src2),
|
|||||||
(COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
|
(COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
|
||||||
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
|
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
|
||||||
|
|
||||||
def : Pat<(not VK1:$src),
|
|
||||||
(COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src, VK16),
|
|
||||||
(COPY_TO_REGCLASS (VCMPSSZrr (f32 (IMPLICIT_DEF)),
|
|
||||||
(f32 (IMPLICIT_DEF)), (i8 0)), VK16)), VK1)>;
|
|
||||||
|
|
||||||
def : Pat<(and VK1:$src1, VK1:$src2),
|
def : Pat<(and VK1:$src1, VK1:$src2),
|
||||||
(COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
|
(COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
|
||||||
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
|
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
|
||||||
|
@ -39,25 +39,6 @@ define i32 @test3(float %a, float %b) {
|
|||||||
ret i32 %conv11.i
|
ret i32 %conv11.i
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: test4
|
|
||||||
; CHECK: kortestw
|
|
||||||
; CHECK: jne
|
|
||||||
; CHECK: ret
|
|
||||||
declare i32 @llvm.x86.avx512.kortestz.w(i16, i16)
|
|
||||||
|
|
||||||
define i16 @test4(i16 %a, i16 %b) {
|
|
||||||
%kortz = call i32 @llvm.x86.avx512.kortestz.w(i16 %a, i16 %b)
|
|
||||||
%t1 = and i32 %kortz, 1
|
|
||||||
%res = icmp eq i32 %t1, 0
|
|
||||||
br i1 %res, label %A, label %B
|
|
||||||
|
|
||||||
A: ret i16 %a
|
|
||||||
B:
|
|
||||||
%b1 = add i16 %a, %b
|
|
||||||
ret i16 %b1
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
; CHECK-LABEL: test5
|
; CHECK-LABEL: test5
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define float @test5(float %p) #0 {
|
define float @test5(float %p) #0 {
|
||||||
@ -104,4 +85,4 @@ define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
|
|||||||
%tmp5 = or i1 %tmp3, %tmp4
|
%tmp5 = or i1 %tmp3, %tmp4
|
||||||
%res = select i1 %tmp5, i32 1, i32 %a3
|
%res = select i1 %tmp5, i32 1, i32 %a3
|
||||||
ret i32 %res
|
ret i32 %res
|
||||||
}
|
}
|
||||||
|
@ -102,9 +102,8 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
|
|||||||
;CHECK: vpcmpltud
|
;CHECK: vpcmpltud
|
||||||
;CKECK: kshiftlw $11
|
;CKECK: kshiftlw $11
|
||||||
;CKECK: kshiftrw $15
|
;CKECK: kshiftrw $15
|
||||||
;CHECK: kxorw
|
|
||||||
;CHECK: kortestw
|
;CHECK: kortestw
|
||||||
;CHECK: jne
|
;CHECK: je
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
||||||
@ -133,4 +132,17 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
|||||||
ret i64 %res
|
ret i64 %res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;CHECK-LABEL: test13
|
||||||
|
;CHECK: cmpl
|
||||||
|
;CHECK: sbbl
|
||||||
|
;CKECK: orl $65532
|
||||||
|
;CHECK: ret
|
||||||
|
define i16 @test13(i32 %a, i32 %b) {
|
||||||
|
%cmp_res = icmp ult i32 %a, %b
|
||||||
|
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
|
||||||
|
%res = bitcast <16 x i1> %maskv to i16
|
||||||
|
ret i16 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user