From f280c65b32358025fd64495c016ccb6f3f70427c Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 16 Feb 2014 11:34:23 +0000 Subject: [PATCH] AVX-512: simpyfied BUILD_VECTOR for masks; fixed cmp/test sequence git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201487 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 93 +++++++---------------- lib/Target/X86/X86InstrAVX512.td | 5 -- test/CodeGen/X86/avx512-cmp.ll | 21 +---- test/CodeGen/X86/avx512-insert-extract.ll | 16 +++- 4 files changed, 43 insertions(+), 92 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4ce2ea36c00..e35aa8fa208 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5802,16 +5802,20 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { bool AllContants = true; uint64_t Immediate = 0; + int NonConstIdx = -1; + bool IsSplat = true; for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (In.getOpcode() == ISD::UNDEF) continue; if (!isa(In)) { AllContants = false; - break; + NonConstIdx = idx; } - if (cast(In)->getZExtValue()) + else if (cast(In)->getZExtValue()) Immediate |= (1ULL << idx); + if (In != Op.getOperand(0)) + IsSplat = false; } if (AllContants) { @@ -5821,63 +5825,19 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DAG.getIntPtrConstant(0)); } - // Splat vector (with undefs) - SDValue In = Op.getOperand(0); - for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) { - if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF) - llvm_unreachable("Unsupported predicate operation"); - } - - SDValue EFLAGS, X86CC; - if (In.getOpcode() == ISD::SETCC) { - SDValue Op0 = In.getOperand(0); - SDValue Op1 = In.getOperand(1); - ISD::CondCode CC = cast(In.getOperand(2))->get(); - bool isFP = Op1.getValueType().isFloatingPoint(); - unsigned X86CCVal = TranslateX86CC(CC, isFP, Op0, Op1, DAG); - - assert(X86CCVal != X86::COND_INVALID && "Unsupported predicate operation"); - - X86CC = DAG.getConstant(X86CCVal, MVT::i8); - EFLAGS = EmitCmp(Op0, Op1, X86CCVal, DAG); - EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); - } else if (In.getOpcode() == X86ISD::SETCC) { - X86CC = In.getOperand(0); - EFLAGS = In.getOperand(1); - } else { - // The algorithm: - // Bit1 = In & 0x1 - // if (Bit1 != 0) - // ZF = 0 - // else - // ZF = 1 - // if (ZF == 0) - // res = allOnes ### CMOVNE -1, %res - // else - // res = allZero - MVT InVT = In.getSimpleValueType(); - SDValue Bit1 = DAG.getNode(ISD::AND, dl, InVT, In, DAG.getConstant(1, InVT)); - EFLAGS = EmitTest(Bit1, X86::COND_NE, DAG); - X86CC = DAG.getConstant(X86::COND_NE, MVT::i8); - } - - if (VT == MVT::v16i1) { - SDValue Cst1 = DAG.getConstant(-1, MVT::i16); - SDValue Cst0 = DAG.getConstant(0, MVT::i16); - SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i16, - Cst0, Cst1, X86CC, EFLAGS); - return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp); - } - - if (VT == MVT::v8i1) { - SDValue Cst1 = DAG.getConstant(-1, MVT::i32); - SDValue Cst0 = DAG.getConstant(0, MVT::i32); - SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i32, - Cst0, Cst1, X86CC, EFLAGS); - CmovOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CmovOp); - return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp); - } - llvm_unreachable("Unsupported predicate operation"); + if (!IsSplat && (NonConstIdx != 0)) + llvm_unreachable("Unsupported BUILD_VECTOR operation"); + MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8; + SDValue Select; + if (IsSplat) + Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0), + DAG.getConstant(-1, SelectVT), + DAG.getConstant(0, SelectVT)); + else + Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0), + DAG.getConstant((Immediate | 1), SelectVT), + DAG.getConstant(Immediate, SelectVT)); + return DAG.getNode(ISD::BITCAST, dl, VT, Select); } SDValue @@ -9808,12 +9768,8 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, if (C->getAPIntValue() == 0) return EmitTest(Op0, X86CC, DAG); - if (Op0.getValueType() == MVT::i1) { - // invert the value - Op0 = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, - DAG.getConstant(-1, MVT::i1)); - return EmitTest(Op0, X86CC, DAG); - } + if (Op0.getValueType() == MVT::i1) + llvm_unreachable("Unexpected comparison operation for MVT::i1 operands"); } if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || @@ -10302,6 +10258,13 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } } + if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) && + (cast(Op1)->getZExtValue() == 1) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + + ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true); + return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC); + } bool isFP = Op1.getSimpleValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 7990d6abcd2..8da347bc077 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1063,11 +1063,6 @@ def : Pat<(or VK1:$src1, VK1:$src2), (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; -def : Pat<(not VK1:$src), - (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src, VK16), - (COPY_TO_REGCLASS (VCMPSSZrr (f32 (IMPLICIT_DEF)), - (f32 (IMPLICIT_DEF)), (i8 0)), VK16)), VK1)>; - def : Pat<(and VK1:$src1, VK1:$src2), (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16), (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index 3515529c89c..47e50a93796 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -39,25 +39,6 @@ define i32 @test3(float %a, float %b) { ret i32 %conv11.i } -; CHECK-LABEL: test4 -; CHECK: kortestw -; CHECK: jne -; CHECK: ret -declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) - -define i16 @test4(i16 %a, i16 %b) { - %kortz = call i32 @llvm.x86.avx512.kortestz.w(i16 %a, i16 %b) - %t1 = and i32 %kortz, 1 - %res = icmp eq i32 %t1, 0 - br i1 %res, label %A, label %B - - A: ret i16 %a - B: - %b1 = add i16 %a, %b - ret i16 %b1 - -} - ; CHECK-LABEL: test5 ; CHECK: ret define float @test5(float %p) #0 { @@ -104,4 +85,4 @@ define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { %tmp5 = or i1 %tmp3, %tmp4 %res = select i1 %tmp5, i32 1, i32 %a3 ret i32 %res - } \ No newline at end of file +} diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index f8a84bb1f2f..4fca78fe72e 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -102,9 +102,8 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { ;CHECK: vpcmpltud ;CKECK: kshiftlw $11 ;CKECK: kshiftrw $15 -;CHECK: kxorw ;CHECK: kortestw -;CHECK: jne +;CHECK: je ;CHECK: ret ;CHECK: ret define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { @@ -133,4 +132,17 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { ret i64 %res } +;CHECK-LABEL: test13 +;CHECK: cmpl +;CHECK: sbbl +;CKECK: orl $65532 +;CHECK: ret +define i16 @test13(i32 %a, i32 %b) { + %cmp_res = icmp ult i32 %a, %b + %maskv = insertelement <16 x i1> , i1 %cmp_res, i32 0 + %res = bitcast <16 x i1> %maskv to i16 + ret i16 %res +} + +