AVX-512: insert element to mask vector; store i1 data

Implemented INSERT_VECTOR_ELT operation for v16i1 and v8i1 vectors;
Implemented "store" for i1 type


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205850 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2014-04-09 12:37:50 +00:00
parent d97cbff528
commit 0d5d656524
5 changed files with 124 additions and 1 deletions

View File

@ -1394,6 +1394,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
@ -5811,6 +5813,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
uint64_t Immediate = 0;
int NonConstIdx = -1;
bool IsSplat = true;
unsigned NumNonConsts = 0;
unsigned NumConsts = 0;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
@ -5818,9 +5822,13 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(In)) {
AllContants = false;
NonConstIdx = idx;
NumNonConsts++;
}
else if (cast<ConstantSDNode>(In)->getZExtValue())
else {
NumConsts++;
if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
}
if (In != Op.getOperand(0))
IsSplat = false;
}
@ -5832,6 +5840,19 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
DAG.getIntPtrConstant(0));
}
if (NumNonConsts == 1 && NonConstIdx != 0) {
SDValue DstVec;
if (NumConsts) {
SDValue VecAsImm = DAG.getConstant(Immediate,
MVT::getIntegerVT(VT.getSizeInBits()));
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
}
else
DstVec = DAG.getUNDEF(VT);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(NonConstIdx),
DAG.getIntPtrConstant(NonConstIdx));
}
if (!IsSplat && (NonConstIdx != 0))
llvm_unreachable("Unsupported BUILD_VECTOR operation");
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
@ -7948,10 +7969,47 @@ static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
/// Insert one bit to mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
SDValue
X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
SDValue Elt = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
MVT VecVT = Vec.getSimpleValueType();
if (!isa<ConstantSDNode>(Idx)) {
// Non constant index. Extend source and destination,
// insert element and then truncate the result.
MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
if (Vec.getOpcode() == ISD::UNDEF)
return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, MVT::i8));
const TargetRegisterClass* rc = getRegClassFor(VecVT);
unsigned MaxSift = rc->getSize()*8 - 1;
EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
DAG.getConstant(MaxSift, MVT::i8));
EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
if (EltVT == MVT::i1)
return InsertBitToMaskVector(Op, DAG);
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);

View File

@ -874,6 +874,8 @@ namespace llvm {
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;

View File

@ -984,6 +984,10 @@ let Predicates = [HasAVX512] in {
(EXTRACT_SUBREG
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
sub_16bit)>;
def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK16)>;
def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK8)>;
}
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
@ -1356,6 +1360,14 @@ defm VMOVDQU64: avx512_load<0x6F, VR512, VK8WM, i512mem, load,
"vmovdqu64", SSEPackedInt, v8i64>,
XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
(v16i32 immAllZerosV), GR16:$mask)),
(VMOVDQU32rmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
(bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
(VMOVDQU64rmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
let AddedComplexity = 20 in {
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
(bc_v8i64 (v16i32 immAllZerosV)))),
@ -4211,3 +4223,7 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
GR8:$mask),
(VPCONFLICTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;

View File

@ -158,3 +158,41 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
%res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
ret i64 %res
}
;CHECK-LABEL: test15
;CHECK: kshiftlw
;CHECK: kmovw
;CHECK: ret
define i16 @test15(i1 *%addr) {
%x = load i1 * %addr, align 128
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
%x2 = bitcast <16 x i1>%x1 to i16
ret i16 %x2
}
;CHECK-LABEL: test16
;CHECK: kshiftlw
;CHECK: kshiftrw
;CHECK: korw
;CHECK: ret
define i16 @test16(i1 *%addr, i16 %a) {
%x = load i1 * %addr, align 128
%a1 = bitcast i16 %a to <16 x i1>
%x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
%x2 = bitcast <16 x i1>%x1 to i16
ret i16 %x2
}
;CHECK-LABEL: test17
;CHECK: kshiftlw
;CHECK: kshiftrw
;CHECK: korw
;CHECK: ret
define i8 @test17(i1 *%addr, i8 %a) {
%x = load i1 * %addr, align 128
%a1 = bitcast i8 %a to <8 x i1>
%x1 = insertelement <8 x i1> %a1, i1 %x, i32 10
%x2 = bitcast <8 x i1>%x1 to i8
ret i8 %x2
}

View File

@ -153,3 +153,12 @@ define void @test18(i8 * %addr, <8 x i64> %data) {
ret void
}
; CHECK-LABEL: store_i1
; CHECK: movb
; CHECK: movb
; CHECK: ret
define void @store_i1() {
store i1 true, i1 addrspace(3)* undef, align 128
store i1 false, i1 addrspace(2)* undef, align 128
ret void
}