diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index ec10968874e..e45ca4dbc0d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4110,6 +4110,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { // shuffle in combination with VEXTs. SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4158,35 +4159,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; int VEXTOffsets[2] = { 0, 0 }; + int OffsetMultipliers[2] = { 1, 1 }; // This loop extracts the usage patterns of the source vectors // and prepares appropriate SDValues for a shuffle if possible. for (unsigned i = 0; i < SourceVecs.size(); ++i) { - if (SourceVecs[i].getValueType() == VT) { + unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements(); + SDValue CurSource = SourceVecs[i]; + if (SourceVecs[i].getValueType().getVectorElementType() != + VT.getVectorElementType()) { + // It may hit this case if SourceVecs[i] is AssertSext/AssertZext. + // Then bitcast it to the vector which holds asserted element type, + // and record the multiplier of element width between SourceVecs and + // Build_vector which is needed to extract the correct lanes later. + EVT CastVT = + EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + SourceVecs[i].getValueSizeInBits() / + VT.getVectorElementType().getSizeInBits()); + + CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]); + OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts; + NumSrcElts *= OffsetMultipliers[i]; + MaxElts[i] *= OffsetMultipliers[i]; + MinElts[i] *= OffsetMultipliers[i]; + } + + if (CurSource.getValueType() == VT) { // No VEXT necessary - ShuffleSrcs[i] = SourceVecs[i]; + ShuffleSrcs[i] = CurSource; VEXTOffsets[i] = 0; continue; - } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { + } else if (NumSrcElts < NumElts) { // We can pad out the smaller vector for free, so if it's part of a // shuffle... - ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i], - DAG.getUNDEF(SourceVecs[i].getValueType())); + ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource, + DAG.getUNDEF(CurSource.getValueType())); continue; } - // Don't attempt to extract subvectors from BUILD_VECTOR sources - // that expand or trunc the original value. - // TODO: We can try to bitcast and ANY_EXTEND the result but - // we need to consider the cost of vector ANY_EXTEND, and the - // legality of all the types. - if (SourceVecs[i].getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); - // Since only 64-bit and 128-bit vectors are legal on ARM and // we've eliminated the other cases... - assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && + assert(NumSrcElts == 2 * NumElts && "unexpected vector sizes in ReconstructShuffle"); if (MaxElts[i] - MinElts[i] >= NumElts) { @@ -4197,22 +4210,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, if (MinElts[i] >= NumElts) { // The extraction can just take the second half VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); } else if (MaxElts[i] < NumElts) { // The extraction can just take the first half VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); } else { // An actual VEXT is needed VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - SDValue VEXTSrc2 = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); + SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); @@ -4232,9 +4243,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, int ExtractElt = cast(Op.getOperand(i).getOperand(1))->getSExtValue(); if (ExtractVec == SourceVecs[0]) { - Mask.push_back(ExtractElt - VEXTOffsets[0]); + Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]); } else { - Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); + Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts - + VEXTOffsets[1]); } } diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll new file mode 100644 index 00000000000..6bfb0a54ea3 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s + + +define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) { +; CHECK: fptosi_v4f64_to_v4i16 +; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d +; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d +; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d +; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d +; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h + %tmp1 = load <4 x double>* %ptr + %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) { +; CHECK: fptosi_v4f64_to_v4i8 +; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d +; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d +; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d +; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d +; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d +; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d +; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d +; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d +; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h +; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h +; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b + %tmp1 = load <8 x double>* %ptr + %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8> + ret <8 x i8> %tmp2 +} +