[X86] Improve buildFromShuffleMostly for AVX

For a 256-bit BUILD_VECTOR consisting mostly of shuffles of 256-bit vectors,
both the BUILD_VECTOR and its operands may need to be legalized in multiple
steps.  Consider:

(v8f32 (BUILD_VECTOR (extract_vector_elt (v8f32 %vreg0,) Constant<1>),
                     (extract_vector_elt %vreg0, Constant<2>),
                     (extract_vector_elt %vreg0, Constant<3>),
                     (extract_vector_elt %vreg0, Constant<4>),
                     (extract_vector_elt %vreg0, Constant<5>),
                     (extract_vector_elt %vreg0, Constant<6>),
                     (extract_vector_elt %vreg0, Constant<7>),
                     %vreg1))

a. We can't build a 256-bit vector efficiently so, we need to split it into
two 128-bit vecs and combine them with VINSERTX128.

b. Operands like (extract_vector_elt (v8f32 %vreg0), Constant<7>) needs to be
split into a VEXTRACTX128 and a further extract_vector_elt from the
resulting 128-bit vector.

c. The extract_vector_elt from b. is lowered into a shuffle to the first
element and a movss.

Depending on the order in which we legalize the BUILD_VECTOR and its
operands[1], buildFromShuffleMostly may be faced with:

(v4f32 (BUILD_VECTOR (extract_vector_elt
                      (vector_shuffle<1,u,u,u> (extract_subvector %vreg0, Constant<4>), undef),
                      Constant<0>),
                     (extract_vector_elt
                      (vector_shuffle<2,u,u,u> (extract_subvector %vreg0, Constant<4>), undef),
                      Constant<0>),
                     (extract_vector_elt
                      (vector_shuffle<3,u,u,u> (extract_subvector %vreg0, Constant<4>), undef),
                      Constant<0>),
                     %vreg1))

In order to figure out the underlying vector and their identity we need to see
through the shuffles.

[1] Note that the order in which operations and their operands are legalized is
only guaranteed in the first iteration of LegalizeDAG.

Fixes <rdar://problem/16296956>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206634 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Adam Nemet 2014-04-18 19:44:16 +00:00
parent 842c27189a
commit d290fa608f
2 changed files with 60 additions and 6 deletions

View File

@ -5703,6 +5703,41 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
return SDValue();
}
/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
/// underlying vector and index.
///
/// Modifies \p ExtractedFromVec to the real vector and returns the real
/// index.
static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
SDValue ExtIdx) {
int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
return Idx;
// For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
// lowered this:
// (extract_vector_elt (v8f32 %vreg1), Constant<6>)
// to:
// (extract_vector_elt (vector_shuffle<2,u,u,u>
// (extract_subvector (v8f32 %vreg0), Constant<4>),
// undef)
// Constant<0>)
// In this case the vector is the extract_subvector expression and the index
// is 2, as specified by the shuffle.
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
SDValue ShuffleVec = SVOp->getOperand(0);
MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
assert(ShuffleVecVT.getVectorElementType() ==
ExtractedFromVec.getSimpleValueType().getVectorElementType());
int ShuffleIdx = SVOp->getMaskElt(Idx);
if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
ExtractedFromVec = ShuffleVec;
return ShuffleIdx;
}
return Idx;
}
static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
@ -5736,15 +5771,15 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
// Quit if non-constant index.
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
// Quit if extracted from vector of different type.
if (ExtractedFromVec.getValueType() != VT)
return SDValue();
// Quit if non-constant index.
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
if (VecIn1.getNode() == 0)
VecIn1 = ExtractedFromVec;
else if (VecIn1 != ExtractedFromVec) {
@ -5755,8 +5790,6 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
if (ExtractedFromVec == VecIn1)
Mask[i] = Idx;
else if (ExtractedFromVec == VecIn2)

View File

@ -0,0 +1,21 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
; Use buildFromShuffleMostly which allows this to be generated as two 128-bit
; shuffles and an insert.
; This is the (somewhat questionable) LLVM IR that is generated for:
; x8.s0123456 = x8.s1234567; // x8 is a <8 x float> type
; x8.s7 = f; // f is float
define <8 x float> @test1(<8 x float> %a, float %b) {
; CHECK-LABEL: test1:
; CHECK: vinsertps
; CHECK-NOT: vinsertps
entry:
%shift = shufflevector <8 x float> %a, <8 x float> undef, <7 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%extend = shufflevector <7 x float> %shift, <7 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
%insert = insertelement <8 x float> %extend, float %b, i32 7
ret <8 x float> %insert
}