mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
[x86] Generalize the single-element insertion lowering to work with
floating point types and use it for both v2f64 and v2i64 single-element insertion lowering. This fixes the last non-AVX performance regression test case I've gotten of for the new vector shuffle lowering. There is obvious analogous lowering for v4f32 that I'll add in a follow-up patch (because with INSERTPS, v4f32 requires special treatment). After that, its AVX stuff. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218175 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8924ed3db4
commit
cc62abbe39
@ -7553,7 +7553,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
|
|||||||
///
|
///
|
||||||
/// This is a common pattern that we have especially efficient patterns to lower
|
/// This is a common pattern that we have especially efficient patterns to lower
|
||||||
/// across all subtarget feature sets.
|
/// across all subtarget feature sets.
|
||||||
static SDValue lowerIntegerElementInsertionVectorShuffle(
|
static SDValue lowerVectorShuffleAsElementInsertion(
|
||||||
MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
||||||
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
|
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
|
||||||
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
|
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
|
||||||
@ -7561,10 +7561,30 @@ static SDValue lowerIntegerElementInsertionVectorShuffle(
|
|||||||
int V2Index = std::find_if(Mask.begin(), Mask.end(),
|
int V2Index = std::find_if(Mask.begin(), Mask.end(),
|
||||||
[&Mask](int M) { return M >= (int)Mask.size(); }) -
|
[&Mask](int M) { return M >= (int)Mask.size(); }) -
|
||||||
Mask.begin();
|
Mask.begin();
|
||||||
|
if (Mask.size() == 2) {
|
||||||
|
if (!Zeroable[V2Index ^ 1]) {
|
||||||
|
// For 2-wide masks we may be able to just invert the inputs. We use an xor
|
||||||
|
// with 2 to flip from {2,3} to {0,1} and vice versa.
|
||||||
|
int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
|
||||||
|
Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
|
||||||
|
if (Zeroable[V2Index])
|
||||||
|
return lowerVectorShuffleAsElementInsertion(VT, DL, V2, V1, InverseMask,
|
||||||
|
Subtarget, DAG);
|
||||||
|
else
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0, Size = Mask.size(); i < Size; ++i)
|
||||||
|
if (i != V2Index && !Zeroable[i])
|
||||||
|
return SDValue(); // Not inserting into a zero vector.
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0, Size = Mask.size(); i < Size; ++i)
|
// Step over any bitcasts on either input so we can scan the actual
|
||||||
if (i != V2Index && !Zeroable[i])
|
// BUILD_VECTOR nodes.
|
||||||
return SDValue(); // Not inserting into a zero vector.
|
while (V1.getOpcode() == ISD::BITCAST)
|
||||||
|
V1 = V1.getOperand(0);
|
||||||
|
while (V2.getOpcode() == ISD::BITCAST)
|
||||||
|
V2 = V2.getOperand(0);
|
||||||
|
|
||||||
// Check for a single input from a SCALAR_TO_VECTOR node.
|
// Check for a single input from a SCALAR_TO_VECTOR node.
|
||||||
// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
|
// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
|
||||||
@ -7579,10 +7599,9 @@ static SDValue lowerIntegerElementInsertionVectorShuffle(
|
|||||||
SDValue V2S = V2.getOperand(Mask[V2Index] - Mask.size());
|
SDValue V2S = V2.getOperand(Mask[V2Index] - Mask.size());
|
||||||
|
|
||||||
// First, we need to zext the scalar if it is smaller than an i32.
|
// First, we need to zext the scalar if it is smaller than an i32.
|
||||||
MVT EltVT = VT.getVectorElementType();
|
|
||||||
assert(EltVT == V2S.getSimpleValueType() &&
|
|
||||||
"Different scalar and element types!");
|
|
||||||
MVT ExtVT = VT;
|
MVT ExtVT = VT;
|
||||||
|
MVT EltVT = VT.getVectorElementType();
|
||||||
|
V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
|
||||||
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
|
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
|
||||||
// Zero-extend directly to i32.
|
// Zero-extend directly to i32.
|
||||||
ExtVT = MVT::v4i32;
|
ExtVT = MVT::v4i32;
|
||||||
@ -7650,6 +7669,12 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
|||||||
if (isShuffleEquivalent(Mask, 1, 3))
|
if (isShuffleEquivalent(Mask, 1, 3))
|
||||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
|
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
|
||||||
|
|
||||||
|
// If we have a single input, insert that into V1 if we can do so cheaply.
|
||||||
|
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1)
|
||||||
|
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
|
||||||
|
MVT::v2f64, DL, V1, V2, Mask, Subtarget, DAG))
|
||||||
|
return Insertion;
|
||||||
|
|
||||||
if (Subtarget->hasSSE41())
|
if (Subtarget->hasSSE41())
|
||||||
if (SDValue Blend =
|
if (SDValue Blend =
|
||||||
lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, DAG))
|
lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, DAG))
|
||||||
@ -7697,6 +7722,13 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
|||||||
if (isShuffleEquivalent(Mask, 1, 3))
|
if (isShuffleEquivalent(Mask, 1, 3))
|
||||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
|
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
|
||||||
|
|
||||||
|
// If we have a single input from V2 insert that into V1 if we can do so
|
||||||
|
// cheaply.
|
||||||
|
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1)
|
||||||
|
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
|
||||||
|
MVT::v2i64, DL, V1, V2, Mask, Subtarget, DAG))
|
||||||
|
return Insertion;
|
||||||
|
|
||||||
if (Subtarget->hasSSE41())
|
if (Subtarget->hasSSE41())
|
||||||
if (SDValue Blend =
|
if (SDValue Blend =
|
||||||
lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, DAG))
|
lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, DAG))
|
||||||
@ -7923,8 +7955,8 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
|||||||
|
|
||||||
// There are special ways we can lower some single-element blends.
|
// There are special ways we can lower some single-element blends.
|
||||||
if (NumV2Elements == 1)
|
if (NumV2Elements == 1)
|
||||||
if (SDValue V = lowerIntegerElementInsertionVectorShuffle(
|
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2,
|
||||||
MVT::v4i32, DL, V1, V2, Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return V;
|
return V;
|
||||||
|
|
||||||
if (Subtarget->hasSSE41())
|
if (Subtarget->hasSSE41())
|
||||||
@ -8604,8 +8636,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
|||||||
|
|
||||||
// There are special ways we can lower some single-element blends.
|
// There are special ways we can lower some single-element blends.
|
||||||
if (NumV2Inputs == 1)
|
if (NumV2Inputs == 1)
|
||||||
if (SDValue V = lowerIntegerElementInsertionVectorShuffle(
|
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v8i16, DL, V1, V2,
|
||||||
MVT::v8i16, DL, V1, V2, Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return V;
|
return V;
|
||||||
|
|
||||||
if (Subtarget->hasSSE41())
|
if (Subtarget->hasSSE41())
|
||||||
@ -8920,8 +8952,8 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
|||||||
|
|
||||||
// There are special ways we can lower some single-element blends.
|
// There are special ways we can lower some single-element blends.
|
||||||
if (NumV2Elements == 1)
|
if (NumV2Elements == 1)
|
||||||
if (SDValue V = lowerIntegerElementInsertionVectorShuffle(
|
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v16i8, DL, V1, V2,
|
||||||
MVT::v16i8, DL, V1, V2, Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return V;
|
return V;
|
||||||
|
|
||||||
// Check whether a compaction lowering can be done. This handles shuffles
|
// Check whether a compaction lowering can be done. This handles shuffles
|
||||||
|
@ -400,6 +400,44 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
|
||||||
|
; ALL-LABEL: @insert_reg_and_zero_v2i64
|
||||||
|
; ALL: movd %rdi, %xmm0
|
||||||
|
; ALL-NEXT: retq
|
||||||
|
%v = insertelement <2 x i64> undef, i64 %a, i32 0
|
||||||
|
%shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||||
|
ret <2 x i64> %shuffle
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
|
||||||
|
; ALL-LABEL: @insert_mem_and_zero_v2i64
|
||||||
|
; ALL: movq (%rdi), %xmm0
|
||||||
|
; ALL-NEXT: retq
|
||||||
|
%a = load i64* %ptr
|
||||||
|
%v = insertelement <2 x i64> undef, i64 %a, i32 0
|
||||||
|
%shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||||
|
ret <2 x i64> %shuffle
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
|
||||||
|
; ALL-LABEL: @insert_reg_and_zero_v2f64
|
||||||
|
; ALL: movq %xmm0, %xmm0
|
||||||
|
; ALL-NEXT: retq
|
||||||
|
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||||
|
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||||
|
ret <2 x double> %shuffle
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
|
||||||
|
; ALL-LABEL: @insert_mem_and_zero_v2f64
|
||||||
|
; ALL: movsd (%rdi), %xmm0
|
||||||
|
; ALL-NEXT: retq
|
||||||
|
%a = load double* %ptr
|
||||||
|
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||||
|
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||||
|
ret <2 x double> %shuffle
|
||||||
|
}
|
||||||
|
|
||||||
define <2 x double> @insert_dup_reg_v2f64(double %a) {
|
define <2 x double> @insert_dup_reg_v2f64(double %a) {
|
||||||
; SSE2-LABEL: @insert_dup_reg_v2f64
|
; SSE2-LABEL: @insert_dup_reg_v2f64
|
||||||
; SSE2: movlhps {{.*}} # xmm0 = xmm0[0,0]
|
; SSE2: movlhps {{.*}} # xmm0 = xmm0[0,0]
|
||||||
|
Loading…
Reference in New Issue
Block a user