Fixed a bug in lowering shuffle_vectors to insertps

Summary:
We were being too strict and not accounting for undefs.
Added a test case and fixed another one where we improved codegen.

Reviewers: grosbach, nadav, delena

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D4039

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210361 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Filipe Cabecinhas 2014-06-06 18:07:06 +00:00
parent 64d39d3281
commit 78cf19b9b9
3 changed files with 35 additions and 11 deletions

View File

@ -3964,14 +3964,22 @@ static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
unsigned CorrectPosV1 = 0;
unsigned CorrectPosV2 = 0;
for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)
for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
if (Mask[i] == -1) {
++CorrectPosV1;
++CorrectPosV2;
continue;
}
if (Mask[i] == i)
++CorrectPosV1;
else if (Mask[i] == i + 4)
++CorrectPosV2;
}
if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
// We have 3 elements from one vector, and one from another.
// We have 3 elements (undefs count as elements from any vector) from one
// vector, and one from another.
return true;
return false;
@ -7462,8 +7470,9 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
"unsupported vector type for insertps/pinsrd");
int FromV1 = std::count_if(Mask.begin(), Mask.end(),
[](const int &i) { return i < 4; });
auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
auto FromV2Predicate = [](const int &i) { return i >= 4; };
int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
SDValue From;
SDValue To;
@ -7471,15 +7480,17 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
if (FromV1 == 1) {
From = V1;
To = V2;
DestIndex = std::find_if(Mask.begin(), Mask.end(),
[](const int &i) { return i < 4; }) -
DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
Mask.begin();
} else {
assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
"More than one element from V1 and from V2, or no elements from one "
"of the vectors. This case should not have returned true from "
"isINSERTPSMask");
From = V2;
To = V1;
DestIndex = std::find_if(Mask.begin(), Mask.end(),
[](const int &i) { return i >= 4; }) -
Mask.begin();
DestIndex =
std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
}
if (MayFoldLoad(From)) {

View File

@ -5,8 +5,10 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
%b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef>
ret <4 x float> %b
; CHECK-LABEL: test1:
; CHECK: vshufps
; CHECK: vpshufd
;; TODO: This test could be improved by removing the xor instruction and
;; having vinsertps zero out the needed elements.
; CHECK: vxorps
; CHECK: vinsertps
}
; rdar://10538417

View File

@ -692,3 +692,14 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
%13 = fadd <4 x float> %11, %12
ret <4 x float> %13
}
define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
; CHECK-LABEL: insertps_with_undefs:
; CHECK-NOT: shufps
; CHECK: insertps $32, %xmm0
; CHECK: ret
%1 = load float* %b, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
ret <4 x float> %result
}