mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-09-30 04:56:49 +00:00
1. Remove the part of r153848 which optimizes shuffle-of-shuffle into a new
shuffle node because it could introduce new shuffle nodes that were not supported efficiently by the target. 2. Add a more restrictive shuffle-of-shuffle optimization for cases where the second shuffle reverses the transformation of the first shuffle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154266 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
961d666be4
commit
d16c8d0d33
@ -7795,19 +7795,20 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
}
|
||||
|
||||
// If this shuffle node is simply a swizzle of another shuffle node,
|
||||
// optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
|
||||
// and it reverses the swizzle of the previous shuffle then we can
|
||||
// optimize shuffle(shuffle(x, undef), undef) -> x.
|
||||
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
|
||||
N1.getOpcode() == ISD::UNDEF) {
|
||||
|
||||
SmallVector<int, 8> NewMask;
|
||||
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
|
||||
|
||||
// If the source shuffle has more than one user then do not try to optimize
|
||||
// it because it may generate a more complex shuffle node. However, if the
|
||||
// source shuffle is also a swizzle (a single source shuffle), our
|
||||
// transformation is still likely to reduce the number of shuffles and only
|
||||
// generate a simple shuffle node.
|
||||
if (N0.getOperand(1).getOpcode() != ISD::UNDEF && !N0.hasOneUse())
|
||||
// Shuffle nodes can only reverse shuffles with a single non-undef value.
|
||||
if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
|
||||
return SDValue();
|
||||
|
||||
// The incoming shuffle must be of the same type as the result of the current
|
||||
// shuffle.
|
||||
if (OtherSV->getOperand(0).getValueType() != VT)
|
||||
return SDValue();
|
||||
|
||||
EVT InVT = N0.getValueType();
|
||||
@ -7824,11 +7825,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
if (Idx >= 0)
|
||||
Idx = OtherSV->getMaskElt(Idx);
|
||||
|
||||
NewMask.push_back(Idx);
|
||||
// The combined shuffle must map each index to itself.
|
||||
if (Idx != i && Idx != -1)
|
||||
return SDValue();
|
||||
}
|
||||
assert(NewMask.size() == VT.getVectorNumElements() && "Invalid mask size");
|
||||
return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
|
||||
OtherSV->getOperand(1), &NewMask[0]);
|
||||
|
||||
return OtherSV->getOperand(0);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=cellspu -o %t1.s
|
||||
; RUN: grep rot %t1.s | count 85
|
||||
; RUN: grep rot %t1.s | count 86
|
||||
; RUN: grep roth %t1.s | count 8
|
||||
; RUN: grep roti.*5 %t1.s | count 1
|
||||
; RUN: grep roti.*27 %t1.s | count 1
|
||||
|
@ -4,13 +4,13 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;CHECK: ltstore
|
||||
;CHECK: movq
|
||||
;CHECK-NEXT: movq
|
||||
;CHECK-NEXT: ret
|
||||
define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) {
|
||||
;CHECK: movq
|
||||
;CHECK: ret
|
||||
define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
|
||||
entry:
|
||||
%in = load <4 x i32>* %pIn
|
||||
%in = load <4 x i32>* %pA
|
||||
%j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x i32> %j, <2 x i32>* %pOut
|
||||
store <2 x i32> %j, <2 x i32>* %pB
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -41,3 +41,28 @@ define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
|
||||
store <4 x i8> %C, <4 x i8>* %pA
|
||||
ret <4 x i8> %C
|
||||
}
|
||||
|
||||
|
||||
|
||||
; CHECK: reverse_1
|
||||
; CHECK-NOT: shuf
|
||||
; CHECK: ret
|
||||
define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
|
||||
%A = load <4 x i32>* %pA
|
||||
%B = load <4 x i32>* %pB
|
||||
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
ret <4 x i32> %S1
|
||||
}
|
||||
|
||||
|
||||
; CHECK: no_reverse_shuff
|
||||
; CHECK: shuf
|
||||
; CHECK: ret
|
||||
define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
|
||||
%A = load <4 x i32>* %pA
|
||||
%B = load <4 x i32>* %pB
|
||||
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
||||
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
|
||||
ret <4 x i32> %S1
|
||||
}
|
||||
|
@ -10,8 +10,10 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
|
||||
entry:
|
||||
; CHECK: cfi_def_cfa_offset
|
||||
; CHECK-NOT: set
|
||||
; CHECK: pcmpgt
|
||||
; CHECK: blendvps
|
||||
; CHECK: movzwl
|
||||
; CHECK: movzwl
|
||||
; CHECK: pshufd
|
||||
; CHECK: pshufb
|
||||
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
|
||||
%cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
|
||||
|
@ -27,11 +27,11 @@ entry:
|
||||
define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
|
||||
entry:
|
||||
; CHECK: t02
|
||||
; CHECK: mov
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: movaps
|
||||
; CHECK: shufps
|
||||
; CHECK: pshufd
|
||||
; CHECK: movq
|
||||
; CHECK: ret
|
||||
%0 = bitcast <8 x i32>* %source to <4 x i32>*
|
||||
%arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
|
||||
%tmp2 = load <4 x i32>* %arrayidx, align 16
|
||||
|
@ -33,7 +33,7 @@ entry:
|
||||
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
|
||||
entry:
|
||||
; CHECK: shuf3:
|
||||
; CHECK: shufd
|
||||
; CHECK: shufps
|
||||
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
%tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
Loading…
Reference in New Issue
Block a user