mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-17 03:30:28 +00:00
This patch optimizes shuffle instruction - generates 2 instructions instead of 4.
Since this specific shuffle is widely used in many workloads we have ~10% performance on them. shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> vmovaps (%rdx), %ymm0 vshufps $8, %ymm0, %ymm0, %ymm0 vmovaps (%rcx), %ymm1 vshufps $8, %ymm0, %ymm1, %ymm1 vunpcklps %ymm0, %ymm1, %ymm0 vmovaps (%rcx), %ymm0 vmovsldup (%rdx), %ymm1 vblendps $85, %ymm0, %ymm1, %ymm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163134 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7765492a7a
commit
3251020738
@ -3506,25 +3506,26 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
|
||||
if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
|
||||
MatchOddMask = false;
|
||||
}
|
||||
static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1};
|
||||
static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1};
|
||||
|
||||
const int *CompactionMask;
|
||||
if (MatchEvenMask)
|
||||
CompactionMask = CompactionMaskEven;
|
||||
else if (MatchOddMask)
|
||||
CompactionMask = CompactionMaskOdd;
|
||||
else
|
||||
if (!MatchEvenMask && !MatchOddMask)
|
||||
return SDValue();
|
||||
|
||||
|
||||
SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
|
||||
|
||||
SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0),
|
||||
UndefNode, CompactionMask);
|
||||
SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1),
|
||||
UndefNode, CompactionMask);
|
||||
static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13};
|
||||
return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask);
|
||||
SDValue Op0 = SVOp->getOperand(0);
|
||||
SDValue Op1 = SVOp->getOperand(1);
|
||||
|
||||
if (MatchEvenMask) {
|
||||
// Shift the second operand right to 32 bits.
|
||||
static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
|
||||
Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
|
||||
} else {
|
||||
// Shift the first operand left to 32 bits.
|
||||
static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
|
||||
Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
|
||||
}
|
||||
static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
|
||||
return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
|
||||
}
|
||||
|
||||
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
|
@ -229,9 +229,8 @@ define <8 x float> @test17(<4 x float> %y) {
|
||||
}
|
||||
|
||||
; CHECK: test18
|
||||
; CHECK: vshufps
|
||||
; CHECK: vshufps
|
||||
; CHECK: vunpcklps
|
||||
; CHECK: vmovshdup
|
||||
; CHECK: vblendps
|
||||
; CHECK: ret
|
||||
define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
|
||||
%S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
|
||||
@ -239,9 +238,8 @@ define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
|
||||
}
|
||||
|
||||
; CHECK: test19
|
||||
; CHECK: vshufps
|
||||
; CHECK: vshufps
|
||||
; CHECK: vunpcklps
|
||||
; CHECK: vmovsldup
|
||||
; CHECK: vblendps
|
||||
; CHECK: ret
|
||||
define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
|
||||
%S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
|
||||
|
Loading…
Reference in New Issue
Block a user