mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
[X86, AVX] instcombine vperm2 intrinsics with zero inputs into shuffles
This is the IR optimizer follow-on patch for D8563: the x86 backend patch that converts this kind of shuffle back into a vperm2. This is also a continuation of the transform that started in D8486. In that patch, Andrea suggested that we could convert vperm2 intrinsics that use zero masks into a single shuffle. This is an implementation of that suggestion. Differential Revision: http://reviews.llvm.org/D8567 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233110 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
71be19dff2
commit
5e0ce9d13a
@ -204,7 +204,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
|
||||
VectorType *VecTy = cast<VectorType>(II.getType());
|
||||
uint8_t Imm = CInt->getZExtValue();
|
||||
ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
|
||||
|
||||
// The immediate permute control byte looks like this:
|
||||
// [1:0] - select 128 bits from sources for low half of destination
|
||||
@ -213,37 +213,51 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
|
||||
// [5:4] - select 128 bits from sources for high half of destination
|
||||
// [6] - ignore
|
||||
// [7] - zero high half of destination
|
||||
|
||||
uint8_t Imm = CInt->getZExtValue();
|
||||
|
||||
bool LowHalfZero = Imm & 0x08;
|
||||
bool HighHalfZero = Imm & 0x80;
|
||||
|
||||
// If both zero mask bits are set, this was just a weird way to
|
||||
// generate a zero vector.
|
||||
if (LowHalfZero && HighHalfZero)
|
||||
return ZeroVector;
|
||||
|
||||
// If 0 or 1 zero mask bits are set, this is a simple shuffle.
|
||||
unsigned NumElts = VecTy->getNumElements();
|
||||
unsigned HalfSize = NumElts / 2;
|
||||
SmallVector<int, 8> ShuffleMask(NumElts);
|
||||
|
||||
// The high bit of the selection field chooses the 1st or 2nd operand.
|
||||
bool LowInputSelect = Imm & 0x02;
|
||||
bool HighInputSelect = Imm & 0x20;
|
||||
|
||||
if ((Imm & 0x88) == 0x88) {
|
||||
// If both zero mask bits are set, this was just a weird way to
|
||||
// generate a zero vector.
|
||||
return ConstantAggregateZero::get(VecTy);
|
||||
}
|
||||
// The low bit of the selection field chooses the low or high half
|
||||
// of the selected operand.
|
||||
bool LowHalfSelect = Imm & 0x01;
|
||||
bool HighHalfSelect = Imm & 0x10;
|
||||
|
||||
// TODO: If a single zero bit is set, replace one of the source operands
|
||||
// with a zero vector and use the same mask generation logic as below.
|
||||
|
||||
if ((Imm & 0x88) == 0x00) {
|
||||
// If neither zero mask bit is set, this is a simple shuffle.
|
||||
unsigned NumElts = VecTy->getNumElements();
|
||||
unsigned HalfSize = NumElts / 2;
|
||||
unsigned HalfBegin;
|
||||
SmallVector<int, 8> ShuffleMask(NumElts);
|
||||
|
||||
// Permute low half of result.
|
||||
HalfBegin = (Imm & 0x3) * HalfSize;
|
||||
for (unsigned i = 0; i != HalfSize; ++i)
|
||||
ShuffleMask[i] = HalfBegin + i;
|
||||
// Determine which operand(s) are actually in use for this instruction.
|
||||
Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
|
||||
Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
|
||||
|
||||
// Permute high half of result.
|
||||
HalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
|
||||
for (unsigned i = HalfSize; i != NumElts; ++i)
|
||||
ShuffleMask[i] = HalfBegin + i - HalfSize;
|
||||
// If needed, replace operands based on zero mask.
|
||||
V0 = LowHalfZero ? ZeroVector : V0;
|
||||
V1 = HighHalfZero ? ZeroVector : V1;
|
||||
|
||||
// Permute low half of result.
|
||||
unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
|
||||
for (unsigned i = 0; i < HalfSize; ++i)
|
||||
ShuffleMask[i] = StartIndex + i;
|
||||
|
||||
Value *Op0 = II.getArgOperand(0);
|
||||
Value *Op1 = II.getArgOperand(1);
|
||||
return Builder.CreateShuffleVector(Op0, Op1, ShuffleMask);
|
||||
}
|
||||
// Permute high half of result.
|
||||
StartIndex = HighHalfSelect ? HalfSize : 0;
|
||||
StartIndex += NumElts;
|
||||
for (unsigned i = 0; i < HalfSize; ++i)
|
||||
ShuffleMask[i + HalfSize] = StartIndex + i;
|
||||
|
||||
return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x02
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x double> %1
|
||||
}
|
||||
|
||||
@ -85,7 +85,7 @@ define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x03
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x double> %1
|
||||
}
|
||||
|
||||
@ -111,7 +111,7 @@ define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x12
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <4 x double> %1
|
||||
}
|
||||
|
||||
@ -120,7 +120,7 @@ define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x13
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <4 x double> %1
|
||||
}
|
||||
|
||||
@ -207,26 +207,41 @@ define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
|
||||
}
|
||||
|
||||
|
||||
; Confirm that when a single zero mask bit is set, we do nothing.
|
||||
; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
|
||||
|
||||
define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
|
||||
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x81
|
||||
; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x double>
|
||||
}
|
||||
|
||||
define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
|
||||
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x83
|
||||
; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 -125)
|
||||
; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x double>
|
||||
}
|
||||
|
||||
|
||||
; Confirm that when the other zero mask bit is set, we do nothing. Also confirm that an ignored bit has no effect.
|
||||
|
||||
define <4 x double> @perm2pd_0x48(<4 x double> %a0, <4 x double> %a1) {
|
||||
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72)
|
||||
define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
|
||||
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x48
|
||||
; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72)
|
||||
; CHECK-LABEL: @perm2pd_0x28
|
||||
; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x double>
|
||||
}
|
||||
|
||||
define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
|
||||
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
|
||||
ret <4 x double> %res
|
||||
|
||||
; CHECK-LABEL: @perm2pd_0x08
|
||||
; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x double>
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user