mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-31 10:34:17 +00:00
optimize the AVX2 (integer) version of vperm2 into a shuffle
...because this is what happens when an instruction set puts its underwear on after its pants. This is an extension of r232852, r233100, and 233110: http://llvm.org/viewvc/llvm-project?view=revision&revision=232852 http://llvm.org/viewvc/llvm-project?view=revision&revision=233100 http://llvm.org/viewvc/llvm-project?view=revision&revision=233110 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233127 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dad4e9cde6
commit
14c1d068a3
@ -972,7 +972,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
case Intrinsic::x86_avx_vperm2f128_pd_256:
|
||||
case Intrinsic::x86_avx_vperm2f128_ps_256:
|
||||
case Intrinsic::x86_avx_vperm2f128_si_256:
|
||||
// TODO: Add the AVX2 version of this instruction.
|
||||
case Intrinsic::x86_avx2_vperm2i128:
|
||||
if (Value *V = SimplifyX86vperm2(*II, *Builder))
|
||||
return ReplaceInstUsesWith(*II, V);
|
||||
break;
|
||||
|
@ -12,7 +12,7 @@ define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i
|
||||
}
|
||||
|
||||
|
||||
; In the following 3 tests, both zero mask bits of the immediate are set.
|
||||
; In the following 4 tests, both zero mask bits of the immediate are set.
|
||||
|
||||
define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
|
||||
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
|
||||
@ -38,6 +38,14 @@ define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK-NEXT: ret <8 x i32> zeroinitializer
|
||||
}
|
||||
|
||||
define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
%res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
|
||||
ret <4 x i64> %res
|
||||
|
||||
; CHECK-LABEL: @perm2i_0x88
|
||||
; CHECK-NEXT: ret <4 x i64> zeroinitializer
|
||||
}
|
||||
|
||||
|
||||
; The other control bits are ignored when zero mask bits of the immediate are set.
|
||||
|
||||
@ -207,6 +215,18 @@ define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
|
||||
}
|
||||
|
||||
|
||||
; Confirm that the AVX2 version works the same.
|
||||
|
||||
define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
%res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
|
||||
ret <4 x i64> %res
|
||||
|
||||
; CHECK-LABEL: @perm2i_0x33
|
||||
; CHECK-NEXT: %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
|
||||
; CHECK-NEXT: ret <4 x i64> %1
|
||||
}
|
||||
|
||||
|
||||
; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
|
||||
|
||||
define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
|
||||
@ -245,7 +265,19 @@ define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
|
||||
; CHECK-NEXT: ret <4 x double>
|
||||
}
|
||||
|
||||
; Check one more with the AVX2 version.
|
||||
|
||||
define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
%res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
|
||||
ret <4 x i64> %res
|
||||
|
||||
; CHECK-LABEL: @perm2i_0x28
|
||||
; CHECK-NEXT: shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <4 x i64>
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
||||
declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
|
||||
declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user