mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-02 22:23:10 +00:00
[AVX512] Bring back vector-shuffle lowering support through broadcasts
Ffter commit at rev219046 512-bit broadcasts lowering become non-optimal. Most of tests on broadcasting and embedded broadcasting were changed and they doesn’t produce efficient code.
Example below is from commit changes (it’s the first test from test/CodeGen/X86/avx512-vbroadcast.ll):
define <16 x i32> @_inreg16xi32(i32 %a) {
; CHECK-LABEL: _inreg16xi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastd %edi, %zmm0
+; CHECK-NEXT: vmovd %edi, %xmm0
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
+; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = insertelement <16 x i32> undef, i32 %a, i32 0
%c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32> %c
}
Here, 256-bit broadcast was generated instead of 512-bit one.
In this patch
1) I added vector-shuffle lowering through broadcasts
2) Removed asserts and branches likes because this is incorrect
- assert(Subtarget->hasDQI() && "We can only lower v8i64 with AVX-512-DQI");
3) Fixed lowering tests
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220774 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -453,10 +453,7 @@ entry:
|
||||
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
|
||||
; CHECK-LABEL: andqbrst:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: vmovq (%rdi), %xmm1
|
||||
; CHECK-NEXT: vpbroadcastq %xmm1, %ymm1
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%a = load i64* %ap, align 8
|
||||
|
||||
@@ -3,9 +3,7 @@
|
||||
define <16 x i32> @_inreg16xi32(i32 %a) {
|
||||
; CHECK-LABEL: _inreg16xi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovd %edi, %xmm0
|
||||
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpbroadcastd %edi, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = insertelement <16 x i32> undef, i32 %a, i32 0
|
||||
%c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
@@ -15,9 +13,7 @@ define <16 x i32> @_inreg16xi32(i32 %a) {
|
||||
define <8 x i64> @_inreg8xi64(i64 %a) {
|
||||
; CHECK-LABEL: _inreg8xi64:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovq %rdi, %xmm0
|
||||
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = insertelement <8 x i64> undef, i64 %a, i32 0
|
||||
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||
@@ -27,9 +23,7 @@ define <8 x i64> @_inreg8xi64(i64 %a) {
|
||||
define <16 x float> @_inreg16xfloat(float %a) {
|
||||
; CHECK-LABEL: _inreg16xfloat:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: ## kill: XMM0<def> XMM0<kill> ZMM0<def>
|
||||
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
|
||||
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = insertelement <16 x float> undef, float %a, i32 0
|
||||
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
|
||||
@@ -39,9 +33,7 @@ define <16 x float> @_inreg16xfloat(float %a) {
|
||||
define <8 x double> @_inreg8xdouble(double %a) {
|
||||
; CHECK-LABEL: _inreg8xdouble:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: ## kill: XMM0<def> XMM0<kill> ZMM0<def>
|
||||
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
|
||||
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = insertelement <8 x double> undef, double %a, i32 0
|
||||
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
|
||||
@@ -51,8 +43,7 @@ define <8 x double> @_inreg8xdouble(double %a) {
|
||||
define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
|
||||
; CHECK-LABEL: _xmm16xi32:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i32> %b
|
||||
@@ -61,8 +52,7 @@ define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
|
||||
define <16 x float> @_xmm16xfloat(<16 x float> %a) {
|
||||
; CHECK-LABEL: _xmm16xfloat:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
|
||||
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x float> %b
|
||||
|
||||
@@ -312,10 +312,7 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
|
||||
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
|
||||
; CHECK-LABEL: test24:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovq (%rdi), %xmm2
|
||||
; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
@@ -330,10 +327,7 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
|
||||
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
|
||||
; CHECK-LABEL: test25:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovd (%rdi), %xmm2
|
||||
; CHECK-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vpcmpled %zmm2, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
@@ -348,11 +342,8 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
|
||||
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
|
||||
; CHECK-LABEL: test26:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovd (%rdi), %xmm3
|
||||
; CHECK-NEXT: vpbroadcastd %xmm3, %ymm3
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
|
||||
; CHECK-NEXT: vpcmpgtd %zmm3, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
@@ -369,11 +360,8 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
|
||||
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
|
||||
; CHECK-LABEL: test27:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovq (%rdi), %xmm3
|
||||
; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
|
||||
; CHECK-NEXT: vpcmpleq %zmm3, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
|
||||
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
@@ -6,8 +6,7 @@ target triple = "x86_64-unknown-unknown"
|
||||
define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_00000000:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <8 x double> %shuffle
|
||||
@@ -729,8 +728,7 @@ define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
|
||||
define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
|
||||
; ALL-LABEL: shuffle_v8i64_00000000:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
|
||||
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpbroadcastq %xmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <8 x i64> %shuffle
|
||||
|
||||
Reference in New Issue
Block a user