mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-02 07:17:36 +00:00
[X86][SSE] Missing SSE/AVX1 memory folding integer instructions
Added most of the missing integer vector folding patterns for SSE (to SSE42) and AVX1. The most useful of these are probably the i32/i64 extraction, i8/i16/i32/i64 insertions, zero/sign extension, unsigned saturation subtractions, i64 subtractions and the variable mask blends (pblendvb) - others include CLMUL, SSE42 string comparisons and bit tests. Differential Revision: http://reviews.llvm.org/D7094 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226745 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -2,13 +2,13 @@
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
|
||||
target triple = "x86_64-apple-macosx10.6.6"
|
||||
|
||||
; Test that the order of operands is correct
|
||||
; CHECK: select_func
|
||||
; CHECK: pblendvb %xmm1, %xmm2
|
||||
; CHECK: ret
|
||||
|
||||
define void @select_func(<8 x i16> %in) {
|
||||
|
||||
; Test that the order of operands is correct
|
||||
; CHECK: select_func
|
||||
; CHECK: pblendvb {{LCPI0_[0-9]*}}(%rip), %xmm1
|
||||
; CHECK: ret
|
||||
|
||||
define void @select_func(<8 x i16> %in) {
|
||||
entry:
|
||||
%c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
%and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
|
||||
|
||||
@@ -23,19 +23,19 @@ vector.body: ; preds = %vector.body, %vecto
|
||||
br i1 %6, label %for.end, label %vector.body
|
||||
|
||||
for.end: ; preds = %vector.body
|
||||
ret void
|
||||
|
||||
; SSE2: @test1
|
||||
; SSE2: psubusw LCPI0_0(%rip), %xmm0
|
||||
|
||||
; AVX1: @test1
|
||||
; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
|
||||
|
||||
; AVX2: @test1
|
||||
; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
|
||||
}
|
||||
|
||||
define void @test2(i16* nocapture %head) nounwind {
|
||||
ret void
|
||||
|
||||
; SSE2: @test1
|
||||
; SSE2: psubusw %xmm0, %xmm1
|
||||
|
||||
; AVX1: @test1
|
||||
; AVX1: vpsubusw %xmm0, %xmm1, %xmm1
|
||||
|
||||
; AVX2: @test1
|
||||
; AVX2: vpsubusw %xmm0, %xmm1, %xmm1
|
||||
}
|
||||
|
||||
define void @test2(i16* nocapture %head) nounwind {
|
||||
vector.ph:
|
||||
br label %vector.body
|
||||
|
||||
@@ -53,19 +53,19 @@ vector.body: ; preds = %vector.body, %vecto
|
||||
br i1 %6, label %for.end, label %vector.body
|
||||
|
||||
for.end: ; preds = %vector.body
|
||||
ret void
|
||||
|
||||
; SSE2: @test2
|
||||
; SSE2: psubusw LCPI1_0(%rip), %xmm0
|
||||
|
||||
; AVX1: @test2
|
||||
; AVX1: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
|
||||
|
||||
; AVX2: @test2
|
||||
; AVX2: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
|
||||
}
|
||||
|
||||
define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
|
||||
ret void
|
||||
|
||||
; SSE2: @test2
|
||||
; SSE2: psubusw %xmm0, %xmm1
|
||||
|
||||
; AVX1: @test2
|
||||
; AVX1: vpsubusw %xmm0, %xmm1, %xmm1
|
||||
|
||||
; AVX2: @test2
|
||||
; AVX2: vpsubusw %xmm0, %xmm1, %xmm1
|
||||
}
|
||||
|
||||
define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
|
||||
vector.ph:
|
||||
%0 = insertelement <8 x i16> undef, i16 %w, i32 0
|
||||
%broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
@@ -115,19 +115,19 @@ vector.body: ; preds = %vector.body, %vecto
|
||||
br i1 %6, label %for.end, label %vector.body
|
||||
|
||||
for.end: ; preds = %vector.body
|
||||
ret void
|
||||
|
||||
; SSE2: @test4
|
||||
; SSE2: psubusb LCPI3_0(%rip), %xmm0
|
||||
|
||||
; AVX1: @test4
|
||||
; AVX1: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
|
||||
|
||||
; AVX2: @test4
|
||||
; AVX2: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
|
||||
}
|
||||
|
||||
define void @test5(i8* nocapture %head) nounwind {
|
||||
ret void
|
||||
|
||||
; SSE2: @test4
|
||||
; SSE2: psubusb %xmm0, %xmm1
|
||||
|
||||
; AVX1: @test4
|
||||
; AVX1: vpsubusb %xmm0, %xmm1, %xmm1
|
||||
|
||||
; AVX2: @test4
|
||||
; AVX2: vpsubusb %xmm0, %xmm1, %xmm1
|
||||
}
|
||||
|
||||
define void @test5(i8* nocapture %head) nounwind {
|
||||
vector.ph:
|
||||
br label %vector.body
|
||||
|
||||
@@ -145,19 +145,19 @@ vector.body: ; preds = %vector.body, %vecto
|
||||
br i1 %6, label %for.end, label %vector.body
|
||||
|
||||
for.end: ; preds = %vector.body
|
||||
ret void
|
||||
|
||||
; SSE2: @test5
|
||||
; SSE2: psubusb LCPI4_0(%rip), %xmm0
|
||||
|
||||
; AVX1: @test5
|
||||
; AVX1: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
|
||||
|
||||
; AVX2: @test5
|
||||
; AVX2: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
|
||||
}
|
||||
|
||||
define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
|
||||
ret void
|
||||
|
||||
; SSE2: @test5
|
||||
; SSE2: psubusb %xmm0, %xmm1
|
||||
|
||||
; AVX1: @test5
|
||||
; AVX1: vpsubusb %xmm0, %xmm1, %xmm1
|
||||
|
||||
; AVX2: @test5
|
||||
; AVX2: vpsubusb %xmm0, %xmm1, %xmm1
|
||||
}
|
||||
|
||||
define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
|
||||
vector.ph:
|
||||
%0 = insertelement <16 x i8> undef, i8 %w, i32 0
|
||||
%broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+aes < %s | FileCheck %s
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
@@ -62,6 +62,38 @@ define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_movd_load(i32 %a0) {
|
||||
;CHECK-LABEL: stack_fold_movd_load
|
||||
;CHECK: movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define i32 @stack_fold_movd_store(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movd_store
|
||||
;CHECK: movd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
|
||||
%1 = extractelement <4 x i32> %a0, i32 0
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movq_load
|
||||
;CHECK: movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define i64 @stack_fold_movq_store(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movq_store
|
||||
;CHECK: movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
|
||||
%1 = extractelement <2 x i64> %a0, i32 0
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
@@ -240,7 +272,13 @@ define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pblendvb
|
||||
define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
|
||||
;CHECK-LABEL: stack_fold_pblendvb
|
||||
;CHECK: vpblendvb {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
@@ -252,7 +290,13 @@ define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pclmulqdq
|
||||
define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pclmulqdq
|
||||
;CHECK: vpclmulqdq $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
@@ -291,10 +335,22 @@ define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
; TODO stack_fold_pcmpestri
|
||||
define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpestri
|
||||
;CHECK: vpcmpestri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
||||
%2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
||||
ret i32 %2
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pcmpestrm
|
||||
define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpestrm
|
||||
;CHECK: vpcmpestrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
@@ -333,15 +389,44 @@ define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
; TODO stack_fold_pcmpistri
|
||||
define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpistri
|
||||
;CHECK: vpcmpistri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
||||
ret i32 %2
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pcmpistrm
|
||||
define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpistrm
|
||||
;CHECK: vpcmpistrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pextrb
|
||||
; TODO stack_fold_pextrd
|
||||
; TODO stack_fold_pextrq
|
||||
|
||||
define i32 @stack_fold_pextrd(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pextrd
|
||||
;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
|
||||
;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
|
||||
%1 = extractelement <4 x i32> %a0, i32 1
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @stack_fold_pextrq(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pextrq
|
||||
;CHECK: pextrq $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
|
||||
;CHECK: movq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Reload
|
||||
%1 = extractelement <2 x i64> %a0, i32 1
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
; TODO stack_fold_pextrw
|
||||
|
||||
define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
@@ -371,7 +456,13 @@ define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_phminposuw
|
||||
define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_phminposuw
|
||||
;CHECK: vphminposuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
@@ -401,10 +492,37 @@ define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pinsrb
|
||||
; TODO stack_fold_pinsrd
|
||||
; TODO stack_fold_pinsrq
|
||||
; TODO stack_fold_pinsrw
|
||||
define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrb
|
||||
;CHECK: vpinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrd
|
||||
;CHECK: vpinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrq
|
||||
;CHECK: vpinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrw
|
||||
;CHECK: vpinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaddubsw
|
||||
@@ -532,40 +650,112 @@ define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxbd
|
||||
define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbd
|
||||
;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxbq
|
||||
define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbq
|
||||
;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxbw
|
||||
define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbw
|
||||
;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxdq
|
||||
define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxdq
|
||||
;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxwd
|
||||
define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxwd
|
||||
;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxwq
|
||||
define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxwq
|
||||
;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxbd
|
||||
define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxbd
|
||||
;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxbq
|
||||
define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxbq
|
||||
;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxbw
|
||||
define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxbw
|
||||
;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxdq
|
||||
define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxdq
|
||||
;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxwd
|
||||
define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxwd
|
||||
;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxwq
|
||||
define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxwq
|
||||
;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
@@ -796,7 +986,13 @@ define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_psubq
|
||||
define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubq
|
||||
;CHECK: vpsubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = sub <2 x i64> %a0, %a1
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubsb
|
||||
@@ -816,10 +1012,22 @@ define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_psubusb
|
||||
define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubusb
|
||||
;CHECK: vpsubusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_psubusw
|
||||
define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubusw
|
||||
;CHECK: vpsubusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
@@ -830,7 +1038,13 @@ define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_ptest
|
||||
define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_ptest
|
||||
;CHECK: vptest {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
|
||||
ret i32 %2
|
||||
}
|
||||
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2,+aes < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
; Stack reload folding tests.
|
||||
;
|
||||
@@ -59,12 +59,44 @@ define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_movd_load(i32 %a0) {
|
||||
;CHECK-LABEL: stack_fold_movd_load
|
||||
;CHECK: movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define i32 @stack_fold_movd_store(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movd_store
|
||||
;CHECK: movd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
|
||||
%1 = extractelement <4 x i32> %a0, i32 0
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movq_load
|
||||
;CHECK: movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define i64 @stack_fold_movq_store(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_movq_store
|
||||
;CHECK: movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
|
||||
%1 = extractelement <2 x i64> %a0, i32 0
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pabsb
|
||||
;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
|
||||
ret <16 x i8> %2
|
||||
@@ -237,25 +269,37 @@ define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pblendvb
|
||||
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
|
||||
;CHECK-LABEL: stack_fold_pblendvb
|
||||
;CHECK: pblendvb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pblendw
|
||||
;CHECK: pblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pclmulqdq
|
||||
declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pclmulqdq
|
||||
;CHECK: pclmulqdq $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpeqb
|
||||
;CHECK: pcmpeqb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
@@ -288,16 +332,28 @@ define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = icmp eq <8 x i16> %a0, %a1
|
||||
%3 = sext <8 x i1> %2 to <8 x i16>
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
; TODO stack_fold_pcmpestri
|
||||
declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pcmpestrm
|
||||
declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpestri
|
||||
;CHECK: pcmpestri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
||||
%2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
||||
ret i32 %2
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpestrm
|
||||
;CHECK: pcmpestrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpgtb
|
||||
;CHECK: pcmpgtb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
@@ -330,21 +386,50 @@ define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = icmp sgt <8 x i16> %a0, %a1
|
||||
%3 = sext <8 x i1> %2 to <8 x i16>
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
; TODO stack_fold_pcmpistri
|
||||
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pcmpistrm
|
||||
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pextrb
|
||||
; TODO stack_fold_pextrd
|
||||
; TODO stack_fold_pextrq
|
||||
; TODO stack_fold_pextrw
|
||||
|
||||
define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpistri
|
||||
;CHECK: pcmpistri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
||||
ret i32 %2
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pcmpistrm
|
||||
;CHECK: pcmpistrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pextrb
|
||||
|
||||
define i32 @stack_fold_pextrd(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pextrd
|
||||
;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
|
||||
;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
|
||||
%1 = extractelement <4 x i32> %a0, i32 1
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @stack_fold_pextrq(<2 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pextrq
|
||||
;CHECK: pextrq $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
|
||||
;CHECK: movq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Reload
|
||||
%1 = extractelement <2 x i64> %a0, i32 1
|
||||
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
; TODO stack_fold_pextrw
|
||||
|
||||
define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_phaddd
|
||||
;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
@@ -368,13 +453,19 @@ define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_phminposuw
|
||||
declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_phminposuw
|
||||
;CHECK: phminposuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_phsubd
|
||||
;CHECK: phsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
@@ -398,16 +489,43 @@ define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pinsrb
|
||||
; TODO stack_fold_pinsrd
|
||||
; TODO stack_fold_pinsrq
|
||||
; TODO stack_fold_pinsrw
|
||||
|
||||
define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaddubsw
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrb
|
||||
;CHECK: pinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrd
|
||||
;CHECK: pinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrq
|
||||
;CHECK: pinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
|
||||
;CHECK-LABEL: stack_fold_pinsrw
|
||||
;CHECK: pinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
||||
%2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmaddubsw
|
||||
;CHECK: pmaddubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
|
||||
@@ -529,46 +647,118 @@ define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxbd
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxbq
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxbw
|
||||
declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxdq
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxwd
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovsxwq
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxbd
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxbq
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxbw
|
||||
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxdq
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxwd
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_pmovzxwq
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbd
|
||||
;CHECK: pmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbq
|
||||
;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxbw
|
||||
;CHECK: pmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxdq
|
||||
;CHECK: pmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxwd
|
||||
;CHECK: pmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovsxwq
|
||||
;CHECK: pmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxbd
|
||||
;CHECK: pmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxbq
|
||||
;CHECK: pmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxbw
|
||||
;CHECK: pmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxdq
|
||||
;CHECK: pmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxwd
|
||||
;CHECK: pmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
|
||||
;CHECK-LABEL: stack_fold_pmovzxwq
|
||||
;CHECK: pmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_pmuldq
|
||||
;CHECK: pmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
@@ -793,13 +983,19 @@ define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
;CHECK: psubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = sub <4 x i32> %a0, %a1
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_psubq
|
||||
|
||||
define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubsb
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubq
|
||||
;CHECK: psubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = sub <2 x i64> %a0, %a1
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubsb
|
||||
;CHECK: psubsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
@@ -813,27 +1009,45 @@ define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_psubusb
|
||||
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
; TODO stack_fold_psubusw
|
||||
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubusb
|
||||
;CHECK: psubusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubusw
|
||||
;CHECK: psubusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_psubw
|
||||
;CHECK: psubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = sub <8 x i16> %a0, %a1
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
; TODO stack_fold_ptest
|
||||
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_ptest
|
||||
;CHECK: ptest {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
|
||||
ret i32 %2
|
||||
}
|
||||
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_punpckhbw
|
||||
;CHECK: punpckhbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
|
||||
Reference in New Issue
Block a user