mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-21 03:32:21 +00:00
2dbe929685
The stack realignment code was fixed to work when there is stack realignment and a dynamic alloca is present so this shouldn't cause correctness issues anymore. Note that this also enables generation of AVX instructions for memset under the assumptions: - Unaligned loads/stores are always fast on CPUs supporting AVX - AVX is not slower than SSE We may need some tweaked heuristics if one of those assumptions turns out not to be true. Effectively reverts r58317. Part of PR2962. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167967 91177308-0d34-0410-b5e6-96231b3b80d8
78 lines
1.8 KiB
LLVM
78 lines
1.8 KiB
LLVM
; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
|
|
; need 16 bytes for SSE and 32 bytes for AVX.
|
|
|
|
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s -check-prefix=NOSSE
|
|
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s -check-prefix=SSE1
|
|
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s -check-prefix=SSE2
|
|
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX1
|
|
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
|
|
|
|
define void @test1(i32 %t) nounwind {
|
|
%tmp1210 = alloca i8, i32 32, align 4
|
|
call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
|
|
%x = alloca i8, i32 %t
|
|
call void @dummy(i8* %x)
|
|
ret void
|
|
|
|
; NOSSE: test1:
|
|
; NOSSE-NOT: and
|
|
; NOSSE: movl $0
|
|
|
|
; SSE1: test1:
|
|
; SSE1: andl $-16
|
|
; SSE1: movl %esp, %esi
|
|
; SSE1: movaps
|
|
|
|
; SSE2: test1:
|
|
; SSE2: andl $-16
|
|
; SSE2: movl %esp, %esi
|
|
; SSE2: movaps
|
|
|
|
; AVX1: test1:
|
|
; AVX1: andl $-32
|
|
; AVX1: movl %esp, %esi
|
|
; AVX1: vmovaps %ymm
|
|
|
|
; AVX2: test1:
|
|
; AVX2: andl $-32
|
|
; AVX2: movl %esp, %esi
|
|
; AVX2: vmovaps %ymm
|
|
|
|
}
|
|
|
|
define void @test2(i32 %t) nounwind {
|
|
%tmp1210 = alloca i8, i32 16, align 4
|
|
call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 16, i32 4, i1 false)
|
|
%x = alloca i8, i32 %t
|
|
call void @dummy(i8* %x)
|
|
ret void
|
|
|
|
; NOSSE: test2:
|
|
; NOSSE-NOT: and
|
|
; NOSSE: movl $0
|
|
|
|
; SSE1: test2:
|
|
; SSE1: andl $-16
|
|
; SSE1: movl %esp, %esi
|
|
; SSE1: movaps
|
|
|
|
; SSE2: test2:
|
|
; SSE2: andl $-16
|
|
; SSE2: movl %esp, %esi
|
|
; SSE2: movaps
|
|
|
|
; AVX1: test2:
|
|
; AVX1: andl $-16
|
|
; AVX1: movl %esp, %esi
|
|
; AVX1: vmovaps %xmm
|
|
|
|
; AVX2: test2:
|
|
; AVX2: andl $-16
|
|
; AVX2: movl %esp, %esi
|
|
; AVX2: vmovaps %xmm
|
|
}
|
|
|
|
declare void @dummy(i8*)
|
|
|
|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|