mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
47 lines
1.3 KiB
LLVM
47 lines
1.3 KiB
LLVM
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s --check-prefix=SANDYB
|
||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx-i | FileCheck %s --check-prefix=SANDYB
|
||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
|
||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s --check-prefix=HASWELL
|
||
|
|
||
|
; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte load
|
||
|
; because that is slower than two 16-byte loads.
|
||
|
; Other AVX-capable chips don't have that problem.
|
||
|
|
||
|
define <8 x float> @load32bytes(<8 x float>* %Ap) {
|
||
|
; CHECK-LABEL: load32bytes
|
||
|
|
||
|
; SANDYB: vmovaps
|
||
|
; SANDYB: vinsertf128
|
||
|
; SANDYB: retq
|
||
|
|
||
|
; BTVER2: vmovups
|
||
|
; BTVER2: retq
|
||
|
|
||
|
; HASWELL: vmovups
|
||
|
; HASWELL: retq
|
||
|
|
||
|
%A = load <8 x float>* %Ap, align 16
|
||
|
ret <8 x float> %A
|
||
|
}
|
||
|
|
||
|
; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte store
|
||
|
; because that is slowerthan two 16-byte stores.
|
||
|
; Other AVX-capable chips don't have that problem.
|
||
|
|
||
|
define void @store32bytes(<8 x float> %A, <8 x float>* %P) {
|
||
|
; CHECK-LABEL: store32bytes
|
||
|
|
||
|
; SANDYB: vextractf128
|
||
|
; SANDYB: vmovaps
|
||
|
; SANDYB: retq
|
||
|
|
||
|
; BTVER2: vmovups
|
||
|
; BTVER2: retq
|
||
|
|
||
|
; HASWELL: vmovups
|
||
|
; HASWELL: retq
|
||
|
|
||
|
store <8 x float> %A, <8 x float>* %P, align 16
|
||
|
ret void
|
||
|
}
|