llvm-6502/test/CodeGen/X86/unaligned-32-byte-memops.ll

47 lines
1.3 KiB
LLVM
Raw Normal View History

; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s --check-prefix=SANDYB
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx-i | FileCheck %s --check-prefix=SANDYB
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s --check-prefix=HASWELL
; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte load
; because that is slower than two 16-byte loads.
; Other AVX-capable chips don't have that problem.
define <8 x float> @load32bytes(<8 x float>* %Ap) {
; CHECK-LABEL: load32bytes
; SANDYB: vmovaps
; SANDYB: vinsertf128
; SANDYB: retq
; BTVER2: vmovups
; BTVER2: retq
; HASWELL: vmovups
; HASWELL: retq
%A = load <8 x float>* %Ap, align 16
ret <8 x float> %A
}
; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte store
; because that is slowerthan two 16-byte stores.
; Other AVX-capable chips don't have that problem.
define void @store32bytes(<8 x float> %A, <8 x float>* %P) {
; CHECK-LABEL: store32bytes
; SANDYB: vextractf128
; SANDYB: vmovaps
; SANDYB: retq
; BTVER2: vmovups
; BTVER2: retq
; HASWELL: vmovups
; HASWELL: retq
store <8 x float> %A, <8 x float>* %P, align 16
ret void
}