mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
28660d4b2f
This patch adds a feature flag to avoid unaligned 32-byte load/store AVX codegen for Sandy Bridge and Ivy Bridge. There is no functionality change intended for those chips. Previously, the absence of AVX2 was being used as a proxy to detect this feature. But that hindered codegen for AVX-enabled AMD chips such as btver2 that do not have the 32-byte unaligned access slowdown. Performance measurements are included in PR21541 ( http://llvm.org/bugs/show_bug.cgi?id=21541 ). Differential Revision: http://reviews.llvm.org/D6355 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222544 91177308-0d34-0410-b5e6-96231b3b80d8
47 lines
1.3 KiB
LLVM
47 lines
1.3 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s --check-prefix=SANDYB
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx-i | FileCheck %s --check-prefix=SANDYB
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s --check-prefix=HASWELL
|
|
|
|
; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte load
|
|
; because that is slower than two 16-byte loads.
|
|
; Other AVX-capable chips don't have that problem.
|
|
|
|
define <8 x float> @load32bytes(<8 x float>* %Ap) {
|
|
; CHECK-LABEL: load32bytes
|
|
|
|
; SANDYB: vmovaps
|
|
; SANDYB: vinsertf128
|
|
; SANDYB: retq
|
|
|
|
; BTVER2: vmovups
|
|
; BTVER2: retq
|
|
|
|
; HASWELL: vmovups
|
|
; HASWELL: retq
|
|
|
|
%A = load <8 x float>* %Ap, align 16
|
|
ret <8 x float> %A
|
|
}
|
|
|
|
; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte store
|
|
; because that is slowerthan two 16-byte stores.
|
|
; Other AVX-capable chips don't have that problem.
|
|
|
|
define void @store32bytes(<8 x float> %A, <8 x float>* %P) {
|
|
; CHECK-LABEL: store32bytes
|
|
|
|
; SANDYB: vextractf128
|
|
; SANDYB: vmovaps
|
|
; SANDYB: retq
|
|
|
|
; BTVER2: vmovups
|
|
; BTVER2: retq
|
|
|
|
; HASWELL: vmovups
|
|
; HASWELL: retq
|
|
|
|
store <8 x float> %A, <8 x float>* %P, align 16
|
|
ret void
|
|
}
|