mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-04 23:32:00 +00:00
d6fb53adb1
register. In most cases we actually compare or select YMM-sized registers and mixing the two types creates horrible code. This commit optimizes some of the transition sequences. PR14657. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171148 91177308-0d34-0410-b5e6-96231b3b80d8
39 lines
1.1 KiB
LLVM
39 lines
1.1 KiB
LLVM
; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
|
|
|
|
;CHECK: and_masks
|
|
;CHECK: vmovups
|
|
;CHECK-NEXT: vcmpltp
|
|
;CHECK-NEXT: vandps
|
|
;CHECK-NEXT: vmovups
|
|
;CHECK: ret
|
|
|
|
define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
|
|
%v0 = load <8 x float>* %a, align 16
|
|
%v1 = load <8 x float>* %b, align 16
|
|
%m0 = fcmp olt <8 x float> %v1, %v0
|
|
%v2 = load <8 x float>* %c, align 16
|
|
%m1 = fcmp olt <8 x float> %v2, %v0
|
|
%mand = and <8 x i1> %m1, %m0
|
|
%r = zext <8 x i1> %mand to <8 x i32>
|
|
store <8 x i32> %r, <8 x i32>* undef, align 16
|
|
ret void
|
|
}
|
|
|
|
;CHECK: neg_mask
|
|
;CHECK: vmovups
|
|
;CHECK-NEXT: vcmpltps
|
|
;CHECK-NEXT: vandps
|
|
;CHECK-NEXT: vmovups
|
|
;CHECK: ret
|
|
|
|
define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
|
|
%v0 = load <8 x float>* %a, align 16
|
|
%v1 = load <8 x float>* %b, align 16
|
|
%m0 = fcmp olt <8 x float> %v1, %v0
|
|
%mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
|
|
%r = zext <8 x i1> %mand to <8 x i32>
|
|
store <8 x i32> %r, <8 x i32>* undef, align 16
|
|
ret void
|
|
}
|
|
|