llvm-6502/test/CodeGen/X86/v8i1-masks.ll
Nadav Rotem d6fb53adb1 On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
register. In most cases we actually compare or select YMM-sized registers
and mixing the two types creates horrible code. This commit optimizes
some of the transition sequences.

PR14657.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171148 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-27 08:15:45 +00:00

39 lines
1.1 KiB
LLVM

; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
;CHECK: and_masks
;CHECK: vmovups
;CHECK-NEXT: vcmpltp
;CHECK-NEXT: vandps
;CHECK-NEXT: vmovups
;CHECK: ret
define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
%v0 = load <8 x float>* %a, align 16
%v1 = load <8 x float>* %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
%v2 = load <8 x float>* %c, align 16
%m1 = fcmp olt <8 x float> %v2, %v0
%mand = and <8 x i1> %m1, %m0
%r = zext <8 x i1> %mand to <8 x i32>
store <8 x i32> %r, <8 x i32>* undef, align 16
ret void
}
;CHECK: neg_mask
;CHECK: vmovups
;CHECK-NEXT: vcmpltps
;CHECK-NEXT: vandps
;CHECK-NEXT: vmovups
;CHECK: ret
define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
%v0 = load <8 x float>* %a, align 16
%v1 = load <8 x float>* %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
%mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%r = zext <8 x i1> %mand to <8 x i32>
store <8 x i32> %r, <8 x i32>* undef, align 16
ret void
}