mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-07 14:33:15 +00:00
Since the result of a SETCC for X86 is 0 or -1 in each lane, we can move unary operations, in this case [su]int_to_fp through the mask operation and constant fold the operation away. Generally speaking: UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> AND(VECTOR_CMP(x,y), constant2) where constant2 is UNARYOP(constant). This implements the transform where UNARYOP is [su]int_to_fp. For example, consider the simple function: define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x float> ret <4 x float> %result } Before this change, the SSE code is generated as: LCPI0_0: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 cvtdq2ps %xmm0, %xmm0 retq After, the code is improved to: LCPI0_0: .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 retq The cvtdq2ps has been constant folded away and the floating point 1.0f vector lanes are materialized directly via the ModRM operand of andps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213342 91177308-0d34-0410-b5e6-96231b3b80d8
19 lines
691 B
LLVM
19 lines
691 B
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
|
|
|
|
define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
|
|
; CHECK-LABEL: LCPI0_0
|
|
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
|
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
|
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
|
; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: cmpeqps %xmm1, %xmm0
|
|
; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
|
|
; CHECK-NEXT: retq
|
|
|
|
%cmp = fcmp oeq <4 x float> %val, %test
|
|
%ext = zext <4 x i1> %cmp to <4 x i32>
|
|
%result = sitofp <4 x i32> %ext to <4 x float>
|
|
ret <4 x float> %result
|
|
}
|