llvm-6502/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll

; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s

define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
; CHECK-LABEL: LCPI0_0:
; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
; CHECK-LABEL: foo:
; CHECK: cmpeqps %xmm1, %xmm0
; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
; CHECK-NEXT: retq

  %cmp = fcmp oeq <4 x float> %val, %test
  %ext = zext <4 x i1> %cmp to <4 x i32>
  %result = sitofp <4 x i32> %ext to <4 x float>
  ret <4 x float> %result
}

define void @bar(<4 x float>* noalias %result) nounwind {
; CHECK-LABEL: LCPI1_0:
; CHECK-NEXT: .long 1082130432              ## float 4.000000e+00
; CHECK-NEXT: .long 1084227584              ## float 5.000000e+00
; CHECK-NEXT: .long 1086324736              ## float 6.000000e+00
; CHECK-NEXT: .long 1088421888              ## float 7.000000e+00
; CHECK-LABEL: bar:
; CHECK:  movaps LCPI1_0(%rip), %xmm0

  %val = uitofp <4 x i32> <i32 4, i32 5, i32 6, i32 7> to <4 x float>
  store <4 x float> %val, <4 x float>* %result
  ret void
}
X86: Constant fold converting vector setcc results to float. Since the result of a SETCC for X86 is 0 or -1 in each lane, we can move unary operations, in this case [su]int_to_fp through the mask operation and constant fold the operation away. Generally speaking: UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> AND(VECTOR_CMP(x,y), constant2) where constant2 is UNARYOP(constant). This implements the transform where UNARYOP is [su]int_to_fp. For example, consider the simple function: define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x float> ret <4 x float> %result } Before this change, the SSE code is generated as: LCPI0_0: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 cvtdq2ps %xmm0, %xmm0 retq After, the code is improved to: LCPI0_0: .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 retq The cvtdq2ps has been constant folded away and the floating point 1.0f vector lanes are materialized directly via the ModRM operand of andps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213342 91177308-0d34-0410-b5e6-96231b3b80d8 2014-07-18 00:40:56 +00:00			`; RUN: llc < %s -mtriple=x86_64-apple-darwin \| FileCheck %s`

			`define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {`
DAG: fp->int conversion for non-splat constants. Constant fold the lanes of the input constant build_vector individually so we correctly handle when the vector elements are not all the same constant value. PR20394 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213798 91177308-0d34-0410-b5e6-96231b3b80d8 2014-07-23 20:41:31 +00:00			`; CHECK-LABEL: LCPI0_0:`
X86: Constant fold converting vector setcc results to float. Since the result of a SETCC for X86 is 0 or -1 in each lane, we can move unary operations, in this case [su]int_to_fp through the mask operation and constant fold the operation away. Generally speaking: UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> AND(VECTOR_CMP(x,y), constant2) where constant2 is UNARYOP(constant). This implements the transform where UNARYOP is [su]int_to_fp. For example, consider the simple function: define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x float> ret <4 x float> %result } Before this change, the SSE code is generated as: LCPI0_0: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 cvtdq2ps %xmm0, %xmm0 retq After, the code is improved to: LCPI0_0: .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 retq The cvtdq2ps has been constant folded away and the floating point 1.0f vector lanes are materialized directly via the ModRM operand of andps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213342 91177308-0d34-0410-b5e6-96231b3b80d8 2014-07-18 00:40:56 +00:00			`; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00`
			`; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00`
			`; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00`
			`; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00`
			`; CHECK-LABEL: foo:`
			`; CHECK: cmpeqps %xmm1, %xmm0`
			`; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0`
			`; CHECK-NEXT: retq`

			`%cmp = fcmp oeq <4 x float> %val, %test`
			`%ext = zext <4 x i1> %cmp to <4 x i32>`
			`%result = sitofp <4 x i32> %ext to <4 x float>`
			`ret <4 x float> %result`
			`}`
DAG: fp->int conversion for non-splat constants. Constant fold the lanes of the input constant build_vector individually so we correctly handle when the vector elements are not all the same constant value. PR20394 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213798 91177308-0d34-0410-b5e6-96231b3b80d8 2014-07-23 20:41:31 +00:00
			`define void @bar(<4 x float>* noalias %result) nounwind {`
			`; CHECK-LABEL: LCPI1_0:`
			`; CHECK-NEXT: .long 1082130432 ## float 4.000000e+00`
			`; CHECK-NEXT: .long 1084227584 ## float 5.000000e+00`
			`; CHECK-NEXT: .long 1086324736 ## float 6.000000e+00`
			`; CHECK-NEXT: .long 1088421888 ## float 7.000000e+00`
			`; CHECK-LABEL: bar:`
			`; CHECK: movaps LCPI1_0(%rip), %xmm0`

			`%val = uitofp <4 x i32> <i32 4, i32 5, i32 6, i32 7> to <4 x float>`
			`store <4 x float> %val, <4 x float>* %result`
			`ret void`
			`}`