llvm-6502/test/CodeGen/SystemZ/fp-move-09.ll
Ulrich Weigand 878c6281d3 [SystemZ] Add CodeGen support for v4f32
The architecture doesn't really have any native v4f32 operations except
v4f32->v2f64 and v2f64->v4f32 conversions, with only half of the v4f32
elements being used.  Even so, using vector registers for <4 x float>
and scalarising individual operations is much better than generating
completely scalar code, since there's much less register pressure.
It's also more efficient to do v4f32 comparisons by extending to 2
v2f64s, comparing those, then packing the result.

This particularly helps with llvmpipe.

Based on a patch by Richard Sandiford.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236523 91177308-0d34-0410-b5e6-96231b3b80d8
2015-05-05 19:27:45 +00:00

63 lines
1.7 KiB
LLVM

; Test moves between FPRs and GPRs for z196 and zEC12.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; Check that moves from i32s to floats can use high registers.
define float @f1(i16 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: llhh [[REG:%r[0-5]]], 0(%r2)
; CHECK: oihh [[REG]], 16256
; CHECK: ldgr %f0, [[REG]]
; CHECK: br %r14
%base = load i16 , i16 *%ptr
%ext = zext i16 %base to i32
%full = or i32 %ext, 1065353216
%res = bitcast i32 %full to float
ret float %res
}
; Check that moves from floats to i32s can use high registers.
; This "store the low byte" technique is used by llvmpipe, for example.
define void @f2(float %val, i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: lgdr [[REG:%r[0-5]]], %f0
; CHECK: stch [[REG]], 0(%r2)
; CHECK: br %r14
%res = bitcast float %val to i32
%trunc = trunc i32 %res to i8
store i8 %trunc, i8 *%ptr
ret void
}
; Like f2, but with a conditional store.
define void @f3(float %val, i8 *%ptr, i32 %which) {
; CHECK-LABEL: f3:
; CHECK: cijlh %r3, 0,
; CHECK: lgdr [[REG:%r[0-5]]], %f0
; CHECK: stch [[REG]], 0(%r2)
; CHECK: br %r14
%int = bitcast float %val to i32
%trunc = trunc i32 %int to i8
%old = load i8 , i8 *%ptr
%cmp = icmp eq i32 %which, 0
%res = select i1 %cmp, i8 %trunc, i8 %old
store i8 %res, i8 *%ptr
ret void
}
; ...and again with 16-bit memory.
define void @f4(float %val, i16 *%ptr, i32 %which) {
; CHECK-LABEL: f4:
; CHECK: cijlh %r3, 0,
; CHECK: lgdr [[REG:%r[0-5]]], %f0
; CHECK: sthh [[REG]], 0(%r2)
; CHECK: br %r14
%int = bitcast float %val to i32
%trunc = trunc i32 %int to i16
%old = load i16 , i16 *%ptr
%cmp = icmp eq i32 %which, 0
%res = select i1 %cmp, i16 %trunc, i16 %old
store i16 %res, i16 *%ptr
ret void
}