llvm-6502/test/CodeGen/AArch64/f16-convert.ll
Tim Northover f8d927f22b CodeGen: emit IR-level f16 conversion intrinsics as fptrunc/fpext
This makes the first stage DAG for @llvm.convert.to.fp16 an fptrunc,
and correspondingly @llvm.convert.from.fp16 an fpext. The legalisation
path is now uniform, regardless of the input IR:

  fptrunc -> FP_TO_FP16 (if f16 illegal) -> libcall
  fpext -> FP16_TO_FP (if f16 illegal) -> libcall

Each target should be able to select the version that best matches its
operations and not be required to duplicate patterns for both fptrunc
and FP_TO_FP16 (for example).

As a result we can remove some redundant AArch64 patterns.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213507 91177308-0d34-0410-b5e6-96231b3b80d8
2014-07-21 09:13:56 +00:00

252 lines
7.3 KiB
LLVM

; RUN: llc < %s -mtriple=arm64-apple-ios -asm-verbose=false | FileCheck %s
define float @load0(i16* nocapture readonly %a) nounwind {
; CHECK-LABEL: load0:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
%tmp = load i16* %a, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
define double @load1(i16* nocapture readonly %a) nounwind {
; CHECK-LABEL: load1:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
%tmp = load i16* %a, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
define float @load2(i16* nocapture readonly %a, i32 %i) nounwind {
; CHECK-LABEL: load2:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
%tmp = load i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
define double @load3(i16* nocapture readonly %a, i32 %i) nounwind {
; CHECK-LABEL: load3:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
%tmp = load i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
define float @load4(i16* nocapture readonly %a, i64 %i) nounwind {
; CHECK-LABEL: load4:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16* %a, i64 %i
%tmp = load i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
define double @load5(i16* nocapture readonly %a, i64 %i) nounwind {
; CHECK-LABEL: load5:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16* %a, i64 %i
%tmp = load i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
define float @load6(i16* nocapture readonly %a) nounwind {
; CHECK-LABEL: load6:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16* %a, i64 10
%tmp = load i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
define double @load7(i16* nocapture readonly %a) nounwind {
; CHECK-LABEL: load7:
; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16* %a, i64 10
%tmp = load i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
define float @load8(i16* nocapture readonly %a) nounwind {
; CHECK-LABEL: load8:
; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16* %a, i64 -10
%tmp = load i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
define double @load9(i16* nocapture readonly %a) nounwind {
; CHECK-LABEL: load9:
; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16* %a, i64 -10
%tmp = load i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
define void @store0(i16* nocapture %a, float %val) nounwind {
; CHECK-LABEL: store0:
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
store i16 %tmp, i16* %a, align 2
ret void
}
define void @store1(i16* nocapture %a, double %val) nounwind {
; CHECK-LABEL: store1:
; CHECK-NEXT: fcvt h0, d0
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
store i16 %tmp, i16* %a, align 2
ret void
}
define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind {
; CHECK-LABEL: store2:
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind {
; CHECK-LABEL: store3:
; CHECK-NEXT: fcvt h0, d0
; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind {
; CHECK-LABEL: store4:
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%arrayidx = getelementptr inbounds i16* %a, i64 %i
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind {
; CHECK-LABEL: store5:
; CHECK-NEXT: fcvt h0, d0
; CHECK-NEXT: str h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%arrayidx = getelementptr inbounds i16* %a, i64 %i
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store6(i16* nocapture %a, float %val) nounwind {
; CHECK-LABEL: store6:
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0, #20]
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%arrayidx = getelementptr inbounds i16* %a, i64 10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store7(i16* nocapture %a, double %val) nounwind {
; CHECK-LABEL: store7:
; CHECK-NEXT: fcvt h0, d0
; CHECK-NEXT: str h0, [x0, #20]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%arrayidx = getelementptr inbounds i16* %a, i64 10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store8(i16* nocapture %a, float %val) nounwind {
; CHECK-LABEL: store8:
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: stur h0, [x0, #-20]
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%arrayidx = getelementptr inbounds i16* %a, i64 -10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store9(i16* nocapture %a, double %val) nounwind {
; CHECK-LABEL: store9:
; CHECK-NEXT: fcvt h0, d0
; CHECK-NEXT: stur h0, [x0, #-20]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%arrayidx = getelementptr inbounds i16* %a, i64 -10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone