diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 09052eddc93..67fa64b29fd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -515,6 +515,14 @@ void X86TargetLowering::resetOperationActions() { } } + // Special handling for half-precision floating point conversions. + // If we don't have F16C support, then lower half float conversions + // into library calls. + if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) { + setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); + setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand); + } + if (Subtarget->hasPOPCNT()) { setOperationAction(ISD::CTPOP , MVT::i8 , Promote); } else { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e6ca519b7d5..f9a5ae1a3d5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8538,6 +8538,21 @@ let Predicates = [HasF16C] in { (VCVTPH2PSrm addr:$src)>; } +// Patterns for matching conversions from float to half-float and vice versa. +let Predicates = [HasF16C] in { + def : Pat<(f32_to_f16 FR32:$src), + (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr + (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>; + + def : Pat<(f16_to_f32 GR16:$src), + (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr + (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >; + + def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))), + (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr + (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >; +} + //===----------------------------------------------------------------------===// // AVX2 Instructions //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/cvt16.ll b/test/CodeGen/X86/cvt16.ll new file mode 100644 index 00000000000..951b5c3ed38 --- /dev/null +++ b/test/CodeGen/X86/cvt16.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=LIBCALL +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=F16C +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT + +; This is a test for float to half float conversions on x86-64. +; +; If flag -soft-float is set, or if there is no F16C support, then: +; 1) half float to float conversions are +; translated into calls to __gnu_h2f_ieee defined +; by the compiler runtime library; +; 2) float to half float conversions are translated into calls +; to __gnu_f2h_ieee which expected to be defined by the +; compiler runtime library. +; +; Otherwise (we have F16C support): +; 1) half float to float conversion are translated using +; vcvtph2ps instructions; +; 2) float to half float conversions are translated using +; vcvtps2ph instructions + + +define void @test1(float %src, i16* %dest) { + %1 = tail call i16 @llvm.convert.to.fp16(float %src) + store i16 %1, i16* %dest, align 2 + ret void +} +; CHECK-LABEL: test1 +; LIBCALL: callq __gnu_f2h_ieee +; SOFTFLOAT: callq __gnu_f2h_ieee +; F16C: vcvtps2ph +; CHECK: ret + + +define float @test2(i16* nocapture %src) { + %1 = load i16* %src, align 2 + %2 = tail call float @llvm.convert.from.fp16(i16 %1) + ret float %2 +} +; CHECK-LABEL: test2: +; LIBCALL: jmp __gnu_h2f_ieee +; SOFTFLOAT: callq __gnu_h2f_ieee +; F16C: vcvtph2ps +; F16C: ret + + +define float @test3(float %src) nounwind uwtable readnone { + %1 = tail call i16 @llvm.convert.to.fp16(float %src) + %2 = tail call float @llvm.convert.from.fp16(i16 %1) + ret float %2 +} + +; CHECK-LABEL: test3: +; LIBCALL: callq __gnu_f2h_ieee +; LIBCALL: jmp __gnu_h2f_ieee +; SOFTFLOAT: callq __gnu_f2h_ieee +; SOFTFLOAT: callq __gnu_h2f_ieee +; F16C: vcvtps2ph +; F16C-NEXT: vcvtph2ps +; F16C: ret + +declare float @llvm.convert.from.fp16(i16) nounwind readnone +declare i16 @llvm.convert.to.fp16(float) nounwind readnone +