mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	[X86][FastIsel] Teach how to select float-half conversion intrinsics.
This patch teaches X86FastISel how to select intrinsic 'convert_from_fp16' and intrinsic 'convert_to_fp16'. If the target has F16C, we can select VCVTPS2PHrr for a float-half conversion, and VCVTPH2PSrr for a half-float conversion. Differential Revision: http://reviews.llvm.org/D7673 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230043 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -2182,6 +2182,68 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { | ||||
|   // FIXME: Handle more intrinsics. | ||||
|   switch (II->getIntrinsicID()) { | ||||
|   default: return false; | ||||
|   case Intrinsic::convert_from_fp16: | ||||
|   case Intrinsic::convert_to_fp16: { | ||||
|     if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) | ||||
|       return false; | ||||
|  | ||||
|     const Value *Op = II->getArgOperand(0); | ||||
|     unsigned InputReg = getRegForValue(Op); | ||||
|     if (InputReg == 0) | ||||
|       return false; | ||||
|  | ||||
|     // F16C only allows converting from float to half and from half to float. | ||||
|     bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; | ||||
|     if (IsFloatToHalf) { | ||||
|       if (!Op->getType()->isFloatTy()) | ||||
|         return false; | ||||
|     } else { | ||||
|       if (!II->getType()->isFloatTy()) | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     unsigned ResultReg = 0; | ||||
|     const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); | ||||
|     if (IsFloatToHalf) { | ||||
|       // 'InputReg' is implicitly promoted from register class FR32 to | ||||
|       // register class VR128 by method 'constrainOperandRegClass' which is | ||||
|       // directly called by 'fastEmitInst_ri'. | ||||
|       // Instruction VCVTPS2PHrr takes an extra immediate operand which is | ||||
|       // used to provide rounding control. | ||||
|       InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0); | ||||
|  | ||||
|       // Move the lower 32-bits of ResultReg to another register of class GR32. | ||||
|       ResultReg = createResultReg(&X86::GR32RegClass); | ||||
|       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | ||||
|               TII.get(X86::VMOVPDI2DIrr), ResultReg) | ||||
|           .addReg(InputReg, RegState::Kill); | ||||
|        | ||||
|       // The result value is in the lower 16-bits of ResultReg. | ||||
|       unsigned RegIdx = X86::sub_16bit; | ||||
|       ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx); | ||||
|     } else { | ||||
|       assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); | ||||
|       // Explicitly sign-extend the input to 32-bit. | ||||
|       InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg, | ||||
|                             /*Kill=*/false); | ||||
|  | ||||
|       // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr. | ||||
|       InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR, | ||||
|                             InputReg, /*Kill=*/true); | ||||
|  | ||||
|       InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true); | ||||
|  | ||||
|       // The result value is in the lower 32-bits of ResultReg. | ||||
|       // Emit an explicit copy from register class VR128 to register class FR32. | ||||
|       ResultReg = createResultReg(&X86::FR32RegClass); | ||||
|       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | ||||
|               TII.get(TargetOpcode::COPY), ResultReg) | ||||
|           .addReg(InputReg, RegState::Kill); | ||||
|     } | ||||
|  | ||||
|     updateValueMap(II, ResultReg); | ||||
|     return true; | ||||
|   } | ||||
|   case Intrinsic::frameaddress: { | ||||
|     MachineFunction *MF = FuncInfo.MF; | ||||
|     if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) | ||||
|   | ||||
							
								
								
									
										23
									
								
								test/CodeGen/X86/fast-isel-double-half-convertion.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								test/CodeGen/X86/fast-isel-double-half-convertion.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| ; RUN: llc -fast-isel -fast-isel-abort -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | ||||
|  | ||||
| ; XFAIL: * | ||||
|  | ||||
| ; In the future, we might want to teach fast-isel how to expand a double-to-half | ||||
| ; conversion into a double-to-float conversion immediately followed by a | ||||
| ; float-to-half conversion. For now, fast-isel is expected to fail. | ||||
|  | ||||
| define double @test_fp16_to_fp64(i32 %a) { | ||||
| entry: | ||||
|   %0 = trunc i32 %a to i16 | ||||
|   %1 = call double @llvm.convert.from.fp16.f64(i16 %0) | ||||
|   ret float %0 | ||||
| } | ||||
|  | ||||
| define i16 @test_fp64_to_fp16(double %a) { | ||||
| entry: | ||||
|   %0 = call i16 @llvm.convert.to.fp16.f64(double %a) | ||||
|   ret i16 %0 | ||||
| } | ||||
|  | ||||
| declare i16 @llvm.convert.to.fp16.f64(double) | ||||
| declare double @llvm.convert.from.fp16.f64(i16) | ||||
							
								
								
									
										28
									
								
								test/CodeGen/X86/fast-isel-float-half-convertion.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								test/CodeGen/X86/fast-isel-float-half-convertion.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| ; RUN: llc -fast-isel -fast-isel-abort -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s | ||||
|  | ||||
| ; Verify that fast-isel correctly expands float-half conversions. | ||||
|  | ||||
| define i16 @test_fp32_to_fp16(float %a) { | ||||
| ; CHECK-LABEL: test_fp32_to_fp16: | ||||
| ; CHECK: vcvtps2ph $0, %xmm0, %xmm0 | ||||
| ; CHECK-NEXT: vmovd %xmm0, %eax | ||||
| ; CHECK-NEXT: retq | ||||
| entry: | ||||
|   %0 = call i16 @llvm.convert.to.fp16.f32(float %a) | ||||
|   ret i16 %0 | ||||
| } | ||||
|  | ||||
| define float @test_fp16_to_fp32(i32 %a) { | ||||
| ; CHECK-LABEL: test_fp16_to_fp32: | ||||
| ; CHECK: movswl %di, %eax | ||||
| ; CHECK-NEXT: vmovd %eax, %xmm0 | ||||
| ; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 | ||||
| ; CHECK-NEXT: retq | ||||
| entry: | ||||
|   %0 = trunc i32 %a to i16 | ||||
|   %1 = call float @llvm.convert.from.fp16.f32(i16 %0) | ||||
|   ret float %1 | ||||
| } | ||||
|  | ||||
| declare i16 @llvm.convert.to.fp16.f32(float) | ||||
| declare float @llvm.convert.from.fp16.f32(i16) | ||||
		Reference in New Issue
	
	Block a user