mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-30 16:17:05 +00:00 
			
		
		
		
	X86: Pattern match scalar loads + vcvtph2ps into just vcvtph2ps.
vcvtph2ps only reads the lower 64 bits of the address passed to the intrinsic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206579 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -8304,6 +8304,12 @@ let Predicates = [HasF16C] in { | ||||
|   defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; | ||||
|   defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; | ||||
|   defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; | ||||
|  | ||||
|   // Pattern match vcvtph2ps of a scalar i64 load. | ||||
|   def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)), | ||||
|             (VCVTPH2PSrm addr:$src)>; | ||||
|   def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)), | ||||
|             (VCVTPH2PSrm addr:$src)>; | ||||
| } | ||||
|  | ||||
| //===----------------------------------------------------------------------===// | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| ; RUN: llc < %s -march=x86 -mattr=+avx,+f16c | FileCheck %s | ||||
| ; RUN: llc < %s -march=x86-64 -mattr=+avx,+f16c | FileCheck %s | ||||
|  | ||||
| define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) { | ||||
|   ; CHECK: vcvtph2ps | ||||
| @@ -30,3 +31,16 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) { | ||||
|   ret <8 x i16> %res | ||||
| } | ||||
| declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly | ||||
|  | ||||
| define <4 x float> @test_x86_vcvtps2ph_128_scalar(i64* %ptr) { | ||||
| ; CHECK-LABEL: test_x86_vcvtps2ph_128_scalar | ||||
| ; CHECK-NOT: vmov | ||||
| ; CHECK: vcvtph2ps (% | ||||
|  | ||||
|   %load = load i64* %ptr | ||||
|   %ins1 = insertelement <2 x i64> undef, i64 %load, i32 0 | ||||
|   %ins2 = insertelement <2 x i64> %ins1, i64 0, i32 1 | ||||
|   %bc = bitcast <2 x i64> %ins2 to <8 x i16> | ||||
|   %res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc) #2 | ||||
|   ret <4 x float> %res | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user