mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	R600: Implement isZExtFree.
This allows 64-bit operations that are truncated to be reduced to 32-bit ones. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204946 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -275,6 +275,22 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { | |||||||
|          (Dest->getPrimitiveSizeInBits() % 32 == 0); |          (Dest->getPrimitiveSizeInBits() % 32 == 0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { | ||||||
|  |   const DataLayout *DL = getDataLayout(); | ||||||
|  |   unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType()); | ||||||
|  |   unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType()); | ||||||
|  |  | ||||||
|  |   return SrcSize == 32 && DestSize == 64; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { | ||||||
|  |   // Any register load of a 64-bit value really requires 2 32-bit moves. For all | ||||||
|  |   // practical purposes, the extra mov 0 to load a 64-bit is free.  As used, | ||||||
|  |   // this will enable reducing 64-bit operations the 32-bit, which is always | ||||||
|  |   // good. | ||||||
|  |   return Src == MVT::i32 && Dest == MVT::i64; | ||||||
|  | } | ||||||
|  |  | ||||||
| bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { | bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { | ||||||
|   // There aren't really 64-bit registers, but pairs of 32-bit ones and only a |   // There aren't really 64-bit registers, but pairs of 32-bit ones and only a | ||||||
|   // limited number of native 64-bit operations. Shrinking an operation to fit |   // limited number of native 64-bit operations. Shrinking an operation to fit | ||||||
|   | |||||||
| @@ -87,6 +87,10 @@ public: | |||||||
|   virtual bool isFNegFree(EVT VT) const override; |   virtual bool isFNegFree(EVT VT) const override; | ||||||
|   virtual bool isTruncateFree(EVT Src, EVT Dest) const override; |   virtual bool isTruncateFree(EVT Src, EVT Dest) const override; | ||||||
|   virtual bool isTruncateFree(Type *Src, Type *Dest) const override; |   virtual bool isTruncateFree(Type *Src, Type *Dest) const override; | ||||||
|  |  | ||||||
|  |   virtual bool isZExtFree(Type *Src, Type *Dest) const override; | ||||||
|  |   virtual bool isZExtFree(EVT Src, EVT Dest) const override; | ||||||
|  |  | ||||||
|   virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; |   virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; | ||||||
|  |  | ||||||
|   virtual MVT getVectorIdxTy() const override; |   virtual MVT getVectorIdxTy() const override; | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s | ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s | ||||||
|  |  | ||||||
|  |  | ||||||
| declare i32 @llvm.r600.read.tidig.x() readnone | declare i32 @llvm.r600.read.tidig.x() readnone | ||||||
| @@ -68,3 +68,17 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add | |||||||
|   store <2 x i64> %result, <2 x i64> addrspace(1)* %out |   store <2 x i64> %result, <2 x i64> addrspace(1)* %out | ||||||
|   ret void |   ret void | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ; SI-LABEL: @trunc_i64_add_to_i32 | ||||||
|  | ; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]], | ||||||
|  | ; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]], | ||||||
|  | ; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]] | ||||||
|  | ; SI-NOT: ADDC | ||||||
|  | ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] | ||||||
|  | ; SI: BUFFER_STORE_DWORD [[VRESULT]], | ||||||
|  | define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { | ||||||
|  |   %add = add i64 %b, %a | ||||||
|  |   %trunc = trunc i64 %add to i32 | ||||||
|  |   store i32 %trunc, i32 addrspace(1)* %out, align 8 | ||||||
|  |   ret void | ||||||
|  | } | ||||||
|   | |||||||
| @@ -40,3 +40,15 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { | |||||||
|   store <4 x i32> %result, <4 x i32> addrspace(1)* %out |   store <4 x i32> %result, <4 x i32> addrspace(1)* %out | ||||||
|   ret void |   ret void | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ; SI-CHECK-LABEL: @trunc_i64_mul_to_i32 | ||||||
|  | ; SI-CHECK: S_LOAD_DWORD | ||||||
|  | ; SI-CHECK: S_LOAD_DWORD | ||||||
|  | ; SI-CHECK: V_MUL_LO_I32 | ||||||
|  | ; SI-CHECK: BUFFER_STORE_DWORD | ||||||
|  | define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { | ||||||
|  |   %mul = mul i64 %b, %a | ||||||
|  |   %trunc = trunc i64 %mul to i32 | ||||||
|  |   store i32 %trunc, i32 addrspace(1)* %out, align 8 | ||||||
|  |   ret void | ||||||
|  | } | ||||||
|   | |||||||
| @@ -114,3 +114,16 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 | |||||||
|   store i64 %or, i64 addrspace(1)* %out |   store i64 %or, i64 addrspace(1)* %out | ||||||
|   ret void |   ret void | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ; SI-LABEL: @trunc_i64_or_to_i32 | ||||||
|  | ; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]], | ||||||
|  | ; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]], | ||||||
|  | ; SI: S_OR_B32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]] | ||||||
|  | ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] | ||||||
|  | ; SI: BUFFER_STORE_DWORD [[VRESULT]], | ||||||
|  | define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { | ||||||
|  |   %add = or i64 %b, %a | ||||||
|  |   %trunc = trunc i64 %add to i32 | ||||||
|  |   store i32 %trunc, i32 addrspace(1)* %out, align 8 | ||||||
|  |   ret void | ||||||
|  | } | ||||||
|   | |||||||
| @@ -90,10 +90,10 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun | |||||||
| } | } | ||||||
|  |  | ||||||
| ; FUNC-LABEL: @sext_in_reg_i32_to_i64 | ; FUNC-LABEL: @sext_in_reg_i32_to_i64 | ||||||
| ; SI: S_LOAD_DWORDX2 | ; SI: S_LOAD_DWORD | ||||||
| ; SI: S_ADD_I32 | ; SI: S_LOAD_DWORD | ||||||
| ; SI-NEXT: S_ADDC_U32 | ; SI: S_ADD_I32 [[ADD:s[0-9]+]], | ||||||
| ; SI-NEXT: S_ASHR_I32 s{{[0-9]+}}, s{{[0-9]+}}, 31 | ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31 | ||||||
| ; SI: BUFFER_STORE_DWORDX2 | ; SI: BUFFER_STORE_DWORDX2 | ||||||
| define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { | define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { | ||||||
|   %c = add i64 %a, %b |   %c = add i64 %a, %b | ||||||
|   | |||||||
| @@ -34,11 +34,12 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { | |||||||
| ; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]], | ; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]], | ||||||
| ; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2 | ; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2 | ||||||
| ; SI: BUFFER_STORE_DWORD v[[LO_VREG]], | ; SI: BUFFER_STORE_DWORD v[[LO_VREG]], | ||||||
| define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) { | define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { | ||||||
|   %aa = add i64 %a, 234 ; Prevent shrinking store. |   %aa = add i64 %a, 234 ; Prevent shrinking store. | ||||||
|   %b = shl i64 %aa, 2 |   %b = shl i64 %aa, 2 | ||||||
|   %result = trunc i64 %b to i32 |   %result = trunc i64 %b to i32 | ||||||
|   store i32 %result, i32 addrspace(1)* %out, align 4 |   store i32 %result, i32 addrspace(1)* %out, align 4 | ||||||
|  |   store i64 %b, i64 addrspace(1)* %out2, align 8 ; Prevent reducing ops to 32-bits | ||||||
|   ret void |   ret void | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user