mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-30 00:16:48 +00:00 
			
		
		
		
	[NVPTX] Add support for cttz/ctlz/ctpop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185176 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -216,6 +216,22 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) | |||||||
|   // Custom handling for i8 intrinsics |   // Custom handling for i8 intrinsics | ||||||
|   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); |   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); | ||||||
|  |  | ||||||
|  |   setOperationAction(ISD::CTLZ, MVT::i16, Legal); | ||||||
|  |   setOperationAction(ISD::CTLZ, MVT::i32, Legal); | ||||||
|  |   setOperationAction(ISD::CTLZ, MVT::i64, Legal); | ||||||
|  |   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); | ||||||
|  |   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); | ||||||
|  |   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); | ||||||
|  |   setOperationAction(ISD::CTTZ, MVT::i16, Expand); | ||||||
|  |   setOperationAction(ISD::CTTZ, MVT::i32, Expand); | ||||||
|  |   setOperationAction(ISD::CTTZ, MVT::i64, Expand); | ||||||
|  |   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); | ||||||
|  |   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); | ||||||
|  |   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); | ||||||
|  |   setOperationAction(ISD::CTPOP, MVT::i16, Legal); | ||||||
|  |   setOperationAction(ISD::CTPOP, MVT::i32, Legal); | ||||||
|  |   setOperationAction(ISD::CTPOP, MVT::i64, Legal); | ||||||
|  |  | ||||||
|   // Now deduce the information based on the above mentioned |   // Now deduce the information based on the above mentioned | ||||||
|   // actions |   // actions | ||||||
|   computeRegisterProperties(); |   computeRegisterProperties(); | ||||||
|   | |||||||
| @@ -2406,6 +2406,64 @@ def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), | |||||||
|                            "mov.b64\t{{$d1, $d2}}, $s;", |                            "mov.b64\t{{$d1, $d2}}, $s;", | ||||||
|                           []>; |                           []>; | ||||||
|  |  | ||||||
|  | // Count leading zeros | ||||||
|  | def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), | ||||||
|  |                        "clz.b32\t$d, $a;", | ||||||
|  |                        []>; | ||||||
|  | def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), | ||||||
|  |                        "clz.b64\t$d, $a;", | ||||||
|  |                        []>; | ||||||
|  |  | ||||||
|  | // 32-bit has a direct PTX instruction | ||||||
|  | def : Pat<(ctlz Int32Regs:$a), | ||||||
|  |           (CLZr32 Int32Regs:$a)>; | ||||||
|  | def : Pat<(ctlz_zero_undef Int32Regs:$a), | ||||||
|  |           (CLZr32 Int32Regs:$a)>; | ||||||
|  |  | ||||||
|  | // For 64-bit, the result in PTX is actually 32-bit so we zero-extend | ||||||
|  | // to 64-bit to match the LLVM semantics | ||||||
|  | def : Pat<(ctlz Int64Regs:$a), | ||||||
|  |           (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; | ||||||
|  | def : Pat<(ctlz_zero_undef Int64Regs:$a), | ||||||
|  |           (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; | ||||||
|  |  | ||||||
|  | // For 16-bit, we zero-extend to 32-bit, then trunc the result back | ||||||
|  | // to 16-bits (ctlz of a 16-bit value is guaranteed to require less | ||||||
|  | // than 16 bits to store). We also need to subtract 16 because the | ||||||
|  | // high-order 16 zeros were counted. | ||||||
|  | def : Pat<(ctlz Int16Regs:$a), | ||||||
|  |           (SUBi16ri (CVT_u16_u32 (CLZr32 | ||||||
|  |             (CVT_u32_u16 Int16Regs:$a, CvtNONE)), | ||||||
|  |            CvtNONE), 16)>; | ||||||
|  | def : Pat<(ctlz_zero_undef Int16Regs:$a), | ||||||
|  |           (SUBi16ri (CVT_u16_u32 (CLZr32 | ||||||
|  |             (CVT_u32_u16 Int16Regs:$a, CvtNONE)), | ||||||
|  |            CvtNONE), 16)>; | ||||||
|  |  | ||||||
|  | // Population count | ||||||
|  | def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), | ||||||
|  |                         "popc.b32\t$d, $a;", | ||||||
|  |                         []>; | ||||||
|  | def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), | ||||||
|  |                         "popc.b64\t$d, $a;", | ||||||
|  |                         []>; | ||||||
|  |  | ||||||
|  | // 32-bit has a direct PTX instruction | ||||||
|  | def : Pat<(ctpop Int32Regs:$a), | ||||||
|  |           (POPCr32 Int32Regs:$a)>; | ||||||
|  |  | ||||||
|  | // For 64-bit, the result in PTX is actually 32-bit so we zero-extend | ||||||
|  | // to 64-bit to match the LLVM semantics | ||||||
|  | def : Pat<(ctpop Int64Regs:$a), | ||||||
|  |           (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; | ||||||
|  |  | ||||||
|  | // For 16-bit, we zero-extend to 32-bit, then trunc the result back | ||||||
|  | // to 16-bits (ctpop of a 16-bit value is guaranteed to require less | ||||||
|  | // than 16 bits to store) | ||||||
|  | def : Pat<(ctpop Int16Regs:$a), | ||||||
|  |           (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), | ||||||
|  |            CvtNONE)>; | ||||||
|  |  | ||||||
| // fround f64 -> f32 | // fround f64 -> f32 | ||||||
| def : Pat<(f32 (fround Float64Regs:$a)), | def : Pat<(f32 (fround Float64Regs:$a)), | ||||||
|           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; |           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; | ||||||
|   | |||||||
							
								
								
									
										44
									
								
								test/CodeGen/NVPTX/ctlz.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								test/CodeGen/NVPTX/ctlz.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | |||||||
|  | ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s | ||||||
|  |  | ||||||
|  | target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" | ||||||
|  |  | ||||||
|  | declare i16 @llvm.ctlz.i16(i16, i1) readnone | ||||||
|  | declare i32 @llvm.ctlz.i32(i32, i1) readnone | ||||||
|  | declare i64 @llvm.ctlz.i64(i64, i1) readnone | ||||||
|  |  | ||||||
|  | define i32 @myctpop(i32 %a) { | ||||||
|  | ; CHECK: clz.b32 | ||||||
|  |   %val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone | ||||||
|  |   ret i32 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i16 @myctpop16(i16 %a) { | ||||||
|  | ; CHECK: clz.b32 | ||||||
|  |   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone | ||||||
|  |   ret i16 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @myctpop64(i64 %a) { | ||||||
|  | ; CHECK: clz.b64 | ||||||
|  |   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone | ||||||
|  |   ret i64 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | define i32 @myctpop_2(i32 %a) { | ||||||
|  | ; CHECK: clz.b32 | ||||||
|  |   %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone | ||||||
|  |   ret i32 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i16 @myctpop16_2(i16 %a) { | ||||||
|  | ; CHECK: clz.b32 | ||||||
|  |   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone | ||||||
|  |   ret i16 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @myctpop64_2(i64 %a) { | ||||||
|  | ; CHECK: clz.b64 | ||||||
|  |   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone | ||||||
|  |   ret i64 %val | ||||||
|  | } | ||||||
							
								
								
									
										25
									
								
								test/CodeGen/NVPTX/ctpop.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								test/CodeGen/NVPTX/ctpop.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | |||||||
|  | ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s | ||||||
|  |  | ||||||
|  | target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" | ||||||
|  |  | ||||||
|  | define i32 @myctpop(i32 %a) { | ||||||
|  | ; CHECK: popc.b32 | ||||||
|  |   %val = tail call i32 @llvm.ctpop.i32(i32 %a) | ||||||
|  |   ret i32 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i16 @myctpop16(i16 %a) { | ||||||
|  | ; CHECK: popc.b32 | ||||||
|  |   %val = tail call i16 @llvm.ctpop.i16(i16 %a) | ||||||
|  |   ret i16 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @myctpop64(i64 %a) { | ||||||
|  | ; CHECK: popc.b64 | ||||||
|  |   %val = tail call i64 @llvm.ctpop.i64(i64 %a) | ||||||
|  |   ret i64 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | declare i16 @llvm.ctpop.i16(i16) | ||||||
|  | declare i32 @llvm.ctpop.i32(i32) | ||||||
|  | declare i64 @llvm.ctpop.i64(i64) | ||||||
							
								
								
									
										45
									
								
								test/CodeGen/NVPTX/cttz.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								test/CodeGen/NVPTX/cttz.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | |||||||
|  | ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s | ||||||
|  |  | ||||||
|  |  | ||||||
|  | target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" | ||||||
|  |  | ||||||
|  | declare i16 @llvm.cttz.i16(i16, i1) readnone | ||||||
|  | declare i32 @llvm.cttz.i32(i32, i1) readnone | ||||||
|  | declare i64 @llvm.cttz.i64(i64, i1) readnone | ||||||
|  |  | ||||||
|  | define i32 @myctpop(i32 %a) { | ||||||
|  | ; CHECK: popc.b32 | ||||||
|  |   %val = call i32 @llvm.cttz.i32(i32 %a, i1 false) readnone | ||||||
|  |   ret i32 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i16 @myctpop16(i16 %a) { | ||||||
|  | ; CHECK: popc.b32 | ||||||
|  |   %val = call i16 @llvm.cttz.i16(i16 %a, i1 false) readnone | ||||||
|  |   ret i16 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @myctpop64(i64 %a) { | ||||||
|  | ; CHECK: popc.b64 | ||||||
|  |   %val = call i64 @llvm.cttz.i64(i64 %a, i1 false) readnone | ||||||
|  |   ret i64 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | define i32 @myctpop_2(i32 %a) { | ||||||
|  | ; CHECK: popc.b32 | ||||||
|  |   %val = call i32 @llvm.cttz.i32(i32 %a, i1 true) readnone | ||||||
|  |   ret i32 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i16 @myctpop16_2(i16 %a) { | ||||||
|  | ; CHECK: popc.b32 | ||||||
|  |   %val = call i16 @llvm.cttz.i16(i16 %a, i1 true) readnone | ||||||
|  |   ret i16 %val | ||||||
|  | } | ||||||
|  |  | ||||||
|  | define i64 @myctpop64_2(i64 %a) { | ||||||
|  | ; CHECK: popc.b64 | ||||||
|  |   %val = call i64 @llvm.cttz.i64(i64 %a, i1 true) readnone | ||||||
|  |   ret i64 %val | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user