mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	We now consider the FPOpFusion flag when determining whether to fuse ops. We also explicitly emit add.rn when fusion is disabled to prevent ptxas from fusing the operations on its own. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213287 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			34 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
			
		
		
	
	
			34 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
 | |
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
 | |
| 
 | |
| target triple = "nvptx64-unknown-cuda"
 | |
| 
 | |
| ;; Make sure we are generating proper instruction sequences for fused ops
 | |
| ;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit
 | |
| ;; add.f32 otherwise.  Without an explicit rounding mode on add.f32, ptxas
 | |
| ;; is free to fuse with a multiply if it is able.  If fusion is not allowed,
 | |
| ;; we do not form fma.rn at the PTX level and explicitly generate add.rn
 | |
| ;; for all adds to prevent ptxas from fusion the ops.
 | |
| 
 | |
| ;; FAST-LABEL: @t0
 | |
| ;; DEFAULT-LABEL: @t0
 | |
| define float @t0(float %a, float %b, float %c) {
 | |
| ;; FAST: fma.rn.f32
 | |
| ;; DEFAULT: mul.rn.f32
 | |
| ;; DEFAULT: add.rn.f32
 | |
|   %v0 = fmul float %a, %b
 | |
|   %v1 = fadd float %v0, %c
 | |
|   ret float %v1
 | |
| }
 | |
| 
 | |
| ;; FAST-LABEL: @t1
 | |
| ;; DEFAULT-LABEL: @t1
 | |
| define float @t1(float %a, float %b) {
 | |
| ;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
 | |
| ;; to prevent ptxas from fusing this with anything else.
 | |
| ;; FAST: add.f32
 | |
| ;; DEFAULT: add.rn.f32
 | |
|   %v1 = fadd float %a, %b
 | |
|   ret float %v1
 | |
| }
 |