Retro68/gcc/libgcc/config/rl78/fpmath-sf.S

; SF format is:
;
; [sign] 1.[23bits] E[8bits(n-127)]
;
; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm
;
; [A+0] mmmmmmmm
; [A+1] mmmmmmmm
; [A+2] Emmmmmmm
; [A+3] SEEEEEEE
;
; Special values (xxx != 0):
;
;  r11      r10      r9       r8
;  [HL+3]   [HL+2]   [HL+1]   [HL+0]   
;  s1111111 10000000 00000000 00000000	infinity
;  s1111111 1xxxxxxx xxxxxxxx xxxxxxxx	NaN
;  s0000000 00000000 00000000 00000000	zero
;  s0000000 0xxxxxxx xxxxxxxx xxxxxxxx	denormals
;
; Note that CMPtype is "signed char" for rl78
;
	
#include "vregs.h"

#define Z	PSW.6

; External Functions:
;
;  __int_isnan  [HL] -> Z if NaN
;  __int_iszero  [HL] -> Z if zero

START_FUNC	__int_isinf
	;; [HL] points to value, returns Z if it's #Inf

	mov	a, [hl+2]
	and	a, #0x80
	mov	x, a
	mov	a, [hl+3]
	and	a, #0x7f
	cmpw	ax, #0x7f80
	skz
	ret			; return NZ if not NaN
	mov	a, [hl+2]
	and	a, #0x7f
	or	a, [hl+1]
	or	a, [hl]
	ret

END_FUNC	__int_isinf

#define A_SIGN		[hl+0]	/* byte */
#define A_EXP		[hl+2]	/* word */
#define A_FRAC_L	[hl+4]	/* word */
#define A_FRAC_LH	[hl+5]	/* byte */
#define A_FRAC_H	[hl+6]	/* word or byte */
#define A_FRAC_HH	[hl+7]	/* byte */

#define B_SIGN		[hl+8]
#define B_EXP		[hl+10]
#define B_FRAC_L	[hl+12]
#define B_FRAC_LH	[hl+13]
#define B_FRAC_H	[hl+14]
#define B_FRAC_HH	[hl+15]

START_FUNC	_int_unpack_sf
	;; convert 32-bit SFmode [DE] to 6-byte struct [HL] ("A")

	mov	a, [de+3]
	sar	a, 7
	mov	A_SIGN, a

	movw	ax, [de+2]
	and	a, #0x7f
	shrw	ax, 7
	movw	bc, ax		; remember if the exponent is all zeros
	subw	ax, #127	; exponent is now non-biased
	movw	A_EXP, ax

	movw	ax, [de]
	movw	A_FRAC_L, ax

	mov	a, [de+2]
	and	a, #0x7f
	cmp0	c		; if the exp is all zeros, it's denormal
	skz
	or	a, #0x80
	mov	A_FRAC_H, a

	mov	a, #0
	mov	A_FRAC_HH, a

	;; rounding-bit-shift
	movw	ax, A_FRAC_L
	shlw	ax, 1
	movw	A_FRAC_L, ax
	mov	a, A_FRAC_H
	rolc	a, 1
	mov	A_FRAC_H, a
	mov	a, A_FRAC_HH
	rolc	a, 1
	mov	A_FRAC_HH, a

	ret

END_FUNC	_int_unpack_sf

;	func(SF a,SF b)
;	[SP+4..7]	a
;	[SP+8..11]	b

START_FUNC		___subsf3

	;; a - b => a + (-b)

	;; Note - we cannot just change the sign of B on the stack and
	;; then fall through into __addsf3.  The stack'ed value may be
	;; used again (it was created by our caller after all).  Instead
	;; we have to allocate some stack space of our own, copy A and B,
	;; change the sign of B, call __addsf3, release the allocated stack
	;; and then return.

	subw	sp, #8
	movw	ax, [sp+4+8]
	movw	[sp], ax
	movw	ax, [sp+4+2+8]
	movw	[sp+2], ax
	movw	ax, [sp+4+4+8]
	movw	[sp+4], ax
	mov 	a, [sp+4+6+8]
	mov	[sp+6], a
	mov	a, [sp+4+7+8]
	xor	a, #0x80
	mov	[sp+7], a
	call	$!___addsf3
	addw	sp, #8
	ret
END_FUNC	___subsf3

START_FUNC	___addsf3

	;; if (isnan(a)) return a
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_isnan
	bnz	$1f
ret_a:
	movw	ax, [sp+4]
	movw	r8, ax
	movw	ax, [sp+6]
	movw	r10, ax
	ret

1:	;; if (isnan (b)) return b;
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_isnan
	bnz	$2f
ret_b:
	movw	ax, [sp+8]
	movw	r8, ax
	movw	ax, [sp+10]
	movw	r10, ax
	ret

2:	;; if (isinf (a))
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	$!__int_isinf
	bnz	$3f

	;;   if (isinf (b) && a->sign != b->sign) return NaN
	
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	$!__int_isinf
	bnz	$ret_a

	mov	a, [sp+7]
	mov	h, a
	mov	a, [sp+11]
	xor	a, h
	bf	a.7, $ret_a

	movw	r8,  #0x0001
	movw	r10, #0x7f80
	ret

3:	;; if (isinf (b)) return b;
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	$!__int_isinf
	bz	$ret_b

	;; if (iszero (b))
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_iszero
	bnz	$4f

	;;   if (iszero (a))
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_iszero
	bnz	$ret_a

	movw	ax, [sp+4]
	movw	r8, ax
	mov	a, [sp+7]
	mov	h, a
	movw	ax, [sp+10]
	and	a, h
	movw	r10, ax
	ret

4:	;; if (iszero (a)) return b;
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_iszero
	bz	$ret_b

; Normalize the two numbers relative to each other.  At this point,
; we need the numbers converted to their "unpacked" format.

	subw	sp, #16		; Save room for two unpacked values.

	movw	ax, sp
	movw	hl, ax
	addw	ax, #16+4
	movw	de, ax
	call	$!_int_unpack_sf

	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	addw	ax, #16+8-8
	movw	de, ax
	call	$!_int_unpack_sf

	movw	ax, sp
	movw	hl, ax

	;; diff = a.exponent - b.exponent
	movw	ax, B_EXP	; sign/exponent word
	movw	bc, ax
	movw	ax, A_EXP	; sign/exponent word
	
	subw	ax, bc		; a = a.exp - b.exp
	movw	de, ax		; d = sdiff

	;;  if (diff < 0) diff = -diff
	bf	a.7, $1f
	xor	a, #0xff
	xor	r_0, #0xff	; x
	incw	ax		; a = diff
1:	
	;; if (diff >= 23) zero the smaller one
	cmpw	ax, #24
	bc	$.L661		; if a < 23 goto 661

	;; zero out the smaller one

	movw	ax, de
	bt	a.7, $1f	; if sdiff < 0 (a_exp < b_exp) goto 1f
	;; "zero out" b
	movw	ax, A_EXP
	movw	B_EXP, ax
	movw	ax, #0
	movw	B_FRAC_L, ax
	movw	B_FRAC_H, ax
	br	$5f
1:	
	;; "zero out" a
	movw	ax, B_EXP
	movw	A_EXP, ax
	movw	ax, #0
	movw	A_FRAC_L, ax
	movw	A_FRAC_H, ax

	br	$5f
.L661:
	;; shift the smaller one so they have the same exponents
1:	
	movw	ax, de
	bt	a.7, $1f
	cmpw	ax, #0		; sdiff > 0
	bnh	$1f		; if (sdiff <= 0) goto 1f

	decw	de
	incw	B_EXP		; because it's [HL+byte]

	movw	ax, B_FRAC_H
	shrw	ax, 1
	movw	B_FRAC_H, ax
	mov	a, B_FRAC_LH
	rorc	a, 1
	mov	B_FRAC_LH, a
	mov	a, B_FRAC_L
	rorc	a, 1
	mov	B_FRAC_L, a
	
	br	$1b
1:	
	movw	ax, de
	bf	a.7, $1f

	incw	de
	incw	A_EXP		; because it's [HL+byte]

	movw	ax, A_FRAC_H
	shrw	ax, 1
	movw	A_FRAC_H, ax
	mov	a, A_FRAC_LH
	rorc	a, 1
	mov	A_FRAC_LH, a
	mov	a, A_FRAC_L
	rorc	a, 1
	mov	A_FRAC_L, a
	
	br	$1b
1:	

5:	;; At this point, A and B have the same exponent.

	mov	a, A_SIGN
	cmp	a, B_SIGN
	bnz	$1f

	;; Same sign, just add.
	movw	ax, A_FRAC_L
	addw	ax, B_FRAC_L
	movw	A_FRAC_L, ax
	mov	a, A_FRAC_H
	addc	a, B_FRAC_H
	mov	A_FRAC_H, a
	mov	a, A_FRAC_HH
	addc	a, B_FRAC_HH
	mov	A_FRAC_HH, a

	br	$.L728

1:	;; Signs differ - A has A_SIGN still.
	bf	a.7, $.L696

	;; A is negative, do B-A
	movw	ax, B_FRAC_L
	subw	ax, A_FRAC_L
	movw	A_FRAC_L, ax
	mov	a, B_FRAC_H
	subc	a, A_FRAC_H
	mov	A_FRAC_H, a
	mov	a, B_FRAC_HH
	subc	a, A_FRAC_HH
	mov	A_FRAC_HH, a

	br	$.L698
.L696:
	;; B is negative, do A-B
	movw	ax, A_FRAC_L
	subw	ax, B_FRAC_L
	movw	A_FRAC_L, ax
	mov	a, A_FRAC_H
	subc	a, B_FRAC_H
	mov	A_FRAC_H, a
	mov	a, A_FRAC_HH
	subc	a, B_FRAC_HH
	mov	A_FRAC_HH, a

.L698:	
	;; A is still A_FRAC_HH
	bt	a.7, $.L706
	
	;; subtraction was positive
	mov	a, #0
	mov	A_SIGN, a
	br	$.L712

.L706:
	;; subtraction was negative
	mov	a, #0xff
	mov	A_SIGN, a

	;; This negates A_FRAC
	mov	a, A_FRAC_L
	xor	a, #0xff		; XOR doesn't mess with carry
	add	a, #1			; INC doesn't set the carry
	mov	A_FRAC_L, a
	mov	a, A_FRAC_LH
	xor	a, #0xff
	addc	a, #0
	mov	A_FRAC_LH, a
	mov	a, A_FRAC_H
	xor	a, #0xff
	addc	a, #0
	mov	A_FRAC_H, a
	mov	a, A_FRAC_HH
	xor	a, #0xff
	addc	a, #0
	mov	A_FRAC_HH, a

.L712:
	;; Renormalize the subtraction

	mov	a, A_FRAC_L
	or	a, A_FRAC_LH
	or	a, A_FRAC_H
	or	a, A_FRAC_HH
	bz	$.L728

	;; Mantissa is not zero, left shift until the MSB is in the
	;; right place
1:
	movw	ax, A_FRAC_H
	cmpw	ax, #0x0200
	bnc	$.L728

	decw	A_EXP

	movw	ax, A_FRAC_L
	shlw	ax, 1
	movw	A_FRAC_L, ax
	movw	ax, A_FRAC_H
	rolwc	ax, 1
	movw	A_FRAC_H, ax
	br	$1b

.L728:
	;; normalize A and pack it

	movw	ax, A_FRAC_H
	cmpw	ax, #0x01ff
	bnh	$1f
	;; overflow in the mantissa; adjust
	movw	ax, A_FRAC_H
	shrw	ax, 1
	movw	A_FRAC_H, ax
	mov	a, A_FRAC_LH
	rorc	a, 1
	mov	A_FRAC_LH, a
	mov	a, A_FRAC_L
	rorc	a, 1
	mov	A_FRAC_L, a
	incw	A_EXP
1:	

	call	$!__rl78_int_pack_a_r8
	addw	sp, #16
	ret

END_FUNC	___addsf3

START_FUNC	__rl78_int_pack_a_r8
	;; pack A to R8
	movw	ax, A_EXP
	addw	ax, #126	; not 127, we want the "bt/bf" test to check for denormals

	bf	a.7, $1f
	;; make a denormal
2:
	movw	bc, ax
	movw	ax, A_FRAC_H
	shrw	ax, 1
	movw	A_FRAC_H, ax
	mov	a, A_FRAC_LH
	rorc	a, 1
	mov	A_FRAC_LH, a
	mov	a, A_FRAC_L
	rorc	a, 1
	mov	A_FRAC_L, a
	movw	ax, bc
	incw	ax
	bt	a.7, $2b
	decw	ax
1:	
	incw	ax		; now it's as if we added 127
	movw	A_EXP, ax

	cmpw	ax, #0xfe
	bnh	$1f
	;; store #Inf instead
	mov	a, A_SIGN
	or	a, #0x7f
	mov	x, #0x80
	movw	r10, ax
	movw	r8, #0
	ret

1:
	bf	a.7, $1f	; note AX has EXP at top of loop
	;; underflow, denormal?
	movw	ax, A_FRAC_H
	shrw	ax, 1
	movw	A_FRAC_H, ax
	mov	a, A_FRAC_LH
	rorc	a, 1
	movw	A_FRAC_LH, ax
	mov	a, A_FRAC_L
	rorc	a, 1
	movw	A_FRAC_L, ax
	incw	A_EXP
	movw	ax, A_EXP
	br	$1b

1:
	;; undo the rounding-bit-shift
	mov	a, A_FRAC_L
	bf	a.0, $1f
	;; round up
	movw	ax, A_FRAC_L
	addw	ax, #1
	movw	A_FRAC_L, ax
	bnc	$1f
	incw	A_FRAC_H

	;; If the rounding set the bit beyond the end of the fraction, increment the exponent.
	mov	a, A_FRAC_HH
	bf	a.1, $1f
	incw	A_EXP
	
1:	
	movw	ax, A_FRAC_H
	shrw	ax, 1
	movw	A_FRAC_H, ax
	mov	a, A_FRAC_LH
	rorc	a, 1
	mov	A_FRAC_LH, a
	mov	a, A_FRAC_L
	rorc	a, 1
	mov	A_FRAC_L, a

	movw	ax, A_FRAC_L
	movw	r8, ax

	or	a, x
	or	a, A_FRAC_H
	or	a, A_FRAC_HH
	bnz	$1f
	movw	ax, #0
	movw	A_EXP, ax
1:	
	mov	a, A_FRAC_H
	and	a, #0x7f
	mov	b, a
	mov	a, A_EXP
	shl	a, 7
	or	a, b
	mov	r10, a

	mov	a, A_SIGN
	and	a, #0x80
	mov	b, a
	mov	a, A_EXP
	shr	a, 1
	or	a, b
	mov	r11, a

	ret
END_FUNC	__rl78_int_pack_a_r8

START_FUNC	___mulsf3

	;; if (isnan(a)) return a
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_isnan
	bnz	$1f
mret_a:
	movw	ax, [sp+4]
	movw	r8, ax
	mov	a, [sp+11]
	and	a, #0x80
	mov	b, a
	movw	ax, [sp+6]
	xor	a, b		; sign is always a ^ b
	movw	r10, ax
	ret
1:	
	;; if (isnan (b)) return b;
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_isnan
	bnz	$1f
mret_b:
	movw	ax, [sp+8]
	movw	r8, ax
	mov	a, [sp+7]
	and	a, #0x80
	mov	b, a
	movw	ax, [sp+10]
	xor	a, b		; sign is always a ^ b
	movw	r10, ax
	ret
1:	
	;; if (isinf (a)) return (b==0) ? nan : a
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	$!__int_isinf
	bnz	$.L805

	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_iszero
	bnz	$mret_a

	movw	r8,  #0x0001	; return NaN
	movw	r10, #0x7f80
	ret

.L805:	
	;; if (isinf (b)) return (a==0) ? nan : b
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	$!__int_isinf
	bnz	$.L814

	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_iszero
	bnz	$mret_b

	movw	r8,  #0x0001	; return NaN
	movw	r10, #0x7f80
	ret

.L814:
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_iszero
	bz	$mret_a
	
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_iszero
	bz	$mret_b

	;; at this point, we're doing the multiplication.

	subw	sp, #16	; save room for two unpacked values

	movw	ax, sp
	movw	hl, ax
	addw	ax, #16+4
	movw	de, ax
	call	$!_int_unpack_sf

	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	addw	ax, #16+8-8
	movw	de, ax
	call	$!_int_unpack_sf

	movw	ax, sp
	movw	hl, ax

	;; multiply SI a.FRAC * SI b.FRAC to DI r8

	subw	sp, #16
	movw	ax, A_FRAC_L
	movw	[sp+0], ax
	movw	ax, A_FRAC_H
	movw	[sp+2], ax

	movw	ax, B_FRAC_L
	movw	[sp+8], ax
	movw	ax, B_FRAC_H
	movw	[sp+10], ax

	movw	ax, #0
	movw	[sp+4], ax
	movw	[sp+6], ax
	movw	[sp+12], ax
	movw	[sp+14], ax

	call	!!___muldi3	; MTMPa * MTMPb -> R8..R15
	addw	sp, #16

	movw	ax, sp
	movw	hl, ax

	;;  add the exponents together
	movw	ax, A_EXP
	addw	ax, B_EXP
	movw	bc, ax		; exponent in BC

	;; now, re-normalize the DI value in R8..R15 to have the
	;; MSB in the "right" place, adjusting BC as we shift it.

	;; The value will normally be in this range:
	;; R15              R8
	;; 0001_0000_0000_0000
	;; 0003_ffff_fc00_0001

	;; so to speed it up, we normalize to:
	;; 0001_xxxx_xxxx_xxxx
	;; then extract the bytes we want (r11-r14)

1:
	mov	a, r15
	cmp0	a
	bnz	$2f
	mov	a, r14
	and	a, #0xfe
	bz	$1f
2:	
	;; shift right, inc exponent
	movw	ax, r14
	shrw	ax, 1
	movw	r14, ax
	mov	a, r13
	rorc	a, 1
	mov	r13, a
	mov	a, r12
	rorc	a, 1
	mov	r12, a
	mov	a, r11
	rorc	a, 1
	mov	r11, a
	;; we don't care about r8/r9/r10 if we're shifting this way
	incw	bc
	br	$1b
1:	
	mov	a, r15
	or	a, r14
	bnz	$1f
	;; shift left, dec exponent
	movw	ax, r8
	shlw	ax, 1
	movw	r8, ax
	movw	ax, r10
	rolwc	ax, 1
	movw	r10, ax
	movw	ax, r12
	rolwc	ax, 1
	movw	r12, ax
	movw	ax, r14
	rolwc	ax, 1
	movw	r14, ax
	decw	bc
	br	$1b
1:
	;; at this point, FRAC is in R11..R14 and EXP is in BC
	movw	ax, bc
	movw	A_EXP, ax

	mov	a, r11
	mov	A_FRAC_L, a
	mov	a, r12
	mov	A_FRAC_LH, a
	mov	a, r13
	mov	A_FRAC_H, a
	mov	a, r14
	mov	A_FRAC_HH, a

	mov	a, A_SIGN
	xor	a, B_SIGN
	mov	A_SIGN, a

	call	$!__rl78_int_pack_a_r8

	addw	sp, #16
	ret

END_FUNC		___mulsf3
	
START_FUNC		___divsf3

	;; if (isnan(a)) return a
	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_isnan
	bnz	$1f
dret_a:
	movw	ax, [sp+4]
	movw	r8, ax
	mov	a, [sp+11]
	and	a, #0x80
	mov	b, a
	movw	ax, [sp+6]
	xor	a, b		; sign is always a ^ b
	movw	r10, ax
	ret
1:	
	;; if (isnan (b)) return b;
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_isnan
	bnz	$1f
dret_b:
	movw	ax, [sp+8]
	movw	r8, ax
	mov	a, [sp+7]
	and	a, #0x80
	mov	b, a
	movw	ax, [sp+10]
	xor	a, b		; sign is always a ^ b
	movw	r10, ax
	ret
1:	

	;; if (isinf (a)) return isinf(b) ? nan : a

	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	$!__int_isinf
	bnz	$1f
	
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	$!__int_isinf
	bnz	$dret_a
dret_nan:	
	movw	r8,  #0x0001	; return NaN
	movw	r10, #0x7f80
	ret
	
1:	

	;; if (iszero (a)) return iszero(b) ? nan : a

	movw	ax, sp
	addw	ax, #4
	movw	hl, ax
	call	!!__int_iszero
	bnz	$1f
	
	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_iszero
	bnz	$dret_a
	br	$dret_nan
	
1:	
	;; if (isinf (b)) return 0

	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	$!__int_isinf
	bnz	$1f
	
	mov	a, [sp+7]
	mov	b, a
	mov	a, [sp+11]
	xor	a, b
	and	a, #0x80
	mov	r11, a
	movw	r8, #0
	mov	r10, #0
	ret
	
1:	
	;; if (iszero (b)) return Inf

	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	call	!!__int_iszero
	bnz	$1f

	mov	a, [sp+7]
	mov	b, a
	mov	a, [sp+11]
	xor	a, b
	or	a, #0x7f
	mov	r11, a
	movw	r8, #0
	mov	r10, #0x80
	ret
1:	

	;; at this point, we're doing the division.  Normalized
	;; mantissas look like:
	;; 01.xx.xx.xx
	;; so we divide:
	;; 01.xx.xx.xx.00.00.00.00
	;; by          01.xx.xx.xx
	;; to get approx 00.80.00.00.00 to 01.ff.ff.ff.00


	subw	sp, #16	; save room for two unpacked values

	movw	ax, sp
	movw	hl, ax
	addw	ax, #16+4
	movw	de, ax
	call	$!_int_unpack_sf

	movw	ax, sp
	addw	ax, #8
	movw	hl, ax
	addw	ax, #16+8-8
	movw	de, ax
	call	$!_int_unpack_sf

	movw	ax, sp
	movw	hl, ax

	;; divide DI a.FRAC / SI b.FRAC to DI r8

	subw	sp, #16
	movw	ax, A_FRAC_L
	movw	[sp+4], ax
	movw	ax, A_FRAC_H
	movw	[sp+6], ax

	movw	ax, B_FRAC_L
	movw	[sp+8], ax
	movw	ax, B_FRAC_H
	movw	[sp+10], ax

	movw	ax, #0
	movw	[sp+0], ax
	movw	[sp+2], ax
	movw	[sp+12], ax
	movw	[sp+14], ax

	call	!!___divdi3	; MTMPa / MTMPb -> R8..R15
	addw	sp, #16

	movw	ax, sp
	movw	hl, ax

	;;  subtract the exponents A - B
	movw	ax, A_EXP
	subw	ax, B_EXP
	movw	bc, ax		; exponent in BC

	;; now, re-normalize the DI value in R8..R15 to have the
	;; MSB in the "right" place, adjusting BC as we shift it.

	;; The value will normally be in this range:
	;; R15              R8
	;; 0000_0000_8000_0000
	;; 0000_0001_ffff_ff00

	;; so to speed it up, we normalize to:
	;; 0000_0001_xxxx_xxxx
	;; then extract the bytes we want (r9-r12)

1:
	movw	ax, r14
	cmpw	ax, #0
	bnz	$2f
	movw	ax, r12
	cmpw	ax, #1
	bnh	$1f
2:	
	;; shift right, inc exponent
	movw	ax, r14
	shrw	ax, 1
	movw	r14, ax
	mov	a, r13
	rorc	a, 1
	mov	r13, a
	mov	a, r12
	rorc	a, 1
	mov	r12, a
	mov	a, r11
	rorc	a, 1
	mov	r11, a
	mov	a, r10
	rorc	a, 1
	mov	r10, a
	mov	a, r9
	rorc	a, 1
	mov	r9, a
	mov	a, r8
	rorc	a, 1
	mov	r8, a

	incw	bc
	br	$1b
1:	
	;; the previous loop leaves r15.r13 zero
	mov	a, r12
	cmp0	a
	bnz	$1f
	;; shift left, dec exponent
	movw	ax, r8
	shlw	ax, 1
	movw	r8, ax
	movw	ax, r10
	rolwc	ax, 1
	movw	r10, ax
	movw	ax, r12
	rolwc	ax, 1
	movw	r12, ax
	;; don't need to do r14
	decw	bc
	br	$1b
1:
	;; at this point, FRAC is in R8..R11 and EXP is in BC
	movw	ax, bc
	movw	A_EXP, ax

	mov	a, r9
	mov	A_FRAC_L, a
	mov	a, r10
	mov	A_FRAC_LH, a
	mov	a, r11
	mov	A_FRAC_H, a
	mov	a, r12
	mov	A_FRAC_HH, a

	mov	a, A_SIGN
	xor	a, B_SIGN
	mov	A_SIGN, a

	call	$!__rl78_int_pack_a_r8

	addw	sp, #16
	ret
	
END_FUNC	___divsf3
Update gcc to 5.2.0 2015-08-28 15:33:40 +00:00			`; SF format is:`
			`;`
			`; [sign] 1.[23bits] E[8bits(n-127)]`
			`;`
			`; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm`
			`;`
			`; [A+0] mmmmmmmm`
			`; [A+1] mmmmmmmm`
			`; [A+2] Emmmmmmm`
			`; [A+3] SEEEEEEE`
			`;`
			`; Special values (xxx != 0):`
			`;`
			`; r11 r10 r9 r8`
			`; [HL+3] [HL+2] [HL+1] [HL+0]`
			`; s1111111 10000000 00000000 00000000 infinity`
			`; s1111111 1xxxxxxx xxxxxxxx xxxxxxxx NaN`
			`; s0000000 00000000 00000000 00000000 zero`
			`; s0000000 0xxxxxxx xxxxxxxx xxxxxxxx denormals`
			`;`
			`; Note that CMPtype is "signed char" for rl78`
			`;`

			`#include "vregs.h"`

			`#define Z PSW.6`

			`; External Functions:`
			`;`
			`; __int_isnan [HL] -> Z if NaN`
			`; __int_iszero [HL] -> Z if zero`

			`START_FUNC __int_isinf`
			`;; [HL] points to value, returns Z if it's #Inf`

			`mov a, [hl+2]`
			`and a, #0x80`
			`mov x, a`
			`mov a, [hl+3]`
			`and a, #0x7f`
			`cmpw ax, #0x7f80`
			`skz`
			`ret ; return NZ if not NaN`
			`mov a, [hl+2]`
			`and a, #0x7f`
			`or a, [hl+1]`
			`or a, [hl]`
			`ret`

			`END_FUNC __int_isinf`

			`#define A_SIGN [hl+0] /* byte */`
			`#define A_EXP [hl+2] /* word */`
			`#define A_FRAC_L [hl+4] /* word */`
			`#define A_FRAC_LH [hl+5] /* byte */`
			`#define A_FRAC_H [hl+6] /* word or byte */`
			`#define A_FRAC_HH [hl+7] /* byte */`

			`#define B_SIGN [hl+8]`
			`#define B_EXP [hl+10]`
			`#define B_FRAC_L [hl+12]`
			`#define B_FRAC_LH [hl+13]`
			`#define B_FRAC_H [hl+14]`
			`#define B_FRAC_HH [hl+15]`

			`START_FUNC _int_unpack_sf`
			`;; convert 32-bit SFmode [DE] to 6-byte struct [HL] ("A")`

			`mov a, [de+3]`
			`sar a, 7`
			`mov A_SIGN, a`

			`movw ax, [de+2]`
			`and a, #0x7f`
			`shrw ax, 7`
			`movw bc, ax ; remember if the exponent is all zeros`
			`subw ax, #127 ; exponent is now non-biased`
			`movw A_EXP, ax`

			`movw ax, [de]`
			`movw A_FRAC_L, ax`

			`mov a, [de+2]`
			`and a, #0x7f`
			`cmp0 c ; if the exp is all zeros, it's denormal`
			`skz`
			`or a, #0x80`
			`mov A_FRAC_H, a`

			`mov a, #0`
			`mov A_FRAC_HH, a`

			`;; rounding-bit-shift`
			`movw ax, A_FRAC_L`
			`shlw ax, 1`
			`movw A_FRAC_L, ax`
			`mov a, A_FRAC_H`
			`rolc a, 1`
			`mov A_FRAC_H, a`
			`mov a, A_FRAC_HH`
			`rolc a, 1`
			`mov A_FRAC_HH, a`

			`ret`

			`END_FUNC _int_unpack_sf`

			`; func(SF a,SF b)`
			`; [SP+4..7] a`
			`; [SP+8..11] b`

			`START_FUNC ___subsf3`

			`;; a - b => a + (-b)`

			`;; Note - we cannot just change the sign of B on the stack and`
			`;; then fall through into __addsf3. The stack'ed value may be`
			`;; used again (it was created by our caller after all). Instead`
			`;; we have to allocate some stack space of our own, copy A and B,`
			`;; change the sign of B, call __addsf3, release the allocated stack`
			`;; and then return.`

			`subw sp, #8`
			`movw ax, [sp+4+8]`
			`movw [sp], ax`
			`movw ax, [sp+4+2+8]`
			`movw [sp+2], ax`
			`movw ax, [sp+4+4+8]`
			`movw [sp+4], ax`
			`mov a, [sp+4+6+8]`
			`mov [sp+6], a`
			`mov a, [sp+4+7+8]`
			`xor a, #0x80`
			`mov [sp+7], a`
			`call $!___addsf3`
			`addw sp, #8`
			`ret`
			`END_FUNC ___subsf3`

			`START_FUNC ___addsf3`

			`;; if (isnan(a)) return a`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_isnan`
			`bnz $1f`
			`ret_a:`
			`movw ax, [sp+4]`
			`movw r8, ax`
			`movw ax, [sp+6]`
			`movw r10, ax`
			`ret`

			`1: ;; if (isnan (b)) return b;`
			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_isnan`
			`bnz $2f`
			`ret_b:`
			`movw ax, [sp+8]`
			`movw r8, ax`
			`movw ax, [sp+10]`
			`movw r10, ax`
			`ret`

			`2: ;; if (isinf (a))`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call $!__int_isinf`
			`bnz $3f`

			`;; if (isinf (b) && a->sign != b->sign) return NaN`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call $!__int_isinf`
			`bnz $ret_a`

			`mov a, [sp+7]`
			`mov h, a`
			`mov a, [sp+11]`
			`xor a, h`
			`bf a.7, $ret_a`

			`movw r8, #0x0001`
			`movw r10, #0x7f80`
			`ret`

			`3: ;; if (isinf (b)) return b;`
			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call $!__int_isinf`
			`bz $ret_b`

			`;; if (iszero (b))`
			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_iszero`
			`bnz $4f`

			`;; if (iszero (a))`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_iszero`
			`bnz $ret_a`

			`movw ax, [sp+4]`
			`movw r8, ax`
			`mov a, [sp+7]`
			`mov h, a`
			`movw ax, [sp+10]`
			`and a, h`
			`movw r10, ax`
			`ret`

			`4: ;; if (iszero (a)) return b;`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_iszero`
			`bz $ret_b`

			`; Normalize the two numbers relative to each other. At this point,`
			`; we need the numbers converted to their "unpacked" format.`

			`subw sp, #16 ; Save room for two unpacked values.`

			`movw ax, sp`
			`movw hl, ax`
			`addw ax, #16+4`
			`movw de, ax`
			`call $!_int_unpack_sf`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`addw ax, #16+8-8`
			`movw de, ax`
			`call $!_int_unpack_sf`

			`movw ax, sp`
			`movw hl, ax`

			`;; diff = a.exponent - b.exponent`
			`movw ax, B_EXP ; sign/exponent word`
			`movw bc, ax`
			`movw ax, A_EXP ; sign/exponent word`

			`subw ax, bc ; a = a.exp - b.exp`
			`movw de, ax ; d = sdiff`

			`;; if (diff < 0) diff = -diff`
			`bf a.7, $1f`
			`xor a, #0xff`
			`xor r_0, #0xff ; x`
			`incw ax ; a = diff`
			`1:`
			`;; if (diff >= 23) zero the smaller one`
			`cmpw ax, #24`
			`bc $.L661 ; if a < 23 goto 661`

			`;; zero out the smaller one`

			`movw ax, de`
			`bt a.7, $1f ; if sdiff < 0 (a_exp < b_exp) goto 1f`
			`;; "zero out" b`
			`movw ax, A_EXP`
			`movw B_EXP, ax`
			`movw ax, #0`
			`movw B_FRAC_L, ax`
			`movw B_FRAC_H, ax`
			`br $5f`
			`1:`
			`;; "zero out" a`
			`movw ax, B_EXP`
			`movw A_EXP, ax`
			`movw ax, #0`
			`movw A_FRAC_L, ax`
			`movw A_FRAC_H, ax`

			`br $5f`
			`.L661:`
			`;; shift the smaller one so they have the same exponents`
			`1:`
			`movw ax, de`
			`bt a.7, $1f`
			`cmpw ax, #0 ; sdiff > 0`
			`bnh $1f ; if (sdiff <= 0) goto 1f`

			`decw de`
			`incw B_EXP ; because it's [HL+byte]`

			`movw ax, B_FRAC_H`
			`shrw ax, 1`
			`movw B_FRAC_H, ax`
			`mov a, B_FRAC_LH`
			`rorc a, 1`
			`mov B_FRAC_LH, a`
			`mov a, B_FRAC_L`
			`rorc a, 1`
			`mov B_FRAC_L, a`

			`br $1b`
			`1:`
			`movw ax, de`
			`bf a.7, $1f`

			`incw de`
			`incw A_EXP ; because it's [HL+byte]`

			`movw ax, A_FRAC_H`
			`shrw ax, 1`
			`movw A_FRAC_H, ax`
			`mov a, A_FRAC_LH`
			`rorc a, 1`
			`mov A_FRAC_LH, a`
			`mov a, A_FRAC_L`
			`rorc a, 1`
			`mov A_FRAC_L, a`

			`br $1b`
			`1:`

			`5: ;; At this point, A and B have the same exponent.`

			`mov a, A_SIGN`
			`cmp a, B_SIGN`
			`bnz $1f`

			`;; Same sign, just add.`
			`movw ax, A_FRAC_L`
			`addw ax, B_FRAC_L`
			`movw A_FRAC_L, ax`
			`mov a, A_FRAC_H`
			`addc a, B_FRAC_H`
			`mov A_FRAC_H, a`
			`mov a, A_FRAC_HH`
			`addc a, B_FRAC_HH`
			`mov A_FRAC_HH, a`

			`br $.L728`

			`1: ;; Signs differ - A has A_SIGN still.`
			`bf a.7, $.L696`

			`;; A is negative, do B-A`
			`movw ax, B_FRAC_L`
			`subw ax, A_FRAC_L`
			`movw A_FRAC_L, ax`
			`mov a, B_FRAC_H`
			`subc a, A_FRAC_H`
			`mov A_FRAC_H, a`
			`mov a, B_FRAC_HH`
			`subc a, A_FRAC_HH`
			`mov A_FRAC_HH, a`

			`br $.L698`
			`.L696:`
			`;; B is negative, do A-B`
			`movw ax, A_FRAC_L`
			`subw ax, B_FRAC_L`
			`movw A_FRAC_L, ax`
			`mov a, A_FRAC_H`
			`subc a, B_FRAC_H`
			`mov A_FRAC_H, a`
			`mov a, A_FRAC_HH`
			`subc a, B_FRAC_HH`
			`mov A_FRAC_HH, a`

			`.L698:`
			`;; A is still A_FRAC_HH`
			`bt a.7, $.L706`

			`;; subtraction was positive`
			`mov a, #0`
			`mov A_SIGN, a`
			`br $.L712`

			`.L706:`
			`;; subtraction was negative`
			`mov a, #0xff`
			`mov A_SIGN, a`

			`;; This negates A_FRAC`
			`mov a, A_FRAC_L`
			`xor a, #0xff ; XOR doesn't mess with carry`
			`add a, #1 ; INC doesn't set the carry`
			`mov A_FRAC_L, a`
			`mov a, A_FRAC_LH`
			`xor a, #0xff`
			`addc a, #0`
			`mov A_FRAC_LH, a`
			`mov a, A_FRAC_H`
			`xor a, #0xff`
			`addc a, #0`
			`mov A_FRAC_H, a`
			`mov a, A_FRAC_HH`
			`xor a, #0xff`
			`addc a, #0`
			`mov A_FRAC_HH, a`

			`.L712:`
			`;; Renormalize the subtraction`

			`mov a, A_FRAC_L`
			`or a, A_FRAC_LH`
			`or a, A_FRAC_H`
			`or a, A_FRAC_HH`
			`bz $.L728`

			`;; Mantissa is not zero, left shift until the MSB is in the`
			`;; right place`
			`1:`
			`movw ax, A_FRAC_H`
			`cmpw ax, #0x0200`
			`bnc $.L728`

			`decw A_EXP`

			`movw ax, A_FRAC_L`
			`shlw ax, 1`
			`movw A_FRAC_L, ax`
			`movw ax, A_FRAC_H`
			`rolwc ax, 1`
			`movw A_FRAC_H, ax`
			`br $1b`

			`.L728:`
			`;; normalize A and pack it`

			`movw ax, A_FRAC_H`
			`cmpw ax, #0x01ff`
			`bnh $1f`
			`;; overflow in the mantissa; adjust`
			`movw ax, A_FRAC_H`
			`shrw ax, 1`
			`movw A_FRAC_H, ax`
			`mov a, A_FRAC_LH`
			`rorc a, 1`
			`mov A_FRAC_LH, a`
			`mov a, A_FRAC_L`
			`rorc a, 1`
			`mov A_FRAC_L, a`
			`incw A_EXP`
			`1:`

			`call $!__rl78_int_pack_a_r8`
			`addw sp, #16`
			`ret`

			`END_FUNC ___addsf3`

			`START_FUNC __rl78_int_pack_a_r8`
			`;; pack A to R8`
			`movw ax, A_EXP`
			`addw ax, #126 ; not 127, we want the "bt/bf" test to check for denormals`

			`bf a.7, $1f`
			`;; make a denormal`
			`2:`
			`movw bc, ax`
			`movw ax, A_FRAC_H`
			`shrw ax, 1`
			`movw A_FRAC_H, ax`
			`mov a, A_FRAC_LH`
			`rorc a, 1`
			`mov A_FRAC_LH, a`
			`mov a, A_FRAC_L`
			`rorc a, 1`
			`mov A_FRAC_L, a`
			`movw ax, bc`
			`incw ax`
			`bt a.7, $2b`
			`decw ax`
			`1:`
			`incw ax ; now it's as if we added 127`
			`movw A_EXP, ax`

			`cmpw ax, #0xfe`
			`bnh $1f`
			`;; store #Inf instead`
			`mov a, A_SIGN`
			`or a, #0x7f`
			`mov x, #0x80`
			`movw r10, ax`
			`movw r8, #0`
			`ret`

			`1:`
			`bf a.7, $1f ; note AX has EXP at top of loop`
			`;; underflow, denormal?`
			`movw ax, A_FRAC_H`
			`shrw ax, 1`
			`movw A_FRAC_H, ax`
			`mov a, A_FRAC_LH`
			`rorc a, 1`
			`movw A_FRAC_LH, ax`
			`mov a, A_FRAC_L`
			`rorc a, 1`
			`movw A_FRAC_L, ax`
			`incw A_EXP`
			`movw ax, A_EXP`
			`br $1b`

			`1:`
			`;; undo the rounding-bit-shift`
			`mov a, A_FRAC_L`
			`bf a.0, $1f`
			`;; round up`
			`movw ax, A_FRAC_L`
			`addw ax, #1`
			`movw A_FRAC_L, ax`
			`bnc $1f`
			`incw A_FRAC_H`

			`;; If the rounding set the bit beyond the end of the fraction, increment the exponent.`
			`mov a, A_FRAC_HH`
			`bf a.1, $1f`
			`incw A_EXP`

			`1:`
			`movw ax, A_FRAC_H`
			`shrw ax, 1`
			`movw A_FRAC_H, ax`
			`mov a, A_FRAC_LH`
			`rorc a, 1`
			`mov A_FRAC_LH, a`
			`mov a, A_FRAC_L`
			`rorc a, 1`
			`mov A_FRAC_L, a`

			`movw ax, A_FRAC_L`
			`movw r8, ax`

			`or a, x`
			`or a, A_FRAC_H`
			`or a, A_FRAC_HH`
			`bnz $1f`
			`movw ax, #0`
			`movw A_EXP, ax`
			`1:`
			`mov a, A_FRAC_H`
			`and a, #0x7f`
			`mov b, a`
			`mov a, A_EXP`
			`shl a, 7`
			`or a, b`
			`mov r10, a`

			`mov a, A_SIGN`
			`and a, #0x80`
			`mov b, a`
			`mov a, A_EXP`
			`shr a, 1`
			`or a, b`
			`mov r11, a`

			`ret`
			`END_FUNC __rl78_int_pack_a_r8`

			`START_FUNC ___mulsf3`

			`;; if (isnan(a)) return a`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_isnan`
			`bnz $1f`
			`mret_a:`
			`movw ax, [sp+4]`
			`movw r8, ax`
			`mov a, [sp+11]`
			`and a, #0x80`
			`mov b, a`
			`movw ax, [sp+6]`
			`xor a, b ; sign is always a ^ b`
			`movw r10, ax`
			`ret`
			`1:`
			`;; if (isnan (b)) return b;`
			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_isnan`
			`bnz $1f`
			`mret_b:`
			`movw ax, [sp+8]`
			`movw r8, ax`
			`mov a, [sp+7]`
			`and a, #0x80`
			`mov b, a`
			`movw ax, [sp+10]`
			`xor a, b ; sign is always a ^ b`
			`movw r10, ax`
			`ret`
			`1:`
			`;; if (isinf (a)) return (b==0) ? nan : a`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call $!__int_isinf`
			`bnz $.L805`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_iszero`
			`bnz $mret_a`

			`movw r8, #0x0001 ; return NaN`
			`movw r10, #0x7f80`
			`ret`

			`.L805:`
			`;; if (isinf (b)) return (a==0) ? nan : b`
			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call $!__int_isinf`
			`bnz $.L814`

			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_iszero`
			`bnz $mret_b`

			`movw r8, #0x0001 ; return NaN`
			`movw r10, #0x7f80`
			`ret`

			`.L814:`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_iszero`
			`bz $mret_a`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_iszero`
			`bz $mret_b`

			`;; at this point, we're doing the multiplication.`

			`subw sp, #16 ; save room for two unpacked values`

			`movw ax, sp`
			`movw hl, ax`
			`addw ax, #16+4`
			`movw de, ax`
			`call $!_int_unpack_sf`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`addw ax, #16+8-8`
			`movw de, ax`
			`call $!_int_unpack_sf`

			`movw ax, sp`
			`movw hl, ax`

			`;; multiply SI a.FRAC * SI b.FRAC to DI r8`

			`subw sp, #16`
			`movw ax, A_FRAC_L`
			`movw [sp+0], ax`
			`movw ax, A_FRAC_H`
			`movw [sp+2], ax`

			`movw ax, B_FRAC_L`
			`movw [sp+8], ax`
			`movw ax, B_FRAC_H`
			`movw [sp+10], ax`

			`movw ax, #0`
			`movw [sp+4], ax`
			`movw [sp+6], ax`
			`movw [sp+12], ax`
			`movw [sp+14], ax`

			`call !!___muldi3 ; MTMPa * MTMPb -> R8..R15`
			`addw sp, #16`

			`movw ax, sp`
			`movw hl, ax`

			`;; add the exponents together`
			`movw ax, A_EXP`
			`addw ax, B_EXP`
			`movw bc, ax ; exponent in BC`

			`;; now, re-normalize the DI value in R8..R15 to have the`
			`;; MSB in the "right" place, adjusting BC as we shift it.`

			`;; The value will normally be in this range:`
			`;; R15 R8`
			`;; 0001_0000_0000_0000`
			`;; 0003_ffff_fc00_0001`

			`;; so to speed it up, we normalize to:`
			`;; 0001_xxxx_xxxx_xxxx`
			`;; then extract the bytes we want (r11-r14)`

			`1:`
			`mov a, r15`
			`cmp0 a`
			`bnz $2f`
			`mov a, r14`
			`and a, #0xfe`
			`bz $1f`
			`2:`
			`;; shift right, inc exponent`
			`movw ax, r14`
			`shrw ax, 1`
			`movw r14, ax`
			`mov a, r13`
			`rorc a, 1`
			`mov r13, a`
			`mov a, r12`
			`rorc a, 1`
			`mov r12, a`
			`mov a, r11`
			`rorc a, 1`
			`mov r11, a`
			`;; we don't care about r8/r9/r10 if we're shifting this way`
			`incw bc`
			`br $1b`
			`1:`
			`mov a, r15`
			`or a, r14`
			`bnz $1f`
			`;; shift left, dec exponent`
			`movw ax, r8`
			`shlw ax, 1`
			`movw r8, ax`
			`movw ax, r10`
			`rolwc ax, 1`
			`movw r10, ax`
			`movw ax, r12`
			`rolwc ax, 1`
			`movw r12, ax`
			`movw ax, r14`
			`rolwc ax, 1`
			`movw r14, ax`
			`decw bc`
			`br $1b`
			`1:`
			`;; at this point, FRAC is in R11..R14 and EXP is in BC`
			`movw ax, bc`
			`movw A_EXP, ax`

			`mov a, r11`
			`mov A_FRAC_L, a`
			`mov a, r12`
			`mov A_FRAC_LH, a`
			`mov a, r13`
			`mov A_FRAC_H, a`
			`mov a, r14`
			`mov A_FRAC_HH, a`

			`mov a, A_SIGN`
			`xor a, B_SIGN`
			`mov A_SIGN, a`

			`call $!__rl78_int_pack_a_r8`

			`addw sp, #16`
			`ret`

			`END_FUNC ___mulsf3`

			`START_FUNC ___divsf3`

			`;; if (isnan(a)) return a`
			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_isnan`
			`bnz $1f`
			`dret_a:`
			`movw ax, [sp+4]`
			`movw r8, ax`
			`mov a, [sp+11]`
			`and a, #0x80`
			`mov b, a`
			`movw ax, [sp+6]`
			`xor a, b ; sign is always a ^ b`
			`movw r10, ax`
			`ret`
			`1:`
			`;; if (isnan (b)) return b;`
			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_isnan`
			`bnz $1f`
			`dret_b:`
			`movw ax, [sp+8]`
			`movw r8, ax`
			`mov a, [sp+7]`
			`and a, #0x80`
			`mov b, a`
			`movw ax, [sp+10]`
			`xor a, b ; sign is always a ^ b`
			`movw r10, ax`
			`ret`
			`1:`

			`;; if (isinf (a)) return isinf(b) ? nan : a`

			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call $!__int_isinf`
			`bnz $1f`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call $!__int_isinf`
			`bnz $dret_a`
			`dret_nan:`
			`movw r8, #0x0001 ; return NaN`
			`movw r10, #0x7f80`
			`ret`

			`1:`

			`;; if (iszero (a)) return iszero(b) ? nan : a`

			`movw ax, sp`
			`addw ax, #4`
			`movw hl, ax`
			`call !!__int_iszero`
			`bnz $1f`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_iszero`
			`bnz $dret_a`
			`br $dret_nan`

			`1:`
			`;; if (isinf (b)) return 0`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call $!__int_isinf`
			`bnz $1f`

			`mov a, [sp+7]`
			`mov b, a`
			`mov a, [sp+11]`
			`xor a, b`
			`and a, #0x80`
			`mov r11, a`
			`movw r8, #0`
			`mov r10, #0`
			`ret`

			`1:`
			`;; if (iszero (b)) return Inf`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`call !!__int_iszero`
			`bnz $1f`

			`mov a, [sp+7]`
			`mov b, a`
			`mov a, [sp+11]`
			`xor a, b`
			`or a, #0x7f`
			`mov r11, a`
			`movw r8, #0`
			`mov r10, #0x80`
			`ret`
			`1:`

			`;; at this point, we're doing the division. Normalized`
			`;; mantissas look like:`
			`;; 01.xx.xx.xx`
			`;; so we divide:`
			`;; 01.xx.xx.xx.00.00.00.00`
			`;; by 01.xx.xx.xx`
			`;; to get approx 00.80.00.00.00 to 01.ff.ff.ff.00`


			`subw sp, #16 ; save room for two unpacked values`

			`movw ax, sp`
			`movw hl, ax`
			`addw ax, #16+4`
			`movw de, ax`
			`call $!_int_unpack_sf`

			`movw ax, sp`
			`addw ax, #8`
			`movw hl, ax`
			`addw ax, #16+8-8`
			`movw de, ax`
			`call $!_int_unpack_sf`

			`movw ax, sp`
			`movw hl, ax`

			`;; divide DI a.FRAC / SI b.FRAC to DI r8`

			`subw sp, #16`
			`movw ax, A_FRAC_L`
			`movw [sp+4], ax`
			`movw ax, A_FRAC_H`
			`movw [sp+6], ax`

			`movw ax, B_FRAC_L`
			`movw [sp+8], ax`
			`movw ax, B_FRAC_H`
			`movw [sp+10], ax`

			`movw ax, #0`
			`movw [sp+0], ax`
			`movw [sp+2], ax`
			`movw [sp+12], ax`
			`movw [sp+14], ax`

			`call !!___divdi3 ; MTMPa / MTMPb -> R8..R15`
			`addw sp, #16`

			`movw ax, sp`
			`movw hl, ax`

			`;; subtract the exponents A - B`
			`movw ax, A_EXP`
			`subw ax, B_EXP`
			`movw bc, ax ; exponent in BC`

			`;; now, re-normalize the DI value in R8..R15 to have the`
			`;; MSB in the "right" place, adjusting BC as we shift it.`

			`;; The value will normally be in this range:`
			`;; R15 R8`
			`;; 0000_0000_8000_0000`
			`;; 0000_0001_ffff_ff00`

			`;; so to speed it up, we normalize to:`
			`;; 0000_0001_xxxx_xxxx`
			`;; then extract the bytes we want (r9-r12)`

			`1:`
			`movw ax, r14`
			`cmpw ax, #0`
			`bnz $2f`
			`movw ax, r12`
			`cmpw ax, #1`
			`bnh $1f`
			`2:`
			`;; shift right, inc exponent`
			`movw ax, r14`
			`shrw ax, 1`
			`movw r14, ax`
			`mov a, r13`
			`rorc a, 1`
			`mov r13, a`
			`mov a, r12`
			`rorc a, 1`
			`mov r12, a`
			`mov a, r11`
			`rorc a, 1`
			`mov r11, a`
			`mov a, r10`
			`rorc a, 1`
			`mov r10, a`
			`mov a, r9`
			`rorc a, 1`
			`mov r9, a`
			`mov a, r8`
			`rorc a, 1`
			`mov r8, a`

			`incw bc`
			`br $1b`
			`1:`
			`;; the previous loop leaves r15.r13 zero`
			`mov a, r12`
			`cmp0 a`
			`bnz $1f`
			`;; shift left, dec exponent`
			`movw ax, r8`
			`shlw ax, 1`
			`movw r8, ax`
			`movw ax, r10`
			`rolwc ax, 1`
			`movw r10, ax`
			`movw ax, r12`
			`rolwc ax, 1`
			`movw r12, ax`
			`;; don't need to do r14`
			`decw bc`
			`br $1b`
			`1:`
			`;; at this point, FRAC is in R8..R11 and EXP is in BC`
			`movw ax, bc`
			`movw A_EXP, ax`

			`mov a, r9`
			`mov A_FRAC_L, a`
			`mov a, r10`
			`mov A_FRAC_LH, a`
			`mov a, r11`
			`mov A_FRAC_H, a`
			`mov a, r12`
			`mov A_FRAC_HH, a`

			`mov a, A_SIGN`
			`xor a, B_SIGN`
			`mov A_SIGN, a`

			`call $!__rl78_int_pack_a_r8`

			`addw sp, #16`
			`ret`

			`END_FUNC ___divsf3`