mirror of
https://github.com/autc04/Retro68.git
synced 2024-12-02 03:50:17 +00:00
990 lines
25 KiB
ArmAsm
990 lines
25 KiB
ArmAsm
# ieee754 sf routines for FT32
|
||
|
||
/* Copyright (C) 1995-2019 Free Software Foundation, Inc.
|
||
|
||
This file is free software; you can redistribute it and/or modify it
|
||
under the terms of the GNU General Public License as published by the
|
||
Free Software Foundation; either version 3, or (at your option) any
|
||
later version.
|
||
|
||
This file is distributed in the hope that it will be useful, but
|
||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
General Public License for more details.
|
||
|
||
Under Section 7 of GPL version 3, you are granted additional
|
||
permissions described in the GCC Runtime Library Exception, version
|
||
3.1, as published by the Free Software Foundation.
|
||
|
||
You should have received a copy of the GNU General Public License and
|
||
a copy of the GCC Runtime Library Exception along with this program;
|
||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||
<http://www.gnu.org/licenses/>. */
|
||
|
||
# See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
|
||
# for implementation details of all except division which is detailed below
|
||
#
|
||
|
||
#ifdef L_fp_tools
|
||
// .global __cmpsf2_
|
||
nan: .long 0x7FFFFFFF # also abs mask
|
||
inf: .long 0x7F800000
|
||
sign_mask: .long 0x80000000
|
||
m_mask: .long 0x007FFFFF
|
||
exp_bias: .long 127
|
||
edge_case: .long 0x00FFFFFF
|
||
smallest_norm: .long 0x00800000 # implicit bit
|
||
high_FF: .long 0xFF000000
|
||
high_uint: .long 0xFFFFFFFF
|
||
|
||
ntz_table:
|
||
.byte 32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
|
||
.byte 10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
|
||
.byte 31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
|
||
.byte 30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0
|
||
|
||
#endif
|
||
|
||
# Supply a few 'missing' instructions
|
||
|
||
# not
|
||
.macro not rd,r1
|
||
xor \rd,\r1,-1
|
||
.endm
|
||
|
||
# negate
|
||
.macro neg x
|
||
not \x, \x
|
||
add \x, \x, 1
|
||
.endm
|
||
|
||
# set $cc from the result of "ashl reg,dist"
|
||
.macro ashlcc reg,dist
|
||
.long 0x5de04008 | (\reg << 15) | (\dist << 4)
|
||
.endm
|
||
|
||
|
||
# converts an unsigned number x to a signed rep based on the bits in sign
|
||
# sign should be 0x00000000 or 0xffffffff.
|
||
.macro to_signed x, sign
|
||
add \x,\x,\sign # conditionally decrement x
|
||
xor \x,\x,\sign # conditionally complement x
|
||
.endm
|
||
|
||
|
||
.macro ld32 r,v
|
||
ldk \r,(\v>>10)
|
||
ldl \r,\r,(\v & 1023)
|
||
.endm
|
||
|
||
# calculate trailing zero count in x, also uses scr.
|
||
# Using Seal's algorithm
|
||
.macro ntz x, scr
|
||
not \scr, \x
|
||
add \scr, \scr, 1
|
||
and \x, \x, \scr
|
||
ashl \scr, \x, 4
|
||
add \x, \scr, \x
|
||
ashl \scr, \x, 6
|
||
add \x, \scr, \x
|
||
ashl \scr, \x, 16
|
||
sub \x, \scr, \x
|
||
lshr \x, \x, 26
|
||
ldk \scr, ntz_table
|
||
add \x, \x, \scr
|
||
lpmi.b \x, \x, 0
|
||
.endm
|
||
|
||
# calculate leading zero count
|
||
.macro nlz x, scr
|
||
flip \x, \x, 31
|
||
ntz \x, \scr
|
||
.endm
|
||
|
||
|
||
# Round 26 bit mantissa to nearest
|
||
# | 23 bits frac | G | R | S |
|
||
.macro round m, s1, s2
|
||
ldk \s1,0xc8
|
||
and \s2,\m,7
|
||
lshr \s1,\s1,\s2
|
||
and \s1,\s1,1
|
||
lshr \m,\m,2
|
||
add \m,\m,\s1
|
||
.endm
|
||
|
||
# If NZ, set the LSB of reg
|
||
.macro sticky reg
|
||
jmpc z,1f
|
||
or \reg,\reg,1 # set the sticky bit to 1
|
||
1:
|
||
.endm
|
||
|
||
##########################################################################
|
||
##########################################################################
|
||
## addition & subtraction
|
||
|
||
#if defined(L_subsf3) || defined(L_addsub_sf)
|
||
.global __subsf3
|
||
__subsf3:
|
||
# this is subtraction, so we just change the sign of r1
|
||
lpm $r2,sign_mask
|
||
xor $r1,$r1,$r2
|
||
jmp __addsf3
|
||
#endif
|
||
|
||
#if defined(L_addsf3) || defined(L_addsub_sf)
|
||
.global __addsf3
|
||
__addsf3:
|
||
# x in $r0, y in $r1, result z in $r0 --||| 100 instructions +/- |||--
|
||
# unpack e, calc d
|
||
bextu $r2,$r0,(8<<5)|23 # ex in r2
|
||
bextu $r3,$r1,(8<<5)|23 # ey in r3
|
||
sub $r5,$r2,$r3 # d = ex - ey
|
||
|
||
# Special values are 0x00 and 0xff in ex and ey.
|
||
# If (ex&ey) != 0 or (xy|ey)=255 then there may be
|
||
# a special value.
|
||
tst $r2,$r3
|
||
jmpc nz,1f
|
||
jmp slow
|
||
1: or $r4,$r2,$r3
|
||
cmp $r4,255
|
||
jmpc nz,no_special_vals
|
||
slow:
|
||
# Check for early exit
|
||
cmp $r2,0
|
||
jmpc z,test_if_not_255
|
||
cmp $r3,0
|
||
jmpc nz,no_early_exit
|
||
test_if_not_255:
|
||
cmp $r2,255
|
||
jmpc z,no_early_exit
|
||
cmp $r3,255
|
||
jmpc z,no_early_exit
|
||
or $r6,$r2,$r3
|
||
cmp $r6,0
|
||
jmpc nz,was_not_zero
|
||
and $r0,$r0,$r1
|
||
lpm $r1,sign_mask
|
||
and $r0,$r0,$r1
|
||
return
|
||
was_not_zero:
|
||
cmp $r2,0
|
||
jmpc nz,ret_x
|
||
move $r0,$r1
|
||
return
|
||
ret_x:
|
||
return
|
||
no_early_exit:
|
||
# setup to test for special values
|
||
sub $r6,$r2,1
|
||
and $r6,$r6,0xFE
|
||
sub $r7,$r3,1
|
||
and $r7,$r7,0xFE
|
||
# test for special values
|
||
cmp $r6,$r7
|
||
jmpc gte,ex_spec_is_gte
|
||
move $r6,$r7
|
||
ex_spec_is_gte:
|
||
cmp $r6,0xFE
|
||
jmpc nz,no_special_vals
|
||
cmp $r5,0
|
||
jmpc ns,d_gte_0
|
||
cmp $r3,0xFF
|
||
jmpc z,ret_y
|
||
cmp $r2,0
|
||
jmpc z,ret_y
|
||
ret_y:
|
||
move $r0,$r1
|
||
return
|
||
d_gte_0:
|
||
cmp $r5,0
|
||
jmpc z,d_is_0
|
||
cmp $r2,0xFF
|
||
jmpc z,ret_x
|
||
cmp $r3,0
|
||
jmpc z,ret_x
|
||
d_is_0:
|
||
cmp $r2,0xFF
|
||
jmpc nz,no_special_vals
|
||
ashl $r6,$r0,9 # clear all except x frac
|
||
ashl $r7,$r1,9 # clear all except y frac
|
||
or $r6,$r6,$r7
|
||
cmp $r6,0
|
||
jmpc nz,ret_nan
|
||
lshr $r4,$r0,31 # sx in r4
|
||
lshr $r5,$r1,31 # sy in r4
|
||
cmp $r4,$r5
|
||
jmpc nz,ret_nan
|
||
return
|
||
ret_nan:
|
||
lpm $r0,nan
|
||
return
|
||
no_special_vals:
|
||
ldk $r8,(1<<10)|(9<<5)|26 # setup implicit bit and mask for e
|
||
#----------------------
|
||
ashr $r4,$r0,31 # sx in r4
|
||
ashl $r0,$r0,3 # shift mx 3 for GRS bits
|
||
bins $r0,$r0,$r8 # clear sx, ex and add implicit bit mx
|
||
# change mx to signed mantissa
|
||
to_signed $r0,$r4
|
||
#----------------------
|
||
ashr $r4,$r1,31 # sy in r4
|
||
ashl $r1,$r1,3 # shift my 3 for GRS bits
|
||
bins $r1,$r1,$r8 # clear sy, ey and add implicit bit my
|
||
# change my to signed mantissa
|
||
to_signed $r1,$r4
|
||
#----------------------
|
||
# test if we swap ms based on d sign
|
||
cmp $r5,0
|
||
jmpc gte,noswap
|
||
# swap mx & my
|
||
xor $r0,$r0,$r1
|
||
xor $r1,$r0,$r1
|
||
xor $r0,$r0,$r1
|
||
# d positive means that ex>=ey, so ez = ex
|
||
# d negative means that ey>ex, so ez = ey
|
||
move $r2,$r3
|
||
# |d|
|
||
neg $r5
|
||
noswap:
|
||
# now $r2 = ez = max(ex,ey)
|
||
cmp $r5,26 # max necessary alignment shift is 26
|
||
jmpc lt,under_26
|
||
ldk $r5,26
|
||
under_26:
|
||
ldk $r7,-1
|
||
ashl $r7,$r7,$r5 # create inverse of mask for test of S bit value in discarded my
|
||
not $r7,$r7
|
||
tst $r1,$r7 # determine value of sticky bit
|
||
# shift my >> |d|
|
||
ashr $r1,$r1,$r5
|
||
sticky $r1
|
||
|
||
# add ms
|
||
add $r0,$r0,$r1
|
||
|
||
# $r4 = sign(mx), mx = |mx|
|
||
ashr $r4,$r0,31
|
||
xor $r0,$r0,$r4
|
||
sub $r0,$r0,$r4
|
||
|
||
# realign mantissa using leading zero count
|
||
flip $r7,$r0,31
|
||
ntz $r7,$r8
|
||
ashl $r0,$r0,$r7
|
||
btst $r0,(6<<5)|0 # test low bits for sticky again
|
||
lshr $r0,$r0,6
|
||
sticky $r0
|
||
|
||
# update exponent
|
||
add $r2,$r2,5
|
||
sub $r2,$r2,$r7
|
||
|
||
# Round to nearest
|
||
round $r0,$r7,$r6
|
||
|
||
# detect_exp_update
|
||
lshr $r6,$r0,24
|
||
add $r2,$r2,$r6
|
||
|
||
# final tests
|
||
# mz == 0? if so, we just bail with a +0
|
||
cmp $r0,0
|
||
jmpc nz,msum_not_zero
|
||
ldk $r0,0
|
||
return
|
||
msum_not_zero:
|
||
# Combined check that (1 <= ez <= 254)
|
||
sub $r3,$r2,1
|
||
cmp $r3,254
|
||
jmpc b,no_special_ret
|
||
# underflow?
|
||
cmp $r2,0
|
||
jmpc gt,no_under
|
||
ldk $r0,0
|
||
jmp pack_sz
|
||
no_under:
|
||
# overflow?
|
||
cmp $r2,255
|
||
jmpc lt,no_special_ret
|
||
ldk $r0,0x7F8
|
||
ashl $r0,$r0,20
|
||
jmp pack_sz
|
||
no_special_ret:
|
||
# Pack ez
|
||
ldl $r2,$r2,(8<<5)|23
|
||
bins $r0,$r0,$r2 # width = 8, pos = 23 pack ez
|
||
# Pack sz
|
||
pack_sz:
|
||
ldl $r4,$r4,(1<<5)|31
|
||
bins $r0,$r0,$r4 # width = 1, pos = 31 set sz to sy
|
||
return
|
||
#endif
|
||
|
||
##########################################################################
|
||
##########################################################################
|
||
## multiplication
|
||
|
||
#ifdef L_mulsf3
|
||
.global __mulsf3
|
||
__mulsf3:
|
||
# x in $r0, y in $r1, result z in $r0 --||| 61 instructions +/- |||--
|
||
|
||
# unpack e
|
||
bextu $r2,$r0,(8<<5)|23 # ex in r2
|
||
bextu $r3,$r1,(8<<5)|23 # ey in r3
|
||
# calc result sign
|
||
xor $r4,$r0,$r1
|
||
lpm $r5,sign_mask
|
||
and $r4,$r4,$r5 # sz in r4
|
||
|
||
# unpack m add implicit bit
|
||
ldk $r5,(1<<10)|(9<<5)|23 # setup implicit bit and mask for e
|
||
#----------------------
|
||
bins $r0,$r0,$r5 # clear sx, ex and add implicit bit mx
|
||
|
||
sub $r6,$r2,1
|
||
cmp $r6,254
|
||
jmpc b,1f
|
||
jmp slow_mul
|
||
1: sub $r6,$r3,1
|
||
cmp $r6,254
|
||
jmpc b,no_special_vals_mul
|
||
|
||
slow_mul:
|
||
# Check for early exit
|
||
cmp $r2,0
|
||
jmpc z,op_is_zero
|
||
cmp $r3,0
|
||
jmpc nz,no_early_exit_mul
|
||
op_is_zero:
|
||
cmp $r2,255
|
||
jmpc z,no_early_exit_mul
|
||
cmp $r3,255
|
||
jmpc z,no_early_exit_mul
|
||
move $r0,$r4
|
||
return
|
||
no_early_exit_mul:
|
||
# setup to test for special values
|
||
sub $r6,$r2,1
|
||
and $r6,$r6,0xFE
|
||
sub $r7,$r3,1
|
||
and $r7,$r7,0xFE
|
||
# test for special values
|
||
cmp $r6,$r7
|
||
jmpc gte,ex_spec_is_gte_ey_mul
|
||
move $r6,$r7
|
||
ex_spec_is_gte_ey_mul:
|
||
cmp $r6,0xFE
|
||
jmpc nz,no_special_vals_mul
|
||
cmp $r2,0xFF
|
||
jmpc nz,ex_not_FF_mul
|
||
ashl $r6,$r0,9
|
||
cmp $r6,0
|
||
jmpc nz,ret_nan
|
||
cmp $r3,0
|
||
jmpc z,ret_nan
|
||
ashl $r6,$r1,1
|
||
lpm $r7,high_FF
|
||
cmp $r6,$r7
|
||
jmpc a,ret_nan
|
||
cmp $r6,0
|
||
jmpc z,ret_nan
|
||
# infinity
|
||
lpm $r0,inf
|
||
or $r0,$r0,$r4
|
||
return
|
||
ex_not_FF_mul:
|
||
cmp $r2,0
|
||
jmpc nz,no_nan_mul
|
||
cmp $r3,0xFF
|
||
jmpc nz,no_nan_mul
|
||
jmp ret_nan
|
||
no_nan_mul:
|
||
lpm $r0,nan
|
||
and $r0,$r0,$r1
|
||
or $r0,$r0,$r4
|
||
return
|
||
|
||
ret_nan:
|
||
lpm $r0,nan
|
||
return
|
||
|
||
no_special_vals_mul:
|
||
bins $r1,$r1,$r5 # clear sy, ey and add implicit bit my
|
||
# calc ez
|
||
add $r3,$r2,$r3
|
||
sub $r3,$r3,127 # ez in r3
|
||
|
||
# (r1,r2) = R0 * R1
|
||
mul $r2,$r0,$r1
|
||
muluh $r1,$r0,$r1
|
||
|
||
btst $r1,(1<<5)|15 # XXX use jmpx
|
||
jmpc z,mul_z0
|
||
|
||
# mz is 1X.XX...X
|
||
# 48-bit product is in (r1,r2). The low 22 bits of r2
|
||
# are discarded.
|
||
lshr $r0,$r2,22
|
||
ashl $r1,$r1,10
|
||
or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
|
||
ashlcc 2,10
|
||
sticky $r0
|
||
add $r3,$r3,1 # bump exponent
|
||
|
||
# Round to nearest
|
||
round $r0, $r1, $r2
|
||
lshr $r6,$r0,24
|
||
add $r3,$r3,$r6
|
||
|
||
sub $r6,$r3,1
|
||
cmp $r6,254
|
||
jmpc b,no_special_ret_mul
|
||
|
||
special_ret_mul:
|
||
# When the final exponent <= 0, result is flushed to 0 except
|
||
# for the border case 0x00FFFFFF which is promoted to next higher
|
||
# FP no., that is, the smallest "normalized" number.
|
||
cmp $r3,0
|
||
jmpc gt,exp_normal
|
||
# Pack ez
|
||
ldl $r3,$r3,(8<<5)|23
|
||
bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
|
||
lpm $r2,edge_case
|
||
cmp $r0,$r2
|
||
jmpc nz,no_edge_case
|
||
lpm $r0,smallest_norm
|
||
jmp pack_sz_mul
|
||
no_edge_case:
|
||
ldk $r0,0
|
||
jmp pack_sz_mul
|
||
exp_normal:
|
||
# overflow?
|
||
cmp $r3,255
|
||
jmpc lt,no_special_ret_mul
|
||
ldk $r0,0x7F8
|
||
ashl $r0,$r0,20
|
||
jmp pack_sz_mul
|
||
no_special_ret_mul:
|
||
# Pack ez
|
||
ldl $r3,$r3,(8<<5)|23
|
||
bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
|
||
# Pack sz
|
||
pack_sz_mul:
|
||
or $r0,$r0,$r4
|
||
return
|
||
|
||
mul_z0:
|
||
# mz is 0X.XX...X
|
||
# 48-bit product is in (r1,r2). The low 21 bits of r2
|
||
# are discarded.
|
||
lshr $r0,$r2,21
|
||
ashl $r1,$r1,11
|
||
or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
|
||
ashlcc 2,11
|
||
sticky $r0
|
||
# Round to nearest
|
||
round $r0, $r1, $r2
|
||
lshr $r6,$r0,24
|
||
add $r3,$r3,$r6
|
||
|
||
sub $r6,$r3,1
|
||
cmp $r6,254
|
||
jmpc b,no_special_ret_mul
|
||
jmp special_ret_mul
|
||
#endif
|
||
|
||
##########################################################################
|
||
##########################################################################
|
||
## division
|
||
|
||
## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
|
||
## for implementation details
|
||
|
||
|
||
|
||
|
||
#ifdef L_divsf3
|
||
dc_1: .long 0xffffe7d7
|
||
dc_2: .long 0xffffffe8
|
||
dc_3: .long 0xffbad86f
|
||
dc_4: .long 0xfffbece7
|
||
dc_5: .long 0xf3672b51
|
||
dc_6: .long 0xfd9d3a3e
|
||
dc_7: .long 0x9a3c4390
|
||
dc_8: .long 0xd4d2ce9b
|
||
dc_9: .long 0x1bba92b3
|
||
dc_10: .long 0x525a1a8b
|
||
dc_11: .long 0x0452b1bf
|
||
dc_12: .long 0xFFFFFFC0
|
||
spec_val_test: .long 0x7F7FFFFF
|
||
|
||
.global __divsf3
|
||
__divsf3:
|
||
push $r13
|
||
# x in $r0, y in $r1, result z in $r0 --||| 73 instructions +/- |||-
|
||
bextu $r10,$r0,(8<<5)|23 # ex in r2
|
||
bextu $r11,$r1,(8<<5)|23 # ey in r3
|
||
lpm $r6, m_mask
|
||
and $r2, $r0, $r6 # mx
|
||
and $r3, $r1, $r6 # my
|
||
cmp $r2,$r3
|
||
bextu $r2,$r30,(1<<5)|4 # c = Tx >= T;
|
||
ashl $r3,$r3,9 # T = X << 9;
|
||
lpm $r13, sign_mask
|
||
ashl $r4,$r0,8 # X8 = X << 8;
|
||
or $r4,$r4,$r13 # Mx = X8 | 0x80000000;
|
||
lshr $r5,$r4,$r2 # S = Mx >> c;
|
||
# calc D
|
||
sub $r2, $r11, $r2
|
||
add $r12, $r10, 125
|
||
sub $r2, $r12, $r2 # int D = (Ex + 125) - (Ey - c);
|
||
# calc result sign
|
||
xor $r12,$r0,$r1
|
||
and $r12,$r12,$r13 # Sr = ( X ˆ Y ) & 0x80000000;
|
||
# check early exit
|
||
cmp $r10, 0
|
||
jmpc nz, no_early_ret_dev
|
||
cmp $r11, 0
|
||
jmpc z, no_early_ret_dev
|
||
cmp $r11, 255
|
||
jmpc z, no_early_ret_dev
|
||
move $r0, $r12
|
||
pop $r13
|
||
return
|
||
no_early_ret_dev:
|
||
# setup to test for special values
|
||
sub $r8,$r10,1
|
||
and $r8,$r8,0xFE
|
||
sub $r9,$r11,1
|
||
and $r9,$r9,0xFE
|
||
# test for special values
|
||
cmp $r8, $r9
|
||
jmpc gte, absXm1_gte_absYm1
|
||
move $r8, $r9
|
||
absXm1_gte_absYm1:
|
||
cmp $r8, 0xFE
|
||
jmpc nz, no_spec_ret_div
|
||
cmp $r10, 0xFF
|
||
jmpc nz, ex_not_FF_div
|
||
lpm $r6, m_mask
|
||
and $r2, $r0, $r6 # mx
|
||
cmp $r2, 0
|
||
jmpc nz, ret_nan_div
|
||
cmp $r11, 0xFF
|
||
jmpc z, ret_nan_div
|
||
jmp ret_inf_div
|
||
ex_not_FF_div:
|
||
cmp $r11, 0xFF
|
||
jmpc nz, ey_not_FF_div
|
||
ashl $r13, $r1, 9
|
||
cmp $r13, 0
|
||
jmpc nz, ret_nan_div
|
||
move $r0, $r12
|
||
pop $r13
|
||
return
|
||
ey_not_FF_div:
|
||
or $r10, $r10, $r11
|
||
cmp $r10, 0
|
||
jmpc z, ret_nan_div
|
||
ret_inf_div:
|
||
lpm $r6, inf
|
||
move $r0, $r6
|
||
or $r0, $r0, $r12
|
||
pop $r13
|
||
return
|
||
ret_nan_div:
|
||
lpm $r0, nan
|
||
pop $r13
|
||
return
|
||
|
||
no_spec_ret_div:
|
||
# check for overflow
|
||
ldk $r6, 0xFE
|
||
cmp $r2, $r6
|
||
jmpc lt, no_overflow_div
|
||
lpm $r6, inf
|
||
or $r0, $r12, $r6
|
||
pop $r13
|
||
return
|
||
no_overflow_div:
|
||
# check for underflow
|
||
cmp $r2, 0
|
||
jmpc ns, no_underflow_div
|
||
xnor $r6, $r6, $r6 # -1
|
||
cmp $r2, $r6
|
||
jmpc nz, ret_sr_div
|
||
ldk $r7, 0xFF
|
||
xor $r6, $r6, $r7 # 0xFF ^ -1 = 0xFFFFFF00
|
||
cmp $r4, $r6
|
||
jmpc nz, ret_sr_div
|
||
lpm $r6, sign_mask
|
||
cmp $r4, $r6
|
||
jmpc nz, ret_sr_div
|
||
lshr $r0, $r6, 8
|
||
or $r0, $r0, $r12
|
||
pop $r13
|
||
return
|
||
ret_sr_div:
|
||
move $r0, $r12
|
||
pop $r13
|
||
return
|
||
no_underflow_div:
|
||
lpm $r6, dc_1
|
||
muluh $r7, $r3, $r6 # i0 = mul( T , 0xffffe7d7 );
|
||
lpm $r6, dc_2
|
||
sub $r7, $r6, $r7 # i1 = 0xffffffe8 - i0;
|
||
muluh $r7, $r5, $r7 # i2 = mul( S , i1 );
|
||
add $r7, $r7, 0x20 # i3 = 0x00000020 + i2;
|
||
muluh $r8, $r3, $r3 # i4 = mul( T , T );
|
||
muluh $r9, $r5, $r8 # i5 = mul( S , i4 );
|
||
lpm $r6, dc_3
|
||
muluh $r10, $r3, $r6 # i6 = mul( T , 0xffbad86f );
|
||
lpm $r6, dc_4
|
||
sub $r10, $r6, $r10 # i7 = 0xfffbece7 - i6;
|
||
muluh $r10, $r9, $r10 # i8 = mul( i5 , i7 );
|
||
add $r7, $r7, $r10 # i9 = i3 + i8;
|
||
muluh $r9, $r8, $r9 # i10 = mul( i4 , i5 );
|
||
lpm $r6, dc_5
|
||
muluh $r10, $r3, $r6 # i11 = mul( T , 0xf3672b51 );
|
||
lpm $r6, dc_6
|
||
sub $r10, $r6, $r10 # i12 = 0xfd9d3a3e - i11;
|
||
lpm $r6, dc_7
|
||
muluh $r11, $r3, $r6 # i13 = mul( T , 0x9a3c4390 );
|
||
lpm $r6, dc_8
|
||
sub $r11, $r6, $r11 # i14 = 0xd4d2ce9b - i13
|
||
muluh $r11, $r8, $r11 # i15 = mul( i4 , i14 );
|
||
add $r10, $r10, $r11 # i16 = i12 + i15;
|
||
muluh $r10, $r9, $r10 # i17 = mul( i10 , i16 )
|
||
add $r7, $r7, $r10 # i18 = i9 + i17;
|
||
muluh $r10, $r8, $r8 # i19 = mul( i4 , i4 );
|
||
lpm $r6, dc_9
|
||
muluh $r11, $r3, $r6 # i20 = mul( T , 0x1bba92b3 );
|
||
lpm $r6, dc_10
|
||
sub $r11, $r6, $r11 # i21 = 0x525a1a8b - i20;
|
||
lpm $r6, dc_11
|
||
muluh $r8, $r8, $r6 # i22 = mul( i4 , 0x0452b1bf );
|
||
add $r8, $r11, $r8 # i23 = i21 + i22;
|
||
muluh $r8, $r10, $r8 # i24 = mul( i19 , i23 );
|
||
muluh $r8, $r9, $r8 # i25 = mul( i10 , i24 );
|
||
add $r3, $r7, $r8 # V = i18 + i25;
|
||
# W = V & 0xFFFFFFC0;
|
||
lpm $r6, dc_12
|
||
and $r3, $r3, $r6 # W
|
||
# round and pack final values
|
||
ashl $r0, $r2, 23 # pack D
|
||
or $r0, $r0, $r12 # pack Sr
|
||
ashl $r12, $r1, 8
|
||
or $r12, $r12, $r13 # My
|
||
muluh $r10, $r3, $r12
|
||
lshr $r11, $r5, 1
|
||
cmp $r10, $r11
|
||
jmpc gte, div_ret_1
|
||
add $r3, $r3, 0x40
|
||
div_ret_1:
|
||
lshr $r3, $r3, 7
|
||
add $r0, $r0, $r3
|
||
pop $r13
|
||
return
|
||
#endif
|
||
|
||
##########################################################################
|
||
##########################################################################
|
||
## Negate
|
||
|
||
#ifdef L_negsf
|
||
.global __negsf
|
||
__negsf:
|
||
lpm $r1, sign_mask
|
||
xor $r0, $r0, $r1
|
||
return
|
||
#endif
|
||
|
||
##########################################################################
|
||
##########################################################################
|
||
## float to int & unsigned int
|
||
|
||
#ifdef L_fixsfsi
|
||
.global __fixsfsi
|
||
__fixsfsi: # 20 instructions
|
||
bextu $r1,$r0,(8<<5)|23 # e in r1
|
||
lshr $r2,$r0,31 # s in r2
|
||
lpm $r3, m_mask
|
||
and $r0,$r0,$r3 # m in r0
|
||
# test nan
|
||
cmp $r1,0xFF
|
||
jmpc nz, int_not_nan
|
||
cmp $r0,0
|
||
jmpc z, int_not_nan
|
||
ldk $r0,0
|
||
return
|
||
int_not_nan:
|
||
# test edges
|
||
cmp $r1, 127
|
||
jmpc gte, int_not_zero # lower limit
|
||
ldk $r0,0
|
||
return
|
||
int_not_zero:
|
||
cmp $r1, 158
|
||
jmpc lt, int_not_max # upper limit
|
||
lpm $r0, nan
|
||
cmp $r2, 0
|
||
jmpc z, int_positive
|
||
xnor $r0, $r0, 0
|
||
return
|
||
int_not_max:
|
||
lpm $r3, smallest_norm
|
||
or $r0, $r0, $r3 # set implicit bit
|
||
sub $r1, $r1, 150
|
||
cmp $r1, 0
|
||
jmpc s, shift_right
|
||
ashl $r0, $r0, $r1
|
||
jmp set_int_sign
|
||
shift_right:
|
||
xnor $r1, $r1, 0
|
||
add $r1, $r1, 1
|
||
lshr $r0, $r0, $r1
|
||
set_int_sign:
|
||
cmp $r2, 0
|
||
jmpc z, int_positive
|
||
xnor $r0, $r0, 0
|
||
add $r0, $r0, 1
|
||
int_positive:
|
||
return
|
||
#endif
|
||
|
||
#ifdef L_fixunssfsi
|
||
.global __fixunssfsi
|
||
__fixunssfsi: # 19 instructions
|
||
lshr $r2, $r0, 31 # s in r2
|
||
cmp $r2, 0
|
||
jmpc z, uint_not_neg
|
||
ldk $r0, 0
|
||
return
|
||
uint_not_neg:
|
||
bextu $r1, $r0, (8<<5)|23 # e in r1
|
||
sub $r1, $r1, 127
|
||
lpm $r3, m_mask
|
||
and $r0, $r0, $r3 # m in r0
|
||
# test nan
|
||
cmp $r1, 0xFF
|
||
jmpc nz, uint_not_nan
|
||
cmp $r0, 0
|
||
jmpc z, uint_not_nan
|
||
ldk $r0, 0
|
||
return
|
||
uint_not_nan:
|
||
# test edges
|
||
cmp $r1, 0
|
||
jmpc ns, uint_not_zero # lower limit
|
||
ldk $r0, 0
|
||
return
|
||
uint_not_zero:
|
||
lpm $r3, smallest_norm
|
||
or $r0, $r0, $r3 # set implicit bit
|
||
cmp $r1, 23
|
||
jmpc lt, shift_uint_right
|
||
sub $r1, $r1, 23
|
||
ashl $r0, $r0, $r1
|
||
return
|
||
shift_uint_right:
|
||
ldk $r3, 23
|
||
sub $r1, $r3, $r1
|
||
lshr $r0, $r0, $r1
|
||
return
|
||
#endif
|
||
|
||
##########################################################################
|
||
##########################################################################
|
||
## int & unsigned int to float
|
||
|
||
|
||
.macro i2f x, s1, s2, s3, lbl
|
||
move \s1, \x
|
||
nlz \s1, \s2
|
||
cmp \s1, 8
|
||
jmpc s, float_round\lbl
|
||
sub \s2, \s1, 8
|
||
ashl \x, \x, \s2
|
||
jmp float_no_round\lbl
|
||
float_round\lbl:
|
||
cmp \s1, 6
|
||
jmpc s, float_shift_right\lbl
|
||
sub \s2, \s1, 6
|
||
ashl \x, \x, \s2
|
||
jmp float_round_and_pack\lbl
|
||
float_shift_right\lbl:
|
||
ldk \s2, 6
|
||
sub \s2, \s2, \s1
|
||
xnor \s3, \s3 ,\s3 # 0xFFFFFFFF
|
||
ashl \s3, \s3 ,\s2 # create inverse of mask for test of S bit value in discarded my
|
||
xnor \s3, \s3 ,0 # NOT
|
||
tst \x, \s3 # determine value of sticky bit
|
||
lshr \x, \x, \s2
|
||
jmpc z,float_round_and_pack\lbl
|
||
or \x, \x, 1 # set the sticky bit to 1
|
||
float_round_and_pack\lbl:
|
||
bextu \s2, \x, (1<<5)|2 # extract low bit of m
|
||
or \x, \x, \s2 # or p into r
|
||
add \x, \x, 1
|
||
lshr \x, \x, 2
|
||
btst \x, (1<<5)|24 # test for carry from round
|
||
jmpc z, float_no_round\lbl
|
||
sub \s1, \s1, 1 # inc e for carry (actually dec nlz)
|
||
lshr \x, \x, 1
|
||
float_no_round\lbl:
|
||
ldk \s2, 158
|
||
sub \s1, \s2, \s1
|
||
# Pack e
|
||
ldl \s1, \s1, (8<<5)|23
|
||
bins \x, \x, \s1
|
||
.endm
|
||
|
||
|
||
#ifdef L_floatsisf
|
||
.global __floatsisf
|
||
__floatsisf: # 32 instructions
|
||
cmp $r0, 0
|
||
jmpc nz, float_not_zero
|
||
return
|
||
float_not_zero:
|
||
ashr $r1, $r0, 31 # s in r1
|
||
xor $r0, $r0, $r1 # cond neg
|
||
sub $r0, $r0, $r1
|
||
i2f $r0, $r2, $r3, $r4, 1
|
||
ldl $r1, $r1, (1<<5)|31
|
||
bins $r0, $r0, $r1
|
||
return
|
||
#endif
|
||
|
||
#ifdef L_floatunsisf
|
||
.global __floatunsisf
|
||
__floatunsisf: # 26 instructions
|
||
cmp $r0, 0
|
||
jmpc nz, float_not_zero2
|
||
return
|
||
float_not_zero2:
|
||
i2f $r0, $r1, $r2, $r3, 2
|
||
return
|
||
#endif
|
||
|
||
#if 0
|
||
##########################################################################
|
||
##########################################################################
|
||
## float compare
|
||
|
||
|
||
__cmpsf2_:
|
||
# calc abs vals
|
||
lpm $r3, nan # also abs mask
|
||
and $r2, $r0, $r3
|
||
and $r3, $r1, $r3
|
||
# test if either abs is nan
|
||
lpm $r4, inf
|
||
cmp $r2, $r4
|
||
jmpc gt, cmp_is_gt
|
||
cmp $r3, $r4
|
||
jmpc gt, cmp_is_gt
|
||
# test if both are 0
|
||
or $r2, $r2, $r3
|
||
cmp $r2, 0
|
||
jmpc z, cmp_is_eq
|
||
# test if eq
|
||
cmp $r0, $r1
|
||
jmpc z, cmp_is_eq
|
||
# -- if either is pos
|
||
and $r2, $r0, $r1
|
||
cmp $r2, 0
|
||
jmpc s, cmp_both_neg
|
||
cmp $r0, $r1
|
||
jmpc gt, cmp_is_gt
|
||
# r0 < r1
|
||
lpm $r0, high_uint
|
||
return
|
||
cmp_both_neg:
|
||
cmp $r0, $r1
|
||
jmpc lt, cmp_is_gt
|
||
# r0 < r1
|
||
lpm $r0, high_uint
|
||
return
|
||
cmp_is_gt:
|
||
ldk $r0, 1
|
||
return
|
||
cmp_is_eq:
|
||
ldk $r0, 0
|
||
return
|
||
#endif
|
||
|
||
#ifdef L_udivsi3
|
||
.global __udivsi3
|
||
__udivsi3:
|
||
# $r0 is dividend
|
||
# $r1 is divisor
|
||
ldk $r2,0
|
||
push $r28
|
||
ldk $r28,-32
|
||
0:
|
||
lshr $r3,$r0,31 # Shift $r2:$r0 left one
|
||
ashl $r0,$r0,1
|
||
ashl $r2,$r2,1
|
||
or $r2,$r2,$r3
|
||
cmp $r2,$r1
|
||
jmpc b,1f
|
||
2:
|
||
sub $r2,$r2,$r1
|
||
add $r0,$r0,1
|
||
1:
|
||
add $r28,$r28,1
|
||
jmpx 31,$r28,1,0b
|
||
pop $r28
|
||
# $r0: quotient
|
||
# $r2: remainder
|
||
return
|
||
#endif
|
||
|
||
#ifdef L_umodsi3
|
||
.global __umodsi3
|
||
__umodsi3:
|
||
call __udivsi3
|
||
move $r0,$r2
|
||
return
|
||
#endif
|
||
|
||
#ifdef L_divsi3
|
||
.global __divsi3
|
||
__divsi3:
|
||
xor $r5,$r0,$r1 # $r5 is sign of result
|
||
ashr $r2,$r0,31 # $r0 = abs($r0)
|
||
xor $r0,$r0,$r2
|
||
sub $r0,$r0,$r2
|
||
ashr $r2,$r1,31 # $r1 = abs($r1)
|
||
xor $r1,$r1,$r2
|
||
sub $r1,$r1,$r2
|
||
call __udivsi3
|
||
ashr $r5,$r5,31
|
||
xor $r0,$r0,$r5
|
||
sub $r0,$r0,$r5
|
||
return
|
||
|
||
#endif
|
||
|
||
#ifdef L_modsi3
|
||
.global __modsi3
|
||
__modsi3:
|
||
move $r5,$r0 # $r5 is sign of result
|
||
ashr $r2,$r0,31 # $r0 = abs($r0)
|
||
xor $r0,$r0,$r2
|
||
sub $r0,$r0,$r2
|
||
ashr $r2,$r1,31 # $r1 = abs($r1)
|
||
xor $r1,$r1,$r2
|
||
sub $r1,$r1,$r2
|
||
call __umodsi3
|
||
ashr $r5,$r5,31
|
||
xor $r0,$r0,$r5
|
||
sub $r0,$r0,$r5
|
||
return
|
||
#endif
|