From 6e8cf7e1d391fc3d4afa883eec12d49f8916ea56 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Sun, 30 Dec 2018 00:02:44 +0100
Subject: [PATCH] implemented sum() asm

---
 compiler/examples/cube3d-c64.p8               | 177 ++++++------
 compiler/examples/test.p8                     | 264 ++++++++++++++----
 .../src/prog8/compiler/target/c64/AsmGen.kt   |   2 +-
 prog8lib/prog8lib.p8                          | 125 ++++++++-
 4 files changed, 408 insertions(+), 160 deletions(-)

diff --git a/compiler/examples/cube3d-c64.p8 b/compiler/examples/cube3d-c64.p8
index 6cd94e820..6a33ec504 100644
--- a/compiler/examples/cube3d-c64.p8
+++ b/compiler/examples/cube3d-c64.p8
@@ -1,26 +1,15 @@
 %import c64utils
 %option enable_floats
 
-~ irq {
-    uword global_time
-    ubyte time_changed
-
-    sub irq() {
-        global_time++
-        time_changed = 1
-    }
-}
-
-
 ~ main {
 
-    const uword width = 320
-    const uword height = 200
+    const uword width = 40
+    const uword height = 25
 
     ; vertices
-    float[8] xcoor = [ -1.0, -1.0, -1.0, -1.0,  1.0,  1.0,  1.0, 1.0 ]
-    float[8] ycoor = [ -1.0, -1.0,  1.0,  1.0, -1.0, -1.0,  1.0, 1.0 ]
-    float[8] zcoor = [ -1.0,  1.0, -1.0,  1.0, -1.0,  1.0, -1.0, 1.0 ]
+    float[8] xcoor = [ -1.0, -1.0, -1.0, -1.0,  1.0,  1.0,  1.0, 2.0 ]
+    float[8] ycoor = [ -1.0, -1.0,  1.0,  1.0, -1.0, -1.0,  1.0, 3.0 ]
+    float[8] zcoor = [ -1.0,  1.0, -1.0,  1.0, -1.0,  1.0, -1.0, 4.0 ]
 
     ; edges (msb=from vertex, lsb=to vertex)
     uword[12] edges = [$0001, $0103, $0302, $0200, $0405, $0507, $0706, $0604, $0004, $0105, $0206, $0307]
@@ -31,25 +20,20 @@
     float[len(zcoor)] rotatedz
 
     sub start()  {
+        float time=0.0
         while true {
-            if irq.time_changed {
-                irq.time_changed = 0
-                ;vm_gfx_clearscr(0)
-                ;vm_gfx_text(8, 6, 1, "Spin")
-                ;vm_gfx_text(29, 11, 1, "to Win !")
-
-                for uword i in 0 to width//10 {
-                    uword x=i*2+width//2-width//10
-                    ;vm_gfx_line(x, 130, i*10.w, 199, 6)
-                }
-
-                rotate_vertices(irq.global_time as float / 30.0)
-                c64scr.print_ub(X)
-                c64.CHROUT('\n')
-                draw_edges()
-                c64scr.print_ub(X)
-                c64.CHROUT('\n')
-            }
+            c64scr.print("stack1 ")
+            c64scr.print_ub(X)
+            c64.CHROUT('\n')
+            rotate_vertices(time)
+;            c64scr.print("stack2 ")
+;            c64scr.print_ub(X)
+;            c64.CHROUT('\n')
+;            draw_edges()
+            c64scr.print("stack3 ")
+            c64scr.print_ub(X)
+            c64.CHROUT('\n')
+            time += 0.1
         }
     }
 
@@ -57,73 +41,90 @@
         ; rotate around origin (0,0,0)
 
         ; set up the 3d rotation matrix values
-        float cosa = cos(t)
-        float sina = sin(t)
-        float cosb = cos(t*0.33)
-        float sinb = sin(t*0.33)
-        float cosc = cos(t*0.78)
-        float sinc = sin(t*0.78)
-
-        float Axx = cosa*cosb
-        float Axy = cosa*sinb*sinc - sina*cosc
-        float Axz = cosa*sinb*cosc + sina*sinc
-        float Ayx = sina*cosb
-        float Ayy = sina*sinb*sinc + cosa*cosc
-        float Ayz = sina*sinb*cosc - cosa*sinc
-        float Azx = -sinb
-        float Azy = cosb*sinc
-        float Azz = cosb*cosc
+;        float cosa = cos(t)
+;        float sina = sin(t)
+;        float cosb = cos(t*0.33)
+;        float sinb = sin(t*0.33)
+;        float cosc = cos(t*0.78)
+;        float sinc = sin(t*0.78)
+;
+;        float Axx = cosa*cosb
+;        float Axy = cosa*sinb*sinc - sina*cosc
+;        float Axz = cosa*sinb*cosc + sina*sinc
+;        float Ayx = sina*cosb
+;        float Ayy = sina*sinb*sinc + cosa*cosc
+;        float Ayz = sina*sinb*cosc - cosa*sinc
+;        float Azx = -sinb
+;        float Azy = cosb*sinc
+;        float Azz = cosb*cosc
 
         for ubyte i in 0 to len(xcoor)-1 {
-            rotatedx[i] = Axx*xcoor[i] + Axy*ycoor[i] + Axz*zcoor[i]
-            rotatedy[i] = Ayx*xcoor[i] + Ayy*ycoor[i] + Ayz*zcoor[i]
-            rotatedz[i] = Azx*xcoor[i] + Azy*ycoor[i] + Azz*zcoor[i]
+            float xc = xcoor[i]
+            c64scr.print("i=")
+            c64scr.print_ub(i)
+            c64scr.print("  xc=")
+            c64flt.print_f(xc)
+            c64.CHROUT('\n')
+            float yc = ycoor[i]
+            c64scr.print("i=")
+            c64scr.print_ub(i)
+            c64scr.print("  yc=")
+            c64flt.print_f(yc)
+            c64.CHROUT('\n')
+            float zc = zcoor[i]
+            c64scr.print("i=")
+            c64scr.print_ub(i)
+            c64scr.print("  zc=")
+            c64flt.print_f(zc)
+            c64.CHROUT('\n')
+            %breakpoint
+
+; @todo the calculations below destroy the  contents of the coor[] arrays???
+;            float rx=Axx*xcoor[i] + Axy*ycoor[i] + Axz*zcoor[i]
+;            float ry=Ayx*xcoor[i] + Ayy*ycoor[i] + Ayz*zcoor[i]
+;            float rz=Azx*xcoor[i] + Azy*ycoor[i] + Azz*zcoor[i]
+;            c64scr.print(" rx=")
+;            c64flt.print_f(rx)
+;            c64.CHROUT('\n')
+;            c64scr.print(" ry=")
+;            c64flt.print_f(rx)
+;            c64.CHROUT('\n')
+;            c64scr.print(" rz=")
+;            c64flt.print_f(rx)
+;            c64.CHROUT('\n')
+
+
+            ;rotatedx[i] = Axx*xcoor[i] + Axy*ycoor[i] + Axz*zcoor[i]
+            ;rotatedy[i] = Ayx*xcoor[i] + Ayy*ycoor[i] + Ayz*zcoor[i]
+            ;rotatedz[i] = Azx*xcoor[i] + Azy*ycoor[i] + Azz*zcoor[i]
         }
     }
 
 
     sub draw_edges() {
 
-        sub toscreenx(float x, float z) -> word {
-            return x/(4.2+z) * (height as float) as word + width // 2
+        sub toscreenx(float x, float z) -> byte {
+            ;return x/(4.2+z) * (height as float) as byte + width // 2
+            c64flt.print_f(x)
+            c64.CHROUT('\n')
+            float fx = (x*8.0 + 20.0)
+            return fx as byte
+
         }
 
-        sub toscreeny(float y, float z) -> word {
-            return y/(4.2+z) * (height as float) as word + height // 2
+        sub toscreeny(float y, float z) -> byte {
+            ;return y/(4.2+z) * (height as float) as byte + height // 2
+            c64flt.print_f(y)
+            c64.CHROUT('\n')
+            float fy = (y*8.0 + 12.0)
+            return fy as byte
         }
 
-        ; draw all edges of the object
-        for uword edge in edges {
-
-            ubyte e_from = msb(edge)
-            ubyte e_to = lsb(edge)
-
-            word x1 = toscreenx(rotatedx[e_from], rotatedz[e_from])
-            word y1 = toscreeny(rotatedy[e_from], rotatedz[e_from])
-            word x2 = toscreenx(rotatedx[e_to], rotatedz[e_to])
-            word y2 = toscreeny(rotatedy[e_to], rotatedz[e_to])
-            ubyte color = e_from+e_to
-            ;vm_gfx_line(x1, y1, x2, y2)
-        }
-
-        ; accentuate the vertices a bit with small boxes
+        ; plot the points of the 3d cube
         for ubyte i in 0 to len(xcoor)-1 {
-            word sx = toscreenx(rotatedx[i], rotatedz[i])
-            word sy = toscreeny(rotatedy[i], rotatedz[i])
-            ubyte color=i+2
-;            vm_gfx_pixel(sx-1, sy-1, color)
-;            vm_gfx_pixel(sx, sy-1, color)
-;            vm_gfx_pixel(sx+1, sy-1, color)
-;            vm_gfx_pixel(sx-1, sy, color)
-;            vm_gfx_pixel(sx, sy, color)
-;            vm_gfx_pixel(sx+1, sy, color)
-;            vm_gfx_pixel(sx-1, sy+1, color)
-;            vm_gfx_pixel(sx, sy+1, color)
-;            vm_gfx_pixel(sx+1, sy+1, color)
-;            vm_gfx_pixel(sx, sy-2, color)
-;            vm_gfx_pixel(sx+2, sy, color)
-;            vm_gfx_pixel(sx, sy+2, color)
-;            vm_gfx_pixel(sx-2, sy, color)
+            ubyte sx = toscreenx(rotatedx[i], rotatedz[i]) as ubyte
+            ubyte sy = toscreeny(rotatedy[i], rotatedz[i]) as ubyte
+            c64scr.setchrclr(sx, sy, 81, i+2)
         }
     }
 }
diff --git a/compiler/examples/test.p8 b/compiler/examples/test.p8
index eb79cdc81..d3b9e5a9c 100644
--- a/compiler/examples/test.p8
+++ b/compiler/examples/test.p8
@@ -3,68 +3,216 @@
 
 ~ main {
 
-    ubyte[3] ubarray = [11,55,222]
-    byte[3] barray = [-11,-22,-33]
-    uword[3] uwarray = [111,2222,55555]
-    word[3] warray = [-111,-222,-555]
-    float[3] farray = [1.11, 2.22, -3.33]
-    str text = "hello\n"
+    ; @todo fix floating point number corruption
+    ; @todo add avg implementations
+
+    word[10] xword = [1,2,3,4,5,6,7,8,9,-1111]
+    word[10] yword = [11,22,33,44,55,66,77,88,99,-1111 ]
+    word[10] zword = [1,2,3,4,5,6,7,8,9,-9999]
+
+    uword[10] xuword = [1,2,3,4,5,6,7,88,99,1111]
+    uword[10] yuword = [11,22,33,44,55,66,77,88,99,1111 ]
+    uword[10] zuword = [1,2,3,4,5,6,77,88,99,9999]
+
+    byte[10] xbyte = [1,2,3,4,5,6,7,8,9,-110]
+    byte[10] ybyte = [ 11,22,33,44,55,-66,-77,-88,-99,-110 ]
+    byte[10] zbyte = [1,2,3,4,5,6,7,8,9,-99]
+
+    ubyte[10] xubyte = [1,2,3,4,5,6,77,88,99,111]
+    ubyte[10] yubyte = [11,22,33,44,55,66,77,88,99,111]
+    ubyte[10] zubyte = [1,2,3,4,5,66,7,88,99,111]
+
+    float[10] xcoor = [ 1,2,3,4,5,6,7,8,9.9,11.11 ]
+    float[10] ycoor = [ 11,22,33,44,55,66,77,88,99.9,111.11 ]
+    float[10] zcoor = [ 111,222,333,444,555,666,777,888,999.9,1001.11 ]
 
     sub start()  {
+;        c64scr.print("\nxword:\n")
+;        for word w1 in xword {
+;            c64scr.print_w(w1)
+;            c64.CHROUT(',')
+;        }
+
+;        c64scr.print("\nxcoor:\n")
+;        for float f1 in xcoor {
+;            c64flt.print_f(f1)
+;            c64.CHROUT(',')
+;        }
+
+;        c64.CHROUT('\n')
+;        c64scr.print("ycoor:\n")
+;        for float f2 in ycoor {
+;            c64flt.print_f(f2)
+;            c64.CHROUT(',')
+;        }
+;        c64.CHROUT('\n')
+;        c64scr.print("zcoor:\n")
+;        for float f3 in zcoor {
+;            c64flt.print_f(f3)
+;            c64.CHROUT(',')
+;        }
+;        c64.CHROUT('\n')
+
+        c64.CHROUT('X')
+        c64scr.print_ub(X)
+        c64.CHROUT('\n')
+        word sumbx = min(xbyte)     ; -65
+        word sumby = min(ybyte)     ; -275
+        word sumbz = min(zbyte)     ; -54
+        uword sumubx = min(xubyte)  ; 396
+        uword sumuby = min(yubyte)  ; 606
+        uword sumubz = min(zubyte)  ; 386
+        word sumwx = min(xword)     ; -1066
+        word sumwy = min(yword)     ; -616
+        word sumwz = min(zword)     ; -9954
+        uword sumuwx = min(xuword)  ; 1326
+        uword sumuwy = min(yuword)  ; 1606
+        uword sumuwz = min(zuword)  ; 10284
+        float sumfx = min(xcoor)    ; 57.01
+        float sumfy = min(ycoor)    ; 607.01
+        float sumfz = min(zcoor)    ; 5997.01
+
+        c64.CHROUT('X')
+        c64scr.print_ub(X)
+        c64.CHROUT('\n')
+;        float avgbx = avg(xbyte)
+;        float avgby = avg(ybyte)
+;        float avgbz = avg(zbyte)
+;        float avgubx = avg(xubyte)
+;        float avguby = avg(yubyte)
+;        float avgubz = avg(zubyte)
+;        float avgwx = avg(xword)
+;        float avgwy = avg(yword)
+;        float avgwz = avg(zword)
+;        float avguwx = avg(xuword)
+;        float avguwy = avg(yuword)
+;        float avguwz = avg(zuword)
+;        float avgfx = avg(xcoor)
+;        float avgfy = avg(ycoor)
+;        float avgfz = avg(zcoor)
+;        c64.CHROUT('X')
+;        c64scr.print_ub(X)
+;        c64.CHROUT('\n')
+
+        c64scr.print("sumbx=")
+        c64scr.print_w(sumbx)
+        c64.CHROUT('\n')
+        c64scr.print("sumby=")
+        c64scr.print_w(sumby)
+        c64.CHROUT('\n')
+        c64scr.print("sumbz=")
+        c64scr.print_w(sumbz)
+        c64.CHROUT('\n')
+
+        c64scr.print("sumubx=")
+        c64scr.print_uw(sumubx)
+        c64.CHROUT('\n')
+        c64scr.print("sumuby=")
+        c64scr.print_uw(sumuby)
+        c64.CHROUT('\n')
+        c64scr.print("sumubz=")
+        c64scr.print_uw(sumubz)
+        c64.CHROUT('\n')
+
+        c64scr.print("sumwx=")
+        c64scr.print_w(sumwx)
+        c64.CHROUT('\n')
+        c64scr.print("sumwy=")
+        c64scr.print_w(sumwy)
+        c64.CHROUT('\n')
+        c64scr.print("sumwz=")
+        c64scr.print_w(sumwz)
+        c64.CHROUT('\n')
+
+        c64scr.print("sumuwx=")
+        c64scr.print_uw(sumuwx)
+        c64.CHROUT('\n')
+        c64scr.print("sumuwy=")
+        c64scr.print_uw(sumuwy)
+        c64.CHROUT('\n')
+        c64scr.print("sumuwz=")
+        c64scr.print_uw(sumuwz)
+        c64.CHROUT('\n')
+
+        c64scr.print("sumfx=")
+        c64flt.print_f(sumfx)
+        c64.CHROUT('\n')
+        c64scr.print("sumfy=")
+        c64flt.print_f(sumfy)
+        c64.CHROUT('\n')
+        c64scr.print("sumfz=")
+        c64flt.print_f(sumfz)
+        c64.CHROUT('\n')
+
+        return
+
+;        c64scr.print("avgbx=")
+;        c64flt.print_f(avgbx)
+;        c64.CHROUT('\n')
+;        c64scr.print("avgby=")
+;        c64flt.print_f(avgby)
+;        c64.CHROUT('\n')
+;        c64scr.print("avgbz=")
+;        c64flt.print_f(avgbz)
+;        c64.CHROUT('\n')
+;
+;        c64scr.print("avgubx=")
+;        c64flt.print_f(avgubx)
+;        c64.CHROUT('\n')
+;        c64scr.print("avguby=")
+;        c64flt.print_f(avguby)
+;        c64.CHROUT('\n')
+;        c64scr.print("avgubz=")
+;        c64flt.print_f(avgubz)
+;        c64.CHROUT('\n')
+;
+;        c64scr.print("avgwx=")
+;        c64flt.print_f(avgwx)
+;        c64.CHROUT('\n')
+;        c64scr.print("avgwy=")
+;        c64flt.print_f(avgwy)
+;        c64.CHROUT('\n')
+;        c64scr.print("avgwz=")
+;        c64flt.print_f(avgwz)
+;        c64.CHROUT('\n')
+;
+;        c64scr.print("avguwx=")
+;        c64flt.print_f(avguwx)
+;        c64.CHROUT('\n')
+;        c64scr.print("avguwy=")
+;        c64flt.print_f(avguwy)
+;        c64.CHROUT('\n')
+;        c64scr.print("avguwz=")
+;        c64flt.print_f(avguwz)
+;        c64.CHROUT('\n')
+;
+;        c64scr.print("avgfx=")
+;        c64flt.print_f(avgfx)
+;        c64.CHROUT('\n')
+;        c64scr.print("avgfy=")
+;        c64flt.print_f(avgfy)
+;        c64.CHROUT('\n')
+;        c64scr.print("avgfz=")
+;        c64flt.print_f(avgfz)
+;        c64.CHROUT('\n')
+;
+;        return
 
 
-rpt:
-        vm_write_str("\nregular for loop byte\n")
-        for ubyte x in 10 to 15 {
-            vm_write_num(x)
-            vm_write_char(',')
-        }
-        vm_write_str("\nregular for loop word\n")
-        for uword y in 500 to 505 {
-            vm_write_num(y)
-            vm_write_char(',')
-        }
+        c64scr.print("\nseparated i=2\n")
+        c64scr.print(" x[2]=")
+        c64flt.print_f(xcoor[2])        ; @todo wrong value printed
 
-        vm_write_str("\nloop str\n")
-        for ubyte c in text {
-            vm_write_num(c)
-            vm_write_char(',')
-        }
-
-        vm_write_str("\nloop ub\n")
-        for ubyte ub in ubarray{
-            vm_write_num(ub)
-            vm_write_char(',')
-        }
-
-        vm_write_str("\nloop b\n")
-        for byte b in barray {
-            vm_write_num(b)
-            vm_write_char(',')
-        }
-
-        vm_write_str("\nloop uw\n")
-        for uword uw in uwarray {
-            vm_write_num(uw)
-            vm_write_char(',')
-        }
-
-        vm_write_str("\nloop w\n")
-        for word w in warray {
-            vm_write_num(w)
-            vm_write_char(',')
-        }
-
-        vm_write_str("\nloop f\n")
-        for float f in farray {
-            vm_write_num(f)
-            vm_write_char(',')
-        }
-
-        goto rpt
-
-
-ending:
-        vm_write_str("\nending\n")
+        c64scr.print(" y[2]=")
+        c64flt.print_f(ycoor[2])
+        c64scr.print(" z[2]=")
+        c64flt.print_f(zcoor[2])
+        c64scr.print("\nseparated i=3\n")
+        c64scr.print(" x[3]=")
+        c64flt.print_f(xcoor[3])
+        c64scr.print(" y[3]=")
+       c64flt.print_f(ycoor[3])
+        c64scr.print(" z[3]=")
+        c64flt.print_f(zcoor[3])
     }
 }
diff --git a/compiler/src/prog8/compiler/target/c64/AsmGen.kt b/compiler/src/prog8/compiler/target/c64/AsmGen.kt
index 9cc38e05c..fe394bff7 100644
--- a/compiler/src/prog8/compiler/target/c64/AsmGen.kt
+++ b/compiler/src/prog8/compiler/target/c64/AsmGen.kt
@@ -248,7 +248,7 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
                 DataType.BYTE -> out("${v.first}\t.char  0")
                 DataType.UWORD -> out("${v.first}\t.word  0")
                 DataType.WORD -> out("${v.first}\t.sint  0")
-                DataType.FLOAT -> out("${v.first}\t.fill  5  ; float")
+                DataType.FLOAT -> out("${v.first}\t.byte  0,0,0,0,0  ; float")
                 DataType.STR,
                 DataType.STR_P,
                 DataType.STR_S,
diff --git a/prog8lib/prog8lib.p8 b/prog8lib/prog8lib.p8
index 45b2aa03d..fe3c9a827 100644
--- a/prog8lib/prog8lib.p8
+++ b/prog8lib/prog8lib.p8
@@ -1218,7 +1218,7 @@ _greater	lda  (SCRATCH_ZPWORD1),y
 		dey
 _lesseq		dey
 		dey
-		bpl  _loop
+		bpl  _loop			; @todo doesn't work for arrays where y will be >127. FIX OTHER LOOPS TOO!
 		lda  _result_maxuw
 		sta  ESTACK_LO,x
 		lda  _result_maxuw+1
@@ -1273,8 +1273,8 @@ func_max_f	.proc
 		sta  _cmp_mod+1			; compare using 255 so we keep larger values
 _minmax_entry	jsr  pop_array_and_lengthmin1Y
 		stx  SCRATCH_ZPREGX
-		sty  SCRATCH_ZPREG
--		lda  SCRATCH_ZPWORD1
+-		sty  SCRATCH_ZPREG
+		lda  SCRATCH_ZPWORD1
 		ldy  SCRATCH_ZPWORD1+1
 		jsr  c64.FCOMP
 _cmp_mod	cmp  #255			; will be modified
@@ -1283,20 +1283,119 @@ _cmp_mod	cmp  #255			; will be modified
 		lda  SCRATCH_ZPWORD1
 		ldy  SCRATCH_ZPWORD1+1
 		jsr  c64.MOVFM
-+		lda  #5
-		clc
-		adc  SCRATCH_ZPWORD1
-		sta  SCRATCH_ZPWORD1
-		bcc  +
-		inc  SCRATCH_ZPWORD1+1
-+		ldy  SCRATCH_ZPREG
+		ldy  SCRATCH_ZPREG
 		dey
-		sty  SCRATCH_ZPREG
-		bpl  -
-		jmp  push_fac1_as_result
+		cmp  #255
+		beq  +
+		lda  SCRATCH_ZPWORD1
+		clc
+		adc  #5
+		sta  SCRATCH_ZPWORD1
+		bcc  -
+		inc  SCRATCH_ZPWORD1+1
+		bne  -
++		jmp  push_fac1_as_result
 _min_float	.byte  255,255,255,255,255	; -1.7014118345e+38
 		.pend
+		
 
+func_sum_b	.proc
+		jsr  pop_array_and_lengthmin1Y
+		lda  #0
+		sta  ESTACK_LO,x
+		sta  ESTACK_HI,x
+_loop		lda  (SCRATCH_ZPWORD1),y
+		pha
+		clc
+		adc  ESTACK_LO,x
+		sta  ESTACK_LO,x
+		; sign extend the high byte
+		pla
+		and  #$80
+		beq  +
+		lda  #$ff
++		adc  ESTACK_HI,x
+		sta  ESTACK_HI,x
+		dey
+		cpy  #255
+		bne  _loop
+		dex
+		rts
+		.pend
+		
+func_sum_ub	.proc
+		jsr  pop_array_and_lengthmin1Y
+		lda  #0
+		sta  ESTACK_LO,x
+		sta  ESTACK_HI,x
+-		lda  (SCRATCH_ZPWORD1),y
+		clc
+		adc  ESTACK_LO,x
+		sta  ESTACK_LO,x
+		bcc  +
+		inc  ESTACK_HI,x
++		dey
+		cpy  #255
+		bne  -
+		dex
+		rts
+		.pend
+
+func_sum_uw	.proc
+		jsr  pop_array_and_lengthmin1Y
+		tya
+		asl  a
+		tay
+		lda  #0
+		sta  ESTACK_LO,x
+		sta  ESTACK_HI,x
+-		lda  (SCRATCH_ZPWORD1),y
+		iny
+		clc
+		adc  ESTACK_LO,x
+		sta  ESTACK_LO,x
+		lda  (SCRATCH_ZPWORD1),y
+		adc  ESTACK_HI,x
+		sta  ESTACK_HI,x
+		dey
+		dey
+		dey
+		cpy  #254
+		bne  -
+		dex
+		rts
+		.pend
+
+func_sum_w	.proc
+		jmp  func_sum_uw
+		.pend
+
+func_sum_f	.proc
+		lda  #<c64.FL_NEGHLF
+		ldy  #>c64.FL_NEGHLF
+		jsr  c64.MOVFM
+		jsr  pop_array_and_lengthmin1Y
+		stx  SCRATCH_ZPREGX
+-		sty  SCRATCH_ZPREG
+		lda  SCRATCH_ZPWORD1
+		ldy  SCRATCH_ZPWORD1+1
+		jsr  c64.FADD
+		ldy  SCRATCH_ZPREG
+		dey
+		cpy  #255
+		beq  +
+		lda  SCRATCH_ZPWORD1
+		clc
+		adc  #5
+		sta  SCRATCH_ZPWORD1
+		bcc  -
+		inc  SCRATCH_ZPWORD1+1
+		bne  -
++		jsr  c64.FADDH
+		jmp  push_fac1_as_result
+		.pend
+		
+		
 pop_array_and_lengthmin1Y	.proc
 		inx
 		ldy  ESTACK_LO,x