From d65cfbf093c549f7fe3f79c00116263eac682ed6 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Wed, 23 Dec 2020 00:54:11 +0100
Subject: [PATCH] fixed math.mul_word_40 that was actually doing *80...

---
 compiler/res/prog8lib/cx16/syslib.p8 |  32 ++--
 compiler/res/prog8lib/math.asm       |   2 -
 docs/source/todo.rst                 |   1 +
 examples/test.p8                     | 222 ++++++++++++++++++++++++++-
 4 files changed, 238 insertions(+), 19 deletions(-)

diff --git a/compiler/res/prog8lib/cx16/syslib.p8 b/compiler/res/prog8lib/cx16/syslib.p8
index 137c13a0b..f665f507f 100644
--- a/compiler/res/prog8lib/cx16/syslib.p8
+++ b/compiler/res/prog8lib/cx16/syslib.p8
@@ -165,22 +165,22 @@ cx16 {
     &ubyte  d1ora	= via1+15
 
     const uword  via2  = $9f70                  ;VIA 6522 #2
-    &ubyte  d2prb	=via2+0
-    &ubyte  d2pra	=via2+1
-    &ubyte  d2ddrb	=via2+2
-    &ubyte  d2ddra	=via2+3
-    &ubyte  d2t1l	=via2+4
-    &ubyte  d2t1h	=via2+5
-    &ubyte  d2t1ll	=via2+6
-    &ubyte  d2t1lh	=via2+7
-    &ubyte  d2t2l	=via2+8
-    &ubyte  d2t2h	=via2+9
-    &ubyte  d2sr	=via2+10
-    &ubyte  d2acr	=via2+11
-    &ubyte  d2pcr	=via2+12
-    &ubyte  d2ifr	=via2+13
-    &ubyte  d2ier	=via2+14
-    &ubyte  d2ora	=via2+15
+    &ubyte  d2prb	= via2+0
+    &ubyte  d2pra	= via2+1
+    &ubyte  d2ddrb	= via2+2
+    &ubyte  d2ddra	= via2+3
+    &ubyte  d2t1l	= via2+4
+    &ubyte  d2t1h	= via2+5
+    &ubyte  d2t1ll	= via2+6
+    &ubyte  d2t1lh	= via2+7
+    &ubyte  d2t2l	= via2+8
+    &ubyte  d2t2h	= via2+9
+    &ubyte  d2sr	= via2+10
+    &ubyte  d2acr	= via2+11
+    &ubyte  d2pcr	= via2+12
+    &ubyte  d2ifr	= via2+13
+    &ubyte  d2ier	= via2+14
+    &ubyte  d2ora	= via2+15
 
 
 ; ---- Commander X-16 additions on top of C64 kernal routines ----
diff --git a/compiler/res/prog8lib/math.asm b/compiler/res/prog8lib/math.asm
index 714695501..963d1cc04 100644
--- a/compiler/res/prog8lib/math.asm
+++ b/compiler/res/prog8lib/math.asm
@@ -1203,8 +1203,6 @@ mul_word_40	.proc
 		rol  a
 		asl  P8ZP_SCRATCH_W1
 		rol  a
-		asl  P8ZP_SCRATCH_W1
-		rol  a
 		tay
 		lda  P8ZP_SCRATCH_W1
 		rts
diff --git a/docs/source/todo.rst b/docs/source/todo.rst
index 9ef9096b6..a859b1140 100644
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -2,6 +2,7 @@
 TODO
 ====
 
+- optimize (byte) bitshifting 1<<x (and 2**x) via lookup table 1,2,4,8,...
 - Cx16 target: support full-screen 640x480 and 320x240 graphics? That requires our own custom graphics routines though to draw lines, and plot pixels.
 - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)
 - make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_'
diff --git a/examples/test.p8 b/examples/test.p8
index 376bef56e..e66f718a3 100644
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -2,13 +2,233 @@
 %import diskio
 %import floats
 %import graphics
-%zeropage basicsafe
 %import test_stack
+%zeropage basicsafe
 %option no_sysinit
 
 main {
 
+;    uword adres
+;    ubyte adreshi
+
+
     sub start () {
         txt.print("hello\n")
+
+        uword xx
+        xx = 777
+        %asm {{
+            lda  xx
+            ldy  xx+1
+            jsr  math.mul_word_20
+            sta  xx
+            sty  xx+1
+        }}
+        txt.print_uw(xx)
+        txt.chrout('\n')
+        xx = 777
+        %asm {{
+            lda  xx
+            ldy  xx+1
+            jsr  math.mul_word_40
+            sta  xx
+            sty  xx+1
+        }}
+        txt.print_uw(xx)
+        txt.chrout('\n')
+        xx = 777
+        %asm {{
+            lda  xx
+            ldy  xx+1
+            jsr  math.mul_word_80
+            sta  xx
+            sty  xx+1
+        }}
+        txt.print_uw(xx)
+        txt.chrout('\n')
+
+        return
+
+        gfx2.set_mode(128)
+        gfx2.clear_screen()
+
+        uword offset
+        ubyte angle
+        uword x
+        uword y
+        when gfx2.active_mode {
+            0 -> {
+                for offset in 0 to 90 step 3 {
+                    for angle in 0 to 255 {
+                        x = $0008+sin8u(angle)/2
+                        y = $0008+cos8u(angle)/2
+                        gfx2.plot(x+offset*2,y+offset, lsb(x+y))
+                    }
+                }
+            }
+            128 -> {
+                for offset in 0 to 190 step 6 {
+                    for angle in 0 to 255 {
+                        x = $0008+sin8u(angle)
+                        y = $0008+cos8u(angle)
+                        gfx2.plot(x+offset*2,y+offset, 1)
+                    }
+                }
+            }
+        }
+    }
+}
+
+gfx2 {
+
+    ubyte active_mode = 255
+    uword width = 0
+    uword height = 0
+    ubyte bpp = 0
+
+    sub set_mode(ubyte mode) {
+        ; mode 0 = bitmap 320 x 240 x 256c
+        ; mode 128 = bitmap 640 x 480 x 1c monochrome
+        ; ...
+
+        when mode {
+            0 -> {
+                ; 320 x 240 x 256c
+                cx16.VERA_DC_VIDEO = (cx16.VERA_DC_VIDEO & %11001111) | %00100000      ; enable only layer 1
+                cx16.VERA_DC_HSCALE = 64
+                cx16.VERA_DC_VSCALE = 64
+                cx16.VERA_L1_CONFIG = %00000111
+                cx16.VERA_L1_MAPBASE = 0
+                cx16.VERA_L1_TILEBASE = 0
+                width = 320
+                height = 240
+                bpp = 8
+            }
+            128 -> {
+                ; 640 x 480 x 1c
+                cx16.VERA_DC_VIDEO = (cx16.VERA_DC_VIDEO & %11001111) | %00100000      ; enable only layer 1
+                cx16.VERA_DC_HSCALE = 128
+                cx16.VERA_DC_VSCALE = 128
+                cx16.VERA_L1_CONFIG = %00000100
+                cx16.VERA_L1_MAPBASE = 0
+                cx16.VERA_L1_TILEBASE = %00000001
+                width = 640
+                height = 480
+                bpp = 1
+            }
+        }
+        active_mode = mode
+    }
+
+    sub clear_screen() {
+        when active_mode {
+            0 -> {
+                ; 320 x 240 x 256c
+                cx16.VERA_CTRL = 0
+                cx16.VERA_ADDR_H = %00010000
+                cx16.VERA_ADDR_M = 0
+                cx16.VERA_ADDR_L = 0
+                repeat 240/4
+                    cs_innerloop1280()
+            }
+            128 -> {
+                ; 640 x 480 x 1c
+                cx16.VERA_CTRL = 0
+                cx16.VERA_ADDR_H = %00010000
+                cx16.VERA_ADDR_M = 0
+                cx16.VERA_ADDR_L = 0
+                repeat 480/16
+                    cs_innerloop1280()
+            }
+        }
+    }
+
+    asmsub cs_innerloop1280() {
+        %asm {{
+            ldy  #160
+-           stz  cx16.VERA_DATA0
+            stz  cx16.VERA_DATA0
+            stz  cx16.VERA_DATA0
+            stz  cx16.VERA_DATA0
+            stz  cx16.VERA_DATA0
+            stz  cx16.VERA_DATA0
+            stz  cx16.VERA_DATA0
+            stz  cx16.VERA_DATA0
+            dey
+            bne  -
+            rts
+        }}
+    }
+
+
+    sub plot(uword x, uword y, ubyte color) {
+        uword addr
+        ubyte addrhi
+
+        when active_mode {
+            0 -> {
+                ; TODO problem when y>=204 and x=..something... then the address gets > 64K; so we really need 24 bit address calculations...
+                addr = y
+                addr_mul_24_320()
+                addr_add_word_24(x)
+                cx16.vpoke(addrhi, addr, color)
+            }
+            128 -> {
+                ubyte[8] bits = [128, 64, 32, 16, 8, 4, 2, 1]
+                addr = 0
+                addr += y*(640/8)
+                addr += x/8
+                ubyte pix = cx16.vpeek(0, addr) | bits[lsb(x)&7]
+                cx16.vpoke(0, addr, pix)
+            }
+        }
+
+
+        ; TODO when subs are in front of real code, they generate in place and fuck up the code. Move them to the bottom?
+        asmsub addr_mul_24_320() {
+            ; addr = addr * 256 + addr * 64,  bits 16-23 into addrhi
+            %asm {{
+            lda  addr
+            sta  P8ZP_SCRATCH_B1
+            lda  addr+1
+            sta  addrhi
+            sta  P8ZP_SCRATCH_REG
+            lda  addr
+            asl  a
+            rol  P8ZP_SCRATCH_REG
+            asl  a
+            rol  P8ZP_SCRATCH_REG
+            asl  a
+            rol  P8ZP_SCRATCH_REG
+            asl  a
+            rol  P8ZP_SCRATCH_REG
+            asl  a
+            rol  P8ZP_SCRATCH_REG
+            asl  a
+            rol  P8ZP_SCRATCH_REG
+            sta  addr
+            lda  P8ZP_SCRATCH_B1
+            clc
+            adc  P8ZP_SCRATCH_REG
+            sta  addr+1
+            bcc  +
+            inc  addrhi
+    +		rts
+            }}
+        }
+
+        asmsub addr_add_word_24(uword w @ AY) {
+            %asm {{
+                clc
+                adc  addr
+                sta  addr
+                tya
+                adc  addr+1
+                sta  addr+1
+                bcc  +
+                inc  addrhi
++               rts
+            }}
+        }
     }
 }