diff --git a/compiler/res/prog8lib/cx16/syslib.p8 b/compiler/res/prog8lib/cx16/syslib.p8
index f001a49de..77d55dbcc 100644
--- a/compiler/res/prog8lib/cx16/syslib.p8
+++ b/compiler/res/prog8lib/cx16/syslib.p8
@@ -279,19 +279,18 @@ asmsub vpeek(ubyte bank @A, uword address @XY) -> ubyte @A {
 }
 
 
-sub vaddr(ubyte bank, uword address, ubyte addrsel, byte incrdecr) {
+asmsub vaddr(uword address @R0, ubyte bank @R1, ubyte addrsel @A, byte incrdecr @Y) {
         ; -- setup the VERA's address register 0 or 1
         %asm {{
-            lda  addrsel
             and  #1
             sta  cx16.VERA_CTRL
-            lda  address
+            lda  cx16.r0
             sta  cx16.VERA_ADDR_L
-            lda  address+1
+            lda  cx16.r0+1
             sta  cx16.VERA_ADDR_M
-            lda  bank
+            lda  cx16.r1
             and  #1
-            ldy  incrdecr
+            cpy  #0
             bmi  _decr
             beq  _seth
             ora  #%00010000
@@ -303,76 +302,70 @@ _decr       ora  #%00011000
 }
 
 
-; TODO make asmsub versions once that no longer generates larger code...
-sub vpoke(ubyte bank, uword address, ubyte value) {
+asmsub vpoke(uword address @R0, ubyte bank @A,  ubyte value @Y) {
         ; -- write a single byte to VERA's video memory
         ;    note: inefficient when writing multiple sequential bytes!
         %asm {{
             stz  cx16.VERA_CTRL
-            lda  bank
             and  #1
             sta  cx16.VERA_ADDR_H
-            lda  address
+            lda  cx16.r0
             sta  cx16.VERA_ADDR_L
-            lda  address+1
+            lda  cx16.r0+1
             sta  cx16.VERA_ADDR_M
-            lda  value
-            sta  cx16.VERA_DATA0
+            sty  cx16.VERA_DATA0
             rts
         }}
 }
 
-sub vpoke_or(ubyte bank, uword address, ubyte value) {
+asmsub vpoke_or(uword address @R0, ubyte bank @A,  ubyte value @Y) {
         ; -- or a single byte to the value already in the VERA's video memory at that location
         ;    note: inefficient when writing multiple sequential bytes!
         %asm {{
             stz  cx16.VERA_CTRL
-            lda  bank
             and  #1
             sta  cx16.VERA_ADDR_H
-            lda  address
+            lda  cx16.r0
             sta  cx16.VERA_ADDR_L
-            lda  address+1
+            lda  cx16.r0+1
             sta  cx16.VERA_ADDR_M
-            lda  value
+            tya
             ora  cx16.VERA_DATA0
             sta  cx16.VERA_DATA0
             rts
         }}
 }
 
-sub vpoke_and(ubyte bank, uword address, ubyte value) {
+asmsub vpoke_and(uword address @R0, ubyte bank @A,  ubyte value @Y) {
         ; -- and a single byte to the value already in the VERA's video memory at that location
         ;    note: inefficient when writing multiple sequential bytes!
         %asm {{
             stz  cx16.VERA_CTRL
-            lda  bank
             and  #1
             sta  cx16.VERA_ADDR_H
-            lda  address
+            lda  cx16.r0
             sta  cx16.VERA_ADDR_L
-            lda  address+1
+            lda  cx16.r0+1
             sta  cx16.VERA_ADDR_M
-            lda  value
+            tya
             and  cx16.VERA_DATA0
             sta  cx16.VERA_DATA0
             rts
         }}
 }
 
-sub vpoke_xor(ubyte bank, uword address, ubyte value) {
+asmsub vpoke_xor(uword address @R0, ubyte bank @A,  ubyte value @Y) {
         ; -- xor a single byte to the value already in the VERA's video memory at that location
         ;    note: inefficient when writing multiple sequential bytes!
         %asm {{
             stz  cx16.VERA_CTRL
-            lda  bank
             and  #1
             sta  cx16.VERA_ADDR_H
-            lda  address
+            lda  cx16.r0
             sta  cx16.VERA_ADDR_L
-            lda  address+1
+            lda  cx16.r0+1
             sta  cx16.VERA_ADDR_M
-            lda  value
+            tya
             eor  cx16.VERA_DATA0
             sta  cx16.VERA_DATA0
             rts
diff --git a/examples/cx16/gfx2.p8 b/examples/cx16/gfx2.p8
index d601ca98f..952ed68d7 100644
--- a/examples/cx16/gfx2.p8
+++ b/examples/cx16/gfx2.p8
@@ -14,6 +14,12 @@ main {
         ubyte mode
         for mode in modes {
             gfx2.set_mode(mode)
+
+;            gfx2.location(20, 50)
+;            repeat 200 {
+;                gfx2.next_pixel(255)
+;            }
+
             draw()
             cx16.wait(120)
         }
@@ -129,12 +135,23 @@ gfx2 {
 
     sub plot(uword x, uword y, ubyte color) {
         ubyte[8] bits = [128, 64, 32, 16, 8, 4, 2, 1]
+        uword addr
+        ubyte value
         when active_mode {
-            0 -> cx16.vpoke_or(0, y*(320/8) + x/8, bits[lsb(x)&7])
-            128 -> cx16.vpoke_or(0, y*(640/8) + x/8, bits[lsb(x)&7])
+            0 -> {
+                addr = x/8 + y*(320/8)
+                value = bits[lsb(x)&7]
+                cx16.vpoke_or(addr, 0, value)
+            }
+            128 -> {
+                addr = x/8 + y*(640/8)
+                value = bits[lsb(x)&7]
+                cx16.vpoke_or(addr, 0, value)
+            }
             1 -> {
                 void addr_mul_320_add_24(y, x)      ; 24 bits result is in r0 and r1L
-                cx16.vpoke(lsb(cx16.r1), cx16.r0, color)
+                ubyte bank = lsb(cx16.r1)
+                cx16.vpoke(cx16.r0, bank, color)
             }
         }
         ; activate vera auto-increment mode so next_pixel() can be used after this
@@ -143,12 +160,20 @@ gfx2 {
     }
 
     sub location(uword x, uword y) {
+        uword address
         when active_mode {
-            0 -> cx16.vaddr(0, y*(320/8) + x/8, 0, 1)
-            128 -> cx16.vaddr(0, y*(640/8) + x/8, 0, 1)
+            0 -> {
+                address = y*(320/8) + x/8
+                cx16.vaddr(address, 0, 0, 1)
+            }
+            128 -> {
+                address = y*(640/8) + x/8
+                cx16.vaddr(address, 0, 0, 1)
+            }
             1 -> {
                 void addr_mul_320_add_24(y, x)      ; 24 bits result is in r0 and r1L
-                cx16.vaddr(lsb(cx16.r1), cx16.r0, 0, 1)
+                ubyte bank = lsb(cx16.r1)
+                cx16.vaddr(cx16.r0, bank, 0, 1)
             }
         }
     }
diff --git a/examples/test.p8 b/examples/test.p8
index c73449ec5..8aa7d92e7 100644
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -9,7 +9,6 @@
 main {
 
 
-    ; TODO asmsub version generates LARGER CODE , why is this?
     sub vpoke(ubyte bank, uword address, ubyte value) {
         %asm {{
             rts
@@ -29,6 +28,7 @@ main {
         ubyte value = 123
         bank++
         vpoke(bank, address, value)
-        vpokeasm(address, bank, value)
+        vpokeasm(address, bank, value)      ; TODO generates params on stack if expression is used such as lsb(bank).  CHECK STACK UNWINDING!!!
+        ; TODO also see if we can do this via R0-R15 temp registers rather than using the estack???
     }
 }