; optimized graphics routines for just the single screen mode: lores 320*240, 256c  (8bpp)
; bitmap image needs to start at VRAM addres $00000.
; This is compatible with the CX16's screen mode 128.  (void cx16.set_screen_mode(128))


gfx_lores {

    %option ignore_unused

    sub set_screen_mode() {
        cx16.VERA_CTRL=0
        cx16.VERA_DC_VIDEO = (cx16.VERA_DC_VIDEO & %11001111) | %00100000      ; enable only layer 1
        cx16.VERA_DC_HSCALE = 64
        cx16.VERA_DC_VSCALE = 64
        cx16.VERA_L1_CONFIG = %00000111
        cx16.VERA_L1_MAPBASE = 0
        cx16.VERA_L1_TILEBASE = 0
        clear_screen(0)
    }

    sub clear_screen(ubyte color) {
        cx16.VERA_CTRL=0
        cx16.VERA_ADDR=0
        cx16.VERA_ADDR_H = 1<<4    ; 1 pixel auto increment
        repeat 240
            cs_innerloop320(color)
        cx16.VERA_ADDR=0
        cx16.VERA_ADDR_H = 0
    }

    sub line(uword x1, ubyte y1, uword x2, ubyte y2, ubyte color) {
        ; Bresenham algorithm.
        ; This code special-cases various quadrant loops to allow simple ++ and -- operations.
        ; NOTE:  this is about twice as fast as the kernal routine GRAPH_draw_line, and ~3-4 times as fast as gfx2.line()
        ;        it trades memory for speed (uses inline plot routine and multiplication lookup tables)
        ;
        ; NOTE:  is currently still a regular 6502 routine, could likely be made much faster with the VeraFX line helper.

        cx16.r3L = y2    ; ensure zeropage
        cx16.r1L = y1    ; ensure zeropage

        if cx16.r1L > cx16.r3L {
            ; make sure dy is always positive to have only 4 instead of 8 special cases
            cx16.r0 = x1
            x1 = x2
            x2 = cx16.r0
            cx16.r0L = cx16.r1L
            cx16.r1L = cx16.r3L
            cx16.r3L = cx16.r0L
        }
        word @zp dx = x2 as word
        word @zp dy = cx16.r3L
        dx -= x1
        dy -= cx16.r1L

        if dx==0 {
            vertical_line(x1, cx16.r1L, lsb(dy)+1, color)
            return
        }
        if dy==0 {
            if x1>x2
                x1=x2
            horizontal_line(x1, cx16.r1L, abs(dx) as uword +1, color)
            return
        }

        word @zp d = 0
        bool positive_ix = true
        if dx < 0 {
            dx = -dx
            positive_ix = false
        }
        word @zp dx2 = dx*2
        word @zp dy2 = dy*2

        cx16.r0  = x1    ; ensure zeropage
        cx16.r2  = x2    ; ensure zeropage

        cx16.VERA_CTRL = 0
        if dx >= dy {
            if positive_ix {
                repeat {
                    plot()
                    if cx16.r0==cx16.r2
                        return
                    cx16.r0++
                    d += dy2
                    if d > dx {
                        cx16.r1L++
                        d -= dx2
                    }
                }
            } else {
                repeat {
                    plot()
                    if cx16.r0==cx16.r2
                        return
                    cx16.r0--
                    d += dy2
                    if d > dx {
                        cx16.r1L++
                        d -= dx2
                    }
                }
            }
        }
        else {
            if positive_ix {
                repeat {
                    plot()
                    if cx16.r1L == cx16.r3L
                        return
                    cx16.r1L++
                    d += dx2
                    if d > dy {
                        cx16.r0++
                        d -= dy2
                    }
                }
            } else {
                repeat {
                    plot()
                    if cx16.r1L == cx16.r3L
                        return
                    cx16.r1L++
                    d += dx2
                    if d > dy {
                        cx16.r0--
                        d -= dy2
                    }
                }
            }
        }

        asmsub plot() {
            ; x in r0,  y in r1,   color.
            %asm {{
                ldy  cx16.r1L
                clc
                lda  times320_lo,y
                adc  cx16.r0L
                sta  cx16.VERA_ADDR_L
                lda  times320_mid,y
                adc  cx16.r0H
                sta  cx16.VERA_ADDR_M
                lda  #0
                adc  times320_hi,y
                sta  cx16.VERA_ADDR_H
                lda  p8v_color
                sta  cx16.VERA_DATA0
                rts
            }}
        }

        %asm {{

; multiplication by 320 lookup table
times320 := 320*range(240)

times320_lo     .byte <times320
times320_mid    .byte >times320
times320_hi     .byte `times320

            }}
    }

    sub horizontal_line(uword xx, ubyte yy, uword length, ubyte color) {
        if length==0
            return
        vera_setaddr(xx, yy)
        ; set vera auto-increment to 1 pixel
        cx16.VERA_ADDR_H = cx16.VERA_ADDR_H & %00000111 | (1<<4)

        %asm {{
            lda  p8v_color
            ldx  p8v_length+1
            beq  +
            ldy  #0
-           sta  cx16.VERA_DATA0
            iny
            bne  -
            dex
            bne  -
+           ldy  p8v_length     ; remaining
            beq  +
-           sta  cx16.VERA_DATA0
            dey
            bne  -
+
        }}
    }

    sub vertical_line(uword xx, ubyte yy, ubyte lheight, ubyte color) {
        vera_setaddr(xx,yy)
        ; set vera auto-increment to 320 pixel increment (=next line)
        cx16.VERA_ADDR_H = cx16.VERA_ADDR_H & %00000111 | (14<<4)
        %asm {{
            ldy  p8v_lheight
            beq  +
            lda  p8v_color
-           sta  cx16.VERA_DATA0
            dey
            bne  -
+
        }}
    }


    asmsub cs_innerloop320(ubyte color @A) clobbers(Y) {
        ; using verafx 32 bits writes here would make this faster but it's safer to
        ; use verafx only explicitly when you know what you're doing.
        %asm {{
            ldy  #40
-           sta  cx16.VERA_DATA0
            sta  cx16.VERA_DATA0
            sta  cx16.VERA_DATA0
            sta  cx16.VERA_DATA0
            sta  cx16.VERA_DATA0
            sta  cx16.VERA_DATA0
            sta  cx16.VERA_DATA0
            sta  cx16.VERA_DATA0
            dey
            bne  -
            rts
        }}
    }

    inline asmsub vera_setaddr(uword xx @R0, ubyte yy @R1) {
        ; set the correct vera start address (no auto increment yet!)
        %asm {{
            jsr  p8s_line.p8s_plot
        }}
    }
}