mirror of https://github.com/fadden/fdraw.git
parent
2ca9d4084f
commit
418e7b7191
|
@ -0,0 +1,549 @@
|
||||||
|
********************************
|
||||||
|
* *
|
||||||
|
* Amper-fdraw *
|
||||||
|
* By Andy McFadden *
|
||||||
|
* For fdraw version 0.3 *
|
||||||
|
* *
|
||||||
|
* Applesoft ampersand *
|
||||||
|
* interface for fdraw. *
|
||||||
|
* *
|
||||||
|
* Developed with Merlin-16 *
|
||||||
|
* *
|
||||||
|
********************************
|
||||||
|
|
||||||
|
lst off
|
||||||
|
org $1d60
|
||||||
|
|
||||||
|
* All of the handler entry points can fit on a single
|
||||||
|
* page, so it's possible to save a few bytes by
|
||||||
|
* dropping the high jump table and just hardcoding
|
||||||
|
* the first page into the jump. This requires that
|
||||||
|
* the ORG be at $xx00.
|
||||||
|
|
||||||
|
PUT FDRAW.DEFS
|
||||||
|
|
||||||
|
* Applesoft BASIC tokens.
|
||||||
|
tok_plot equ $8d
|
||||||
|
tok_hgr2 equ $90
|
||||||
|
tok_hgr equ $91
|
||||||
|
tok_hcolor equ $92
|
||||||
|
tok_hplot equ $93
|
||||||
|
tok_draw equ $94
|
||||||
|
tok_xdraw equ $95
|
||||||
|
tok_inverse equ $9e
|
||||||
|
tok_clear equ $bd
|
||||||
|
tok_new equ $bf
|
||||||
|
tok_to equ $c1
|
||||||
|
tok_at equ $c5
|
||||||
|
*tok_sgn equ $d2
|
||||||
|
tok_scrn equ $d7
|
||||||
|
tok_exp equ $dd
|
||||||
|
tok_cos equ $de
|
||||||
|
tok_sin equ $df
|
||||||
|
|
||||||
|
* System locations.
|
||||||
|
PCL equ $3a ;used by monitor
|
||||||
|
PCH equ $3b ;used by monitor
|
||||||
|
A1L equ $3c ;used by monitor
|
||||||
|
A1H equ $3d ;used by monitor
|
||||||
|
LINNUM equ $50 ;50-51
|
||||||
|
FACLO equ $a1
|
||||||
|
CHRGET equ $b1 ;advance ptr, get next tok
|
||||||
|
CHRGOT equ $b7 ;get next tok (no advance)
|
||||||
|
TXTPTR equ $b8
|
||||||
|
HPAG equ $e6 ;$20 or $40
|
||||||
|
|
||||||
|
AMPERV equ $3f5
|
||||||
|
|
||||||
|
TXTCLR equ $c050
|
||||||
|
TXTSET equ $c051
|
||||||
|
MIXCLR equ $c052
|
||||||
|
MIXSET equ $c053
|
||||||
|
LOWSCR equ $c054
|
||||||
|
HISCR equ $c055
|
||||||
|
LORES equ $c056
|
||||||
|
HIRES equ $c057
|
||||||
|
|
||||||
|
ERROR equ $d412 ;error based on X reg
|
||||||
|
FRMNUM equ $dd67
|
||||||
|
SynError equ $dec9 ;throw SYNTAX ERROR
|
||||||
|
CHKCOM equ $debe
|
||||||
|
IllQError equ $e199 ;throw ILLEGAL QUANTITY ERROR
|
||||||
|
GETADR equ $e752
|
||||||
|
GETBYT equ $e6f8 ;gets byte, in X/FACLO
|
||||||
|
HFNS equ $f6b9 ;get hi-res x/y for hplot
|
||||||
|
|
||||||
|
* Prepare the ampersand vector.
|
||||||
|
*
|
||||||
|
* Ideally we'd check to see if the existing vector is
|
||||||
|
* different from ours, and if so, jump to it when we
|
||||||
|
* get a token we don't recognize. Not convinced
|
||||||
|
* there's an actual use case for this.
|
||||||
|
init
|
||||||
|
lda #$4c ;JMP, in case it got
|
||||||
|
sta AMPERV ; trashed
|
||||||
|
lda #<dispatch
|
||||||
|
sta AMPERV+1
|
||||||
|
lda #>dispatch
|
||||||
|
sta AMPERV+2
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Entry point from BASIC. The token is in A.
|
||||||
|
dispatch
|
||||||
|
ldx #:cmdend-:cmdtab-1
|
||||||
|
]loop cmp :cmdtab,x
|
||||||
|
beq :match
|
||||||
|
dex
|
||||||
|
bpl ]loop
|
||||||
|
jmp SynError
|
||||||
|
|
||||||
|
:match
|
||||||
|
lda :jmptabh,x
|
||||||
|
* lda #>h_new ;all on first page
|
||||||
|
pha
|
||||||
|
lda :jmptabl,x
|
||||||
|
pha
|
||||||
|
jmp CHRGET ;eat token, jump
|
||||||
|
|
||||||
|
|
||||||
|
:cmdtab dfb tok_new
|
||||||
|
dfb tok_hgr
|
||||||
|
dfb tok_hgr2
|
||||||
|
dfb tok_scrn
|
||||||
|
dfb tok_hcolor
|
||||||
|
dfb tok_inverse
|
||||||
|
dfb tok_clear
|
||||||
|
dfb tok_hplot
|
||||||
|
dfb tok_xdraw
|
||||||
|
dfb tok_draw
|
||||||
|
dfb tok_exp
|
||||||
|
dfb tok_cos
|
||||||
|
dfb tok_sin
|
||||||
|
dfb tok_at
|
||||||
|
dfb tok_plot
|
||||||
|
:cmdend
|
||||||
|
|
||||||
|
:jmptabl dfb <h_new-1
|
||||||
|
dfb <h_hgr-1
|
||||||
|
dfb <h_hgr2-1
|
||||||
|
dfb <h_scrn-1
|
||||||
|
dfb <h_hcolor-1
|
||||||
|
dfb <h_inverse-1
|
||||||
|
dfb <h_clear-1
|
||||||
|
dfb <h_hplot-1
|
||||||
|
dfb <h_xdraw-1
|
||||||
|
dfb <h_draw-1
|
||||||
|
dfb <h_exp-1
|
||||||
|
dfb <h_cos-1
|
||||||
|
dfb <h_sin-1
|
||||||
|
dfb <h_at-1
|
||||||
|
dfb <h_plot-1
|
||||||
|
:jmptabh dfb >h_new-1
|
||||||
|
dfb >h_hgr-1
|
||||||
|
dfb >h_hgr2-1
|
||||||
|
dfb >h_scrn-1
|
||||||
|
dfb >h_hcolor-1
|
||||||
|
dfb >h_inverse-1
|
||||||
|
dfb >h_clear-1
|
||||||
|
dfb >h_hplot-1
|
||||||
|
dfb >h_xdraw-1
|
||||||
|
dfb >h_draw-1
|
||||||
|
dfb >h_exp-1
|
||||||
|
dfb >h_cos-1
|
||||||
|
dfb >h_sin-1
|
||||||
|
dfb >h_at-1
|
||||||
|
dfb >h_plot-1
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &NEW - initialize
|
||||||
|
h_new
|
||||||
|
lda #$20 ;match Init result
|
||||||
|
sta g_cur_page
|
||||||
|
lda #$00
|
||||||
|
sta g_hcolor
|
||||||
|
tax ;init "previous hplot"
|
||||||
|
tay ; coord to zero
|
||||||
|
jsr storeprv
|
||||||
|
ldx #139 ;279/2
|
||||||
|
ldy #0
|
||||||
|
lda #95 ;191/2
|
||||||
|
jsr storeac
|
||||||
|
jmp f_Init
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &HGR - show page 1 with mixed text, and clear screen.
|
||||||
|
* Sets the color to zero.
|
||||||
|
h_hgr
|
||||||
|
ldx #$20 ;page 1
|
||||||
|
lda #$00 ;$c054
|
||||||
|
beq hgr_com
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &HGR2 - show page 2 with no text, and clear screen.
|
||||||
|
* Sets the color to zero.
|
||||||
|
h_hgr2
|
||||||
|
ldx #$40 ;page 2
|
||||||
|
lda #$01 ;$c055
|
||||||
|
;fall through to hgr_com
|
||||||
|
|
||||||
|
* We go slightly out of our way to clear the screen
|
||||||
|
* before tripping the softswitches. This avoids
|
||||||
|
* flashing the previous hi-res page contents when
|
||||||
|
* entering from text mode.
|
||||||
|
*
|
||||||
|
* We also want to go nomix-page2 but page1-mix
|
||||||
|
* (note reverse order) to avoid flashing text pg 2.
|
||||||
|
hgr_com stx f_in_arg
|
||||||
|
stx g_cur_page
|
||||||
|
stx HPAG ;probably useful
|
||||||
|
pha
|
||||||
|
jsr f_SetPage
|
||||||
|
lda #$00
|
||||||
|
sta f_in_arg
|
||||||
|
jsr f_SetColor
|
||||||
|
jsr f_Clear
|
||||||
|
lda g_hcolor ;restore color
|
||||||
|
sta f_in_arg
|
||||||
|
jsr f_SetColor
|
||||||
|
bit TXTCLR ;$c050
|
||||||
|
bit HIRES ;$c057
|
||||||
|
pla
|
||||||
|
beq :pg1
|
||||||
|
bit MIXCLR ;$c052
|
||||||
|
bit HISCR ;$c055
|
||||||
|
rts
|
||||||
|
:pg1 bit LOWSCR ;$c054
|
||||||
|
bit MIXSET ;$c053
|
||||||
|
rts
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &SCRN({1,2}) - set the current hi-res page
|
||||||
|
h_scrn
|
||||||
|
jsr GETBYT
|
||||||
|
cpx #1
|
||||||
|
beq :okay
|
||||||
|
cpx #2
|
||||||
|
beq :okay
|
||||||
|
jmp IllQError
|
||||||
|
:okay jsr CHRGET ;eat ')' (we assume)
|
||||||
|
txa ;X/Y unaltered
|
||||||
|
asl
|
||||||
|
asl
|
||||||
|
asl
|
||||||
|
asl
|
||||||
|
asl ;multiply x32
|
||||||
|
sta g_cur_page
|
||||||
|
sta f_in_arg
|
||||||
|
jmp f_SetPage
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &HCOLOR={0-7} - set the current color
|
||||||
|
h_hcolor
|
||||||
|
jsr GETBYT ;get color
|
||||||
|
cpx #8
|
||||||
|
blt :okay
|
||||||
|
jmp IllQError
|
||||||
|
:okay stx f_in_arg
|
||||||
|
stx g_hcolor
|
||||||
|
jmp f_SetColor
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &INVERSE - flip pages
|
||||||
|
*
|
||||||
|
* If we're currently drawing on $20, we set the page
|
||||||
|
* to $40 and hit $c054 to show $20. And vice-versa.
|
||||||
|
* The goal is to make double-buffered animation easy.
|
||||||
|
h_inverse
|
||||||
|
lda g_cur_page
|
||||||
|
eor #$60
|
||||||
|
sta g_cur_page
|
||||||
|
ldx #$00
|
||||||
|
cmp #$40 ;about to start drawing on 2?
|
||||||
|
beq :showpg1 ;yes, show page 1
|
||||||
|
inx ;no, show page 2
|
||||||
|
:showpg1 ldy LOWSCR,x
|
||||||
|
sta f_in_arg
|
||||||
|
jmp f_SetPage
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &CLEAR - clear current page to current color.
|
||||||
|
h_clear
|
||||||
|
jmp f_Clear ;well, that was easy
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &XDRAW left,top,right,bottom - draw rectangle outline
|
||||||
|
h_xdraw
|
||||||
|
jsr getltrb
|
||||||
|
jmp f_DrawRect
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &DRAW left,top,right,bottom - draw filled rectangle
|
||||||
|
h_draw
|
||||||
|
jsr getltrb
|
||||||
|
jmp f_FillRect
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &EXP {0,1} - set line draw mode
|
||||||
|
h_exp
|
||||||
|
jsr GETBYT
|
||||||
|
cpx #2
|
||||||
|
blt :okay
|
||||||
|
jmp IllQError
|
||||||
|
:okay stx f_in_arg
|
||||||
|
jmp f_SetLineMode
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &COS cx,cy,rad - draw outline circle
|
||||||
|
h_cos
|
||||||
|
jsr getcxcyr
|
||||||
|
jmp f_DrawCircle
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &SIN cx,cy,rad - draw filled circle
|
||||||
|
h_sin
|
||||||
|
jsr getcxcyr
|
||||||
|
jmp f_FillCircle
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &AT x,y - select center for array draw
|
||||||
|
h_at
|
||||||
|
jsr HFNS
|
||||||
|
jmp storeac
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &PLOT vertexAddr, indexAddr, indexCount [AT cx,cy]
|
||||||
|
* draw lines from arrays of vertices and indices
|
||||||
|
h_plot jmp array_draw
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* &HPLOT x,y - draw a point
|
||||||
|
* &HPLOT TO x,y - draw a line from last point to x,y
|
||||||
|
* &HPLOT x0,y0 to x1,y1 - draw a line
|
||||||
|
lst on ;last token handler --
|
||||||
|
h_hplot equ * ; must be on first page
|
||||||
|
lst off ; to omit high byte table
|
||||||
|
|
||||||
|
jsr CHRGOT ;check next token
|
||||||
|
lst off
|
||||||
|
cmp #tok_to ;is this an "HPLOT TO"?
|
||||||
|
beq :leadingto
|
||||||
|
jsr getx1y1 ;get the first coord
|
||||||
|
jsr copy1to0
|
||||||
|
jsr CHRGOT ;see if single point
|
||||||
|
cmp #tok_to
|
||||||
|
beq :hplot_to ;nope, draw line
|
||||||
|
jsr copy0toprev ;draw point, and save x/y
|
||||||
|
jmp f_DrawPoint ; for subsequent HPLOT TO
|
||||||
|
|
||||||
|
:leadingto ;"HPLOT TO", restore the
|
||||||
|
lda g_prevxl ; previous coord to x0/y0
|
||||||
|
sta f_in_x0l ;(can't rely on f_in_zzz
|
||||||
|
lda g_prevxh ; being there -- we might
|
||||||
|
sta f_in_x0h ; have drawn a rect)
|
||||||
|
lda g_prevy
|
||||||
|
sta f_in_y0
|
||||||
|
:hplot_to
|
||||||
|
jsr CHRGET ;eat the TO
|
||||||
|
jsr getx1y1 ;get the coords
|
||||||
|
jsr f_DrawLine ;draw it
|
||||||
|
jsr copy1to0 ;shift 1->0 for next round
|
||||||
|
jsr CHRGOT
|
||||||
|
cmp #tok_to ;another TO?
|
||||||
|
beq :hplot_to ;yes, branch
|
||||||
|
jmp copy0toprev ;no, save prev and bail
|
||||||
|
|
||||||
|
* Get coordinates and store in X1/Y1.
|
||||||
|
getx1y1
|
||||||
|
jsr HFNS
|
||||||
|
store1 stx f_in_x1l ;store X/Y/A in coord1
|
||||||
|
sty f_in_x1h
|
||||||
|
sta f_in_y1
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Save x0/y0 as our "previous" coordinate.
|
||||||
|
copy0toprev
|
||||||
|
ldx f_in_x0l
|
||||||
|
ldy f_in_x0h
|
||||||
|
lda f_in_y0
|
||||||
|
storeprv stx g_prevxl ;store X/Y/A in g_prev
|
||||||
|
sty g_prevxh
|
||||||
|
sta g_prevy
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Copy X1/Y1 into X0/Y0.
|
||||||
|
copy1to0
|
||||||
|
ldx f_in_x1l
|
||||||
|
ldy f_in_x1h
|
||||||
|
lda f_in_y1
|
||||||
|
store0 stx f_in_x0l ;store X/Y/A in coord 0
|
||||||
|
sty f_in_x0h
|
||||||
|
sta f_in_y0
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Store X/Y/A into array-center.
|
||||||
|
storeac stx g_ac_xl
|
||||||
|
sty g_ac_xh
|
||||||
|
sta g_ac_y
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Get left/top/right/bottom coordinates.
|
||||||
|
getltrb
|
||||||
|
jsr HFNS
|
||||||
|
jsr store0 ;save as X0/Y0
|
||||||
|
jsr CHKCOM ;eat a comma
|
||||||
|
jsr HFNS
|
||||||
|
jsr store1 ;save as X1/Y1
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Get center coordinates and radius.
|
||||||
|
getcxcyr
|
||||||
|
jsr HFNS ;get CX and CY
|
||||||
|
jsr store0 ;save as X0/Y0
|
||||||
|
jsr CHKCOM ;eat a comma
|
||||||
|
jsr GETBYT ;convert to 0-255
|
||||||
|
stx f_in_rad
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Array-draw handler.
|
||||||
|
*
|
||||||
|
* We know that fdraw doesn't use LINNUM or A1L/A1H,
|
||||||
|
* so it's safe to use them here.
|
||||||
|
array_draw
|
||||||
|
]vertices equ A1L ;2b
|
||||||
|
]indices equ LINNUM ;2b
|
||||||
|
]count equ PCL
|
||||||
|
]cur equ PCH
|
||||||
|
|
||||||
|
jsr FRMNUM ;get vertex buffer address
|
||||||
|
jsr GETADR
|
||||||
|
lda LINNUM ;copy to A1L
|
||||||
|
sta ]vertices
|
||||||
|
lda LINNUM+1
|
||||||
|
sta ]vertices+1
|
||||||
|
jsr CHKCOM ;eat the comma
|
||||||
|
jsr FRMNUM ;get index buffer address
|
||||||
|
jsr GETADR ;leave it in LINNUM
|
||||||
|
jsr CHKCOM
|
||||||
|
jsr GETBYT ;get the count
|
||||||
|
cpx #128 ;range check (0-127)
|
||||||
|
blt :countok
|
||||||
|
jmp IllQError
|
||||||
|
:countok txa
|
||||||
|
beq :done ;nothing to do
|
||||||
|
asl ;double it
|
||||||
|
sta ]count ;stash it
|
||||||
|
lda #$00
|
||||||
|
sta ]cur
|
||||||
|
|
||||||
|
* Check for optional AT cx,cy.
|
||||||
|
jsr CHRGOT
|
||||||
|
cmp #tok_at
|
||||||
|
bne :noat
|
||||||
|
JSR CHRGET ;eat the AT
|
||||||
|
lda LINNUM ;the code that reads the
|
||||||
|
pha ; hi-res coordinates will
|
||||||
|
lda LINNUM+1 ; overwrite LINNUM, so
|
||||||
|
pha ; we have to save & restore
|
||||||
|
jsr h_at
|
||||||
|
pla
|
||||||
|
sta LINNUM+1
|
||||||
|
pla
|
||||||
|
sta LINNUM
|
||||||
|
:noat
|
||||||
|
|
||||||
|
]loop jsr getvertex
|
||||||
|
bcs :skip2
|
||||||
|
jsr store0
|
||||||
|
jsr getvertex
|
||||||
|
bcs :skip
|
||||||
|
jsr store1
|
||||||
|
jsr f_DrawLine
|
||||||
|
dfb $2c ;BIT addr
|
||||||
|
:skip2 inc ]cur
|
||||||
|
:skip lda ]cur
|
||||||
|
cmp ]count
|
||||||
|
blt ]loop
|
||||||
|
:done rts
|
||||||
|
|
||||||
|
* Get the Nth vertex, specified by ]cur, and load it
|
||||||
|
* into X/Y/A (xlo/xhi/y). Returns with carry set if
|
||||||
|
* the vertex is invalid.
|
||||||
|
*
|
||||||
|
* Increments ]cur by 1.
|
||||||
|
getvertex
|
||||||
|
ldy ]cur
|
||||||
|
inc ]cur
|
||||||
|
lda (]indices),y
|
||||||
|
bmi :badv ;must be 0-127
|
||||||
|
jsr :calcvertex
|
||||||
|
|
||||||
|
ldx g_out_x
|
||||||
|
ldy g_out_x+1
|
||||||
|
beq :xok ;0-255, ok
|
||||||
|
cpy #1
|
||||||
|
bne :badv ;512+
|
||||||
|
cpx #280-256
|
||||||
|
bge :badv ;280-511
|
||||||
|
:xok
|
||||||
|
lda g_out_y+1
|
||||||
|
bne :badv ;Y is neg or > 255
|
||||||
|
lda g_out_y
|
||||||
|
cmp #192
|
||||||
|
bcc :goodv
|
||||||
|
:badv
|
||||||
|
sec
|
||||||
|
:goodv rts
|
||||||
|
|
||||||
|
* Get VX and VY, merging with AC, and store in
|
||||||
|
* 16-bit g_out_x and g_out_y. Range not checked
|
||||||
|
* here. On entry, A has vertex index.
|
||||||
|
:calcvertex
|
||||||
|
asl
|
||||||
|
tay
|
||||||
|
ldx #$00 ;hi byte of vertex
|
||||||
|
lda (]vertices),y ;x-coord
|
||||||
|
bpl :xpos
|
||||||
|
dex ;sign-extend hi byte
|
||||||
|
:xpos clc
|
||||||
|
adc g_ac_xl
|
||||||
|
sta g_out_x
|
||||||
|
txa
|
||||||
|
adc g_ac_xh
|
||||||
|
sta g_out_x+1
|
||||||
|
|
||||||
|
iny
|
||||||
|
ldx #$00
|
||||||
|
lda (]vertices),y ;y-coord
|
||||||
|
bpl :ypos
|
||||||
|
dex ;sign-extend hi byte
|
||||||
|
:ypos clc
|
||||||
|
adc g_ac_y
|
||||||
|
sta g_out_y
|
||||||
|
bcc :nocarry
|
||||||
|
inx
|
||||||
|
:nocarry stx g_out_y+1
|
||||||
|
rts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
* Global variables
|
||||||
|
|
||||||
|
g_cur_page ds 1 ;$20 or $40
|
||||||
|
g_hcolor ds 1
|
||||||
|
g_prevxl ds 1
|
||||||
|
g_prevxh ds 1
|
||||||
|
g_prevy ds 1
|
||||||
|
g_ac_xl ds 1 ;Center-point coordinates
|
||||||
|
g_ac_xh ds 1 ; for array-based line
|
||||||
|
g_ac_y ds 1 ; draw (&AT, &PLOT).
|
||||||
|
g_out_x ds 2 ;16-bit coordinates for
|
||||||
|
g_out_y ds 2 ; array-based line draw
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
lst on
|
||||||
|
end equ *
|
||||||
|
sav amperfdraw
|
||||||
|
lst off
|
|
@ -0,0 +1,752 @@
|
||||||
|
********************************
|
||||||
|
* *
|
||||||
|
* Fast Apple II Graphics *
|
||||||
|
* By Andy McFadden *
|
||||||
|
* Version 0.3, Aug 2015 *
|
||||||
|
* *
|
||||||
|
* Circle rendering *
|
||||||
|
* (Included by FDRAW.S) *
|
||||||
|
* *
|
||||||
|
* Developed with Merlin-16 *
|
||||||
|
* *
|
||||||
|
********************************
|
||||||
|
|
||||||
|
* TODO: if USE_FAST is 0, replace the outline circle
|
||||||
|
* plot code with calls to DrawPoint (or maybe a
|
||||||
|
* common sub-function so we don't trash the input
|
||||||
|
* parameters). Saves a little space.
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Draw a circle. The radius is in in_rad, and
|
||||||
|
* the center is at in_x0l+in_x0h,in_y0.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
DrawCircle
|
||||||
|
lda #$20 ;JSR
|
||||||
|
cmp _cp08 ;configured for outline?
|
||||||
|
beq :okay
|
||||||
|
jsr fixcplot
|
||||||
|
:okay
|
||||||
|
jmp calc_circle
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Draw filled circle.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
FillCircle
|
||||||
|
lda #$2c ;BIT
|
||||||
|
cmp _cp08 ;configured for fill?
|
||||||
|
beq :okay
|
||||||
|
jsr fixcplot
|
||||||
|
:okay
|
||||||
|
jsr calc_circle
|
||||||
|
jmp FillRaster
|
||||||
|
|
||||||
|
|
||||||
|
* Calculate a circle, using Bresenham's algorithm. The
|
||||||
|
* results are placed into the rasterization buffers.
|
||||||
|
*
|
||||||
|
* in_rad must be from 0 to 255. The x/y center
|
||||||
|
* coordinates must be on the screen, but the circle
|
||||||
|
* can extend off the edge.
|
||||||
|
*
|
||||||
|
* The computed values are stored in the rasterization
|
||||||
|
* tables. For an outline circle, we also plot the
|
||||||
|
* points immediately.
|
||||||
|
|
||||||
|
do USE_FAST ;*****
|
||||||
|
* local storage -- not used often enough to merit DP
|
||||||
|
circ_8bit ds 1
|
||||||
|
circ_clip ds 1
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
calc_circle
|
||||||
|
max_fast_rad equ 41
|
||||||
|
]cxl equ zloc0
|
||||||
|
]cxh equ zloc1
|
||||||
|
]cy equ zloc2
|
||||||
|
]dlo equ zloc3
|
||||||
|
]dhi equ zloc4
|
||||||
|
]xsav equ zloc5
|
||||||
|
]ysav equ zloc6
|
||||||
|
]min_x equ zloc7 ;min/max offsets from center
|
||||||
|
]max_x equ zloc8 ;(min is above center, max
|
||||||
|
]min_y equ zloc9 ; is below)
|
||||||
|
]max_y equ zloc10
|
||||||
|
]hitmp equ zloc11
|
||||||
|
* only used by hplot for outline circles
|
||||||
|
]hbasl equ zptr0
|
||||||
|
]andmask equ zloc11 ;overlaps with ]hitmp
|
||||||
|
]savxreg equ zloc12
|
||||||
|
]savyreg equ zloc13
|
||||||
|
|
||||||
|
* Special-case radius=0. It removes an annoying
|
||||||
|
* edge case (first y-- becomes 0xff, but 6502 cmp
|
||||||
|
* is unsigned).
|
||||||
|
lda in_rad
|
||||||
|
bne :notzero
|
||||||
|
ldy in_y0
|
||||||
|
sty rast_top
|
||||||
|
sty rast_bottom
|
||||||
|
lda in_x0l
|
||||||
|
sta rastx0l,y
|
||||||
|
sta rastx1l,y
|
||||||
|
lda in_x0h
|
||||||
|
sta rastx0h,y
|
||||||
|
sta rastx1h,y
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Use different version of function for small
|
||||||
|
* circles, because we can do it all in 8 bits.
|
||||||
|
:notzero
|
||||||
|
do USE_FAST ;*****
|
||||||
|
ldy #$01
|
||||||
|
cmp #max_fast_rad ;in_rad in Acc
|
||||||
|
blt :use_fast
|
||||||
|
dey
|
||||||
|
:use_fast sty circ_8bit
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
lda in_x0l ;copy center to DP for speed
|
||||||
|
sta ]cxl
|
||||||
|
lda in_x0h
|
||||||
|
sta ]cxh
|
||||||
|
lda in_y0
|
||||||
|
sta ]cy
|
||||||
|
|
||||||
|
* Compute min/max values, based on offset from center.
|
||||||
|
* These are compared against offset-from-center x/y.
|
||||||
|
* We need tight bounds on Y because we use it to
|
||||||
|
* compute the rast_render top/bottom. Getting tight
|
||||||
|
* bounds on X is not so important, but we still need
|
||||||
|
* it for the no-clip optimization.
|
||||||
|
ldx #$04 ;count edges needing clip
|
||||||
|
|
||||||
|
lda #NUM_ROWS-1 ;191
|
||||||
|
sec
|
||||||
|
sbc ]cy ;maxY = 191-cy
|
||||||
|
cmp in_rad
|
||||||
|
blt :ylimok
|
||||||
|
lda in_rad ;clamp to radius
|
||||||
|
dex
|
||||||
|
:ylimok sta ]max_y ;maxY = 191-cy
|
||||||
|
|
||||||
|
lda ]cy ;minY = cy
|
||||||
|
cmp in_rad
|
||||||
|
blt :ylimok2
|
||||||
|
lda in_rad ;clamp to radius
|
||||||
|
dex
|
||||||
|
:ylimok2 sta ]min_y
|
||||||
|
|
||||||
|
lda ]cxh
|
||||||
|
beq :xlimlo
|
||||||
|
* Examples (note #<NUM_COLS-1 is 279-256 = 23):
|
||||||
|
* cx=265 (cxh=1 cxl=11), 23-11=14, chk rad
|
||||||
|
lda #<NUM_COLS-1 ;maxX = 279-cx
|
||||||
|
sec
|
||||||
|
sbc ]cxl
|
||||||
|
cmp in_rad
|
||||||
|
blt :xlimhok
|
||||||
|
lda in_rad ;clamp to radius
|
||||||
|
dex
|
||||||
|
:xlimhok sta ]max_x
|
||||||
|
|
||||||
|
lda in_rad ;min X always out of range
|
||||||
|
dex ; so just clamp to radius
|
||||||
|
sta ]min_x
|
||||||
|
jmp :xlimdone
|
||||||
|
|
||||||
|
* Examples:
|
||||||
|
* For cx=0 to 24, we can never pass right edge (our
|
||||||
|
* maximum radius is 255).
|
||||||
|
* cx=3, 23-3=20 + carry set --> bad, must use rad
|
||||||
|
* cx=24, 23-24=255 + carry clear --> ok, chk rad
|
||||||
|
* cx=255, 23-255=24 + carry clear --> ok, chk rad
|
||||||
|
:xlimlo
|
||||||
|
lda #<NUM_COLS-1 ;maxX = 279-cx
|
||||||
|
sec
|
||||||
|
sbc ]cxl
|
||||||
|
bcs :xuserad
|
||||||
|
cmp in_rad
|
||||||
|
blt :xlimok
|
||||||
|
:xuserad lda in_rad ;clamp to radius
|
||||||
|
dex
|
||||||
|
:xlimok sta ]max_x
|
||||||
|
|
||||||
|
lda ]cxl ;minX = (cx > 255) ?
|
||||||
|
cmp in_rad
|
||||||
|
blt :xlimok2
|
||||||
|
lda in_rad ;clamp to radius
|
||||||
|
dex
|
||||||
|
:xlimok2 sta ]min_x
|
||||||
|
|
||||||
|
:xlimdone
|
||||||
|
|
||||||
|
do USE_FAST ;*****
|
||||||
|
stx circ_clip
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
* set top/bottom rows for rasterizer
|
||||||
|
lda ]cy
|
||||||
|
clc
|
||||||
|
adc ]max_y
|
||||||
|
sta rast_bottom
|
||||||
|
lda ]cy
|
||||||
|
sec
|
||||||
|
sbc ]min_y
|
||||||
|
sta rast_top
|
||||||
|
|
||||||
|
DO 0 ;debug debug debug
|
||||||
|
LDA ]min_x ;save a copy where the
|
||||||
|
STA $0380 ; monitor won't trash it
|
||||||
|
LDA ]max_x
|
||||||
|
STA $0381
|
||||||
|
LDA ]min_y
|
||||||
|
STA $0382
|
||||||
|
LDA ]max_y
|
||||||
|
STA $0383
|
||||||
|
FIN
|
||||||
|
|
||||||
|
* Set initial conditions for Bresenham.
|
||||||
|
ldx #0 ;:x = 0
|
||||||
|
stx ]xsav
|
||||||
|
ldy in_rad ;:y = rad
|
||||||
|
sty ]ysav
|
||||||
|
lda #1 ;:d = 1 - rad
|
||||||
|
sec
|
||||||
|
sbc ]ysav ;in_rad
|
||||||
|
sta ]dlo
|
||||||
|
bcs :hizero ;C==1 if in_rad<=1
|
||||||
|
ldx #$ff ;C was 0, make neg
|
||||||
|
:hizero stx ]dhi
|
||||||
|
|
||||||
|
*
|
||||||
|
* Outer loop -- plot 8 points, then update values.
|
||||||
|
*
|
||||||
|
circ_loop
|
||||||
|
|
||||||
|
do USE_FAST ;*****
|
||||||
|
lda circ_clip
|
||||||
|
beq ncypy
|
||||||
|
jmp with_clip
|
||||||
|
|
||||||
|
* Quick version, no clipping required
|
||||||
|
* row cy+y: cx-x and cx+x
|
||||||
|
ncypy
|
||||||
|
lda ]ysav
|
||||||
|
clc
|
||||||
|
adc ]cy
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]xsav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp00 jsr cplotl
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]xsav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp01 jsr cplotrn
|
||||||
|
|
||||||
|
* row cy-y: cx-x and cx+x
|
||||||
|
ncymy
|
||||||
|
lda ]cy
|
||||||
|
sec
|
||||||
|
sbc ]ysav
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]xsav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp02 jsr cplotl
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]xsav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp03 jsr cplotrn
|
||||||
|
|
||||||
|
* row cy+x: cx-y and cx+y
|
||||||
|
ncypx
|
||||||
|
lda ]xsav ;off bottom?
|
||||||
|
clc
|
||||||
|
adc ]cy
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]ysav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp04 jsr cplotl
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]ysav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp05 jsr cplotrn
|
||||||
|
|
||||||
|
* row cy-x: cx-y and cx+y
|
||||||
|
ncymx
|
||||||
|
lda ]cy
|
||||||
|
sec
|
||||||
|
sbc ]xsav
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]ysav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp06 jsr cplotl
|
||||||
|
|
||||||
|
lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]ysav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp07 jsr cplotrn
|
||||||
|
|
||||||
|
* CLICK
|
||||||
|
jmp circ_plot_done
|
||||||
|
|
||||||
|
fin ;***** (USE_FAST)
|
||||||
|
|
||||||
|
*
|
||||||
|
* Same thing, but this time clipping edges.
|
||||||
|
*
|
||||||
|
with_clip
|
||||||
|
|
||||||
|
* row cy+y: cx-x and cx+x
|
||||||
|
ccypy
|
||||||
|
lda ]ysav ;off bottom?
|
||||||
|
cmp ]max_y
|
||||||
|
beq :cypy_ok
|
||||||
|
bge cypy_skip ;completely off screen
|
||||||
|
:cypy_ok clc
|
||||||
|
adc ]cy
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
ldx ]xsav ;handle cx-x
|
||||||
|
cpx ]min_x
|
||||||
|
blt :cxmx_ok
|
||||||
|
beq :cxmx_ok
|
||||||
|
lda #0 ;clip at 0
|
||||||
|
sta rastx0l,y
|
||||||
|
sta rastx0h,y
|
||||||
|
beq cxmx_done0 ;always
|
||||||
|
BREAK
|
||||||
|
:cxmx_ok lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]xsav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp08 jsr cplotl
|
||||||
|
cxmx_done0
|
||||||
|
|
||||||
|
cpx ]max_x ;handle cx+x
|
||||||
|
blt :cxpx_ok
|
||||||
|
beq :cxpx_ok
|
||||||
|
lda #<NUM_COLS-1
|
||||||
|
sta rastx1l,y
|
||||||
|
lda #>NUM_COLS-1
|
||||||
|
sta rastx1h,y
|
||||||
|
bne cxpx_done0 ;always
|
||||||
|
BREAK
|
||||||
|
:cxpx_ok lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]xsav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp09 jsr cplotr
|
||||||
|
cxpx_done0
|
||||||
|
cypy_skip
|
||||||
|
|
||||||
|
* row cy-y: cx-x and cx+x
|
||||||
|
ccymy
|
||||||
|
lda ]ysav ;off top?
|
||||||
|
cmp ]min_y
|
||||||
|
beq :cymy_ok
|
||||||
|
bge cymy_skip
|
||||||
|
:cymy_ok lda ]cy
|
||||||
|
sec
|
||||||
|
sbc ]ysav
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
ldx ]xsav ;handle cx-x
|
||||||
|
cpx ]min_x
|
||||||
|
blt :cxmx_ok
|
||||||
|
beq :cxmx_ok
|
||||||
|
lda #0 ;clip at 0
|
||||||
|
sta rastx0l,y
|
||||||
|
sta rastx0h,y
|
||||||
|
beq cxmx_done1 ;always
|
||||||
|
BREAK
|
||||||
|
:cxmx_ok lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]xsav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp10 jsr cplotl
|
||||||
|
cxmx_done1
|
||||||
|
|
||||||
|
cpx ]max_x ;handle cx+x
|
||||||
|
blt :cxpx_ok
|
||||||
|
beq :cxpx_ok
|
||||||
|
lda #<NUM_COLS-1
|
||||||
|
sta rastx1l,y
|
||||||
|
lda #>NUM_COLS-1
|
||||||
|
sta rastx1h,y
|
||||||
|
bne cxpx_done1 ;always
|
||||||
|
BREAK
|
||||||
|
:cxpx_ok lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]xsav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp11 jsr cplotr
|
||||||
|
cxpx_done1
|
||||||
|
cymy_skip
|
||||||
|
|
||||||
|
* row cy+x: cx-y and cx+y
|
||||||
|
ccypx
|
||||||
|
lda ]xsav ;off bottom?
|
||||||
|
cmp ]max_y
|
||||||
|
beq :cypx_ok
|
||||||
|
bge cypx_skip
|
||||||
|
:cypx_ok clc
|
||||||
|
adc ]cy
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
ldx ]ysav ;handle cx-y
|
||||||
|
cpx ]min_x
|
||||||
|
blt :cxmy_ok
|
||||||
|
beq :cxmy_ok
|
||||||
|
lda #0 ;clip at 0
|
||||||
|
sta rastx0l,y
|
||||||
|
sta rastx0h,y
|
||||||
|
beq cxmy_done2 ;always
|
||||||
|
BREAK
|
||||||
|
:cxmy_ok lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]ysav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp12 jsr cplotl
|
||||||
|
cxmy_done2
|
||||||
|
|
||||||
|
cpx ]max_x ;handle cx+y
|
||||||
|
blt :cxpy_ok
|
||||||
|
beq :cxpy_ok
|
||||||
|
lda #<NUM_COLS-1
|
||||||
|
sta rastx1l,y
|
||||||
|
lda #>NUM_COLS-1
|
||||||
|
sta rastx1h,y
|
||||||
|
bne cxpy_done2 ;always
|
||||||
|
BREAK
|
||||||
|
:cxpy_ok lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]ysav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp13 jsr cplotr
|
||||||
|
cxpy_done2
|
||||||
|
cypx_skip
|
||||||
|
|
||||||
|
* row cy-x: cx-y and cx+y
|
||||||
|
ccymx
|
||||||
|
lda ]xsav ;off top?
|
||||||
|
cmp ]min_y
|
||||||
|
beq :cymx_ok
|
||||||
|
bge cymx_skip
|
||||||
|
:cymx_ok lda ]cy
|
||||||
|
sec
|
||||||
|
sbc ]xsav
|
||||||
|
tay ;y-coord in Y-reg
|
||||||
|
|
||||||
|
ldx ]ysav ;handle cx-y
|
||||||
|
cpx ]min_x
|
||||||
|
blt :cxmy_ok
|
||||||
|
beq :cxmy_ok
|
||||||
|
lda #0 ;clip at 0
|
||||||
|
sta rastx0l,y
|
||||||
|
sta rastx0h,y
|
||||||
|
beq cxmy_done3 ;always
|
||||||
|
BREAK
|
||||||
|
:cxmy_ok lda ]cxl
|
||||||
|
sec
|
||||||
|
sbc ]ysav
|
||||||
|
sta rastx0l,y
|
||||||
|
lda ]cxh
|
||||||
|
sbc #$00
|
||||||
|
sta rastx0h,y
|
||||||
|
_cp14 jsr cplotl
|
||||||
|
cxmy_done3
|
||||||
|
|
||||||
|
cpx ]max_x ;handle cx+y
|
||||||
|
blt :cxpy_ok
|
||||||
|
beq :cxpy_ok
|
||||||
|
lda #<NUM_COLS-1
|
||||||
|
sta rastx1l,y
|
||||||
|
lda #>NUM_COLS-1
|
||||||
|
sta rastx1h,y
|
||||||
|
bne cxpy_done3 ;always
|
||||||
|
BREAK
|
||||||
|
:cxpy_ok lda ]cxl
|
||||||
|
clc
|
||||||
|
adc ]ysav
|
||||||
|
sta rastx1l,y
|
||||||
|
lda ]cxh
|
||||||
|
adc #$00
|
||||||
|
sta rastx1h,y
|
||||||
|
_cp15 jsr cplotr
|
||||||
|
cxpy_done3
|
||||||
|
cymx_skip
|
||||||
|
|
||||||
|
circ_plot_done
|
||||||
|
* Update X/Y/D. Up to about radius=41 we can maintain
|
||||||
|
* 'd' in an 8-bit register.
|
||||||
|
do USE_FAST ;*****
|
||||||
|
lda circ_8bit
|
||||||
|
beq circ_slow
|
||||||
|
|
||||||
|
*
|
||||||
|
* Bresenham update, with 8-bit 'd'.
|
||||||
|
*
|
||||||
|
ldx ]xsav
|
||||||
|
lda ]dlo
|
||||||
|
bmi :dneg
|
||||||
|
txa ;:d = d + ((x-y)*4) +5
|
||||||
|
sec
|
||||||
|
sbc ]ysav ;x <= y, may be neg or 0
|
||||||
|
asl
|
||||||
|
asl
|
||||||
|
clc ;can't know carry
|
||||||
|
adc #5
|
||||||
|
clc ;still don't want carry
|
||||||
|
adc ]dlo
|
||||||
|
sta ]dlo
|
||||||
|
dec ]ysav ;:y--
|
||||||
|
jmp :loopbot
|
||||||
|
:dneg txa ;:d = d + (x*4) +3
|
||||||
|
asl
|
||||||
|
asl ;x always pos, C=0
|
||||||
|
DO 0
|
||||||
|
BCC :TEST ;debug
|
||||||
|
BREAK ;debug
|
||||||
|
:TEST ;debug
|
||||||
|
FIN
|
||||||
|
adc #3
|
||||||
|
adc ]dlo
|
||||||
|
sta ]dlo
|
||||||
|
:loopbot
|
||||||
|
inx ;:x++
|
||||||
|
stx ]xsav
|
||||||
|
cpx ]ysav
|
||||||
|
beq :again
|
||||||
|
bge circ_done
|
||||||
|
:again jmp circ_loop
|
||||||
|
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
*
|
||||||
|
* Bresenham update, with 16-bit 'd'
|
||||||
|
*
|
||||||
|
circ_slow
|
||||||
|
CLICK
|
||||||
|
ldx ]xsav
|
||||||
|
lda ]dhi
|
||||||
|
bmi :dneg
|
||||||
|
lda ]dlo
|
||||||
|
clc
|
||||||
|
adc #5
|
||||||
|
sta ]dlo
|
||||||
|
bcc :noinc
|
||||||
|
inc ]dhi
|
||||||
|
:noinc
|
||||||
|
txa ;:d = d + ((x-y)*4) +5
|
||||||
|
ldy #$00
|
||||||
|
sty ]hitmp
|
||||||
|
sec
|
||||||
|
sbc ]ysav ;x <= y, may be neg or 0
|
||||||
|
beq :xeqy ;if x==y, nothing to add
|
||||||
|
ldy #$ff
|
||||||
|
sty ]hitmp
|
||||||
|
asl
|
||||||
|
rol ]hitmp
|
||||||
|
asl
|
||||||
|
rol ]hitmp
|
||||||
|
clc
|
||||||
|
adc ]dlo
|
||||||
|
sta ]dlo
|
||||||
|
lda ]dhi
|
||||||
|
adc ]hitmp
|
||||||
|
sta ]dhi
|
||||||
|
:xeqy
|
||||||
|
dec ]ysav ;:y--
|
||||||
|
jmp :loopbot
|
||||||
|
|
||||||
|
:dneg lda ]dlo ;:d = d + (x*4) + 3
|
||||||
|
clc
|
||||||
|
adc #3
|
||||||
|
sta ]dlo
|
||||||
|
bcc :noinc2
|
||||||
|
inc ]dhi
|
||||||
|
:noinc2 txa
|
||||||
|
ldy #0 ;x always positive
|
||||||
|
sty ]hitmp
|
||||||
|
asl
|
||||||
|
rol ]hitmp
|
||||||
|
asl
|
||||||
|
rol ]hitmp
|
||||||
|
clc ;not needed?
|
||||||
|
adc ]dlo
|
||||||
|
sta ]dlo
|
||||||
|
lda ]dhi
|
||||||
|
adc ]hitmp
|
||||||
|
sta ]dhi
|
||||||
|
:loopbot
|
||||||
|
inx ;:x++
|
||||||
|
stx ]xsav
|
||||||
|
cpx ]ysav
|
||||||
|
beq :again
|
||||||
|
bge circ_done
|
||||||
|
:again jmp circ_loop
|
||||||
|
|
||||||
|
|
||||||
|
circ_done rts
|
||||||
|
|
||||||
|
|
||||||
|
* Plot a point for outline circle rendering.
|
||||||
|
*
|
||||||
|
* X and Y must be preserved. Y holds the current line
|
||||||
|
* number.
|
||||||
|
*
|
||||||
|
* Most DP locations are in use -- see the variable
|
||||||
|
* declarations at the start of the circle function.
|
||||||
|
|
||||||
|
* cplotl is the entry point for the leftmost point.
|
||||||
|
cplotl
|
||||||
|
stx ]savxreg
|
||||||
|
sty ]savyreg
|
||||||
|
|
||||||
|
lda ylooklo,y
|
||||||
|
sta ]hbasl
|
||||||
|
lda ylookhi,y
|
||||||
|
_pg_or2 ora #$20
|
||||||
|
sta ]hbasl+1
|
||||||
|
|
||||||
|
* Convert the X coordinate into byte/bit.
|
||||||
|
ldx rastx0l,y ;x coord, lo
|
||||||
|
lda rastx0h,y ;>= 256?
|
||||||
|
beq :lotabl ;no, use the low table
|
||||||
|
ldy div7hi,x
|
||||||
|
lda mod7hi,x
|
||||||
|
bpl cplotcom ;always
|
||||||
|
BREAK ;debug
|
||||||
|
:lotabl ldy div7lo,x
|
||||||
|
lda mod7lo,x
|
||||||
|
jmp cplotcom
|
||||||
|
|
||||||
|
* cplotr is the entry point for the rightmost point.
|
||||||
|
* We use rastx1 instead of rastx0.
|
||||||
|
cplotr
|
||||||
|
lda ylooklo,y
|
||||||
|
sta ]hbasl
|
||||||
|
lda ylookhi,y
|
||||||
|
_pg_or3 ora #$20
|
||||||
|
sta ]hbasl+1
|
||||||
|
|
||||||
|
* If we just plotted the left point on the same line,
|
||||||
|
* we can skip the Y-lookup by jumping here.
|
||||||
|
cplotrn
|
||||||
|
stx ]savxreg
|
||||||
|
sty ]savyreg
|
||||||
|
|
||||||
|
ldx rastx1l,y ;x coord, lo
|
||||||
|
lda rastx1h,y ;>= 256?
|
||||||
|
beq :lotabl ;no, use the low table
|
||||||
|
ldy div7hi,x
|
||||||
|
lda mod7hi,x
|
||||||
|
bpl cplotcom ;always
|
||||||
|
BREAK ;debug
|
||||||
|
:lotabl ldy div7lo,x
|
||||||
|
lda mod7lo,x
|
||||||
|
|
||||||
|
* Plot the point. The byte offset (0-39) is in Y,
|
||||||
|
* the bit offset (0-6) is in A.
|
||||||
|
cplotcom
|
||||||
|
tax
|
||||||
|
lda colorline,y ;start with color pattern
|
||||||
|
eor (]hbasl),y ;flip all bits
|
||||||
|
and andmask,x ;clear other bits
|
||||||
|
eor (]hbasl),y ;restore ours, set theirs
|
||||||
|
sta (]hbasl),y
|
||||||
|
|
||||||
|
ldx ]savxreg
|
||||||
|
ldy ]savyreg
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Reconfigure calc_circle to either JSR to cplotl/r,
|
||||||
|
* or just BIT the address (a 4-cycle no-op). The
|
||||||
|
* desired instruction is in A.
|
||||||
|
fixcplot
|
||||||
|
do USE_FAST ;*****
|
||||||
|
sta _cp00
|
||||||
|
sta _cp01
|
||||||
|
sta _cp02
|
||||||
|
sta _cp03
|
||||||
|
sta _cp04
|
||||||
|
sta _cp05
|
||||||
|
sta _cp06
|
||||||
|
sta _cp07
|
||||||
|
fin ;*****
|
||||||
|
sta _cp08
|
||||||
|
sta _cp09
|
||||||
|
sta _cp10
|
||||||
|
sta _cp11
|
||||||
|
sta _cp12
|
||||||
|
sta _cp13
|
||||||
|
sta _cp14
|
||||||
|
sta _cp15
|
||||||
|
rts
|
|
@ -0,0 +1,588 @@
|
||||||
|
********************************
|
||||||
|
* *
|
||||||
|
* Fast Apple II Graphics *
|
||||||
|
* By Andy McFadden *
|
||||||
|
* Version 0.3, Aug 2015 *
|
||||||
|
* *
|
||||||
|
* Point and line functions *
|
||||||
|
* (Included by FDRAW.S) *
|
||||||
|
* *
|
||||||
|
* Developed with Merlin-16 *
|
||||||
|
* *
|
||||||
|
********************************
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Draw a single point in the current color.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
DrawPoint
|
||||||
|
]hbasl equ zptr0
|
||||||
|
|
||||||
|
ldy in_y0
|
||||||
|
lda ylooklo,y
|
||||||
|
sta ]hbasl
|
||||||
|
lda ylookhi,y
|
||||||
|
ora g_page
|
||||||
|
sta ]hbasl+1
|
||||||
|
|
||||||
|
ldx in_x0l ;x coord, lo
|
||||||
|
lda in_x0h ;>= 256?
|
||||||
|
beq :lotabl ;no, use the low table
|
||||||
|
ldy div7hi,x
|
||||||
|
lda mod7hi,x
|
||||||
|
bpl :plotit ;always
|
||||||
|
BREAK ;debug
|
||||||
|
:lotabl ldy div7lo,x
|
||||||
|
lda mod7lo,x
|
||||||
|
|
||||||
|
* Plot the point. The byte offset (0-39) is in Y,
|
||||||
|
* the bit offset (0-6) is in A.
|
||||||
|
:plotit
|
||||||
|
tax
|
||||||
|
lda colorline,y ;start with color pattern
|
||||||
|
eor (]hbasl),y ;flip all bits
|
||||||
|
and andmask,x ;clear other bits
|
||||||
|
eor (]hbasl),y ;restore ours, set theirs
|
||||||
|
sta (]hbasl),y
|
||||||
|
rts
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Draw a line between two points.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
DrawLine
|
||||||
|
|
||||||
|
]hbasl equ zptr0
|
||||||
|
]xposl equ zloc0 ;always left edge
|
||||||
|
]xposh equ zloc1
|
||||||
|
]ypos equ zloc2 ;top or bottom
|
||||||
|
]deltaxl equ zloc3
|
||||||
|
]deltaxh equ zloc4
|
||||||
|
]deltay equ zloc5
|
||||||
|
]count equ zloc6
|
||||||
|
]counth equ zloc7
|
||||||
|
]diff equ zloc8
|
||||||
|
]diffh equ zloc9
|
||||||
|
]andmask equ zloc10
|
||||||
|
]wideflag equ zloc11 ;doesn't really need DP
|
||||||
|
|
||||||
|
* We use a traditional Bresenham run-length approach.
|
||||||
|
* Run-slicing is possible, but the code is larger
|
||||||
|
* and the increased cost means it's only valuable
|
||||||
|
* for longer lines. An optimal solution would switch
|
||||||
|
* approaches based on line length.
|
||||||
|
*
|
||||||
|
* Start by identifying where x0 or x1 is on the
|
||||||
|
* left. To make life simpler we always work from
|
||||||
|
* left to right, flipping the coordinates if
|
||||||
|
* needed.
|
||||||
|
*
|
||||||
|
* We also need to figure out if the line is more
|
||||||
|
* than 255 pixels long -- which, because of
|
||||||
|
* inclusive coordinates, means abs(x0-x1) > 254.
|
||||||
|
lda in_x1l ;assume x0 on left
|
||||||
|
sec
|
||||||
|
sbc in_x0l
|
||||||
|
tax
|
||||||
|
beq checkvert ;low bytes even, check hi
|
||||||
|
lda in_x1h
|
||||||
|
sbc in_x0h
|
||||||
|
bcs lx0left
|
||||||
|
|
||||||
|
* x1 is on the left, so the values are negative
|
||||||
|
* (hi byte in A, lo byte in X)
|
||||||
|
lx0right eor #$ff ;invert hi
|
||||||
|
sta ]deltaxh ;store
|
||||||
|
txa
|
||||||
|
eor #$ff ;invert lo
|
||||||
|
sta ]deltaxl
|
||||||
|
inc ]deltaxl ;add one for 2s complement
|
||||||
|
bne :noinchi ;rolled into high byte?
|
||||||
|
inc ]deltaxh ;yes
|
||||||
|
:noinchi lda in_x1l ;start with x1
|
||||||
|
sta ]xposl
|
||||||
|
lda in_x1h
|
||||||
|
sta ]xposh
|
||||||
|
lda in_y1
|
||||||
|
sta ]ypos
|
||||||
|
sec
|
||||||
|
sbc in_y0 ;compute deltay
|
||||||
|
jmp lncommon
|
||||||
|
|
||||||
|
checkvert
|
||||||
|
lda in_x1h ;diff high bytes
|
||||||
|
sbc in_x0h ;(carry still set)
|
||||||
|
blt lx0right ;width=256, x0 right
|
||||||
|
bne lx0left ;width=256, x0 left
|
||||||
|
jmp vertline ;all zero, go vert
|
||||||
|
|
||||||
|
* (branch back from below)
|
||||||
|
* This is a purely horizontal line. We farm the job
|
||||||
|
* out to the raster fill code for speed. (There's
|
||||||
|
* no problem with the line code handling it; its just
|
||||||
|
* more efficient to let the raster code do it.)
|
||||||
|
phorizontal
|
||||||
|
ldy ]ypos
|
||||||
|
sty rast_top
|
||||||
|
sty rast_bottom
|
||||||
|
lda ]xposl
|
||||||
|
sta rastx0l,y
|
||||||
|
clc
|
||||||
|
adc ]deltaxl ;easier to add delta back
|
||||||
|
sta rastx1l,y ; in than sort out which
|
||||||
|
lda ]xposh ; arg is left vs. right
|
||||||
|
sta rastx0h,y
|
||||||
|
adc ]deltaxh
|
||||||
|
sta rastx1h,y
|
||||||
|
jmp FillRaster
|
||||||
|
|
||||||
|
* x0 is on the left, so the values are positive
|
||||||
|
lx0left stx ]deltaxl
|
||||||
|
sta ]deltaxh
|
||||||
|
lda in_x0l ;start with x0
|
||||||
|
sta ]xposl
|
||||||
|
lda in_x0h
|
||||||
|
sta ]xposh
|
||||||
|
lda in_y0 ;and y0
|
||||||
|
sta ]ypos
|
||||||
|
sec
|
||||||
|
sbc in_y1 ;compute deltay
|
||||||
|
|
||||||
|
* Value of (starty - endy) is in A, flags still set.
|
||||||
|
lncommon
|
||||||
|
bcs :posy
|
||||||
|
eor #$ff ;negative, invert
|
||||||
|
adc #$01
|
||||||
|
sta ]deltay
|
||||||
|
lda #$e8 ;INX
|
||||||
|
bne gotdy
|
||||||
|
:posy
|
||||||
|
_lmb beq phorizontal
|
||||||
|
sta ]deltay
|
||||||
|
lda #$ca ;DEX
|
||||||
|
gotdy sta _hmody
|
||||||
|
sta _vmody
|
||||||
|
sta _wmody
|
||||||
|
|
||||||
|
do 0 ;***** for regression test
|
||||||
|
ldx #$01
|
||||||
|
lda ]deltaxh
|
||||||
|
bne :iswide
|
||||||
|
lda ]deltaxl
|
||||||
|
cmp #$ff ;== 255?
|
||||||
|
beq :iswide
|
||||||
|
ldx #$00 ;notwide
|
||||||
|
:iswide stx $300
|
||||||
|
lda ]xposl
|
||||||
|
sta $301
|
||||||
|
lda ]xposh
|
||||||
|
sta $302
|
||||||
|
lda ]ypos
|
||||||
|
sta $303
|
||||||
|
ldx ]deltaxl
|
||||||
|
stx $304
|
||||||
|
ldx ]deltaxh
|
||||||
|
stx $305
|
||||||
|
ldx ]deltay
|
||||||
|
stx $306
|
||||||
|
lda _hmody
|
||||||
|
and #$20 ;nonzero means inc,
|
||||||
|
sta $307 ; zero means dec
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
* At this point we have the initial X position in
|
||||||
|
* ]startxl/h, the initial Y position in ]starty,
|
||||||
|
* deltax in ]deltaxl, deltay in ]deltay, and we've
|
||||||
|
* tweaked the Y-update instructions to either INC or
|
||||||
|
* DEC depending on the direction of movement.
|
||||||
|
*
|
||||||
|
* The next step is to decide whether the line is
|
||||||
|
* horizontal-dominant or vertical-dominant, and
|
||||||
|
* branch to the appropriate handler.
|
||||||
|
*
|
||||||
|
* The core loops for horiz and vert take about
|
||||||
|
* 80 cycles when moving diagonally, and about
|
||||||
|
* 20 fewer when moving in the primary direction.
|
||||||
|
* The wide-horiz is a bit slower.
|
||||||
|
ldy #$01 ;set "wide" flag to 1
|
||||||
|
lda ]deltaxl
|
||||||
|
ldx ]deltaxh
|
||||||
|
bne horzdom ;width >= 256
|
||||||
|
cmp #$ff ;width == 255
|
||||||
|
beq horzdom
|
||||||
|
dey ;not wide
|
||||||
|
cmp ]deltay
|
||||||
|
bge horzdom ; for diagonal lines
|
||||||
|
jmp vertdom
|
||||||
|
|
||||||
|
* We could special-case pure-diagonal lines here
|
||||||
|
* (just BEQ a couple lines up). It does
|
||||||
|
* represent our worst case. I'm not convinced
|
||||||
|
* we'll see them often enough to make it worthwhile.
|
||||||
|
|
||||||
|
|
||||||
|
* horizontal-dominant
|
||||||
|
horzdom
|
||||||
|
sty ]wideflag
|
||||||
|
sta ]count ;:count = deltax + 1
|
||||||
|
inc ]count
|
||||||
|
lsr ;:diff = deltax / 2
|
||||||
|
sta ]diff
|
||||||
|
|
||||||
|
* set Y to the byte offset in the line
|
||||||
|
* load the AND mask into ]andmask
|
||||||
|
ldx ]xposl
|
||||||
|
lda ]xposh ;>= 256?
|
||||||
|
beq :lotabl ;no, use the low table
|
||||||
|
ldy div7hi,x
|
||||||
|
lda mod7hi,x
|
||||||
|
bpl :gottab ;always
|
||||||
|
* BREAK ;debug
|
||||||
|
:lotabl ldy div7lo,x
|
||||||
|
lda mod7lo,x
|
||||||
|
:gottab
|
||||||
|
tax
|
||||||
|
lda andmask,x
|
||||||
|
sta ]andmask
|
||||||
|
|
||||||
|
* Set initial value for line address.
|
||||||
|
ldx ]ypos
|
||||||
|
lda ylooklo,x
|
||||||
|
sta ]hbasl
|
||||||
|
lda ylookhi,x
|
||||||
|
ora g_page
|
||||||
|
sta ]hbasl+1
|
||||||
|
|
||||||
|
lda ]wideflag ;is this a "wide" line?
|
||||||
|
beq :notwide ;nope, stay local
|
||||||
|
jmp widedom
|
||||||
|
|
||||||
|
:notwide lda colorline,y ;set initial color mask
|
||||||
|
sta _hlcolor+1
|
||||||
|
jmp horzloop
|
||||||
|
|
||||||
|
hrts rts
|
||||||
|
|
||||||
|
* bottom of loop, essentially
|
||||||
|
hnoroll sta ]diff ;3
|
||||||
|
hdecc dec ]count ;5 :count--
|
||||||
|
beq hrts ;2 :while (count != 0)
|
||||||
|
;= 7 or 10
|
||||||
|
|
||||||
|
* We keep the byte offset in the line in Y, and the
|
||||||
|
* line index in X, for the entire loop.
|
||||||
|
horzloop
|
||||||
|
_hlcolor lda #$00 ;2 start with color pattern
|
||||||
|
_lmdh eor (]hbasl),y ;5 flip all bits
|
||||||
|
and ]andmask ;3 clear other bits
|
||||||
|
eor (]hbasl),y ;5 restore ours, set theirs
|
||||||
|
sta (]hbasl),y ;6 = 21
|
||||||
|
|
||||||
|
* Move right. We shift the bit mask that determines
|
||||||
|
* the pixel. When we shift into bit 7, we know it's
|
||||||
|
* time to advance another byte.
|
||||||
|
*
|
||||||
|
* If this is a shallow line we would benefit from
|
||||||
|
* keeping the index in X and just doing a 4-cycle
|
||||||
|
* indexed load to get the mask. Not having the
|
||||||
|
* line number in X makes the line calc more
|
||||||
|
* expensive for steeper lines though.
|
||||||
|
lda ]andmask ;3
|
||||||
|
asl ;2 shift, losing hi bit
|
||||||
|
eor #$80 ;2 set the hi bit
|
||||||
|
bne :noh8 ;3 cleared hi bit?
|
||||||
|
* We could BEQ away and branch back in, but this
|
||||||
|
* happens every 7 iterations, so on average it's
|
||||||
|
* a very small improvement. If we happen to branch
|
||||||
|
* across a page boundary the double-branch adds
|
||||||
|
* two more cycles and we lose.
|
||||||
|
iny ;2 advance to next byte
|
||||||
|
lda colorline,y ;4 update color mask
|
||||||
|
sta _hlcolor+1 ;4
|
||||||
|
lda #$81 ;2 reset
|
||||||
|
:noh8 sta ]andmask ;3 = 13 + ((12-1)/7) = 14
|
||||||
|
|
||||||
|
* Update error diff.
|
||||||
|
lda ]diff ;3
|
||||||
|
sec ;2
|
||||||
|
sbc ]deltay ;3 :diff -= deltay
|
||||||
|
bcs hnoroll ;2+ :if (diff < 0) ...
|
||||||
|
;= 11 level, 10 up/down
|
||||||
|
adc ]deltaxl ;3 : diff += deltax
|
||||||
|
sta ]diff ;3
|
||||||
|
_hmody inx ;2 : ypos++ (or --)
|
||||||
|
lda ylooklo,x ;4 update hbasl after line
|
||||||
|
sta ]hbasl ;3 change
|
||||||
|
lda ylookhi,x ;4
|
||||||
|
_pg_or4 ora #$20 ;2
|
||||||
|
sta ]hbasl+1 ;3
|
||||||
|
bne hdecc ;3 = +27 this path -> 37
|
||||||
|
BREAK
|
||||||
|
* horizontal: 10+21+14+11=56 cycles/pixel
|
||||||
|
* diagonal: 7+21+14+37=79 cycles/pixel
|
||||||
|
|
||||||
|
|
||||||
|
* Vertical-dominant line. Could go up or down.
|
||||||
|
vertdom
|
||||||
|
ldx in_y0
|
||||||
|
cpx ]ypos ;starting at y0?
|
||||||
|
bne :endy0 ;yup
|
||||||
|
ldx in_y1 ;nope
|
||||||
|
:endy0 stx _vchk+1 ;end condition
|
||||||
|
|
||||||
|
lda ]deltay
|
||||||
|
lsr
|
||||||
|
sta ]diff ;:diff = deltay / 2
|
||||||
|
|
||||||
|
* set Y to the byte offset in the line
|
||||||
|
* load the AND mask into ]andmask
|
||||||
|
ldx ]xposl
|
||||||
|
lda ]xposh ;>= 256?
|
||||||
|
beq :lotabl ;no, use the low table
|
||||||
|
ldy div7hi,x
|
||||||
|
lda mod7hi,x
|
||||||
|
bpl :gottab ;always
|
||||||
|
BREAK ;debug
|
||||||
|
:lotabl ldy div7lo,x
|
||||||
|
lda mod7lo,x
|
||||||
|
:gottab
|
||||||
|
tax
|
||||||
|
lda andmask,x ;initial pixel mask
|
||||||
|
sta ]andmask
|
||||||
|
|
||||||
|
lda colorline,y ;initial color mask
|
||||||
|
sta _vlcolor+1
|
||||||
|
|
||||||
|
ldx ]ypos
|
||||||
|
jmp vertloop
|
||||||
|
|
||||||
|
* We keep the byte offset in the line in Y, and the
|
||||||
|
* line index in X, for the entire loop.
|
||||||
|
|
||||||
|
* Bottom of loop, essentially.
|
||||||
|
vnoroll sta ]diff ;3
|
||||||
|
|
||||||
|
vertloop
|
||||||
|
lda ylooklo,x ;4
|
||||||
|
sta ]hbasl ;3
|
||||||
|
lda ylookhi,x ;4
|
||||||
|
_pg_or5 ora #$20 ;2
|
||||||
|
sta ]hbasl+1 ;3 = 16
|
||||||
|
|
||||||
|
_vlcolor lda #$00 ;2 start with color pattern
|
||||||
|
_lmdv eor (]hbasl),y ;5 flip all bits
|
||||||
|
and ]andmask ;3 clear other bits
|
||||||
|
eor (]hbasl),y ;5 restore ours, set theirs
|
||||||
|
sta (]hbasl),y ;6 = 21
|
||||||
|
|
||||||
|
_vchk cpx #$00 ;2 was this last line?
|
||||||
|
beq vrts ;2 yes, done
|
||||||
|
_vmody inx ;2 :ypos++ (or --)
|
||||||
|
|
||||||
|
* Update error diff.
|
||||||
|
lda ]diff ;3
|
||||||
|
sec ;2
|
||||||
|
sbc ]deltaxl ;3 :diff -= deltax
|
||||||
|
bcs vnoroll ;2 :if (diff < 0) ...
|
||||||
|
;= 10 vert, 9 move right
|
||||||
|
|
||||||
|
adc ]deltay ;3 : diff += deltay
|
||||||
|
sta ]diff ;3
|
||||||
|
* Move right. We shift the bit mask that determines
|
||||||
|
* the pixel. When we shift into bit 7, we know it's
|
||||||
|
* time to advance another byte.
|
||||||
|
lda ]andmask ;3
|
||||||
|
asl ;2 shift, losing hi bit
|
||||||
|
eor #$80 ;2 set the hi bit
|
||||||
|
beq :is8 ;2+ goes to zero on 8th bit
|
||||||
|
sta ]andmask ;3
|
||||||
|
bne vertloop ;3 = 21 + (18/7) = 24
|
||||||
|
BREAK
|
||||||
|
|
||||||
|
:is8 iny ;2 advance to next byte
|
||||||
|
lda colorline,y ;4 update color
|
||||||
|
sta _vlcolor+1 ;4
|
||||||
|
lda #$81 ;2 reset
|
||||||
|
sta ]andmask ;3
|
||||||
|
bne vertloop ;3 = 18
|
||||||
|
BREAK
|
||||||
|
vrts rts
|
||||||
|
* vertical: 3 + 16 + 21 + 6 + 10 = 56 cycles
|
||||||
|
* diagonal: 16 + 21 + 6 + 9 + 24 = 76 cycles
|
||||||
|
|
||||||
|
|
||||||
|
* "Wide" horizontally-dominant loop. We have to
|
||||||
|
* maintain error-diff and deltax as 16-bit values.
|
||||||
|
* Most of the setup from the "narrow" version carried
|
||||||
|
* over, but we have to re-do the count and diff.
|
||||||
|
*
|
||||||
|
* Normally we set count to (deltax + 1) and decrement
|
||||||
|
* to zero, but it's actually easier to set it equal
|
||||||
|
* to deltax and check for -1.
|
||||||
|
widedom
|
||||||
|
lda ]deltaxh ;:count = deltax
|
||||||
|
sta ]counth
|
||||||
|
ldx ]deltaxl
|
||||||
|
stx ]count
|
||||||
|
stx ]diff
|
||||||
|
lsr ;:diff = deltax / 2
|
||||||
|
ror ]diff
|
||||||
|
sta ]diffh
|
||||||
|
ldx ]ypos
|
||||||
|
|
||||||
|
lda colorline,y ;set initial color mask
|
||||||
|
sta _wlcolor+1
|
||||||
|
|
||||||
|
* We keep the byte offset in the line in Y, and the
|
||||||
|
* line index in X, for the entire loop.
|
||||||
|
wideloop
|
||||||
|
_wlcolor lda #$00 ;2 start with color pattern
|
||||||
|
_lmdw eor (]hbasl),y ;5 flip all bits
|
||||||
|
and ]andmask ;3 clear other bits
|
||||||
|
eor (]hbasl),y ;5 restore ours, set theirs
|
||||||
|
sta (]hbasl),y ;6 = 21
|
||||||
|
|
||||||
|
* Move right. We shift the bit mask that determines
|
||||||
|
* the pixel. When we shift into bit 7, we know it's
|
||||||
|
* time to advance another byte.
|
||||||
|
lda ]andmask ;3
|
||||||
|
asl ;2 shift, losing hi bit
|
||||||
|
eor #$80 ;2 set the hi bit
|
||||||
|
bne :not7 ;3 goes to zero on 8th bit
|
||||||
|
iny ; 2 advance to next byte
|
||||||
|
lda colorline,y ; 4 update color mask
|
||||||
|
sta _hlcolor+1 ; 4
|
||||||
|
lda #$81 ; 2 reset
|
||||||
|
:not7 sta ]andmask ;3 = 13 usually, 25 every 7
|
||||||
|
|
||||||
|
* Update error diff, which is a positive number. If
|
||||||
|
* it goes negative ("if (diff < 0)") we act.
|
||||||
|
lda ]diff
|
||||||
|
sec
|
||||||
|
sbc ]deltay ;:diff -= deltay
|
||||||
|
bcs wnoroll ;didn't even roll low byte
|
||||||
|
dec ]diffh ;check hi byte
|
||||||
|
bpl wnoroll ;went 1->0, keep going
|
||||||
|
|
||||||
|
adc ]deltaxl ;: diff += deltax
|
||||||
|
sta ]diff
|
||||||
|
lda ]diffh
|
||||||
|
adc ]deltaxh
|
||||||
|
sta ]diffh
|
||||||
|
_wmody inx ;: ypos++ (or --)
|
||||||
|
lda ylooklo,x ;update hbasl after line
|
||||||
|
sta ]hbasl ; change
|
||||||
|
lda ylookhi,x
|
||||||
|
_pg_or6 ora #$20
|
||||||
|
sta ]hbasl+1
|
||||||
|
bne wdecc
|
||||||
|
BREAK
|
||||||
|
|
||||||
|
wnoroll sta ]diff
|
||||||
|
|
||||||
|
wdecc dec ]count ;5 :count--
|
||||||
|
lda ]count ;3
|
||||||
|
cmp #$ff ;2
|
||||||
|
bne wideloop ;3 :while (count > -1)
|
||||||
|
dec ]counth ;low rolled, decr high
|
||||||
|
beq wideloop ;went 1->0, keep going
|
||||||
|
rts
|
||||||
|
|
||||||
|
|
||||||
|
* Pure-vertical line. These are common in certain
|
||||||
|
* applications, and checking for it only adds two
|
||||||
|
* cycles to the general case.
|
||||||
|
vertline
|
||||||
|
ldx in_y0
|
||||||
|
ldy in_y1
|
||||||
|
cpx in_y1 ;y0 < y1?
|
||||||
|
blt :usey0 ;yes, go from y0 to y1
|
||||||
|
txa ;swap X/A
|
||||||
|
tay
|
||||||
|
ldx in_y1
|
||||||
|
:usey0 stx ]ypos
|
||||||
|
iny
|
||||||
|
sty _pvytest+1
|
||||||
|
|
||||||
|
ldx in_x0l ;xc lo
|
||||||
|
lda in_x0h ;>= 256?
|
||||||
|
beq :lotabl
|
||||||
|
ldy div7hi,x
|
||||||
|
lda mod7hi,x
|
||||||
|
bpl :gotit ;always
|
||||||
|
:lotabl ldy div7lo,x
|
||||||
|
lda mod7lo,x
|
||||||
|
|
||||||
|
* Byte offset is in Y, mod-7 value is in A.
|
||||||
|
:gotit tax
|
||||||
|
lda andmask,x
|
||||||
|
sta _pvand+1 ;this doesn't change
|
||||||
|
|
||||||
|
lda colorline,y
|
||||||
|
sta _pvcolor+1 ;nor does this
|
||||||
|
|
||||||
|
ldx ]ypos ;top line
|
||||||
|
|
||||||
|
* There's a trick where, when (linenum & 0x07) is
|
||||||
|
* nonzero, you just add 4 to hbasl+1 instead of
|
||||||
|
* re-doing the lookup. However, TXA+AND+BEQ
|
||||||
|
* followed by LDA+CLC+ADC+STA is 16 cycles, the same
|
||||||
|
* as our self-modified lookup, so it's not a win.
|
||||||
|
* (And if we used a second ylookhi and self-modded
|
||||||
|
* the table address, we could shave off another 2.)
|
||||||
|
|
||||||
|
* Main pure-vertical loop
|
||||||
|
pverloop
|
||||||
|
lda ylooklo,x ;4
|
||||||
|
sta ]hbasl ;3
|
||||||
|
lda ylookhi,x ;4
|
||||||
|
_pg_or7 ora #$20 ;2
|
||||||
|
sta ]hbasl+1 ;3 (= 16)
|
||||||
|
|
||||||
|
_pvcolor lda #$00 ;2 start with color pattern
|
||||||
|
_lmdpv eor (]hbasl),y ;5 flip all bits
|
||||||
|
_pvand and #$00 ;2 clear other bits
|
||||||
|
eor (]hbasl),y ;5
|
||||||
|
sta (]hbasl),y ;6 (= 20)
|
||||||
|
|
||||||
|
inx ;2
|
||||||
|
_pvytest cpx #$00 ;2 done?
|
||||||
|
bne pverloop ;3 = 7
|
||||||
|
rts
|
||||||
|
* 43 cycles/pixel
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Set the line mode according to in_arg
|
||||||
|
*
|
||||||
|
* A slightly silly feature to get xdraw lines
|
||||||
|
* without really working for it.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
SetLineMode
|
||||||
|
lda in_arg
|
||||||
|
beq :standard
|
||||||
|
|
||||||
|
* configure for xdraw
|
||||||
|
lda #$24 ;BIT dp
|
||||||
|
sta _lmb
|
||||||
|
sta _lmdh
|
||||||
|
sta _lmdv
|
||||||
|
sta _lmdw
|
||||||
|
sta _lmdpv
|
||||||
|
rts
|
||||||
|
|
||||||
|
* configure for standard drawing
|
||||||
|
:standard lda #$f0 ;BEQ
|
||||||
|
sta _lmb
|
||||||
|
lda #$51 ;EOR (dp),y
|
||||||
|
sta _lmdh
|
||||||
|
sta _lmdv
|
||||||
|
sta _lmdw
|
||||||
|
sta _lmdpv
|
||||||
|
rts
|
|
@ -0,0 +1,805 @@
|
||||||
|
********************************
|
||||||
|
* *
|
||||||
|
* Fast Apple II Graphics *
|
||||||
|
* By Andy McFadden *
|
||||||
|
* Version 0.3, Aug 2015 *
|
||||||
|
* *
|
||||||
|
* Main source file *
|
||||||
|
* *
|
||||||
|
* Developed with Merlin-16 *
|
||||||
|
* *
|
||||||
|
********************************
|
||||||
|
|
||||||
|
* Set to 1 to build FDRAW.FAST, set to zero to
|
||||||
|
* build FDRAW.SMALL.
|
||||||
|
USE_FAST equ 1
|
||||||
|
|
||||||
|
* Set to 1 to turn on beeps/clicks for debugging.
|
||||||
|
NOISE_ON equ 0
|
||||||
|
|
||||||
|
|
||||||
|
lst off
|
||||||
|
org $6000
|
||||||
|
|
||||||
|
*
|
||||||
|
* Macros.
|
||||||
|
*
|
||||||
|
spkr equ $c030
|
||||||
|
bell equ $ff3a
|
||||||
|
|
||||||
|
* If enabled, click the speaker (changes flags only).
|
||||||
|
CLICK mac
|
||||||
|
do NOISE_ON
|
||||||
|
bit spkr
|
||||||
|
fin
|
||||||
|
<<<
|
||||||
|
* If enabled, beep the speaker (scrambles regs).
|
||||||
|
BEEP mac
|
||||||
|
do NOISE_ON
|
||||||
|
jsr bell
|
||||||
|
fin
|
||||||
|
<<<
|
||||||
|
* If enabled, insert a BRK.
|
||||||
|
BREAK mac
|
||||||
|
do NOISE_ON
|
||||||
|
brk $99
|
||||||
|
fin
|
||||||
|
<<<
|
||||||
|
|
||||||
|
* In "fast" mode, we align tables on page boundaries so we
|
||||||
|
* don't take a 1-cycle hit when the indexing crosses a page.
|
||||||
|
* In "small" mode, we skip the alignment.
|
||||||
|
PG_ALIGN mac
|
||||||
|
do USE_FAST
|
||||||
|
ds \
|
||||||
|
fin
|
||||||
|
<<<
|
||||||
|
|
||||||
|
*
|
||||||
|
* Hi-res screen constants.
|
||||||
|
*
|
||||||
|
BYTES_PER_ROW = 40
|
||||||
|
NUM_ROWS = 192
|
||||||
|
NUM_COLS = 280
|
||||||
|
|
||||||
|
*
|
||||||
|
* Variable storage. We assign generic names to
|
||||||
|
* zero-page scratch locations, then assign variables
|
||||||
|
* with real names to these.
|
||||||
|
*
|
||||||
|
* 06-09 are unused (except by SWEET-16)
|
||||||
|
* 1a-1d are Applesoft hi-res scratch
|
||||||
|
* cc-cf are only used by INTBASIC
|
||||||
|
* eb-ef and ff appear totally unused by ROM routines
|
||||||
|
*
|
||||||
|
zptr0 equ $1a ;2b
|
||||||
|
zloc0 equ $06
|
||||||
|
zloc1 equ $07
|
||||||
|
zloc2 equ $08
|
||||||
|
zloc3 equ $09
|
||||||
|
zloc4 equ $1c
|
||||||
|
zloc5 equ $1d
|
||||||
|
zloc6 equ $cc
|
||||||
|
zloc7 equ $cd
|
||||||
|
zloc8 equ $ce
|
||||||
|
zloc9 equ $cf
|
||||||
|
zloc10 equ $eb
|
||||||
|
zloc11 equ $ec
|
||||||
|
zloc12 equ $ed
|
||||||
|
zloc13 equ $ee
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Entry points for external programs.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
Entry
|
||||||
|
jmp Init ;initialize data tables
|
||||||
|
dfb 0,3 ;version number
|
||||||
|
|
||||||
|
*
|
||||||
|
* Parameters passed from external programs.
|
||||||
|
*
|
||||||
|
in_arg ds 1 ;generic argument
|
||||||
|
in_x0l ds 1 ;X coordinate 0, low part
|
||||||
|
in_x0h ds 1 ;X coordinate 0, high part
|
||||||
|
in_y0 ds 1 ;Y coordinate 0
|
||||||
|
in_x1l ds 1
|
||||||
|
in_x1h ds 1
|
||||||
|
in_y1 ds 1
|
||||||
|
in_rad ds 1 ;radius for circles
|
||||||
|
|
||||||
|
ds 3 ;pad to 16 bytes
|
||||||
|
|
||||||
|
jmp SetColor
|
||||||
|
jmp SetPage
|
||||||
|
jmp Clear
|
||||||
|
jmp DrawPoint
|
||||||
|
jmp DrawLine
|
||||||
|
jmp DrawRect
|
||||||
|
jmp FillRect
|
||||||
|
jmp DrawCircle
|
||||||
|
jmp FillCircle
|
||||||
|
jmp SetLineMode
|
||||||
|
jmp noimpl ;reserved2
|
||||||
|
jmp FillRaster
|
||||||
|
|
||||||
|
* Raster fill values. Top, bottom, and pointers to tables
|
||||||
|
* for the benefit of external callers.
|
||||||
|
rast_top ds 1
|
||||||
|
rast_bottom ds 1
|
||||||
|
da rastx0l
|
||||||
|
da rastx0h
|
||||||
|
da rastx1l
|
||||||
|
da rastx1h
|
||||||
|
|
||||||
|
noimpl rts
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Global variables.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
|
||||||
|
g_inited dfb 0 ;initialized?
|
||||||
|
g_color dfb 0 ;hi-res color (0-7)
|
||||||
|
g_page dfb $20 ;hi-res page ($20 or $40)
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Initialize.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
Init
|
||||||
|
lda #$00
|
||||||
|
sta in_arg
|
||||||
|
jsr SetColor ;set color to zero
|
||||||
|
jsr SetLineMode ;set normal lines
|
||||||
|
lda #$20
|
||||||
|
sta in_arg
|
||||||
|
sta g_inited
|
||||||
|
jmp SetPage ;set hi-res page 1
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Set the color.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
SetColor
|
||||||
|
lda in_arg
|
||||||
|
cmp g_color ;same as the old color?
|
||||||
|
beq :done
|
||||||
|
|
||||||
|
and #$07 ;safety first
|
||||||
|
sta g_color
|
||||||
|
|
||||||
|
* Update the "colorline" table, which provides a quick color
|
||||||
|
* lookup for odd/even bytes. We could also have one table
|
||||||
|
* per color and self-mod the "LDA addr,y" instructions to
|
||||||
|
* point to the current one, but that uses a bunch of memory
|
||||||
|
* and is kind of ugly. Takes 16 + (12 * 40) = 496 cycles.
|
||||||
|
tax ;2
|
||||||
|
lda xormask,x ;4
|
||||||
|
sta :_xormsk+1 ;4
|
||||||
|
|
||||||
|
lda oddcolor,x ;4
|
||||||
|
ldy #BYTES_PER_ROW-1 ;2
|
||||||
|
]loop sta colorline,y ;5
|
||||||
|
:_xormsk eor #$00 ;2
|
||||||
|
dey ;2
|
||||||
|
bpl ]loop ;3
|
||||||
|
|
||||||
|
:done rts
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Set the page.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
SetPage
|
||||||
|
lda g_inited ;let's just check this
|
||||||
|
beq noinit ; (not called too often)
|
||||||
|
|
||||||
|
lda in_arg
|
||||||
|
cmp #$20
|
||||||
|
beq :good
|
||||||
|
cmp #$40
|
||||||
|
beq :good
|
||||||
|
jmp bell
|
||||||
|
:good
|
||||||
|
sta g_page
|
||||||
|
|
||||||
|
do 0 ;*****
|
||||||
|
cmp ylookhi
|
||||||
|
beq :tabok
|
||||||
|
* Check to see if the values currently in the Y-lookup table
|
||||||
|
* match our current page setting. If they don't, we need to
|
||||||
|
* adjust the code that does lookups.
|
||||||
|
|
||||||
|
* This approach modifies the table itself, paying a large
|
||||||
|
* cost now so we don't have to pay it on every lookup.
|
||||||
|
* However, this costs 2+(16*192)=3074 cycles, while an
|
||||||
|
* "ORA imm" only adds two to each lookup, so we'd have
|
||||||
|
* to do a lot of drawing to make this worthwhile.
|
||||||
|
* (Note: assumes ylookhi is based at $2000 not $0000)
|
||||||
|
ldy #NUM_ROWS ;2
|
||||||
|
]loop lda ylookhi-1,y ;4
|
||||||
|
eor #$60 ;2 $20 <--> $40
|
||||||
|
sta ylookhi-1,y ;5
|
||||||
|
dey ;2
|
||||||
|
bne ]loop ;3
|
||||||
|
|
||||||
|
else ;*****
|
||||||
|
|
||||||
|
* This approach uses self-modifying code to update the
|
||||||
|
* relevant instructions. It's a bit messy to have it
|
||||||
|
* here, but it saves us from having to do it on
|
||||||
|
* every call.
|
||||||
|
*
|
||||||
|
* We could also have a second y-lookup table and
|
||||||
|
* use this to update the pointers. That would let
|
||||||
|
* us drop the "ORA imm" entirely, without the cost
|
||||||
|
* of the rewrite above, but eating up another 192 bytes.
|
||||||
|
sta _pg_or1+1 ;rastfill
|
||||||
|
sta _pg_or2+1 ;circle hplot
|
||||||
|
sta _pg_or3+1 ;circle hplot
|
||||||
|
sta _pg_or4+1 ;drawline
|
||||||
|
sta _pg_or5+1 ;drawline
|
||||||
|
sta _pg_or6+1 ;drawline
|
||||||
|
sta _pg_or7+1 ;drawline
|
||||||
|
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
:tabok rts
|
||||||
|
|
||||||
|
noinit ldy #$00
|
||||||
|
]loop lda :initmsg,y
|
||||||
|
beq :done
|
||||||
|
jsr $fded ;cout
|
||||||
|
iny
|
||||||
|
bne ]loop
|
||||||
|
:done rts
|
||||||
|
|
||||||
|
:initmsg asc "FDRAW NOT INITIALIZED",87,87,00
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Clear the screen to the current color.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
Clear
|
||||||
|
|
||||||
|
do USE_FAST ;*****
|
||||||
|
* This performs a "visually linear" clear, erasing the screen
|
||||||
|
* from left to right and top to bottom. To reduce the amount
|
||||||
|
* of code required we erase in thirds (top/middle/bottom).
|
||||||
|
*
|
||||||
|
* Compare to a "venetian blind" clear, which is what you get
|
||||||
|
* if you erase memory linearly.
|
||||||
|
*
|
||||||
|
* The docs discuss different approaches. This version
|
||||||
|
* requires ((2 + 5*64 + 11) * 40 + 14) * 3 = 40002 cycles.
|
||||||
|
* If we didn't divide it into thirds to keep the top-down
|
||||||
|
* look, we'd need (5*64 + 9) * 120 = 39480 cycles, so
|
||||||
|
* we're spending 522 cycles to avoid the venetian look.
|
||||||
|
lda :clrloop+2
|
||||||
|
cmp g_page
|
||||||
|
beq :pageok
|
||||||
|
|
||||||
|
* We're on the wrong hi-res page. Flip to the other one.
|
||||||
|
* 4 + (20*64) = 1284 cycles to do the flip (+ a few more
|
||||||
|
* because we're probably crossing a page boundary).
|
||||||
|
BEEP
|
||||||
|
ldy #NUM_ROWS ;2
|
||||||
|
]loop lda :clrloop-3+2,y ;4
|
||||||
|
eor #$60 ;2
|
||||||
|
sta :clrloop-3+2,y ;5
|
||||||
|
dey ;2
|
||||||
|
dey ;2
|
||||||
|
dey ;2
|
||||||
|
bne ]loop ;3
|
||||||
|
|
||||||
|
:pageok ldx g_color ;grab the current color
|
||||||
|
lda xormask,x
|
||||||
|
sta :_xormsk+1
|
||||||
|
lda evencolor,x
|
||||||
|
|
||||||
|
ldy #0
|
||||||
|
jsr :clearthird
|
||||||
|
ldy #BYTES_PER_ROW
|
||||||
|
jsr :clearthird
|
||||||
|
ldy #BYTES_PER_ROW*2
|
||||||
|
* fall through into :clearthird for final pass
|
||||||
|
|
||||||
|
:clearthird
|
||||||
|
ldx #BYTES_PER_ROW-1 ;2
|
||||||
|
:clrloop sta $2000,y ;5 (* 64)
|
||||||
|
sta $2400,y ;this could probably be
|
||||||
|
sta $2800,y ; done with LUP math
|
||||||
|
sta $2c00,y
|
||||||
|
sta $3000,y
|
||||||
|
sta $3400,y
|
||||||
|
sta $3800,y
|
||||||
|
sta $3c00,y
|
||||||
|
sta $2080,y
|
||||||
|
sta $2480,y
|
||||||
|
sta $2880,y
|
||||||
|
sta $2c80,y
|
||||||
|
sta $3080,y
|
||||||
|
sta $3480,y
|
||||||
|
sta $3880,y
|
||||||
|
sta $3c80,y
|
||||||
|
sta $2100,y
|
||||||
|
sta $2500,y
|
||||||
|
sta $2900,y
|
||||||
|
sta $2d00,y
|
||||||
|
sta $3100,y
|
||||||
|
sta $3500,y
|
||||||
|
sta $3900,y
|
||||||
|
sta $3d00,y
|
||||||
|
sta $2180,y
|
||||||
|
sta $2580,y
|
||||||
|
sta $2980,y
|
||||||
|
sta $2d80,y
|
||||||
|
sta $3180,y
|
||||||
|
sta $3580,y
|
||||||
|
sta $3980,y
|
||||||
|
sta $3d80,y
|
||||||
|
sta $2200,y
|
||||||
|
sta $2600,y
|
||||||
|
sta $2a00,y
|
||||||
|
sta $2e00,y
|
||||||
|
sta $3200,y
|
||||||
|
sta $3600,y
|
||||||
|
sta $3a00,y
|
||||||
|
sta $3e00,y
|
||||||
|
sta $2280,y
|
||||||
|
sta $2680,y
|
||||||
|
sta $2a80,y
|
||||||
|
sta $2e80,y
|
||||||
|
sta $3280,y
|
||||||
|
sta $3680,y
|
||||||
|
sta $3a80,y
|
||||||
|
sta $3e80,y
|
||||||
|
sta $2300,y
|
||||||
|
sta $2700,y
|
||||||
|
sta $2b00,y
|
||||||
|
sta $2f00,y
|
||||||
|
sta $3300,y
|
||||||
|
sta $3700,y
|
||||||
|
sta $3b00,y
|
||||||
|
sta $3f00,y
|
||||||
|
sta $2380,y
|
||||||
|
sta $2780,y
|
||||||
|
sta $2b80,y
|
||||||
|
sta $2f80,y
|
||||||
|
sta $3380,y
|
||||||
|
sta $3780,y
|
||||||
|
sta $3b80,y
|
||||||
|
sta $3f80,y
|
||||||
|
:_xormsk eor #$00 ;2 flip odd/even bits
|
||||||
|
iny ;2
|
||||||
|
dex ;2
|
||||||
|
bmi :done ;2
|
||||||
|
jmp :clrloop ;3
|
||||||
|
:done rts
|
||||||
|
|
||||||
|
else ;***** not USE_FAST
|
||||||
|
|
||||||
|
* This version was suggested by Marcus Heuser on
|
||||||
|
* comp.sys.apple2.programmer. It does a "venetian blind"
|
||||||
|
* clear, and takes (5 * 32 + 7) * 248 = 41416 cycles.
|
||||||
|
* It overwrites half of the screen holes.
|
||||||
|
lda :clrloop+5
|
||||||
|
cmp g_page
|
||||||
|
beq :pageok
|
||||||
|
|
||||||
|
* We're on the wrong hi-res page. Flip to the other one.
|
||||||
|
* 12 + (20*31) = 632 cycles to do the flip. We have to
|
||||||
|
* single out the first entry because it's $1f not $20.
|
||||||
|
BEEP
|
||||||
|
lda :clrloop+2 ;4
|
||||||
|
eor #$20 ;2 $1f <-> $3f
|
||||||
|
sta :clrloop+2 ;4
|
||||||
|
ldy #31*3 ;2
|
||||||
|
]loop lda :clrloop+2,y ;4
|
||||||
|
eor #$60 ;2 $20 <-> $40
|
||||||
|
sta :clrloop+2,y ;5
|
||||||
|
dey ;2
|
||||||
|
dey ;2
|
||||||
|
dey ;2
|
||||||
|
bne ]loop ;3
|
||||||
|
|
||||||
|
:pageok ldx g_color
|
||||||
|
lda xormask,x
|
||||||
|
sta :_xormsk+1
|
||||||
|
lda oddcolor,x
|
||||||
|
ldy #248 ;120 + 8 + 120
|
||||||
|
:clrloop
|
||||||
|
]addr = $1fff
|
||||||
|
lup 32 ;begin a loop in assembler
|
||||||
|
sta ]addr,y ;5
|
||||||
|
]addr = ]addr+$100 ;sta 20ff,21ff,...
|
||||||
|
--^
|
||||||
|
:_xormsk eor #$00 ;2
|
||||||
|
dey ;2
|
||||||
|
bne :clrloop ;3
|
||||||
|
rts
|
||||||
|
|
||||||
|
fin ;***** not USE_FAST
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Draw rectangle outline.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
DrawRect
|
||||||
|
* We could just issue 4 line draw calls here, maybe
|
||||||
|
* adjusting the vertical lines by 1 pixel up/down to
|
||||||
|
* avoid overdraw. But if the user wanted 4 lines,
|
||||||
|
* they could just draw 4 lines. Instead, we're going
|
||||||
|
* to draw a double line on each edge to ensure that
|
||||||
|
* the outline rectangle always has the correct color.
|
||||||
|
*
|
||||||
|
* Rather than draw two vertical lines, we draw a
|
||||||
|
* two-pixel-wide filled rectangle on each side.
|
||||||
|
*
|
||||||
|
* We don't want to double-up if the rect is only one
|
||||||
|
* pixel wide, so we have to check for that.
|
||||||
|
*
|
||||||
|
* If the rect is one pixel high, it's just a line.
|
||||||
|
* If it's two pixels high, we don't need to draw
|
||||||
|
* the left/right edges, just the top/bottom lines.
|
||||||
|
* If it's more than two tall, we don't need to draw
|
||||||
|
* the left/right edges on the top and bottom lines,
|
||||||
|
* so we save a few cycles by skipping those.
|
||||||
|
|
||||||
|
lda in_y1 ;copy top/bottom to local
|
||||||
|
sta rast_bottom
|
||||||
|
dec rast_bottom ;move up one
|
||||||
|
sec
|
||||||
|
sbc in_y0
|
||||||
|
beq :isline ;1 pixel high, just draw line
|
||||||
|
cmp #1
|
||||||
|
beq :twolines ;2 pixels high, lines only
|
||||||
|
ldy in_y0
|
||||||
|
iny ;start down a line
|
||||||
|
sty rast_top
|
||||||
|
|
||||||
|
lda in_x0h ;check to see if left/right
|
||||||
|
cmp in_x1h ; coords are the same; if
|
||||||
|
bne :notline ; so, going +1/-1 at edge
|
||||||
|
lda in_x0l ; will overdraw.
|
||||||
|
cmp in_x1l
|
||||||
|
bne :notlin1
|
||||||
|
|
||||||
|
:isline jmp DrawLine ;just treat like line
|
||||||
|
|
||||||
|
* Set up left edge. Top line is in Y.
|
||||||
|
:notline lda in_x0l
|
||||||
|
:notlin1 sta rastx0l,y
|
||||||
|
clc
|
||||||
|
adc #1
|
||||||
|
sta rastx1l,y
|
||||||
|
lda in_x0h
|
||||||
|
ora #$80 ;"repeat" flag
|
||||||
|
sta rastx0h,y
|
||||||
|
and #$7f
|
||||||
|
adc #0
|
||||||
|
sta rastx1h,y
|
||||||
|
jsr FillRaster
|
||||||
|
|
||||||
|
ldy rast_top
|
||||||
|
lda in_x1l ;now set up right edge
|
||||||
|
sta rastx1l,y
|
||||||
|
sec
|
||||||
|
sbc #1
|
||||||
|
sta rastx0l,y
|
||||||
|
lda in_x1h
|
||||||
|
sta rastx1h,y
|
||||||
|
sbc #0
|
||||||
|
ora #$80 ;"repeat" flag
|
||||||
|
sta rastx0h,y
|
||||||
|
jsr FillRaster
|
||||||
|
|
||||||
|
* Now the top/bottom lines.
|
||||||
|
:twolines
|
||||||
|
ldy in_y0
|
||||||
|
jsr :drawline
|
||||||
|
ldy in_y1
|
||||||
|
|
||||||
|
:drawline
|
||||||
|
sty rast_top
|
||||||
|
sty rast_bottom
|
||||||
|
lda in_x0l ;copy left/right to the
|
||||||
|
sta rastx0l,y ; table entry for the
|
||||||
|
lda in_x0h ; appropriate line
|
||||||
|
sta rastx0h,y
|
||||||
|
lda in_x1l
|
||||||
|
sta rastx1l,y
|
||||||
|
lda in_x1h
|
||||||
|
sta rastx1h,y
|
||||||
|
jmp FillRaster
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Draw filled rectangle.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
FillRect
|
||||||
|
* Just fill out the raster table and call the fill routine.
|
||||||
|
* We require y0=top, y1=bottom, x0=left, x1=right.
|
||||||
|
ldy in_y0
|
||||||
|
sty rast_top
|
||||||
|
lda in_y1
|
||||||
|
sta rast_bottom
|
||||||
|
|
||||||
|
lda in_x0l
|
||||||
|
sta rastx0l,y
|
||||||
|
lda in_x0h
|
||||||
|
ora #$80 ;"repeat" flag
|
||||||
|
sta rastx0h,y
|
||||||
|
lda in_x1l
|
||||||
|
sta rastx1l,y
|
||||||
|
lda in_x1h
|
||||||
|
sta rastx1h,y
|
||||||
|
|
||||||
|
jmp FillRaster
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Fill an area defined by the raster tables.
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
FillRaster
|
||||||
|
|
||||||
|
* Render rasterized output. The left and right edges
|
||||||
|
* are stored in the rastx0/rastx1 tables, and the top
|
||||||
|
* and bottom-most pixels are in rast_top/rast_bottom.
|
||||||
|
*
|
||||||
|
* This can be used to render an arbitrary convex
|
||||||
|
* polygon after it has been rasterized.
|
||||||
|
*
|
||||||
|
* If the high bit of the high byte of X0 is set, we
|
||||||
|
* go into "repeat" mode, where we just repeat the
|
||||||
|
* previous line. This saves about 40 cycles of
|
||||||
|
* overhead per line when drawing rectangles, plus
|
||||||
|
* what we would have to spend to populate multiple
|
||||||
|
* lines of the raster table. It only increases the
|
||||||
|
* general per-line cost by 3 cycles.
|
||||||
|
*
|
||||||
|
* We could use the "repeat" flag to use this code to
|
||||||
|
* draw vertical lines, though that's mostly of value
|
||||||
|
* to an external caller who knows ahead of time that
|
||||||
|
* the line is vertical. The DrawLine code is pretty
|
||||||
|
* good with vertical lines, and adding additional
|
||||||
|
* setup time to every vertical-dominant line to
|
||||||
|
* decide if it should call here seems like a
|
||||||
|
* losing proposition.
|
||||||
|
|
||||||
|
]hbasl equ zptr0
|
||||||
|
]hbash equ zptr0+1
|
||||||
|
]lftbyte equ zloc0
|
||||||
|
]lftbit equ zloc1
|
||||||
|
]rgtbyte equ zloc2
|
||||||
|
]rgtbit equ zloc3
|
||||||
|
]line equ zloc4
|
||||||
|
]andmask equ zloc5
|
||||||
|
]cur_line equ zloc6
|
||||||
|
]repting equ zloc7
|
||||||
|
|
||||||
|
ldx g_color ;configure color XOR byte
|
||||||
|
lda xormask,x
|
||||||
|
do USE_FAST ;*****
|
||||||
|
cmp rast_unroll+3 ;already configured?
|
||||||
|
beq :goodmask
|
||||||
|
jsr fixrastxor
|
||||||
|
:goodmask
|
||||||
|
else
|
||||||
|
sta _xorcolor+1
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
lda #$00
|
||||||
|
sta ]repting
|
||||||
|
|
||||||
|
ldy rast_top
|
||||||
|
|
||||||
|
* Main rasterization loop. Y holds the line number.
|
||||||
|
rastloop
|
||||||
|
sty ]cur_line ;3
|
||||||
|
ldx ylooklo,y ;4
|
||||||
|
stx ]hbasl ;3
|
||||||
|
lda ylookhi,y ;4
|
||||||
|
_pg_or1 ora #$20 ;2 will be $20 or $40
|
||||||
|
sta ]hbash ;3 = 19 cycles
|
||||||
|
do USE_FAST-1 ;***** i.e. not USE_FAST
|
||||||
|
stx _wrhires+1
|
||||||
|
sta _wrhires+2
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
* divide left edge by 7
|
||||||
|
ldx rastx0l,y ;4 line num in Y
|
||||||
|
lda rastx0h,y ;4
|
||||||
|
bpl :noflag ;2
|
||||||
|
sta rastx0h+1,y ;4 propagate
|
||||||
|
lda ]repting ;3 first time through?
|
||||||
|
beq :firstre ;2 yup, finish calculations
|
||||||
|
lda ]rgtbyte ;3 need this in A
|
||||||
|
bpl :repeat ;3 always
|
||||||
|
:firstre lda rastx0h,y ;reload
|
||||||
|
sta ]repting ;any nonzero will do
|
||||||
|
and #$7f ;strip repeat flag
|
||||||
|
:noflag beq :lotabl
|
||||||
|
lda mod7hi,x
|
||||||
|
sta ]lftbit
|
||||||
|
lda div7hi,x
|
||||||
|
sta ]lftbyte
|
||||||
|
bpl :gotlft ;always
|
||||||
|
BREAK ;debug
|
||||||
|
:lotabl lda mod7lo,x
|
||||||
|
sta ]lftbit
|
||||||
|
lda div7lo,x
|
||||||
|
sta ]lftbyte
|
||||||
|
:gotlft
|
||||||
|
|
||||||
|
* divide right edge by 7
|
||||||
|
ldx rastx1l,y ;4 line num in Y
|
||||||
|
lda rastx1h,y ;4
|
||||||
|
beq :lotabr ;3
|
||||||
|
lda mod7hi,x
|
||||||
|
sta ]rgtbit
|
||||||
|
lda div7hi,x
|
||||||
|
sta ]rgtbyte
|
||||||
|
bpl :gotrgt ;always
|
||||||
|
BREAK ;debug
|
||||||
|
:lotabr lda mod7lo,x ;4
|
||||||
|
sta ]rgtbit ;3
|
||||||
|
lda div7lo,x ;4
|
||||||
|
sta ]rgtbyte ;3 = 25 for X1 < 256
|
||||||
|
:gotrgt
|
||||||
|
|
||||||
|
:repeat
|
||||||
|
cmp ]lftbyte ;3
|
||||||
|
bne :not1byte ;3
|
||||||
|
|
||||||
|
* The left and right edges are in the same byte. We
|
||||||
|
* need to set up the mask differently, so we deal with
|
||||||
|
* it as a special case.
|
||||||
|
ldy ]lftbit
|
||||||
|
lda leftmask,y ;create the AND mask
|
||||||
|
ldx ]rgtbit
|
||||||
|
and rightmask,x ;strip out bits on right
|
||||||
|
sta ]andmask
|
||||||
|
|
||||||
|
ldy ]lftbyte
|
||||||
|
lda colorline,y ;get color bits
|
||||||
|
eor (]hbasl),y ;combine w/screen
|
||||||
|
and ]andmask ;remove not-ours
|
||||||
|
eor (]hbasl),y ;combine again
|
||||||
|
sta (]hbasl),y
|
||||||
|
jmp rastlinedone
|
||||||
|
|
||||||
|
* This is the more general case. We special-case the
|
||||||
|
* left and right edges, then byte-stomp the middle.
|
||||||
|
* On entry, ]rgtbyte is in A
|
||||||
|
:not1byte
|
||||||
|
sec ;2 compute number of full
|
||||||
|
sbc ]lftbyte ;3 and partial bytes to
|
||||||
|
tax ;2 draw
|
||||||
|
inx ;2
|
||||||
|
|
||||||
|
ldy ]rgtbit ;3
|
||||||
|
cpy #6 ;2
|
||||||
|
beq :rgtnospcl ;3
|
||||||
|
lda rightmask,y ;handle partial-byte right
|
||||||
|
sta ]andmask
|
||||||
|
ldy ]rgtbyte
|
||||||
|
lda colorline,y
|
||||||
|
eor (]hbasl),y
|
||||||
|
and ]andmask
|
||||||
|
eor (]hbasl),y
|
||||||
|
sta (]hbasl),y
|
||||||
|
dex ;adjust count
|
||||||
|
:rgtnospcl
|
||||||
|
|
||||||
|
ldy ]lftbit ;3 check left for partial
|
||||||
|
beq :lftnospcl ;3
|
||||||
|
lda leftmask,y ;handle partial-byte left
|
||||||
|
sta ]andmask
|
||||||
|
ldy ]lftbyte
|
||||||
|
lda colorline,y
|
||||||
|
eor (]hbasl),y
|
||||||
|
and ]andmask
|
||||||
|
eor (]hbasl),y
|
||||||
|
sta (]hbasl),y
|
||||||
|
dex ;adjust count
|
||||||
|
beq rastlinedone ;bail if all done
|
||||||
|
iny ;advance start position
|
||||||
|
bne :liny ;always
|
||||||
|
BREAK
|
||||||
|
:lftnospcl
|
||||||
|
|
||||||
|
ldy ]lftbyte ;3
|
||||||
|
:liny
|
||||||
|
|
||||||
|
do USE_FAST ;***** "fast" loop
|
||||||
|
* Instead of looping, jump into an unrolled loop.
|
||||||
|
* Cost is 10 cycles per byte with an extra 14 cycles
|
||||||
|
* of overhead, so we start to win at 4 bytes.
|
||||||
|
lda rastunidx,x ;4
|
||||||
|
sta :_rastun+1 ;4
|
||||||
|
lda colorline,y ;4 get odd/even color val
|
||||||
|
:_rastun jmp rast_unroll ;3
|
||||||
|
|
||||||
|
else ;***** "slow" loop
|
||||||
|
* Inner loop of the renderer. This runs 0-40x.
|
||||||
|
* Cost is 14 cycles/byte.
|
||||||
|
lda colorline,y ;get appropriate odd/even val
|
||||||
|
_wrhires sta $2000,y ;5 replaced with line addr
|
||||||
|
_xorcolor eor #$00 ;2 replaced with $00/$7f
|
||||||
|
iny ;2
|
||||||
|
dex ;2
|
||||||
|
bne _wrhires ;3
|
||||||
|
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
rastlinedone
|
||||||
|
ldy ]cur_line ;3 more lines to go?
|
||||||
|
cpy rast_bottom ;4
|
||||||
|
bge :done ;2
|
||||||
|
iny ;2
|
||||||
|
jmp rastloop ;3 must have line in Y
|
||||||
|
|
||||||
|
:done rts
|
||||||
|
|
||||||
|
fixrastxor
|
||||||
|
do USE_FAST ;*****
|
||||||
|
* Update the EOR statements in the unrolled rastfill code.
|
||||||
|
* Doing this with a loop takes ~600 cycles, doing it with
|
||||||
|
* unrolled stores takes 160. We only do this when we
|
||||||
|
* need to, so changing the color from green to blue won't
|
||||||
|
* cause this to run.
|
||||||
|
*
|
||||||
|
* Call with the XOR value in A.
|
||||||
|
]offset = 0
|
||||||
|
lup BYTES_PER_ROW
|
||||||
|
sta rast_unroll+3+]offset
|
||||||
|
]offset = ]offset+5
|
||||||
|
--^
|
||||||
|
BEEP
|
||||||
|
rts
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
|
||||||
|
* include the line functions
|
||||||
|
put FDRAW.LINE
|
||||||
|
|
||||||
|
* include the circle functions
|
||||||
|
put FDRAW.CIRCLE
|
||||||
|
|
||||||
|
lst on
|
||||||
|
CODE_END equ * ;end of code section
|
||||||
|
lst off
|
||||||
|
|
||||||
|
* include the data tables
|
||||||
|
put FDRAW.TABLES
|
||||||
|
|
||||||
|
lst on
|
||||||
|
DAT_END equ * ;end of data / BSS
|
||||||
|
lst off
|
||||||
|
|
||||||
|
* Save the appropriate object file.
|
||||||
|
do USE_FAST
|
||||||
|
sav FDRAW.FAST
|
||||||
|
else
|
||||||
|
sav FDRAW.SMALL
|
||||||
|
fin
|
|
@ -0,0 +1,339 @@
|
||||||
|
********************************
|
||||||
|
* *
|
||||||
|
* Fast Apple II Graphics *
|
||||||
|
* By Andy McFadden *
|
||||||
|
* Version 0.3, Aug 2015 *
|
||||||
|
* *
|
||||||
|
* Pre-computed data and *
|
||||||
|
* large internal buffers. *
|
||||||
|
* (Included by FDRAW.S) *
|
||||||
|
* *
|
||||||
|
* Developed with Merlin-16 *
|
||||||
|
* *
|
||||||
|
********************************
|
||||||
|
|
||||||
|
* Expected layout with alignment:
|
||||||
|
*
|
||||||
|
* P1 ylooklo, misc tables
|
||||||
|
* P2 ylookhi, colorline
|
||||||
|
* P3 rastx0l
|
||||||
|
* P4 rastx0h
|
||||||
|
* P5 rastx1l
|
||||||
|
* P6 rastx1h, div7hi, mod7hi
|
||||||
|
* P7 div7lo
|
||||||
|
* P8 mod7lo
|
||||||
|
* P9 rast_unroll, rastunidx
|
||||||
|
*
|
||||||
|
* Tables should be just under $900 bytes.
|
||||||
|
|
||||||
|
PG_ALIGN
|
||||||
|
|
||||||
|
* Hi-res Y lookup, low part (192 bytes).
|
||||||
|
ylooklo HEX 0000000000000000
|
||||||
|
HEX 8080808080808080
|
||||||
|
HEX 0000000000000000
|
||||||
|
HEX 8080808080808080
|
||||||
|
HEX 0000000000000000
|
||||||
|
HEX 8080808080808080
|
||||||
|
HEX 0000000000000000
|
||||||
|
HEX 8080808080808080
|
||||||
|
HEX 2828282828282828
|
||||||
|
HEX a8a8a8a8a8a8a8a8
|
||||||
|
HEX 2828282828282828
|
||||||
|
HEX a8a8a8a8a8a8a8a8
|
||||||
|
HEX 2828282828282828
|
||||||
|
HEX a8a8a8a8a8a8a8a8
|
||||||
|
HEX 2828282828282828
|
||||||
|
HEX a8a8a8a8a8a8a8a8
|
||||||
|
HEX 5050505050505050
|
||||||
|
HEX d0d0d0d0d0d0d0d0
|
||||||
|
HEX 5050505050505050
|
||||||
|
HEX d0d0d0d0d0d0d0d0
|
||||||
|
HEX 5050505050505050
|
||||||
|
HEX d0d0d0d0d0d0d0d0
|
||||||
|
HEX 5050505050505050
|
||||||
|
HEX d0d0d0d0d0d0d0d0
|
||||||
|
|
||||||
|
* Color masks for odd/even bytes, colors 0-7.
|
||||||
|
evencolor dfb $00,$2a,$55,$7f,$80,$aa,$d5,$ff
|
||||||
|
oddcolor dfb $00,$55,$2a,$7f,$80,$d5,$aa,$ff
|
||||||
|
|
||||||
|
* XOR mask for colors 0-7 - non-BW flip on odd/even.
|
||||||
|
xormask dfb $00,$7f,$7f,$00,$00,$7f,$7f,$00
|
||||||
|
|
||||||
|
* AND mask for the 7 pixel positions, high bit set
|
||||||
|
* for the color shift.
|
||||||
|
andmask dfb $81,$82,$84,$88,$90,$a0,$c0
|
||||||
|
|
||||||
|
* These are pixel AND masks, used with the modulo 7
|
||||||
|
* result. Entry #2 in leftmask means we're touching
|
||||||
|
* the rightmost 5 pixels, and entry #2 in rightmask
|
||||||
|
* means we're touching the 3 leftmost pixels.
|
||||||
|
*
|
||||||
|
* The high bit is always set, because we want to
|
||||||
|
* keep the color's high bit.
|
||||||
|
leftmask dfb $ff,$fe,$fc,$f8,$f0,$e0,$c0
|
||||||
|
rightmask dfb $81,$83,$87,$8f,$9f,$bf,$ff
|
||||||
|
|
||||||
|
PG_ALIGN
|
||||||
|
|
||||||
|
* Hi-res Y lookup, high part (192 bytes).
|
||||||
|
* OR with $20 or $40.
|
||||||
|
ylookhi HEX 0004080c1014181c
|
||||||
|
HEX 0004080c1014181c
|
||||||
|
HEX 0105090d1115191d
|
||||||
|
HEX 0105090d1115191d
|
||||||
|
HEX 02060a0e12161a1e
|
||||||
|
HEX 02060a0e12161a1e
|
||||||
|
HEX 03070b0f13171b1f
|
||||||
|
HEX 03070b0f13171b1f
|
||||||
|
HEX 0004080c1014181c
|
||||||
|
HEX 0004080c1014181c
|
||||||
|
HEX 0105090d1115191d
|
||||||
|
HEX 0105090d1115191d
|
||||||
|
HEX 02060a0e12161a1e
|
||||||
|
HEX 02060a0e12161a1e
|
||||||
|
HEX 03070b0f13171b1f
|
||||||
|
HEX 03070b0f13171b1f
|
||||||
|
HEX 0004080c1014181c
|
||||||
|
HEX 0004080c1014181c
|
||||||
|
HEX 0105090d1115191d
|
||||||
|
HEX 0105090d1115191d
|
||||||
|
HEX 02060a0e12161a1e
|
||||||
|
HEX 02060a0e12161a1e
|
||||||
|
HEX 03070b0f13171b1f
|
||||||
|
HEX 03070b0f13171b1f
|
||||||
|
|
||||||
|
* Masks for current color (even/odd), e.g. 55 2a 55 2a ...
|
||||||
|
* Updated whenever the color changes.
|
||||||
|
colorline ds 40
|
||||||
|
|
||||||
|
PG_ALIGN
|
||||||
|
rastx0l ds NUM_ROWS
|
||||||
|
PG_ALIGN
|
||||||
|
rastx0h ds NUM_ROWS
|
||||||
|
ds 1 ;repeat mode can overstep
|
||||||
|
PG_ALIGN
|
||||||
|
rastx1l ds NUM_ROWS
|
||||||
|
PG_ALIGN
|
||||||
|
rastx1h ds NUM_ROWS
|
||||||
|
|
||||||
|
* Lookup tables for dividing 0-279 by 7. The "hi"
|
||||||
|
* parts are 24 bytes each, so they fit inside
|
||||||
|
* the previous 192-byte entry. The "lo" parts
|
||||||
|
* each fill a page.
|
||||||
|
div7hi HEX 2424242525252525
|
||||||
|
HEX 2525262626262626
|
||||||
|
HEX 2627272727272727
|
||||||
|
mod7hi HEX 0405060001020304
|
||||||
|
HEX 0506000102030405
|
||||||
|
HEX 0600010203040506
|
||||||
|
|
||||||
|
PG_ALIGN
|
||||||
|
|
||||||
|
div7lo HEX 0000000000000001
|
||||||
|
HEX 0101010101010202
|
||||||
|
HEX 0202020202030303
|
||||||
|
HEX 0303030304040404
|
||||||
|
HEX 0404040505050505
|
||||||
|
HEX 0505060606060606
|
||||||
|
HEX 0607070707070707
|
||||||
|
HEX 0808080808080809
|
||||||
|
HEX 0909090909090a0a
|
||||||
|
HEX 0a0a0a0a0a0b0b0b
|
||||||
|
HEX 0b0b0b0b0c0c0c0c
|
||||||
|
HEX 0c0c0c0d0d0d0d0d
|
||||||
|
HEX 0d0d0e0e0e0e0e0e
|
||||||
|
HEX 0e0f0f0f0f0f0f0f
|
||||||
|
HEX 1010101010101011
|
||||||
|
HEX 1111111111111212
|
||||||
|
HEX 1212121212131313
|
||||||
|
HEX 1313131314141414
|
||||||
|
HEX 1414141515151515
|
||||||
|
HEX 1515161616161616
|
||||||
|
HEX 1617171717171717
|
||||||
|
HEX 1818181818181819
|
||||||
|
HEX 1919191919191a1a
|
||||||
|
HEX 1a1a1a1a1a1b1b1b
|
||||||
|
HEX 1b1b1b1b1c1c1c1c
|
||||||
|
HEX 1c1c1c1d1d1d1d1d
|
||||||
|
HEX 1d1d1e1e1e1e1e1e
|
||||||
|
HEX 1e1f1f1f1f1f1f1f
|
||||||
|
HEX 2020202020202021
|
||||||
|
HEX 2121212121212222
|
||||||
|
HEX 2222222222232323
|
||||||
|
HEX 2323232324242424
|
||||||
|
mod7lo HEX 0001020304050600
|
||||||
|
HEX 0102030405060001
|
||||||
|
HEX 0203040506000102
|
||||||
|
HEX 0304050600010203
|
||||||
|
HEX 0405060001020304
|
||||||
|
HEX 0506000102030405
|
||||||
|
HEX 0600010203040506
|
||||||
|
HEX 0001020304050600
|
||||||
|
HEX 0102030405060001
|
||||||
|
HEX 0203040506000102
|
||||||
|
HEX 0304050600010203
|
||||||
|
HEX 0405060001020304
|
||||||
|
HEX 0506000102030405
|
||||||
|
HEX 0600010203040506
|
||||||
|
HEX 0001020304050600
|
||||||
|
HEX 0102030405060001
|
||||||
|
HEX 0203040506000102
|
||||||
|
HEX 0304050600010203
|
||||||
|
HEX 0405060001020304
|
||||||
|
HEX 0506000102030405
|
||||||
|
HEX 0600010203040506
|
||||||
|
HEX 0001020304050600
|
||||||
|
HEX 0102030405060001
|
||||||
|
HEX 0203040506000102
|
||||||
|
HEX 0304050600010203
|
||||||
|
HEX 0405060001020304
|
||||||
|
HEX 0506000102030405
|
||||||
|
HEX 0600010203040506
|
||||||
|
HEX 0001020304050600
|
||||||
|
HEX 0102030405060001
|
||||||
|
HEX 0203040506000102
|
||||||
|
HEX 0304050600010203
|
||||||
|
|
||||||
|
|
||||||
|
* RastFill unrolled loop. At each step we store the current
|
||||||
|
* color value, XOR it to flip the bits if needed, and advance.
|
||||||
|
* The caller needs to set the appropriate initial value based
|
||||||
|
* on whether the address is odd or even.
|
||||||
|
*
|
||||||
|
* We can use a 3-cycle "EOR dp" or a 2-cycle "EOR imm". The
|
||||||
|
* former is one cycle slower, the latter requires us to
|
||||||
|
* self-mod 40 instructions when the color changes.
|
||||||
|
*
|
||||||
|
* This must be page-aligned so that we can take the value
|
||||||
|
* from the rastunidx table and self-mod a JMP without having
|
||||||
|
* to do a 16-bit add. We have just enough room for the
|
||||||
|
* unrolled loop (40*5+3) and x5 table (41) = 244 bytes, fits
|
||||||
|
* on a single page.
|
||||||
|
|
||||||
|
do USE_FAST ;*****
|
||||||
|
ds \
|
||||||
|
]hbasl equ zptr0 ;must match FillRaster
|
||||||
|
rast_unroll equ *
|
||||||
|
lst off
|
||||||
|
lup BYTES_PER_ROW
|
||||||
|
sta (]hbasl),y ;6
|
||||||
|
eor #$00 ;2
|
||||||
|
iny ;2 10 cycles, 5 bytes
|
||||||
|
--^
|
||||||
|
jmp rastlinedone
|
||||||
|
|
||||||
|
* Index into rast_unroll. If we need to output N bytes,
|
||||||
|
* we want to jump to (rast_unroll + (40 - N) * 5) (where
|
||||||
|
* 5 is the number of bytes per iteration).
|
||||||
|
rastunidx
|
||||||
|
]offset = BYTES_PER_ROW*5
|
||||||
|
lup BYTES_PER_ROW+1 ;0-40
|
||||||
|
dfb ]offset
|
||||||
|
]offset = ]offset-5
|
||||||
|
--^
|
||||||
|
|
||||||
|
fin ;*****
|
||||||
|
|
||||||
|
|
||||||
|
********************************
|
||||||
|
*
|
||||||
|
* Code used to generate tables above. If you want to
|
||||||
|
* decrease load size, use these functions to generate
|
||||||
|
* the data into empty memory, then discard the code.
|
||||||
|
* (Maybe use a negative DS and overlap with rastx0l?)
|
||||||
|
*
|
||||||
|
********************************
|
||||||
|
DO 0 ;*****
|
||||||
|
|
||||||
|
init_ylook
|
||||||
|
]hbasl equ zptr1
|
||||||
|
]hbash equ zptr1+1
|
||||||
|
|
||||||
|
* Initialize Y-lookup table. We just call the bascalc
|
||||||
|
* function.
|
||||||
|
ldx #NUM_ROWS
|
||||||
|
ldy #NUM_ROWS-1
|
||||||
|
]loop tya
|
||||||
|
jsr bascalc
|
||||||
|
lda hbasl
|
||||||
|
sta ylooklo,y
|
||||||
|
lda hbash
|
||||||
|
ora #$20 ;remove for $0000 base
|
||||||
|
sta ylookhi,y
|
||||||
|
dey
|
||||||
|
dex
|
||||||
|
bne ]loop
|
||||||
|
rts
|
||||||
|
|
||||||
|
* Hi-res base address calculation. This is based on the
|
||||||
|
* HPOSN routine at $F411.
|
||||||
|
*
|
||||||
|
* Call with the line in A. The results are placed into
|
||||||
|
* zptr1. X and Y are not disturbed.
|
||||||
|
*
|
||||||
|
* The value is in the $0000-1fff range, so you must OR
|
||||||
|
* the desired hi-res page in.
|
||||||
|
*
|
||||||
|
bascalc
|
||||||
|
pha
|
||||||
|
and #$c0
|
||||||
|
sta ]hbasl
|
||||||
|
lsr
|
||||||
|
lsr
|
||||||
|
ora ]hbasl
|
||||||
|
sta ]hbasl
|
||||||
|
pla
|
||||||
|
sta ]hbash
|
||||||
|
asl
|
||||||
|
asl
|
||||||
|
asl
|
||||||
|
rol ]hbash
|
||||||
|
asl
|
||||||
|
rol ]hbash
|
||||||
|
asl
|
||||||
|
ror ]hbasl
|
||||||
|
lda ]hbash
|
||||||
|
and #$1f
|
||||||
|
sta ]hbash
|
||||||
|
rts
|
||||||
|
|
||||||
|
*
|
||||||
|
* Create divide-by-7 tables.
|
||||||
|
*
|
||||||
|
mkdivtab
|
||||||
|
]val equ zloc0
|
||||||
|
|
||||||
|
ldy #0
|
||||||
|
sty ]val
|
||||||
|
ldx #0
|
||||||
|
]loop lda ]val
|
||||||
|
sta div7lo,y
|
||||||
|
txa
|
||||||
|
sta mod7lo,y
|
||||||
|
inx
|
||||||
|
iny
|
||||||
|
beq :lodone
|
||||||
|
cpx #7
|
||||||
|
bne ]loop
|
||||||
|
inc ]val
|
||||||
|
ldx #0
|
||||||
|
beq ]loop ;always
|
||||||
|
:lodone ;safe to ignore ]va update
|
||||||
|
]loop lda ]val
|
||||||
|
sta div7hi,y
|
||||||
|
txa
|
||||||
|
sta mod7hi,y
|
||||||
|
iny
|
||||||
|
cpy #280-256
|
||||||
|
beq :hidone
|
||||||
|
inx
|
||||||
|
cpx #7
|
||||||
|
bne ]loop
|
||||||
|
inc ]val
|
||||||
|
ldx #0
|
||||||
|
beq ]loop ;always
|
||||||
|
:hidone rts
|
||||||
|
|
||||||
|
FIN ;*****
|
61
README.md
61
README.md
|
@ -1,2 +1,59 @@
|
||||||
# fdraw
|
fdraw
|
||||||
Fast Apple II graphics
|
=====
|
||||||
|
|
||||||
|
Fast graphics routines for the Apple II
|
||||||
|
By Andy McFadden
|
||||||
|
Version 0.3, August 2015
|
||||||
|
|
||||||
|
## Overview ##
|
||||||
|
|
||||||
|
The fdraw library provides fast rendering of points, lines, rectangles,
|
||||||
|
and circles, as well as high-speed screen clears, for Apple II hi-res
|
||||||
|
graphics. It can be used from Applesoft or 6502 assembly language.
|
||||||
|
|
||||||
|
Two disk images are available in the [fdraw-disks.zip](fdraw-disks) zip
|
||||||
|
archive. `fdrawdemo.do` is a 140K disk image with the demos that will
|
||||||
|
run on an Apple ][+ or later. `fdrawdev.po` is an 800K disk image with
|
||||||
|
the source code, demos, and a few extras.
|
||||||
|
|
||||||
|
A video of the demos running in the AppleWin emulator
|
||||||
|
[https://www.youtube.com/watch?v=z2RFGVoaROE](is available).
|
||||||
|
|
||||||
|
Learn more about how fdraw works in the
|
||||||
|
[docs/manual.md](library documentation).
|
||||||
|
|
||||||
|
Learn about the demos in the [docs/demos.md](demo documentation).
|
||||||
|
|
||||||
|
Learn more about what possessed me to write a graphics library for the
|
||||||
|
Apple II more than 20 years after the platform was discontinued in the
|
||||||
|
[docs/personal-notes.md](fadden's brain documentation).
|
||||||
|
|
||||||
|
The main bits of source code are accessible from git for easy viewing,
|
||||||
|
but the "official" home is on `fdrawdev.po`.
|
||||||
|
|
||||||
|
All code is copyright 2015 by Andy McFadden. All rights reserved. The
|
||||||
|
source code is available under the Apache 2 license (a very friendly
|
||||||
|
open-source license).
|
||||||
|
|
||||||
|
|
||||||
|
### Version History ###
|
||||||
|
|
||||||
|
##### v0.1 March 13, 2006
|
||||||
|
|
||||||
|
No source code, just a demo with fast filled circles and screen clears.
|
||||||
|
|
||||||
|
##### v0.2 March 20, 2006
|
||||||
|
|
||||||
|
Polished up the sources and published. This version implemented Clear,
|
||||||
|
FillRect, FillCircle, and FillRaster.
|
||||||
|
|
||||||
|
##### v0.3 August 21, 2015
|
||||||
|
|
||||||
|
Added DrawPoint, DrawLine, DrawRect, DrawCircle, and SetLineMode. Various
|
||||||
|
size and performance improvements.
|
||||||
|
|
||||||
|
Added Amperfdraw to make Applesoft BASIC programming easier.
|
||||||
|
|
||||||
|
Added several more demos and tests.
|
||||||
|
|
||||||
|
Added documentation.
|
||||||
|
|
|
@ -0,0 +1,167 @@
|
||||||
|
fdraw Demo README
|
||||||
|
=================
|
||||||
|
|
||||||
|
The fdraw distribution comes with a handful of demonstration programs.
|
||||||
|
Most of them are written in Applesoft BASIC, and use the amperfdraw
|
||||||
|
interface. This is a somewhat poor way to demonstrate animation
|
||||||
|
performance, as Applesoft adds a tremendous amount of overhead, but it
|
||||||
|
is the only way to show what you *can* do with Applesoft.
|
||||||
|
|
||||||
|
The easiest way to run them is with the "DEMO" program, which scans the
|
||||||
|
DEMOS directory for BASIC programs and presents a list. You can also
|
||||||
|
just run them directly.
|
||||||
|
|
||||||
|
* INTRO : Sort of a "hello, world" for fdraw. Mix of single- and
|
||||||
|
double-buffered animation.
|
||||||
|
|
||||||
|
* CIRCULAR : Draws lots of circles.
|
||||||
|
|
||||||
|
* RECTSPLAT : Draws lots of rectangles.
|
||||||
|
|
||||||
|
* CUBIC : Draws a spinning wireframe 3D cube. (The 3D coordinates are
|
||||||
|
pre-computed -- fdraw doesn't do matrix transforms.)
|
||||||
|
|
||||||
|
* TUNNEL : Animates circles to simulate driving through a tunnel.
|
||||||
|
|
||||||
|
* LINEAR : Draws lots of lines. The wipes show speed differences for
|
||||||
|
horizontal and vertical special cases, while the circular spinner
|
||||||
|
shows HPLOT is not as fast as &HPLOT which is not as fast as &PLOT for
|
||||||
|
a set of lines at a variety of angles.
|
||||||
|
|
||||||
|
* LINE.DIFF : Draws several lines with the ROM routines and fdraw
|
||||||
|
side-by-side to illustrate the difference in line style.
|
||||||
|
|
||||||
|
* CLEARLY : Clears the screen 32 times, 4 sets in each of the 8 colors.
|
||||||
|
The first round is done with the Applesoft ROM routine ("CALL 62454"),
|
||||||
|
the second round uses the fdraw &CLEAR function.
|
||||||
|
|
||||||
|
* HRFAN : A simple line-art demo, using "xdraw" DrawLine with lines in
|
||||||
|
different colors. Not a great demo, as the Applesoft code driving it
|
||||||
|
is rather slow, but it looks pretty good if you bump up the emulation
|
||||||
|
speed or switch to IIgs "fast" mode. (This deserves a conversion to
|
||||||
|
assembly language.)
|
||||||
|
|
||||||
|
* BRIAN.THEME.ORI : The Brian's Theme demo from the DOS 3.3 System
|
||||||
|
Master. Unmodified except for integration with the demo menu
|
||||||
|
system, and with the bug on line 31112 fixed.
|
||||||
|
|
||||||
|
* BRIAN.THEME.NEW : The Brian's Theme demo with '&' placed in front of
|
||||||
|
the various draw calls. There isn't a huge difference in speed, as
|
||||||
|
there's a lot of overhead from Applesoft, but its interesting to note
|
||||||
|
the change in the appearance of the lines.
|
||||||
|
|
||||||
|
* WIGGLE : Sample program that shows direct use of rasterization tables.
|
||||||
|
|
||||||
|
When the demos are launched from the menu, they will assume that fdraw
|
||||||
|
is already loaded and won't try to load it again. If you run the demo
|
||||||
|
program directly, it will try to load FDRAW.FAST and AMPERFDRAW from the
|
||||||
|
parent directory before doing any drawing.
|
||||||
|
|
||||||
|
|
||||||
|
## Extras ##
|
||||||
|
|
||||||
|
The EXTRAS directory has some additional software that isn't "officially"
|
||||||
|
part of fdraw, but may be of use.
|
||||||
|
|
||||||
|
NOTE: some of these assume fdraw and amperfdraw are already loaded, and
|
||||||
|
will hang if not. Run DEMO and hit <esc> before running these.
|
||||||
|
|
||||||
|
* ARRAY.EXAMPLE : The &PLOT example from the documentation.
|
||||||
|
|
||||||
|
* XDRAW.ANIM : A demonstration of line animation using "xdraw" mode and
|
||||||
|
a simple shape that is drawn twice by a single &PLOT call. One copy
|
||||||
|
is offset by 2 pixels, so each &PLOT call erases the previous copy and
|
||||||
|
draws a new copy 2 pixels to the right. The animation is shown twice,
|
||||||
|
once with "erase all, draw all", and once with the erase and draw calls
|
||||||
|
interleaved for every line.
|
||||||
|
|
||||||
|
* LINEFONT : Program for creating draw-array tables for text phrases. Used
|
||||||
|
to create data files for the "intro" demo. See the "LINEFONT Details"
|
||||||
|
section for more information.
|
||||||
|
|
||||||
|
* DAVIEWER: Views the contents of .DA files created by LINEFONT.
|
||||||
|
|
||||||
|
* BENCHCLEAR : Calls the "clear" function 256 times from a small
|
||||||
|
assembly-language program. Handy for benchmarks, but slightly silly
|
||||||
|
since it's relatively easy to calculate the exact cycle cost.
|
||||||
|
|
||||||
|
|
||||||
|
## LINEFONT Details ##
|
||||||
|
|
||||||
|
NOTE: this program is an unfinished rough cut ("pre alpha"), used for
|
||||||
|
preparing data for demos.
|
||||||
|
|
||||||
|
The program includes a font definition, routines for displaying
|
||||||
|
characters, and code for generating and exporting pre-rendered strings.
|
||||||
|
|
||||||
|
Character vertices are expressed as floating-point values. The baseline
|
||||||
|
is at zero, the peak ascent is at 1.0, the lowest descent is -1.0. The
|
||||||
|
leftmost pixel is at zero, the maximum value for the rightmost pixel is 1.0.
|
||||||
|
Characters don't have to fill out the entire cell -- proportionally-spaced
|
||||||
|
fonts are supported -- but they are expected to start at the left edge.
|
||||||
|
|
||||||
|
So a capital 'M' might look like this:
|
||||||
|
|
||||||
|
0.0,0.0 -> 0.0,1.0 -> 0.5,0.7 -> 1.0,1.0 -> 1.0,0.0
|
||||||
|
|
||||||
|
There is currently no "user interface", unless the "user" can program in
|
||||||
|
Applesoft BASIC. To generate strings, add a series of statements that set
|
||||||
|
variables and call 20000 to add rendered strings to the set. The relevant
|
||||||
|
variables are:
|
||||||
|
|
||||||
|
S$ - string to add
|
||||||
|
DW - desired width, in pixels, of a cell 1.0 units wide
|
||||||
|
DH - desired height, in pixels of a cell 2.0 units high (ascent + descent)
|
||||||
|
IS% - inter-character spacing, in pixels
|
||||||
|
SW% - width of the space character (usually same as DW)
|
||||||
|
MO% - monospace flag; if nonzero, all chars are treated as 1.0 units wide
|
||||||
|
|
||||||
|
Remove the REM from the start of line 1010 to enable the character viewer.
|
||||||
|
At present only a couple of lower-case letters are defined.
|
||||||
|
|
||||||
|
|
||||||
|
#### LINEFONT Output ####
|
||||||
|
|
||||||
|
The LINEFONT program outputs a binary blob that can be passed to
|
||||||
|
the &PLOT array-draw function. The file structure is:
|
||||||
|
|
||||||
|
+0 byte - number of array sets in the list.
|
||||||
|
+1 2 bytes * N - table of offsets to individual array sets. One of
|
||||||
|
these per array set. The value is the offset from the start of the
|
||||||
|
file.
|
||||||
|
|
||||||
|
(2N+1) array set #1:
|
||||||
|
+0 byte - number of vertices (0-127)
|
||||||
|
+1 byte - number of index pairs (0-127)
|
||||||
|
+2 2 bytes * V - vertices (values are signed X/Y)
|
||||||
|
+X 2 bytes * I - index pairs (values are 0-127)
|
||||||
|
|
||||||
|
To display phrase #3, you would get the 16-bit value from the offset
|
||||||
|
table with PEEK(start + 1 + 3 * 2) + PEEK(start + 2 + 3 * 2) * 256.
|
||||||
|
You get the number of vertices from PEEK(start + offset), and the number
|
||||||
|
of index pairs from PEEK(start + offset + 1). Finally, call the array-draw
|
||||||
|
function with:
|
||||||
|
|
||||||
|
VA = start + offset + 2
|
||||||
|
IA = VA + num_vertices * 2
|
||||||
|
&PLOT va, ia, num_index_pairs
|
||||||
|
|
||||||
|
The 0,0 point in the blob is in the center of the phrase horizontally
|
||||||
|
(which allows a maximum width of 255 pixels), and at the font baseline
|
||||||
|
vertically (so most of the font will appear above the zero point, but
|
||||||
|
descenders will extend below).
|
||||||
|
|
||||||
|
|
||||||
|
#### Future Enhancements ####
|
||||||
|
|
||||||
|
Right now the font definition is embedded in the program. This takes up
|
||||||
|
a lot of space -- before too long the BASIC program is going to intrude
|
||||||
|
on the hi-res page -- and is unnecessarily restrictive. The font should be
|
||||||
|
defined by a separate program, and BSAVEd into a line-font file that
|
||||||
|
LINEFONT can load.
|
||||||
|
|
||||||
|
Generating strings should be menu-driven and interactive, rather than
|
||||||
|
requiring manual changes to the code to fiddle with sizes and spacing.
|
||||||
|
DAVIEWER should be folded into the generation program (though it's kind
|
||||||
|
of handy as a simple example of how to unpack and access content).
|
||||||
|
|
|
@ -0,0 +1,990 @@
|
||||||
|
fdraw Library Documentation
|
||||||
|
===========================
|
||||||
|
|
||||||
|
Fast graphics primitives for the Apple II
|
||||||
|
By Andy McFadden
|
||||||
|
Version 0.3, August 2015
|
||||||
|
|
||||||
|
## Overview ##
|
||||||
|
|
||||||
|
The fdraw library provides fast rendering of points, lines, rectangles,
|
||||||
|
and circles, as well as high-speed screen clears, for Apple II hi-res
|
||||||
|
graphics. It can be used from Applesoft or assembly language.
|
||||||
|
|
||||||
|
The Applesoft ROM routines were designed to be as compact as possible,
|
||||||
|
and were unable to use self-modifying code techniques, so their speed is
|
||||||
|
less than what the Apple II is capable of. The fdraw routines pick a
|
||||||
|
different point in the speed/space trade-off continuum, providing fast
|
||||||
|
speeds at a reasonable size. Not everyone agrees on what "reasonable"
|
||||||
|
means, so the fdraw code can be built in two modes, one that favors
|
||||||
|
speed, one that reduces size.
|
||||||
|
|
||||||
|
**Contents:**
|
||||||
|
|
||||||
|
- [Applesoft BASIC Ampersand API](#amperapi)
|
||||||
|
- [Raw API](#rawapi)
|
||||||
|
- [Building the Code](#building)
|
||||||
|
- [Apple II Hi-res in a Nutshell](#nutshell)
|
||||||
|
- [Notes on the Drawing Functions](#notes)
|
||||||
|
- [General Notes](#additional-notes)
|
||||||
|
- [Enhancement Ideas](#ideas)
|
||||||
|
- [My Quest for Lines](#history)
|
||||||
|
|
||||||
|
|
||||||
|
<div id='amperapi'/>
|
||||||
|
## Applesoft BASIC Ampersand API (Amperfdraw) ##
|
||||||
|
|
||||||
|
The ampersand API acts as a bridge between Applesoft BASIC and fdraw.
|
||||||
|
It's more convenient and has less overhead than POKE and CALL, though
|
||||||
|
you are not prevented from using that approach if you prefer. It's
|
||||||
|
best to use one or the other though, not mix and match.
|
||||||
|
|
||||||
|
All arguments are checked for validity. An appropriate Applesoft
|
||||||
|
error is thrown if invalid syntax or arguments are discovered.
|
||||||
|
|
||||||
|
This is not intended to be compatible with, nor a replacement for, the
|
||||||
|
ampersand utilities in Beagle Graphics.
|
||||||
|
|
||||||
|
* &NEW - calls the fdraw Init function (which sets the color to 0 and
|
||||||
|
selects hi-res page 1). You must do this once, at the start of
|
||||||
|
your program, after fdraw has been loaded. This also resets internal
|
||||||
|
amperfdraw state, setting the "HPLOT TO" origin to (0,0) and the "AT"
|
||||||
|
point to (139,95).
|
||||||
|
* &HGR - does what HGR does, only faster. Equivalent to executing
|
||||||
|
`&HCOLOR=0:&SCRN(1):&CLEAR:&HCOLOR=[prevcolor]`, and then setting the
|
||||||
|
display softswitches to display hi-res page 1 in mixed mode. Also sets
|
||||||
|
$e6 (HPAG) for convenience in case you want to mix & match with ROM
|
||||||
|
routines.
|
||||||
|
* &HGR2 - like &HGR, but for page 2. Like HGR2, this turns off
|
||||||
|
mixed-text mode.
|
||||||
|
* &SCRN({1,2}) - sets the hi-res page that will be used for drawing. Does
|
||||||
|
not change which page is displayed. (Use the softswitches, or call
|
||||||
|
&INVERSE.)
|
||||||
|
* &INVERSE - flips the render page to the other page, and hits the
|
||||||
|
display softswitches to show the page that was just rendered. Intended
|
||||||
|
for double-buffered animation.
|
||||||
|
* &HCOLOR={0-7} - sets color, using the same numbering scheme as Applesoft.
|
||||||
|
Does not affect the color used by the ROM routines.
|
||||||
|
* &CLEAR - clears screen to current color.
|
||||||
|
* &HPLOT [TO] x,y [TO x,y ...] - draws a point or a line. Works the same as
|
||||||
|
Applesoft, e.g. "&HPLOT TO" starts from the end of the previously
|
||||||
|
drawn line, and you can chain multiple "TO x,y" in a single statement.
|
||||||
|
* &EXP {0,1} - set line mode. 0 is normal, 1 is "xdraw".
|
||||||
|
* &XDRAW left,top,right,bottom - draws outline rectangle.
|
||||||
|
* &DRAW left,top,right,bottom - draws filled rectangle.
|
||||||
|
* &COS cx,cy,r - draws outline circle.
|
||||||
|
* &SIN cx,cy,r - draws filled circle.
|
||||||
|
|
||||||
|
* &AT cx,cy - sets center offset for array-based rendering. Position must
|
||||||
|
be on the hi-res screen (0-279, 0-191).
|
||||||
|
* &PLOT vertexAddr, indexAddr, indexCount [AT cx,cy] - draws from the
|
||||||
|
specified byte-arrays. See the "Drawing Lines with Indexed Byte-Arrays"
|
||||||
|
section for the full explanation.
|
||||||
|
|
||||||
|
|
||||||
|
<div id='rawapi'/>
|
||||||
|
## Raw API ##
|
||||||
|
|
||||||
|
The code is assembled at $6000 by default. The program's length includes
|
||||||
|
all data tables and work areas, and no memory outside of the program,
|
||||||
|
zero page, and the current hi-res page is modified.
|
||||||
|
|
||||||
|
Input parameters and the function jump table are located near the start
|
||||||
|
of the program. The API description below describes the addresses in
|
||||||
|
relative terms.
|
||||||
|
|
||||||
|
Input parameters are not checked for validity. They must be in the range
|
||||||
|
specified by the API, or undefined (but probably bad) behavior will result.
|
||||||
|
The values will not be modified by fdraw functions.
|
||||||
|
|
||||||
|
All drawing operations use the current color.
|
||||||
|
|
||||||
|
* +0 Init - call this when the library is first loaded. It must be
|
||||||
|
called before any other functions are used. It initializes the
|
||||||
|
color to zero and the page to $20.
|
||||||
|
* +3 (major version number, currently 0)
|
||||||
|
* +4 (minor version number, currently 3)
|
||||||
|
* +5 Input parameter area:
|
||||||
|
* +5 arg - used for misc functions, e.g. SetColor and SetPage
|
||||||
|
* +6 x0l - low part of the X0 coordinate (0-279)
|
||||||
|
* +7 x0h - high part of X0
|
||||||
|
* +8 y0 - Y0 coordinate (0-191)
|
||||||
|
* +9 x1l - low part of X1 (0-279)
|
||||||
|
* +10 x1h - high part of X1
|
||||||
|
* +11 y1 - Y1 coordinate (0-191)
|
||||||
|
* +12 rad - circle radius (0-255)
|
||||||
|
* +13 (reserved)
|
||||||
|
* +16 SetColor - set the color used for drawing (0-7) to the value in "arg".
|
||||||
|
The numbering is the same as the Applesoft hi-res colors.
|
||||||
|
* +19 SetPage - set the hi-res page used for drawing to the value in "arg",
|
||||||
|
which must be $20 or $40. Does not change the page that is displayed.
|
||||||
|
(Because a bad value can cause memory corruption, this value *is*
|
||||||
|
checked, and bad values rejected.)
|
||||||
|
* +22 Clear - erase the current hi-res page to the current color.
|
||||||
|
* +25 DrawPoint - plot a single point at x0,y0.
|
||||||
|
* +28 DrawLine - draw a line from x0,y0 to x1,y1 (inclusive).
|
||||||
|
* +31 DrawRect - draw a rectangle with corners at x0,y0 and x1,y1 (inclusive).
|
||||||
|
x0,y0 is the top-left, x1,y1 is the bottom-right. The left and
|
||||||
|
right edges will be drawn two bits wide to ensure that the edges
|
||||||
|
are visible (drawn at x0+1, x1-1).
|
||||||
|
* +34 FillRect - draw a filled rectangle with corners at x0,y0 and x1,y1
|
||||||
|
(inclusive).
|
||||||
|
* +37 DrawCircle - draw a circle with center at x0,y0 and radius=rad.
|
||||||
|
* +40 FillCircle - draw a filled circle with center at x0,y0 and radius=rad.
|
||||||
|
* +43 SetLineMode - set the DrawLine mode to the value in "arg", which can
|
||||||
|
be 0 (normal) or 1 (xdraw).
|
||||||
|
* +46 (reserved)
|
||||||
|
|
||||||
|
* +49 FillRaster - draw an arbitrary shape from the rasterization tables.
|
||||||
|
For each line from top to bottom, the left and right edges will
|
||||||
|
be read from rastx1/rastx2 and a raster drawn in the current color.
|
||||||
|
* +52 (byte) topmost line to rasterize (0-191)
|
||||||
|
* +53 (byte) bottom-most line to rasterize (0-191), inclusive
|
||||||
|
* +54 (2 bytes) address of rastx1l table
|
||||||
|
* +56 (2 bytes) address of rastx1h table
|
||||||
|
* +58 (2 bytes) address of rastx2l table
|
||||||
|
* +60 (2 bytes) address of rastx2h table
|
||||||
|
|
||||||
|
The rasterization table addresses are read-only; changing them will have
|
||||||
|
no effect.
|
||||||
|
|
||||||
|
fdraw uses a fair number of zero page locations. The exact set can be
|
||||||
|
determined by looking at FDRAW.S. The locations were chosen to not
|
||||||
|
interfere with DOS, ProDOS, Applesoft, or the Monitor. They may
|
||||||
|
interfere with Integer BASIC, SWEET16, or your own application code.
|
||||||
|
Remapping them to different locations is straightforward: just change
|
||||||
|
the assignment of zptr/zloc values near the top of FDRAW.S to use
|
||||||
|
different addresses. fdraw does not expect any zero page value to be
|
||||||
|
preserved across calls, so you're welcome to use those locations in your
|
||||||
|
own code, but understand that fdraw functions will overwrite them.
|
||||||
|
|
||||||
|
|
||||||
|
<div id='nutshell'/>
|
||||||
|
## Apple II Hi-res in a Nutshell ##
|
||||||
|
|
||||||
|
This is a quick overview of the Apple II hi-res graphics architecture
|
||||||
|
for anyone not recently acquainted.
|
||||||
|
|
||||||
|
The Apple II hi-res graphics screen is a quirky beast. The typical
|
||||||
|
API treats it as 280x192 with 6 colors (black, white, green, purple,
|
||||||
|
orange, blue), though the reality is more complicated than that.
|
||||||
|
|
||||||
|
There are two hi-res screens, occupying 8K each, at $2000 and $4000.
|
||||||
|
You turn them on and flip between them by accessing softswitches in
|
||||||
|
memory-mapped I/O space.
|
||||||
|
|
||||||
|
Each byte determines the color of seven adjacent pixels, so it takes
|
||||||
|
(280 / 7) = 40 bytes to store each line. The lines are organized into
|
||||||
|
groups of three (120 bytes), which are interleaved across thirds of
|
||||||
|
the screen. To speed the computation used to find the start of a
|
||||||
|
line in memory, the group is padded out to 128 bytes; this means
|
||||||
|
((192 / 3) * 8) = 512 of the 8192 bytes are part of invisible
|
||||||
|
"screen holes". The interleaving is responsible for the characteristic
|
||||||
|
"venetian blind" effect when clearing the screen.
|
||||||
|
|
||||||
|
Now imagine 280 bits in a row. If two consecutive bits are on, you
|
||||||
|
get white. If they're both off, you get black. If they alternate
|
||||||
|
on and off, you get color. The color depends on the position of the bit;
|
||||||
|
for example, if even-numbered bits are on, you get purple, while
|
||||||
|
odd-numbered bits yield green. The high bit in each byte adjusts the
|
||||||
|
position of bits within that byte by half a pixel, changing purple and
|
||||||
|
green to blue and orange.
|
||||||
|
|
||||||
|
This arrangement has some curious consequences. If you have green and
|
||||||
|
purple next to each other, there will be a color glitch where they meet.
|
||||||
|
The reason is obvious if you look at the bit patterns when odd/even meet:
|
||||||
|
`...010101101010...` or `...101010010101...`. The first pattern has two
|
||||||
|
adjacent 1 bits (white), the latter two adjacent 0 bits (black). Things
|
||||||
|
get even weirder if split occurs at a byte boundary and the high bit is
|
||||||
|
different, as the half-pixel shift can make the "glitch" pixel wider or
|
||||||
|
narrower by half a pixel.
|
||||||
|
|
||||||
|
The Applesoft ROM routines draw lines that are 1 bit wide. If you execute
|
||||||
|
a command like `HGR : HCOLOR=1 : HPLOT 0,0 to 0,10`, you won't see
|
||||||
|
anything happen. That's because HCOLOR=1 sets the color to green,
|
||||||
|
which means it only draws on odd pixels, but the HPLOT command we gave
|
||||||
|
drew a vertical line on even pixels. It set 11 bits to zero, but since
|
||||||
|
the screen was already zeroed out there was no apparent effect.
|
||||||
|
|
||||||
|
If you execute `HGR : HCOLOR=3 : HPLOT 1,0 to 1,10`, you would expect a
|
||||||
|
white line to appear. However, drawing in "white" just means that no
|
||||||
|
bit positions are excluded. So it drew a vertical column of pixels at
|
||||||
|
X=1, which appears as a green line.
|
||||||
|
|
||||||
|
If (without clearing the screen after the previous command) you execute
|
||||||
|
"HCOLOR=4 : HPLOT 5,0 to 5,10`, something curious happens: the green line
|
||||||
|
turns orange. HCOLOR=4 is black with the high-bit set. So we drew a
|
||||||
|
line of black in column 5 (which we won't see, because that part of the
|
||||||
|
screen is already black), and set the high bit in that byte. The same
|
||||||
|
byte holds columns 0 through 6, so drawing in column 5 also affected
|
||||||
|
column 1. We can put it back to green with "HCOLOR=0 : HPLOT 5,0 to 5,10".
|
||||||
|
|
||||||
|
It's important to keep the structure in mind while drawing to avoid
|
||||||
|
surprises.
|
||||||
|
|
||||||
|
Note that the Applesoft ROM routines treat 0,0 as the top-left corner,
|
||||||
|
with positive coordinates moving right and down, and lines are drawn
|
||||||
|
with inclusive end coordinates. This is different from many modern
|
||||||
|
systems. fdraw follows the Applesoft conventions to avoid confusion.
|
||||||
|
|
||||||
|
Handy table of graphics softswitches:
|
||||||
|
|
||||||
|
name | addr | decimal | purpose
|
||||||
|
------ | ----- | ------- | ------------------
|
||||||
|
TXTCLR | $c050 | -16304 | enable graphics
|
||||||
|
TXTSET | $c051 | -16303 | text-only
|
||||||
|
MIXCLR | $c052 | -16302 | disable mixed mode
|
||||||
|
MIXSET | $c053 | -16301 | enable mixed mode (4 lines of text)
|
||||||
|
LOWSCR | $c054 | -16300 | display page 1
|
||||||
|
HISCR | $c055 | -16299 | display page 2
|
||||||
|
LORES | $c056 | -16298 | show lo-res screen
|
||||||
|
HIRES | $c057 | -16297 | show hi-res screen
|
||||||
|
|
||||||
|
|
||||||
|
<div id='building'/>
|
||||||
|
## Building the Code ##
|
||||||
|
|
||||||
|
The main fdraw code is written for the Merlin assembler (specifically
|
||||||
|
Merlin-16 3.40, though other versions should work). It uses plain 6502
|
||||||
|
code, and is expected to run on an Apple ][+.
|
||||||
|
|
||||||
|
For convenience when editing the files on an Apple II, and to allow the
|
||||||
|
code to be compiled by Merlin-16 running under ProDOS 8, the code is
|
||||||
|
broken into four files. The main file, FDRAW.S, includes the other
|
||||||
|
three with PUT directives. FDRAW.S holds the API entry points and some
|
||||||
|
of the drawing code. FDRAW.LINE.S has the code for drawing points and
|
||||||
|
lines, while FDRAW.CIRCLE.S has the code for drawing circles.
|
||||||
|
FDRAW.TABLE.S holds the data tables, as well as empty space for work
|
||||||
|
areas. The empty space is included in the binary so you can determine
|
||||||
|
the full memory footprint by looking at the length of the file.
|
||||||
|
|
||||||
|
Near the top of FDRAW.S is a constant, `USE_FAST`, which may be set
|
||||||
|
to 0 or 1. If set to 0, some code optimizations are disabled,
|
||||||
|
reducing the size of the code and data areas. Further, the page
|
||||||
|
alignment on data tables is disabled, reducing the internal fragmentation
|
||||||
|
of the data area.
|
||||||
|
|
||||||
|
The USE_FAST setting also determines which file recevies the assembler
|
||||||
|
output: FDRAW.FAST or FDRAW.SMALL. To generate both, it is necessary to
|
||||||
|
assemble the file, change the constant, and then assemble the file again.
|
||||||
|
|
||||||
|
Tests and demos are written in Applesoft BASIC, with a couple of
|
||||||
|
exceptions.
|
||||||
|
|
||||||
|
|
||||||
|
### Why So Big? ###
|
||||||
|
|
||||||
|
The fdraw code weighs in at a hefty 5KB (or 4KB for the "small" build).
|
||||||
|
That doesn't sound like much in the age of multi-gigabyte mobile phones,
|
||||||
|
but it's a sizeable fraction of the space available on an Apple ][+.
|
||||||
|
|
||||||
|
If you want to modify individual pixels quickly, you need two things:
|
||||||
|
a line base-address table, and a divide-by-7 table. Computing base
|
||||||
|
addresses and dividing by 7 aren't hugely expensive, but we're going
|
||||||
|
to be doing them often, so they need to be as fast as possible.
|
||||||
|
|
||||||
|
The line address table has 192 entries, one for each line, 2 bytes per
|
||||||
|
entry. The divide-by-7 table has 280 entries, one for each horizontal
|
||||||
|
pixel position, with one byte for the dividend and one for the quotient.
|
||||||
|
(The quotient can be expressed as a numeric value from 0 to 6, or as
|
||||||
|
a byte with a specific bit set.)
|
||||||
|
|
||||||
|
That's 944 bytes. For optimum performance, each table must fit on a
|
||||||
|
single page of memory. We can split the division table into two pieces,
|
||||||
|
one for 0-255 and one for 256-279, and put the smaller half on the same
|
||||||
|
page as the Y table, along with 16 bytes of padding. The final size is
|
||||||
|
256 + 256 + (192+24+24+pad) + 192 = 960. So you can write off 1K of
|
||||||
|
memory before you've written any code.
|
||||||
|
|
||||||
|
(There's a clever way to reduce the size of the y-lookup table to 24
|
||||||
|
entries, but it's slightly faster and much easier to use full tables.)
|
||||||
|
|
||||||
|
For the FillRaster function, fdraw needs to record the left and right
|
||||||
|
X coordinates on each line (2 bytes each), so that's 192 * 4 = 768 bytes.
|
||||||
|
Again, for optimum performance, each table needs to be on its own page,
|
||||||
|
so for USE_FAST=1 that expands to 1024 bytes.
|
||||||
|
|
||||||
|
Add to that another full page of unrolled rasterization code, and you've
|
||||||
|
got 2304 bytes of tables.
|
||||||
|
|
||||||
|
The rest is code, most of which was written with a flagrant disregard
|
||||||
|
for size. Many common code fragments are repeated inline, rather than
|
||||||
|
called as a subroutine, because a subroutine call (JSR+RTS) costs 12
|
||||||
|
cycles. Calling a common "plot a point" function from the line-drawing
|
||||||
|
code would increase the per-pixel cost by 15-20%.
|
||||||
|
|
||||||
|
|
||||||
|
<div id='notes'/>
|
||||||
|
## Notes on the Drawing Functions ##
|
||||||
|
|
||||||
|
### Screen Clear ###
|
||||||
|
|
||||||
|
The Clear function erases the current hi-res page to the current color.
|
||||||
|
It's several times faster than the version built into the ROM.
|
||||||
|
|
||||||
|
#### Performance ####
|
||||||
|
|
||||||
|
The fastest possible way to clear the screen to a specific color on a
|
||||||
|
6502 is to write to every visible location with an absolute store
|
||||||
|
instruction. Subtracting the screen holes, that's 7680 address *
|
||||||
|
4 cycles = 30720 cycles. The code to do that would be 23,040 bytes long,
|
||||||
|
making it impractical.
|
||||||
|
|
||||||
|
A slower but more memory-efficient approach has one store statement for
|
||||||
|
each line, and iterates through 40 times (280 / 7 = 40). Factoring in the
|
||||||
|
loop overhead, that comes out to 40 * (192 * 5 + 9) = 38760 cycles.
|
||||||
|
192 sets of store instructions fills 576 bytes, which is much better
|
||||||
|
than 23K, but still quite a lot.
|
||||||
|
|
||||||
|
We can reduce the size further by taking the lines 3 at a time, erasing
|
||||||
|
the first 120 bytes in each 128-byte group (the last 8 bytes are the
|
||||||
|
screen hole). We'd need to use 7680/120 = 64 store instructions, for a
|
||||||
|
total of 120 * (64 * 5 + 9) = 39480 cycles, with 192 bytes for the main
|
||||||
|
part of the erase loop. We're not quite 2% slower, but 384 bytes
|
||||||
|
smaller, which seems a fair trade-off. Because we're accessing memory
|
||||||
|
linearly we now have a "venetian blind" clear, which is something of an
|
||||||
|
Apple II trademark, but we can fix that by spending an additional 522
|
||||||
|
cycles to erase the screen in thirds (top/middle/bottom).
|
||||||
|
|
||||||
|
Any further changes that make the code smaller also increase the execution
|
||||||
|
time. When built with USE_FAST=0, the code will use a different loop
|
||||||
|
with 32 stores that write 248 bytes each, and takes 41416 cycles. It's
|
||||||
|
half the size, but nearly 2000 cycles slower, and overwrites half of the
|
||||||
|
screen holes.
|
||||||
|
|
||||||
|
At the extreme end of space over speed is the Applesoft ROM routine -- HGR
|
||||||
|
or "CALL 62454" -- which only needs about 30 bytes for its main loop, but
|
||||||
|
takes (8192*33)+(12*64)+17 = 271121 cycles for black or white, or
|
||||||
|
(8192*40)+(12*64)+17 = 328465 cycles for green/purple/blue/orange --
|
||||||
|
7-8x slower than our preferred implementation.
|
||||||
|
|
||||||
|
The screen clear is wired to a specific hi-res page, so the SetPage
|
||||||
|
function must rewrite the store instructions when the page changes (or
|
||||||
|
we need to keep two full copies of the function around). For an
|
||||||
|
application that is constantly doing flip-erase, the overhead must be
|
||||||
|
factored into the efficiency of the approach -- for example, rewriting
|
||||||
|
stores with indexed LDA/EOR/STA in a loop will take 20 cycles per iteration,
|
||||||
|
1280 cycles for the full set of 64. The "slow" clear has half the
|
||||||
|
number of store instructions, so takes half the time to fix up after
|
||||||
|
a page flip.
|
||||||
|
|
||||||
|
|
||||||
|
### Raster Fill ###
|
||||||
|
|
||||||
|
Drawing an outline of a rectangle or circle can be done efficiently by
|
||||||
|
drawing lines or plotting points. Drawing a filled shape is more
|
||||||
|
expensive if one point is plotted at a time, especially on the Apple II
|
||||||
|
where every byte affects 7 pixels.
|
||||||
|
|
||||||
|
For filled shapes, fdraw populates a rasterization table. The table has
|
||||||
|
192 entries, each of which holds the left and right edges of the shape
|
||||||
|
on that line. The code fills in the pixels one line at a time, using
|
||||||
|
a simple byte store for the middle parts, and bit masks at the edges.
|
||||||
|
|
||||||
|
External applications can use the raster renderer directly by filling
|
||||||
|
out the rasterization table and calling FillRaster.
|
||||||
|
|
||||||
|
While the FillRaster function itself will not modify the contents of the
|
||||||
|
raster tables, other fdraw calls will, sometimes unexpectedly. For
|
||||||
|
example, drawing a horizontal line is performed with a single-line
|
||||||
|
fill call. Filled rectangles might populate the table in the way you'd
|
||||||
|
expect, or might use some internal shortcut that only fills out one line
|
||||||
|
and sets a "repeat" flag. Don't make assumptions about what will be in
|
||||||
|
the table after a call to one of the drawing functions. You *can* count
|
||||||
|
on whatever you wrote there yourself to be unmodified after calls to
|
||||||
|
FillRaster, SetColor, or SetPage, so you can do page-flipping and
|
||||||
|
color-cycling without having to repopulate the tables.
|
||||||
|
|
||||||
|
#### Performance ####
|
||||||
|
|
||||||
|
The fill code needs about 100 cycles to set up each line when drawing
|
||||||
|
a rectangle, more if the line doesn't start and end on byte boundaries.
|
||||||
|
The inner loop costs 10 cycles per byte. To clear the screen with the
|
||||||
|
raster fill code, it would take (192 * (100 + 40 * 10)) = 96000 cycles,
|
||||||
|
or nearly 2.5x the time required for the dedicated clear code. Which is
|
||||||
|
about what you'd expect, as the screen erase needs 4 cycles per byte, and
|
||||||
|
has lower per-line overhead. (This can be improved significantly; see
|
||||||
|
the notes in the "enhancements" section.)
|
||||||
|
|
||||||
|
Non-rectangular shapes take slightly longer to set up, as the edges must
|
||||||
|
be recomputed for each line.
|
||||||
|
|
||||||
|
|
||||||
|
### Lines ###
|
||||||
|
|
||||||
|
The goal is to provide a replacement for Applesoft's HPLOT function
|
||||||
|
that is faster and more consistent in appearance. Lines are drawn using
|
||||||
|
Bresenham's run-length algorithm.
|
||||||
|
|
||||||
|
Internally, there are five separate functions. Horizontal and vertical
|
||||||
|
lines each get a special-case handler. There's another for mostly-vertical
|
||||||
|
lines, one for mostly-horizontal lines, and one for wide mostly-horizontal
|
||||||
|
lines (255 pixels or wider). The latter requires 16-bit math, and is
|
||||||
|
slightly slower.
|
||||||
|
|
||||||
|
The Applesoft routine isn't quite the same as the standard Bresenham
|
||||||
|
algorithm, because it doesn't move diagonally. Consider a line from
|
||||||
|
(0,0) to (50,10) -- gently sloping down and to the right. The standard
|
||||||
|
algorithm would plot exactly 51 pixels, one in each horizontal position.
|
||||||
|
The "pen" always moves one pixel right, but sometimes also moves down.
|
||||||
|
|
||||||
|
In Applesoft, the "pen" can move either right or down, but can't do
|
||||||
|
both at once. This results in lines that feel thin when near horizontal
|
||||||
|
or vertical, but become thicker as they approach 45 degrees. This
|
||||||
|
reduces performance, because Applesoft draws twice as many pixels for a
|
||||||
|
diagonal line as the basic algorithm. It can also be visually jarring
|
||||||
|
when animated, because lines get very thick when near diagonal.
|
||||||
|
|
||||||
|
Different applications have used different styles; for example:
|
||||||
|
|
||||||
|
- Stellar 7 and Elite for the Apple II use Bresenham-style lines. If
|
||||||
|
you look at near-diagonal lines on a color monitor you can see the
|
||||||
|
pixels alternating green and purple.
|
||||||
|
- A2-FS1 Flight Simulator appears to be using Bresenham lines but with
|
||||||
|
doubled bits, effectively treating the screen as having 140 pixels. This
|
||||||
|
gives solid white lines with a fairly consistent feel.
|
||||||
|
- GraFORTH doubles the bits, but treats the screen as 256 pixels wide
|
||||||
|
(not 280... it gives up 24 pixels to improve performance). White
|
||||||
|
lines are thick like Flight Simulator, but feel less jagged because
|
||||||
|
each step can move left or right by one bit rather than two.
|
||||||
|
|
||||||
|
The SetLineMode function lets you choose between "draw" and "xdraw". The
|
||||||
|
former draws color pixels, setting and clearing bits as needed, while
|
||||||
|
the latter inverts whatever is currently on the screen. This can have
|
||||||
|
some unusual effects. Drawing the same line twice erases the line.
|
||||||
|
Drawing a green line over a purple line gives you a white line. Drawing
|
||||||
|
with colors 5 and 6 can produce odd results, because the high bit inverts
|
||||||
|
every time you touch a byte -- which means the ends of a horizontal line
|
||||||
|
will be a different color if the byte holds an even number of affected
|
||||||
|
pixels. It's best to draw with colors 0-3 when in xdraw mode. Clearing
|
||||||
|
the background to color 4, rather than 0, will cause drawing in colors
|
||||||
|
0-3 to actually be 4-7.
|
||||||
|
|
||||||
|
#### Performance ####
|
||||||
|
|
||||||
|
Mostly-horizontal lines step horizontally each iteration, and sometimes
|
||||||
|
step vertically. Mostly-vertical lines step vertically each iteration,
|
||||||
|
and sometimes step horizontally. Each part of the operation has a cost,
|
||||||
|
so the fastest lines are the ones drawn primarily in a single direction.
|
||||||
|
Diagonal lines are the worst case for performance.
|
||||||
|
|
||||||
|
The current code requires just under 80 cycles per pixel for diagonal
|
||||||
|
movement, and about 56 for single-direction movement. There's another
|
||||||
|
150 cycles or so per line for the initial setup.
|
||||||
|
|
||||||
|
Vertical lines cost about 43 cycles per pixel. Horizontal lines are
|
||||||
|
handled as a trivial FillRaster call, which at peak performance can write
|
||||||
|
7 pixels in 10 cycles.
|
||||||
|
|
||||||
|
This is about as fast as you can get with the Bresenham run-length
|
||||||
|
algorithm and Applesoft-style color handling. It's possible to go faster
|
||||||
|
by switching to a different pixel style, or using a run-slice approach.
|
||||||
|
|
||||||
|
|
||||||
|
### Rectangles ###
|
||||||
|
|
||||||
|
Filled rectangles are currently implemented by putting the left and
|
||||||
|
right edges into the rasterization table, and calling FillRaster.
|
||||||
|
|
||||||
|
Outline rectangles could be drawn as four lines, but that doesn't look
|
||||||
|
very good in color unless you get the lines on the right columns. To
|
||||||
|
ensure that the edges are in the correct color, outline rectangles are
|
||||||
|
drawn as four separate items: a two-pixel-wide left edge, a two-pixel-wide
|
||||||
|
right edge, and horizontal lines at the top and bottom. FillRaster does
|
||||||
|
the actual work.
|
||||||
|
|
||||||
|
#### Performance ####
|
||||||
|
|
||||||
|
FillRaster is suboptimal for rectangles, because it works by rows rather
|
||||||
|
than by columns (see "Vertically-Challenged Rasterization" later in this
|
||||||
|
document). Rectangles could be drawn 2.5x faster with dedicated code,
|
||||||
|
but at a cost of hundreds of bytes of memory.
|
||||||
|
|
||||||
|
The advantage of using FillRaster is that we need it for filled circles,
|
||||||
|
so adding support for rectangles was nearly free. And it's still pretty
|
||||||
|
fast.
|
||||||
|
|
||||||
|
|
||||||
|
### Circles ###
|
||||||
|
|
||||||
|
Circles are computed with Bresenham's algorithm. The idea is to compute
|
||||||
|
one octant of the circle with this bit of magic:
|
||||||
|
|
||||||
|
void drawOutline(int cx, int cy, int rad) {
|
||||||
|
int x, y, d;
|
||||||
|
|
||||||
|
d = 1 - rad;
|
||||||
|
x = 0;
|
||||||
|
y = rad;
|
||||||
|
|
||||||
|
while (x <= y) {
|
||||||
|
plot(cx, cy, x, y);
|
||||||
|
|
||||||
|
if (d < 0) {
|
||||||
|
d = d + (x * 4) + 3;
|
||||||
|
} else {
|
||||||
|
d = d + ((x - y) * 4) + 5;
|
||||||
|
y--;
|
||||||
|
}
|
||||||
|
x++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Then each X/Y coordinate is plotted eight times:
|
||||||
|
|
||||||
|
(cx+x, cy+y) (cx-x, cy+y) (cx+x, cy-y) (cx-x, cy-y)
|
||||||
|
(cx+y, cy+x) (cx-y, cy+x) (cx+y, cy-x) (cx-y, cy-x)
|
||||||
|
|
||||||
|
For an outline circle, we plot every point. For a filled circle, we add
|
||||||
|
each point to a rasterization table. Near the top and bottom of the
|
||||||
|
circle there will be multiple updates to the same line, with each update
|
||||||
|
replacing the previous one (which works, as we are moving "outward").
|
||||||
|
|
||||||
|
The center point of the circle must be on screen, but it's not necessary
|
||||||
|
for the entire circle to fit. Coordinates outside screen space are clipped.
|
||||||
|
|
||||||
|
#### Performance ####
|
||||||
|
|
||||||
|
The implementation of Bresenham's algorithm is straightforward, and is
|
||||||
|
about as fast as it's going to get. There are actually two versions of
|
||||||
|
the core computation. If the radius is less than 41, we can keep all of
|
||||||
|
the variables in 8 bits. For circles with radius 41 and larger, we need
|
||||||
|
to use 16 bits, slowing each step slightly.
|
||||||
|
|
||||||
|
There are also two versions of the octant plot. If the circle fits entirely
|
||||||
|
on-screen, we use a simple version. If it doesn't, we use a version that
|
||||||
|
clips values. For rasterization that means clamping X to the left or
|
||||||
|
right edge, and skipping updates that are off the screen in the Y dimension.
|
||||||
|
For an outline circle we simply don't plot any clipped points.
|
||||||
|
|
||||||
|
The rendering of filled circles is very fast, though there is a possibility
|
||||||
|
of optimizing the center-fill of large circles. Outline circles were
|
||||||
|
added by inserting JSR PLOT at key points, and could perhaps be faster.
|
||||||
|
|
||||||
|
|
||||||
|
### Drawing Lines with Indexed Byte-Arrays ###
|
||||||
|
|
||||||
|
The &PLOT command allows a BASIC program to execute a series of line-draw
|
||||||
|
commands with a single statement. Think of it like shape-table animation
|
||||||
|
with lines instead of plotted points.
|
||||||
|
|
||||||
|
Suppose you want to draw a rectangle with an X through the middle. We'll
|
||||||
|
make it 11 units wide and 21 units high. To draw that in the middle of
|
||||||
|
the screen, we'd set CX=139 and CY=95, then draw lines offset from that
|
||||||
|
by +/- 5 in X and +/- 10 in Y:
|
||||||
|
|
||||||
|
HPLOT CX-5,CY-10 TO CX-5,CY+10 : REM LEFT
|
||||||
|
HPLOT CX-5,CY-10 TO CX+5,CY-10 : REM TOP
|
||||||
|
HPLOT CX+5,CY-10 TO CX+5,CY+10 : REM RIGHT
|
||||||
|
HPLOT CX-5,CY+10 TO CX+5,CY+10 : REM BOTTOM
|
||||||
|
HPLOT CX-5,CY-10 to CX+5,CY+10 : SLASH
|
||||||
|
HPLOT CX+5,CY-10 to CX-5,CY+10 : BACKSLASH
|
||||||
|
|
||||||
|
Six lines, each of which needs four coordinates. We'd need 24 bytes
|
||||||
|
to store that in an integer array.
|
||||||
|
|
||||||
|
Suppose instead we identified the four vertices, and numbered them:
|
||||||
|
|
||||||
|
#0 CX-5,CY-10
|
||||||
|
#1 CX+5,CY-10
|
||||||
|
#2 CX-5,CY+10
|
||||||
|
#3 CX+5,CY+10
|
||||||
|
|
||||||
|
and then created a list of line segments using the vertex indices:
|
||||||
|
|
||||||
|
HPLOT #0 TO #2
|
||||||
|
HPLOT #0 to #1
|
||||||
|
HPLOT #1 TO #3
|
||||||
|
HPLOT #2 TO #3
|
||||||
|
HPLOT #0 TO #3
|
||||||
|
HPLOT #1 TO #2
|
||||||
|
|
||||||
|
This requires (4*2) + (6*2) = 20 bytes, for a small savings. The real
|
||||||
|
value in the approach is that it separates the description of the shape
|
||||||
|
from the placement of the points. For example, if you want to change
|
||||||
|
vertex #0 to (CX-7,CY-12), you don't have to make changes two three
|
||||||
|
separate HPLOT calls. (This is particularly useful when you have code
|
||||||
|
that scales and rotates the vertices.)
|
||||||
|
|
||||||
|
For the current release of fdraw, the only built-in transform is
|
||||||
|
translation. Using "&AT cx,cy", you can place the center point anywhere
|
||||||
|
on the screen. This allows you to animate movement of the shape by
|
||||||
|
simply calling &AT to change the position, and &PLOT to draw.
|
||||||
|
|
||||||
|
The &PLOT command takes three arguments: the address of a vertex array,
|
||||||
|
the address of an index array, and the number of line segments to draw.
|
||||||
|
These are referred to as "byte arrays" because they are arbitrary
|
||||||
|
locations in memory where you have BLOADed or POKEd your shape data, not
|
||||||
|
Applesoft arrays. The count can be from 0 to 127. You can optionally
|
||||||
|
add an AT to the end; if not present, the coordinates of the previous AT
|
||||||
|
are used. The initial value is the center of the screen (x=139 y=95).
|
||||||
|
|
||||||
|
The vertex array uses two signed bytes per vertex (-128 to 127), one for
|
||||||
|
the X coordinate and one for the Y coordinate.
|
||||||
|
|
||||||
|
The index array uses two bytes per line segment. Each byte is an index
|
||||||
|
into the vertex array, from 0 to 127.
|
||||||
|
|
||||||
|
Here's an Applesoft program that implements the above example. (The DATA
|
||||||
|
statements use negative numbers for clarity; if you replace the negative
|
||||||
|
values with 256+value, e.g. -5 becomes 251, then you can avoid the IF
|
||||||
|
statement and just poke the value directly.)
|
||||||
|
|
||||||
|
100 TEXT : NORMAL : HOME
|
||||||
|
200 & NEW : & HGR : VTAB 21
|
||||||
|
210 & HCOLOR= 3
|
||||||
|
500 REM ARRAY TEST
|
||||||
|
510 AD = 768: REM $300
|
||||||
|
520 READ D: IF D = 1000 THEN 560
|
||||||
|
530 IF D < 0 THEN D = 256 + D
|
||||||
|
540 POKE AD,D:AD = AD + 1: GOTO 520
|
||||||
|
560 & PLOT 768,776,6: & AT 50,50: & PLOT 768,776,6
|
||||||
|
570 POKE 768,256 - 10: POKE 769,256 - 20: & PLOT 768,776,6 AT 100,50
|
||||||
|
600 DATA -5,-10, 5,-10, -5,10, 5,10
|
||||||
|
610 DATA 0,2, 0,1, 1,3, 2,3, 0,3, 1,2, 1000
|
||||||
|
|
||||||
|
This draws the shape twice, once at the middle of the screen, once centered
|
||||||
|
at 50,50. It then adjusts the top-left coordinate, and draws the shape
|
||||||
|
centered at 100,50. Looking at the output, you can see that the top-left
|
||||||
|
corner of the third instance has moved, and all three lines from that
|
||||||
|
point have moved with it.
|
||||||
|
|
||||||
|
If a vertex ends up off-screen, lines that use that vertex are omitted
|
||||||
|
(not clipped). If you tried to draw the example shape at (0,0), nothing
|
||||||
|
would happen, because every line has at least one point that would be
|
||||||
|
off-screen -- only point #3 is still visible, and all of the lines that
|
||||||
|
use that point extend off screen.
|
||||||
|
|
||||||
|
You can specify a maximum of 128 vertices and 128 index pairs for a
|
||||||
|
single call. If none of the line segments share vertices, you'll need
|
||||||
|
two vertices per line, which means a cap of 64 lines.
|
||||||
|
|
||||||
|
#### Performance ####
|
||||||
|
|
||||||
|
There isn't a whole lot to it -- it just feeds the lines to DrawLine.
|
||||||
|
The key speed advantage is the removal of the Applesoft overhead.
|
||||||
|
|
||||||
|
|
||||||
|
<div id='ideas'/>
|
||||||
|
## Enhancement Ideas ##
|
||||||
|
|
||||||
|
Some ideas for future versions of fdraw.
|
||||||
|
|
||||||
|
### fdraw ###
|
||||||
|
|
||||||
|
Line clipping would make the array-draw function more useful for
|
||||||
|
animation projects. If we accepted signed 16-bit values as input to
|
||||||
|
the clip function, we could specify an AT point outside the screen bounds.
|
||||||
|
That could be extended to circles, which could have off-screen centers.
|
||||||
|
|
||||||
|
A "game line" function or line mode that restricts coordinates to 0-255
|
||||||
|
and ignores color might be worth an experiment.
|
||||||
|
|
||||||
|
Triangle rasterization is possible, but perhaps a bit silly.
|
||||||
|
|
||||||
|
We could handle ellipses, but they're more complicated than circles, and
|
||||||
|
are slower to compute -- you need a couple of multiplications during
|
||||||
|
setup, and the asymmetry means you have to compute a quadrant rather
|
||||||
|
than an octant. If the goal is fast animation rather than general-purpose
|
||||||
|
picture painting then there's little value in supporting ellipses.
|
||||||
|
|
||||||
|
Some of the inner loops are almost certainly paying an extra cycle to
|
||||||
|
cross a page boundary. That's not easy to fix without adding absurd
|
||||||
|
amounts of padding.
|
||||||
|
|
||||||
|
"USE_FAST" could be applied more aggressively to reduce the size.
|
||||||
|
|
||||||
|
Having "fast" vs. "small" builds was mostly an experiment to see how
|
||||||
|
much of a difference in size and speed we'd get by dropping some of
|
||||||
|
the more expensive operations. Another way to reduce size would be to
|
||||||
|
make the build modular, so you could (say) omit circle drawing or only
|
||||||
|
include line drawing. Some trade-offs would have to be made, e.g. if
|
||||||
|
you only wanted line drawing then it makese sense to disable (or replace)
|
||||||
|
the horizontal-line optimization that calls FillRaster, as that requires
|
||||||
|
some sizeable tables that would otherwise be unused.
|
||||||
|
|
||||||
|
### Amperfdraw ###
|
||||||
|
|
||||||
|
The Amperfdraw API is somewhat minimal and could be improved. Taking a
|
||||||
|
cue from Beagle Graphics, the rect and circle calls should probably look
|
||||||
|
more like:
|
||||||
|
|
||||||
|
&DRAW width,height [AT left,top]
|
||||||
|
&COS radius [AT left,top]
|
||||||
|
|
||||||
|
The "&AT" coordinate, currently only used by &PLOT, should be more
|
||||||
|
widely used. Not only is it more convenient, it's also slightly faster,
|
||||||
|
since we don't have to parse the left/top coordinates each time.
|
||||||
|
|
||||||
|
The existing code is (somewhat lazily) using the Applesoft routines to
|
||||||
|
parse coordinates, which includes the range check. We wouldn't be able
|
||||||
|
to use them for width/height, because we would need to take values in the
|
||||||
|
range (0-280, 0-192), where width/height of zero means "draw nothing".
|
||||||
|
|
||||||
|
I deliberately used Applesoft tokens, rather than arbitrary words, to
|
||||||
|
make commands simpler to parse. Some of them don't fit that well. COS
|
||||||
|
and SIN are circle-related, but it's not obvious which is outline and
|
||||||
|
which is filled. DRAW and XDRAW don't really sound like rectangle-draw
|
||||||
|
calls, and would be much more appropriate if used to set the line draw
|
||||||
|
mode. Spending a few bytes & cycles to get better names might be
|
||||||
|
worthwhile.
|
||||||
|
|
||||||
|
It's possible to store &PLOT arrays in actual BASIC integer arrays,
|
||||||
|
which might make them easier to code for. The fact that arrays are
|
||||||
|
DIM()ed once, cannot be resized, and cannot be discarded makes them
|
||||||
|
difficult to use for dynamic data.
|
||||||
|
|
||||||
|
Currently &PLOT takes a list of vertices and a list of line segments.
|
||||||
|
We could also support "continuous line" mode, where it just plays
|
||||||
|
connect-the-dots (saves space, doesn't really affect speed). Being
|
||||||
|
able to embed color changes could be handy.
|
||||||
|
|
||||||
|
&PLOT handles lines and vertices the way Applesoft does, with inclusive
|
||||||
|
coordinates. This results in overdraw when vertices are shared. This
|
||||||
|
is a (small) performance hit, and causes graphical glitches when connected
|
||||||
|
lines are drawn in "xdraw" mode.
|
||||||
|
|
||||||
|
|
||||||
|
<div id='additional-notes'/>
|
||||||
|
# Additional Notes #
|
||||||
|
|
||||||
|
Getting into the gory details here.
|
||||||
|
|
||||||
|
## Setting a pixel ##
|
||||||
|
|
||||||
|
Hi-res pixels are curious creatures.
|
||||||
|
|
||||||
|
Pixel color values are determined by adjacent bits. The various drawing
|
||||||
|
routines only set one bit at a time, so "drawing" in green (hcolor=1) will
|
||||||
|
cause bits to be set in odd columns, cleared in even columns. We don't
|
||||||
|
touch adjacent bits, so drawing purple (hcolor=2) in column 0 and green
|
||||||
|
in column 1 will produce a white line, while drawing them with the columns
|
||||||
|
reversed will produce a black line.
|
||||||
|
|
||||||
|
Making life more complicated is the use of the high bit in each byte, which
|
||||||
|
affects the color. If you draw a purple line in column 0, and a black1
|
||||||
|
line with hcolor=4 in column 6, the purple line turns blue, because the
|
||||||
|
black1 line sets the high bit.
|
||||||
|
|
||||||
|
To set a bit at an arbitrary X offset, we need to do the following:
|
||||||
|
|
||||||
|
(1) Determine which byte to change (xc / 7) and which bit (xc mod 7).
|
||||||
|
(2) Determine the color mask for that byte. For green, it's 0x2a
|
||||||
|
(00101010) in even columns, 0x55 (01010101) in odd columns.
|
||||||
|
(3) Set or clear the target bit and the high bit, leaving the others
|
||||||
|
intact.
|
||||||
|
|
||||||
|
One way to do this is illustrated below. Assume we're drawing a green
|
||||||
|
line at X=17. There's already a green dot at X=15, which gives us a
|
||||||
|
bit pattern of 00000010. (Bits are "backwards", i.e. the bit on the
|
||||||
|
right is the pixel on the left.)
|
||||||
|
|
||||||
|
LDY byteoffset X=2
|
||||||
|
LDX bitoffset X=3
|
||||||
|
LDA bitmask,x A=0x88 (10001000)
|
||||||
|
STA <andmask
|
||||||
|
LDA oddevencolor,y 4 cyc A=0x2a (00101010)
|
||||||
|
EOR (hbasl),y 5 cyc A=0x28 (00101010 ^ 00000010 = 00101000)
|
||||||
|
AND <andmask 3 cyc A=0x08 (00101000 & 10001000 = 00001000)
|
||||||
|
EOR (hbasl),y 5 cyc A=0x0a (00001000 ^ 00000010 = 00001010)
|
||||||
|
STA (hbasl),y 6 cyc
|
||||||
|
|
||||||
|
As a second example, here's how we plot a black1 (hcolor=4) point at X=6
|
||||||
|
when there's a purple point (hcolor=2) at X=0 (00000001).
|
||||||
|
|
||||||
|
LDA bitmask,x A=0xc0 (11000000)
|
||||||
|
STA <andmask
|
||||||
|
LDA oddevencolor,y 4 cyc A=0x80 (10000000)
|
||||||
|
EOR (hbasl),y 5 cyc A=0x81 (10000000 ^ 10000001 = 00000001)
|
||||||
|
AND <andmask 3 cyc A=0x81 (00000001 & 11000000 = 00000000)
|
||||||
|
EOR (hbasl),y 5 cyc A=0x81 (00000000 ^ 10000001 = 10000001)
|
||||||
|
STA (hbasl),y 6 cyc
|
||||||
|
|
||||||
|
Note the purple pixel is still set, but now the high bit is as well,
|
||||||
|
changing it to blue.
|
||||||
|
|
||||||
|
The trick is to start with the color pattern, which specifies how we want
|
||||||
|
the bits to be set or cleared. We EOR in the screen, which causes the
|
||||||
|
bits in A to be inverted wherever they were set on the screen. Next we
|
||||||
|
use the AND mask to zero out the bits we don't want to update on-screen.
|
||||||
|
When we do the second EOR from the screen, the bits we just zeroed will
|
||||||
|
take on the values from the screen, while the bits we didn't zero will
|
||||||
|
return to their original values from the color pattern (because EORing
|
||||||
|
twice with the same value restores the original).
|
||||||
|
|
||||||
|
It might look a little nicer if we always set two adjacent bits. That
|
||||||
|
would avoid the phenomenon where drawing from 0,0 to 0,10 in green doesn't
|
||||||
|
appear to do anything. For 6 out of 7 pixels this is easy, a simple
|
||||||
|
adjustment to the bitmask, but for the 7th pixel we'll need to update an
|
||||||
|
adjacent byte... unless it's the rightmost byte, which would cause us to
|
||||||
|
overflow and wrap around (or write into a screen hole). GraFORTH
|
||||||
|
renders lines this way, avoiding the overflow issue by limiting the X
|
||||||
|
coordinate range to (0,255).
|
||||||
|
|
||||||
|
To implement "xdraw" mode, where instead of setting pixels we invert
|
||||||
|
the current value, we can just omit (or NOP out) the first EOR.
|
||||||
|
|
||||||
|
We could draw faster if we simply set the new bits, rather than setting
|
||||||
|
some and clearing others according to the color mask. This could result
|
||||||
|
in some odd behavior, e.g. drawing a horizontal green line over a
|
||||||
|
horizontal purple line would result in a white line. Given how strange
|
||||||
|
things are in general this might not be an issue.
|
||||||
|
|
||||||
|
For 3D games like Stellar 7 or Elite, which essentially draw thin
|
||||||
|
monochromatic lines, we can drop the color mask and just set the bit on
|
||||||
|
the screen. Plotting a pixel is then simply:
|
||||||
|
|
||||||
|
LDA (hbasl),y 5 cyc
|
||||||
|
ORA <bitmask 3 cyc
|
||||||
|
STA (hbasl),y 6 cyc
|
||||||
|
|
||||||
|
This cuts the cycle count from 23 to 14. It's also not necessary to
|
||||||
|
worry about the high bit, which can save a few cycles when shifting
|
||||||
|
the bitmask. Most games are also able to limit the "active" part of
|
||||||
|
the screen to fewer than 255 pixels, which eliminates some 16-bit math
|
||||||
|
during setup.
|
||||||
|
|
||||||
|
For "xdraw" mode, the "ORA <bitmask" becomes "EOR <bitmask".
|
||||||
|
|
||||||
|
|
||||||
|
## Single- or Double-Buffered Animation ##
|
||||||
|
|
||||||
|
Because the Apple II has two hi-res graphics pages, it's possible to
|
||||||
|
double-buffer the animation to reduce or eliminate flicker. The
|
||||||
|
application displays one page while erasing and redrawing the other.
|
||||||
|
|
||||||
|
In most cases it's faster to erase the entire screen with the Clear
|
||||||
|
function than it is to draw over with black. For example, consider four
|
||||||
|
diagonal lines in a diamond shape, 100 pixels on a side. Diagonal
|
||||||
|
lines are the most expensive, as each step requires advancing in
|
||||||
|
both vertical and horizontal directions. The current implementation
|
||||||
|
needs about 80 cycles per diagonal pixel, or 100 * 4 * 80 = 32,000 cycles
|
||||||
|
to draw four medium-length lines (ignoring the setup cost for each line).
|
||||||
|
If you assume that the average cost to draw a pixel is about 70 cycles,
|
||||||
|
you can draw 570 pixels in the time it takes to erase the full screen.
|
||||||
|
|
||||||
|
We can clear the entire screen in about 40,000 cycles. If the drawing
|
||||||
|
area is smaller, a custom clear routine could do it in even less.
|
||||||
|
(Imagine your drawing routines keep track of the highest and lowest
|
||||||
|
line that anything touches, and then just erase the "dirty" lines.) So
|
||||||
|
unless you're doing relatively light rendering, you'll get the best
|
||||||
|
performance by wiping all or part of screen rather than drawing over the
|
||||||
|
previous contents.
|
||||||
|
|
||||||
|
The &INVERSE command is intended to make double-buffered animation
|
||||||
|
easier from BASIC. Use &HGR2 to switch to full-screen mode, then call
|
||||||
|
`&SCRN(1):&HCOLOR=0:&CLEAR` to select page 1 and clear it. Draw your
|
||||||
|
first frame, then call &INVERSE to display page 1 and select page 2
|
||||||
|
for drawing.
|
||||||
|
|
||||||
|
|
||||||
|
An alternative approach is exemplified by Elite. The game only uses
|
||||||
|
one hi-res page, but doesn't noticeably flicker (though distant objects
|
||||||
|
sort of "sparkle"). Suppose you're writing a similarly line-oriented
|
||||||
|
game, and your rendering cycle looks like this:
|
||||||
|
|
||||||
|
- Step 1: draw over previous content with black
|
||||||
|
- Step 2: draw new content with white
|
||||||
|
|
||||||
|
Your game will flicker badly without double-buffering, because there will
|
||||||
|
be a few display refresh periods where most of the lines have been erased.
|
||||||
|
Suppose instead you did this:
|
||||||
|
|
||||||
|
- For each line in the shape, erase the old line, then draw the line in
|
||||||
|
its new position
|
||||||
|
|
||||||
|
Now you might get some flickering on certain lines if the beam crosses
|
||||||
|
them while they're black, but the shape as a whole will be visible most
|
||||||
|
of the time. The trouble with this approach is that, if your shape is
|
||||||
|
moving across the screen, you'll be drawing black over some recent white
|
||||||
|
lines, causing some distracting artifacts.
|
||||||
|
|
||||||
|
The way to make this work is to use "xdraw" mode, where bits are toggled
|
||||||
|
rather than set or cleared. If you draw a new line across an old line that
|
||||||
|
will soon be erased, the crossing point is cleared. When the old line
|
||||||
|
is erased, the crossing point is set white again, so your new line
|
||||||
|
appears unbroken.
|
||||||
|
|
||||||
|
It should be noted that this works well for Elite because they use backface
|
||||||
|
elimination, so lines within a single shape don't cross. It's also
|
||||||
|
important to avoid re-drawing points at shared vertices, or your corners
|
||||||
|
will disappear unless there are an odd number of lines.
|
||||||
|
|
||||||
|
If there's very little on screen, this could be faster than a full clear.
|
||||||
|
Mostly it's of value if you need the 8KB occupied by the second hi-res
|
||||||
|
page for something other than output.
|
||||||
|
|
||||||
|
|
||||||
|
## Vertically-Challenged Rasterization ##
|
||||||
|
|
||||||
|
As noted earlier, we can clear the screen in about 40,000 cycles with
|
||||||
|
the Clear function, but drawing a screen-sized filled rectangle takes
|
||||||
|
about 96,000. Why the difference?
|
||||||
|
|
||||||
|
The FillRaster function handles one horizontal line at a time. For
|
||||||
|
each line it sets any pixels sticking out on the left and right edges,
|
||||||
|
and then it jumps into an unrolled byte-stomp function that blasts
|
||||||
|
its way through the middle at 10 cycles per byte. Compare this to the
|
||||||
|
Clear function, which only needs 5 cycles per byte.
|
||||||
|
|
||||||
|
The trick to improving the speed at which we draw filled rectangles
|
||||||
|
is to make it more like the Clear function, which operates on columns
|
||||||
|
rather than rows.
|
||||||
|
|
||||||
|
Suppose, for example, we figured out which bits we need to set on the
|
||||||
|
left edge, and then applied them to every row. Then we did the same
|
||||||
|
for the right edge. The set-up cost for each edge went from
|
||||||
|
(N cycles * Y rows) to (N cycles). Can we apply this to the middle
|
||||||
|
byte as well?
|
||||||
|
|
||||||
|
It turns out we can. The fundamental problem with setting bytes
|
||||||
|
horizontally is that we have to index off of a direct page register,
|
||||||
|
e.g. "STA ([hbasl),y". The only ways around this either add too much
|
||||||
|
loop overhead, too much setup overhead, or require too much memory.
|
||||||
|
For any given line, we need to find the base address, and issue a
|
||||||
|
6-cycle indirect store, followed immediately by an increment of the Y
|
||||||
|
register. If we're drawing in color it's worse than that, because we
|
||||||
|
also have to exclusive-OR the color because the bit pattern flips for
|
||||||
|
odd/even columns.
|
||||||
|
|
||||||
|
We're much better off unrolling vertically. Suppose you have 192
|
||||||
|
"STA abs,y" instructions, one for each row, one after the other. You
|
||||||
|
no longer need the base address lookup, because it's baked into the
|
||||||
|
code, and since we're only touching one column we don't need to worry
|
||||||
|
about odd/even color values here. To use this to draw rows 50-100, you
|
||||||
|
would replace the STA in row 101 with an RTS, and then JSR to the 50th
|
||||||
|
STA instruction. After the row is painted, you increment Y, exclusive-OR
|
||||||
|
the color value, and jump through again. (You can make this a little
|
||||||
|
faster by JMPing in and out instead, but you pay a bit more for setup
|
||||||
|
and cleanup, especially when you have to restore the base address that
|
||||||
|
got overwritten by the JMP.)
|
||||||
|
|
||||||
|
With this change we're working at 5 cycles per byte, plus the loop
|
||||||
|
overhead. A full-screen FillRect will be about as fast as a Clear.
|
||||||
|
|
||||||
|
There are a couple of down sides. First, you need 192*3=576 bytes to
|
||||||
|
hold this pile of store instructions. If you're drawing a lot of filled
|
||||||
|
rectangles, though, the 2x speed improvement would make the size penalty
|
||||||
|
worthwhile. The other problem arises if you use double-buffered animation,
|
||||||
|
as the table is hard-wired to page 1. You can either spend a couple
|
||||||
|
thousand cycles when the page flips to rewrite the addresses, or you can
|
||||||
|
have a second full copy of the stores for page 2.
|
||||||
|
|
||||||
|
The current horizontally-focused implementation uses 256 bytes for its
|
||||||
|
unrolled code area, but you wouldn't be able to get rid of that by
|
||||||
|
switching to the vertical approach. The reason the code works the way
|
||||||
|
it does is that it's designed to render circles, and those are hard to do
|
||||||
|
vertically. With horizontal rasters, when you look at the left and right
|
||||||
|
edges you only need to examine the current row, and set pixels in a
|
||||||
|
single byte. With vertical strips, each byte spans seven columns of
|
||||||
|
pixels, so the top and bottom "edges" might be several bytes deep. The
|
||||||
|
code would have to iterate in "edge space" until it reached the meaty
|
||||||
|
center, and the cost of doing so would likely erase the benefit of vertical
|
||||||
|
fills until your circles got reasonably large.
|
||||||
|
|
||||||
|
It's possible that a hybrid approach, in which selected rectangles in the
|
||||||
|
center of a large circle are drawn with a fast vertical fill, could be
|
||||||
|
used, with slower code rendering the outer edges. The trick would be to
|
||||||
|
come up with an approach that doesn't leave gaps, minimizes overdraw, and
|
||||||
|
is sufficiently faster to make the effort worthwhile.
|
||||||
|
|
|
@ -0,0 +1,197 @@
|
||||||
|
My Quest for Lines
|
||||||
|
==================
|
||||||
|
|
||||||
|
As far back as I can remember, I always wanted to draw lines on the
|
||||||
|
hi-res screen.
|
||||||
|
|
||||||
|
This probably started when I saw Battlezone in the arcades in the early
|
||||||
|
1980s. I still think the game is beautiful -- a first-person shooter
|
||||||
|
reduced to the essential elements. I wanted to write something similar
|
||||||
|
for the Apple II, but I didn't know where to start. (I should probably
|
||||||
|
mention that I was 11 years old in 1980.)
|
||||||
|
|
||||||
|
Battlezone had a dedicated matrix processor (the "math box"), and a
|
||||||
|
vector display that handled the line drawing. The Apple II had neither
|
||||||
|
of those things, which meant that achieving the same level of performance
|
||||||
|
and graphical detail weren't possible. Despite those shortcomings, Damon
|
||||||
|
Slye create a pretty solid Battlezone-ish game in 1983, called Stellar 7.
|
||||||
|
A couple of years later, Braben and Bell made another compelling wireframe
|
||||||
|
combat game, the space combat sim Elite. (The A2-FS1 flight simulator
|
||||||
|
came out much earlier, but the graphics were blinky, enemies were just
|
||||||
|
dots, and the action was much slower-paced. Of course, it loaded from
|
||||||
|
cassette tape and ran in 16KB, so they didn't have much choice.)
|
||||||
|
|
||||||
|
Seeing these games showed me that the problems could be solved. I decided
|
||||||
|
that the place to start was line drawing, because (a) line drawing is
|
||||||
|
pretty fundamental to wireframe 3D, and (b) I wasn't getting the performance
|
||||||
|
I needed out of HPLOT TO.
|
||||||
|
|
||||||
|
Somewhere in the mid-1980s -- I was in high school now -- I began by trying
|
||||||
|
to figure out how line drawing worked. Suppose, for example, you want to
|
||||||
|
HPLOT 0,0 TO 19,5. How do you decide which pixels to set?
|
||||||
|
|
||||||
|
I wrote a program (which I recently found) called "HPLOT SIMULATOR". It
|
||||||
|
computed the ratio of vertical to horizontal pixels (e.g. 20 / 6 = 0.3),
|
||||||
|
and marched horizontally across the screen, adding the fractional value to
|
||||||
|
the Y coordinate at each step. The result was a pretty good-looking line.
|
||||||
|
|
||||||
|
The trouble was that it used floating-point math and required division,
|
||||||
|
things that the 6502 is not very good at. It occurred to me that division
|
||||||
|
can be performed as a series of integer subtractions. (It probably occurred
|
||||||
|
to me because I didn't know any other way to divide on the 6502, not having
|
||||||
|
encountered the shift-and-subtract approach yet.) So if you initialize a
|
||||||
|
counter to zero, and add 6 to it each time you move horizontally, then when
|
||||||
|
it reaches 20 you know it's time to move vertically. Subtracting 20 from
|
||||||
|
the counter resets it, but retains the division remainder as the starting
|
||||||
|
point, so you retain the fractional part.
|
||||||
|
|
||||||
|
When I went to college I took a graphics class, and was introduced to
|
||||||
|
Bresenham's classic line algorithm. This was essentially the same as what
|
||||||
|
I'd figured out for myself, but with two refinements: (1) it used signed
|
||||||
|
values, allowing a slightly cheaper "< 0" comparison, and (2) it started
|
||||||
|
with the counter half full, correcting the slight lopsidedness of my lines.
|
||||||
|
|
||||||
|
The graphics class inspired me to write a 3D game library called Arc3D
|
||||||
|
in 1990. I used it to create a pair of demos: "Not Modulae", which
|
||||||
|
animated several 3D shapes on the screen, including a pair of ships from
|
||||||
|
Elite; and "Not Stellar 7", a graphics demo that let you drive around
|
||||||
|
(and, sadly, through) some tanks from Stellar 7. The Arc3D library was
|
||||||
|
written for the IIgs, in 65816 assembly, and used the super-hi-res screen.
|
||||||
|
Having a better CPU, lots more memory, and a less-quirky graphics
|
||||||
|
architecture made things easier than doing the same on a classic Apple II.
|
||||||
|
|
||||||
|
I wrote my own super-hi-res line drawing code, of course, but a year later
|
||||||
|
when I disassembled somebody else's demo I found better code. Which, it
|
||||||
|
turned out, they had also lifted from another source, an FTA demo. I
|
||||||
|
dropped mine and used theirs.
|
||||||
|
|
||||||
|
After I graduated from college, my side projects tended more toward data
|
||||||
|
compression and Netrek, so Arc3D was never improved upon.
|
||||||
|
|
||||||
|
Fifteen years later, in 2006, there was a discussion on a Usenet group
|
||||||
|
about circle rendering. Once upon a time I'd drawn circles from BASIC
|
||||||
|
with trig functions, but it was painfully slow, which made me wonder
|
||||||
|
about a part of the game Horizon V where you steer through a series of
|
||||||
|
circles. I wanted to try it for myself and see what it would take.
|
||||||
|
(Looking at a youtube video of Horizon V, the animation is more radial
|
||||||
|
than circular... I suspect it's not really drawing circles at all.)
|
||||||
|
|
||||||
|
I first announced my results in a
|
||||||
|
[comp.sys.apple2.programmer](https://groups.google.com/forum/#!msg/comp.sys.apple2.programmer/Vj_xVjMHaR0/cLU3t2TlPrMJ)
|
||||||
|
posting. I had focused on filled circles, rather than outline circles,
|
||||||
|
since that seemed like a more interesting challenge. The "fdraw" demo
|
||||||
|
supported fast rendering of filled circles, filled rectangles, and had
|
||||||
|
a very fast screen clear. A week later, after a bit of cleanup, I
|
||||||
|
[https://groups.google.com/d/msg/comp.sys.apple2.programmer/Un4pV5p8Elw/6qZVAPc_da0J](released the fdraw v0.2 sources).
|
||||||
|
|
||||||
|
It occurred to me at the time that this would be a handy place to stick
|
||||||
|
the hi-res line drawing code I'd always wanted to write. Somewhere around
|
||||||
|
this time I also sort of poked at the idea of writing a dedicated hi-res
|
||||||
|
graphics compression program.
|
||||||
|
|
||||||
|
Fast forward another nine years, to 2015. After learning about the LZ4
|
||||||
|
format, I went back to my data compression roots and wrote
|
||||||
|
[https://github.com/fadden/fhpack](fhpack) and some demos. I had so much
|
||||||
|
fun doing it that I decided it was finally time to write some hi-res
|
||||||
|
line drawing code.
|
||||||
|
|
||||||
|
Being older, wiser, and having easy access to relevant information, I
|
||||||
|
began with the appropriate chapters in Michael Abrash's _Graphics
|
||||||
|
Programming Black Book Special Edition_. This covered the standard
|
||||||
|
algorithm, but also had a chapter on a faster "run-slice" approach.
|
||||||
|
This intrigued me, because instead of the usual "step right, check if
|
||||||
|
it's time to move down, step right, check if it's time ..." logic, it
|
||||||
|
says, "figure out how long each line segment is; then, move right 3
|
||||||
|
times, step down, move right 4 times, step down, ...", saving a lot of
|
||||||
|
redundant computation. The trouble is that it requires fixed-point
|
||||||
|
division, and drawing N adjacent pixels is tricky when your graphics
|
||||||
|
architecture has 7 horizontal pixels per byte. You'd have to be a bit
|
||||||
|
crazy to try to get that to work.
|
||||||
|
|
||||||
|
So I went with a standard approach, and used the Applesoft ROM method of
|
||||||
|
coloring pixels (discussed in the fdraw docs). I carefully optimized
|
||||||
|
the code, and squeezed out as much performance as I could.
|
||||||
|
|
||||||
|
When I was done, I began looking around at what other people did to see if
|
||||||
|
there were any tricks I missed.
|
||||||
|
|
||||||
|
I looked at the Applesoft ROM code. Very clever, but very much optimized
|
||||||
|
for space over speed. Also, because it's in ROM, self-modifying code is
|
||||||
|
not possible, so they lose a cycle here and there.
|
||||||
|
|
||||||
|
Next I looked at GraFORTH. I figured out how functions were arranged,
|
||||||
|
identified the plot function, and disassembled it with CiderPress. It uses
|
||||||
|
a pretty standard algorithm, but supports multiple drawing modes and sets
|
||||||
|
two adjacent bits for better-looking colored lines. Good use of
|
||||||
|
self-modifying code, but some choices were made to reduce code size at the
|
||||||
|
expense of speed. My code was faster.
|
||||||
|
|
||||||
|
Next I looked at Elite. Digging through memory after the program had
|
||||||
|
loaded, I found a collection of purpose-built line functions. Some drew
|
||||||
|
color, most used EOR to "xdraw" monochrome lines. Standard Bresenham
|
||||||
|
approach, with a bit of variation on the Y-lookup table -- their table is
|
||||||
|
only 24 bytes (1/8th of the screen), and they use a quick "add 4 to the
|
||||||
|
high byte" 7 out of every eight lines. I tried applying this to my code,
|
||||||
|
but it turned out that just using a full lookup table was a tiny bit faster.
|
||||||
|
|
||||||
|
Next I looked at Stellar 7, one of my earliest inspirations. I scanned
|
||||||
|
through some files with CiderPress, looking for anything line-draw-esque.
|
||||||
|
(If you spend enough time drawing lines you start to see patterns.)
|
||||||
|
After about five minutes I found the code, in the same file as this
|
||||||
|
gigantic unrolled division routine. But as I started to dig into the code
|
||||||
|
I noticed that it was using a count oddly, and this one function was...
|
||||||
|
HOLY CATS he did run-slicing.
|
||||||
|
|
||||||
|
And he did it big. There are several line functions, all of them padded
|
||||||
|
out to live on a single page (so that none of the branches cross page
|
||||||
|
boundaries, which costs an extra cycle). It has the usual special cases --
|
||||||
|
simple horizontal and vertical lines -- and the usual split between
|
||||||
|
vertically-dominant and horizontally-dominant lines. But there are *three*
|
||||||
|
different functions for drawing mostly-horizontal lines, selected based on
|
||||||
|
slope, all of which try to set multiple horizontal pixels at once. The
|
||||||
|
slope of the line affects how the code is structured; for example, for
|
||||||
|
very shallow lines it expects that it will often be able to set an entire
|
||||||
|
byte at once. Color is not supported, so pixels are set with a simple
|
||||||
|
OR operation.
|
||||||
|
|
||||||
|
It's very impressive, and a wee bit terrifying. But when you're making
|
||||||
|
a game that will be spending much of its time drawing lines, you really
|
||||||
|
want to optimize those draw functions.
|
||||||
|
|
||||||
|
The tricky part is that divide. The division routine is unrolled to a
|
||||||
|
healthy 187 bytes long, and might take 240 cycles to run. For short
|
||||||
|
lines and mostly-vertical lines it might have been more efficicent to skip
|
||||||
|
the division and just use a run-length implementation, but the ability to
|
||||||
|
set multiple bits at once for mostly-horizontal lines is a huge win. It's
|
||||||
|
a fair bet that the code in Stellar 7 is the fastest line drawing
|
||||||
|
implementation for the Apple II. (Of course, I haven't looked at Arcticfox,
|
||||||
|
the sequel...)
|
||||||
|
|
||||||
|
The general structure of the code was actually very similar to mine: always
|
||||||
|
draw left to right, use self-modifying code to handle up vs. down, and so on.
|
||||||
|
I didn't come away with any new ideas for optimizations to my run-length
|
||||||
|
implementation from this or the other programs I looked at... but there
|
||||||
|
are a lot of other games that I haven't disassembled.
|
||||||
|
|
||||||
|
|
||||||
|
So, 30+ years after HPLOT SIMULATOR, here I am with a bunch of code for
|
||||||
|
drawing lines on the Apple II hi-res screen.
|
||||||
|
|
||||||
|
I don't plan on writing Battlezone for the Apple II. Stellar 7 did that,
|
||||||
|
and more. My goal in developing fdraw was to scratch a very old itch.
|
||||||
|
|
||||||
|
I had forgotten how much fun this stuff is. Working in ARM assembly
|
||||||
|
language on Android offered similar challenges, but you're never entirely
|
||||||
|
sure exactly how your code will perform on the wide range of CPU
|
||||||
|
architectures (affecting instruction interleave, cache size and
|
||||||
|
replacement policy, etc.), you have to guess at cache misses and the
|
||||||
|
success rate of data prefetching, and it's difficult to measure results when
|
||||||
|
there's multiple threads running and interrupts firing. On the Apple II
|
||||||
|
you can count every cycle, and know exactly what will happen when.
|
||||||
|
|
||||||
|
I don't expect that anyone will find the code useful, but that wasn't
|
||||||
|
really the point.
|
||||||
|
|
||||||
|
Andy McFadden
|
||||||
|
August 2015
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue