mirror of
https://github.com/fadden/fdraw.git
synced 2025-03-13 05:30:39 +00:00
Initial checkin
fdraw v0.3
This commit is contained in:
parent
2ca9d4084f
commit
418e7b7191
549
AMPERFDRAW.S
Normal file
549
AMPERFDRAW.S
Normal file
@ -0,0 +1,549 @@
|
||||
********************************
|
||||
* *
|
||||
* Amper-fdraw *
|
||||
* By Andy McFadden *
|
||||
* For fdraw version 0.3 *
|
||||
* *
|
||||
* Applesoft ampersand *
|
||||
* interface for fdraw. *
|
||||
* *
|
||||
* Developed with Merlin-16 *
|
||||
* *
|
||||
********************************
|
||||
|
||||
lst off
|
||||
org $1d60
|
||||
|
||||
* All of the handler entry points can fit on a single
|
||||
* page, so it's possible to save a few bytes by
|
||||
* dropping the high jump table and just hardcoding
|
||||
* the first page into the jump. This requires that
|
||||
* the ORG be at $xx00.
|
||||
|
||||
PUT FDRAW.DEFS
|
||||
|
||||
* Applesoft BASIC tokens.
|
||||
tok_plot equ $8d
|
||||
tok_hgr2 equ $90
|
||||
tok_hgr equ $91
|
||||
tok_hcolor equ $92
|
||||
tok_hplot equ $93
|
||||
tok_draw equ $94
|
||||
tok_xdraw equ $95
|
||||
tok_inverse equ $9e
|
||||
tok_clear equ $bd
|
||||
tok_new equ $bf
|
||||
tok_to equ $c1
|
||||
tok_at equ $c5
|
||||
*tok_sgn equ $d2
|
||||
tok_scrn equ $d7
|
||||
tok_exp equ $dd
|
||||
tok_cos equ $de
|
||||
tok_sin equ $df
|
||||
|
||||
* System locations.
|
||||
PCL equ $3a ;used by monitor
|
||||
PCH equ $3b ;used by monitor
|
||||
A1L equ $3c ;used by monitor
|
||||
A1H equ $3d ;used by monitor
|
||||
LINNUM equ $50 ;50-51
|
||||
FACLO equ $a1
|
||||
CHRGET equ $b1 ;advance ptr, get next tok
|
||||
CHRGOT equ $b7 ;get next tok (no advance)
|
||||
TXTPTR equ $b8
|
||||
HPAG equ $e6 ;$20 or $40
|
||||
|
||||
AMPERV equ $3f5
|
||||
|
||||
TXTCLR equ $c050
|
||||
TXTSET equ $c051
|
||||
MIXCLR equ $c052
|
||||
MIXSET equ $c053
|
||||
LOWSCR equ $c054
|
||||
HISCR equ $c055
|
||||
LORES equ $c056
|
||||
HIRES equ $c057
|
||||
|
||||
ERROR equ $d412 ;error based on X reg
|
||||
FRMNUM equ $dd67
|
||||
SynError equ $dec9 ;throw SYNTAX ERROR
|
||||
CHKCOM equ $debe
|
||||
IllQError equ $e199 ;throw ILLEGAL QUANTITY ERROR
|
||||
GETADR equ $e752
|
||||
GETBYT equ $e6f8 ;gets byte, in X/FACLO
|
||||
HFNS equ $f6b9 ;get hi-res x/y for hplot
|
||||
|
||||
* Prepare the ampersand vector.
|
||||
*
|
||||
* Ideally we'd check to see if the existing vector is
|
||||
* different from ours, and if so, jump to it when we
|
||||
* get a token we don't recognize. Not convinced
|
||||
* there's an actual use case for this.
|
||||
init
|
||||
lda #$4c ;JMP, in case it got
|
||||
sta AMPERV ; trashed
|
||||
lda #<dispatch
|
||||
sta AMPERV+1
|
||||
lda #>dispatch
|
||||
sta AMPERV+2
|
||||
rts
|
||||
|
||||
* Entry point from BASIC. The token is in A.
|
||||
dispatch
|
||||
ldx #:cmdend-:cmdtab-1
|
||||
]loop cmp :cmdtab,x
|
||||
beq :match
|
||||
dex
|
||||
bpl ]loop
|
||||
jmp SynError
|
||||
|
||||
:match
|
||||
lda :jmptabh,x
|
||||
* lda #>h_new ;all on first page
|
||||
pha
|
||||
lda :jmptabl,x
|
||||
pha
|
||||
jmp CHRGET ;eat token, jump
|
||||
|
||||
|
||||
:cmdtab dfb tok_new
|
||||
dfb tok_hgr
|
||||
dfb tok_hgr2
|
||||
dfb tok_scrn
|
||||
dfb tok_hcolor
|
||||
dfb tok_inverse
|
||||
dfb tok_clear
|
||||
dfb tok_hplot
|
||||
dfb tok_xdraw
|
||||
dfb tok_draw
|
||||
dfb tok_exp
|
||||
dfb tok_cos
|
||||
dfb tok_sin
|
||||
dfb tok_at
|
||||
dfb tok_plot
|
||||
:cmdend
|
||||
|
||||
:jmptabl dfb <h_new-1
|
||||
dfb <h_hgr-1
|
||||
dfb <h_hgr2-1
|
||||
dfb <h_scrn-1
|
||||
dfb <h_hcolor-1
|
||||
dfb <h_inverse-1
|
||||
dfb <h_clear-1
|
||||
dfb <h_hplot-1
|
||||
dfb <h_xdraw-1
|
||||
dfb <h_draw-1
|
||||
dfb <h_exp-1
|
||||
dfb <h_cos-1
|
||||
dfb <h_sin-1
|
||||
dfb <h_at-1
|
||||
dfb <h_plot-1
|
||||
:jmptabh dfb >h_new-1
|
||||
dfb >h_hgr-1
|
||||
dfb >h_hgr2-1
|
||||
dfb >h_scrn-1
|
||||
dfb >h_hcolor-1
|
||||
dfb >h_inverse-1
|
||||
dfb >h_clear-1
|
||||
dfb >h_hplot-1
|
||||
dfb >h_xdraw-1
|
||||
dfb >h_draw-1
|
||||
dfb >h_exp-1
|
||||
dfb >h_cos-1
|
||||
dfb >h_sin-1
|
||||
dfb >h_at-1
|
||||
dfb >h_plot-1
|
||||
|
||||
|
||||
********************************
|
||||
* &NEW - initialize
|
||||
h_new
|
||||
lda #$20 ;match Init result
|
||||
sta g_cur_page
|
||||
lda #$00
|
||||
sta g_hcolor
|
||||
tax ;init "previous hplot"
|
||||
tay ; coord to zero
|
||||
jsr storeprv
|
||||
ldx #139 ;279/2
|
||||
ldy #0
|
||||
lda #95 ;191/2
|
||||
jsr storeac
|
||||
jmp f_Init
|
||||
|
||||
********************************
|
||||
* &HGR - show page 1 with mixed text, and clear screen.
|
||||
* Sets the color to zero.
|
||||
h_hgr
|
||||
ldx #$20 ;page 1
|
||||
lda #$00 ;$c054
|
||||
beq hgr_com
|
||||
|
||||
********************************
|
||||
* &HGR2 - show page 2 with no text, and clear screen.
|
||||
* Sets the color to zero.
|
||||
h_hgr2
|
||||
ldx #$40 ;page 2
|
||||
lda #$01 ;$c055
|
||||
;fall through to hgr_com
|
||||
|
||||
* We go slightly out of our way to clear the screen
|
||||
* before tripping the softswitches. This avoids
|
||||
* flashing the previous hi-res page contents when
|
||||
* entering from text mode.
|
||||
*
|
||||
* We also want to go nomix-page2 but page1-mix
|
||||
* (note reverse order) to avoid flashing text pg 2.
|
||||
hgr_com stx f_in_arg
|
||||
stx g_cur_page
|
||||
stx HPAG ;probably useful
|
||||
pha
|
||||
jsr f_SetPage
|
||||
lda #$00
|
||||
sta f_in_arg
|
||||
jsr f_SetColor
|
||||
jsr f_Clear
|
||||
lda g_hcolor ;restore color
|
||||
sta f_in_arg
|
||||
jsr f_SetColor
|
||||
bit TXTCLR ;$c050
|
||||
bit HIRES ;$c057
|
||||
pla
|
||||
beq :pg1
|
||||
bit MIXCLR ;$c052
|
||||
bit HISCR ;$c055
|
||||
rts
|
||||
:pg1 bit LOWSCR ;$c054
|
||||
bit MIXSET ;$c053
|
||||
rts
|
||||
|
||||
********************************
|
||||
* &SCRN({1,2}) - set the current hi-res page
|
||||
h_scrn
|
||||
jsr GETBYT
|
||||
cpx #1
|
||||
beq :okay
|
||||
cpx #2
|
||||
beq :okay
|
||||
jmp IllQError
|
||||
:okay jsr CHRGET ;eat ')' (we assume)
|
||||
txa ;X/Y unaltered
|
||||
asl
|
||||
asl
|
||||
asl
|
||||
asl
|
||||
asl ;multiply x32
|
||||
sta g_cur_page
|
||||
sta f_in_arg
|
||||
jmp f_SetPage
|
||||
|
||||
********************************
|
||||
* &HCOLOR={0-7} - set the current color
|
||||
h_hcolor
|
||||
jsr GETBYT ;get color
|
||||
cpx #8
|
||||
blt :okay
|
||||
jmp IllQError
|
||||
:okay stx f_in_arg
|
||||
stx g_hcolor
|
||||
jmp f_SetColor
|
||||
|
||||
********************************
|
||||
* &INVERSE - flip pages
|
||||
*
|
||||
* If we're currently drawing on $20, we set the page
|
||||
* to $40 and hit $c054 to show $20. And vice-versa.
|
||||
* The goal is to make double-buffered animation easy.
|
||||
h_inverse
|
||||
lda g_cur_page
|
||||
eor #$60
|
||||
sta g_cur_page
|
||||
ldx #$00
|
||||
cmp #$40 ;about to start drawing on 2?
|
||||
beq :showpg1 ;yes, show page 1
|
||||
inx ;no, show page 2
|
||||
:showpg1 ldy LOWSCR,x
|
||||
sta f_in_arg
|
||||
jmp f_SetPage
|
||||
|
||||
********************************
|
||||
* &CLEAR - clear current page to current color.
|
||||
h_clear
|
||||
jmp f_Clear ;well, that was easy
|
||||
|
||||
********************************
|
||||
* &XDRAW left,top,right,bottom - draw rectangle outline
|
||||
h_xdraw
|
||||
jsr getltrb
|
||||
jmp f_DrawRect
|
||||
|
||||
********************************
|
||||
* &DRAW left,top,right,bottom - draw filled rectangle
|
||||
h_draw
|
||||
jsr getltrb
|
||||
jmp f_FillRect
|
||||
|
||||
********************************
|
||||
* &EXP {0,1} - set line draw mode
|
||||
h_exp
|
||||
jsr GETBYT
|
||||
cpx #2
|
||||
blt :okay
|
||||
jmp IllQError
|
||||
:okay stx f_in_arg
|
||||
jmp f_SetLineMode
|
||||
|
||||
********************************
|
||||
* &COS cx,cy,rad - draw outline circle
|
||||
h_cos
|
||||
jsr getcxcyr
|
||||
jmp f_DrawCircle
|
||||
|
||||
********************************
|
||||
* &SIN cx,cy,rad - draw filled circle
|
||||
h_sin
|
||||
jsr getcxcyr
|
||||
jmp f_FillCircle
|
||||
|
||||
********************************
|
||||
* &AT x,y - select center for array draw
|
||||
h_at
|
||||
jsr HFNS
|
||||
jmp storeac
|
||||
|
||||
********************************
|
||||
* &PLOT vertexAddr, indexAddr, indexCount [AT cx,cy]
|
||||
* draw lines from arrays of vertices and indices
|
||||
h_plot jmp array_draw
|
||||
|
||||
********************************
|
||||
* &HPLOT x,y - draw a point
|
||||
* &HPLOT TO x,y - draw a line from last point to x,y
|
||||
* &HPLOT x0,y0 to x1,y1 - draw a line
|
||||
lst on ;last token handler --
|
||||
h_hplot equ * ; must be on first page
|
||||
lst off ; to omit high byte table
|
||||
|
||||
jsr CHRGOT ;check next token
|
||||
lst off
|
||||
cmp #tok_to ;is this an "HPLOT TO"?
|
||||
beq :leadingto
|
||||
jsr getx1y1 ;get the first coord
|
||||
jsr copy1to0
|
||||
jsr CHRGOT ;see if single point
|
||||
cmp #tok_to
|
||||
beq :hplot_to ;nope, draw line
|
||||
jsr copy0toprev ;draw point, and save x/y
|
||||
jmp f_DrawPoint ; for subsequent HPLOT TO
|
||||
|
||||
:leadingto ;"HPLOT TO", restore the
|
||||
lda g_prevxl ; previous coord to x0/y0
|
||||
sta f_in_x0l ;(can't rely on f_in_zzz
|
||||
lda g_prevxh ; being there -- we might
|
||||
sta f_in_x0h ; have drawn a rect)
|
||||
lda g_prevy
|
||||
sta f_in_y0
|
||||
:hplot_to
|
||||
jsr CHRGET ;eat the TO
|
||||
jsr getx1y1 ;get the coords
|
||||
jsr f_DrawLine ;draw it
|
||||
jsr copy1to0 ;shift 1->0 for next round
|
||||
jsr CHRGOT
|
||||
cmp #tok_to ;another TO?
|
||||
beq :hplot_to ;yes, branch
|
||||
jmp copy0toprev ;no, save prev and bail
|
||||
|
||||
* Get coordinates and store in X1/Y1.
|
||||
getx1y1
|
||||
jsr HFNS
|
||||
store1 stx f_in_x1l ;store X/Y/A in coord1
|
||||
sty f_in_x1h
|
||||
sta f_in_y1
|
||||
rts
|
||||
|
||||
* Save x0/y0 as our "previous" coordinate.
|
||||
copy0toprev
|
||||
ldx f_in_x0l
|
||||
ldy f_in_x0h
|
||||
lda f_in_y0
|
||||
storeprv stx g_prevxl ;store X/Y/A in g_prev
|
||||
sty g_prevxh
|
||||
sta g_prevy
|
||||
rts
|
||||
|
||||
* Copy X1/Y1 into X0/Y0.
|
||||
copy1to0
|
||||
ldx f_in_x1l
|
||||
ldy f_in_x1h
|
||||
lda f_in_y1
|
||||
store0 stx f_in_x0l ;store X/Y/A in coord 0
|
||||
sty f_in_x0h
|
||||
sta f_in_y0
|
||||
rts
|
||||
|
||||
* Store X/Y/A into array-center.
|
||||
storeac stx g_ac_xl
|
||||
sty g_ac_xh
|
||||
sta g_ac_y
|
||||
rts
|
||||
|
||||
* Get left/top/right/bottom coordinates.
|
||||
getltrb
|
||||
jsr HFNS
|
||||
jsr store0 ;save as X0/Y0
|
||||
jsr CHKCOM ;eat a comma
|
||||
jsr HFNS
|
||||
jsr store1 ;save as X1/Y1
|
||||
rts
|
||||
|
||||
* Get center coordinates and radius.
|
||||
getcxcyr
|
||||
jsr HFNS ;get CX and CY
|
||||
jsr store0 ;save as X0/Y0
|
||||
jsr CHKCOM ;eat a comma
|
||||
jsr GETBYT ;convert to 0-255
|
||||
stx f_in_rad
|
||||
rts
|
||||
|
||||
* Array-draw handler.
|
||||
*
|
||||
* We know that fdraw doesn't use LINNUM or A1L/A1H,
|
||||
* so it's safe to use them here.
|
||||
array_draw
|
||||
]vertices equ A1L ;2b
|
||||
]indices equ LINNUM ;2b
|
||||
]count equ PCL
|
||||
]cur equ PCH
|
||||
|
||||
jsr FRMNUM ;get vertex buffer address
|
||||
jsr GETADR
|
||||
lda LINNUM ;copy to A1L
|
||||
sta ]vertices
|
||||
lda LINNUM+1
|
||||
sta ]vertices+1
|
||||
jsr CHKCOM ;eat the comma
|
||||
jsr FRMNUM ;get index buffer address
|
||||
jsr GETADR ;leave it in LINNUM
|
||||
jsr CHKCOM
|
||||
jsr GETBYT ;get the count
|
||||
cpx #128 ;range check (0-127)
|
||||
blt :countok
|
||||
jmp IllQError
|
||||
:countok txa
|
||||
beq :done ;nothing to do
|
||||
asl ;double it
|
||||
sta ]count ;stash it
|
||||
lda #$00
|
||||
sta ]cur
|
||||
|
||||
* Check for optional AT cx,cy.
|
||||
jsr CHRGOT
|
||||
cmp #tok_at
|
||||
bne :noat
|
||||
JSR CHRGET ;eat the AT
|
||||
lda LINNUM ;the code that reads the
|
||||
pha ; hi-res coordinates will
|
||||
lda LINNUM+1 ; overwrite LINNUM, so
|
||||
pha ; we have to save & restore
|
||||
jsr h_at
|
||||
pla
|
||||
sta LINNUM+1
|
||||
pla
|
||||
sta LINNUM
|
||||
:noat
|
||||
|
||||
]loop jsr getvertex
|
||||
bcs :skip2
|
||||
jsr store0
|
||||
jsr getvertex
|
||||
bcs :skip
|
||||
jsr store1
|
||||
jsr f_DrawLine
|
||||
dfb $2c ;BIT addr
|
||||
:skip2 inc ]cur
|
||||
:skip lda ]cur
|
||||
cmp ]count
|
||||
blt ]loop
|
||||
:done rts
|
||||
|
||||
* Get the Nth vertex, specified by ]cur, and load it
|
||||
* into X/Y/A (xlo/xhi/y). Returns with carry set if
|
||||
* the vertex is invalid.
|
||||
*
|
||||
* Increments ]cur by 1.
|
||||
getvertex
|
||||
ldy ]cur
|
||||
inc ]cur
|
||||
lda (]indices),y
|
||||
bmi :badv ;must be 0-127
|
||||
jsr :calcvertex
|
||||
|
||||
ldx g_out_x
|
||||
ldy g_out_x+1
|
||||
beq :xok ;0-255, ok
|
||||
cpy #1
|
||||
bne :badv ;512+
|
||||
cpx #280-256
|
||||
bge :badv ;280-511
|
||||
:xok
|
||||
lda g_out_y+1
|
||||
bne :badv ;Y is neg or > 255
|
||||
lda g_out_y
|
||||
cmp #192
|
||||
bcc :goodv
|
||||
:badv
|
||||
sec
|
||||
:goodv rts
|
||||
|
||||
* Get VX and VY, merging with AC, and store in
|
||||
* 16-bit g_out_x and g_out_y. Range not checked
|
||||
* here. On entry, A has vertex index.
|
||||
:calcvertex
|
||||
asl
|
||||
tay
|
||||
ldx #$00 ;hi byte of vertex
|
||||
lda (]vertices),y ;x-coord
|
||||
bpl :xpos
|
||||
dex ;sign-extend hi byte
|
||||
:xpos clc
|
||||
adc g_ac_xl
|
||||
sta g_out_x
|
||||
txa
|
||||
adc g_ac_xh
|
||||
sta g_out_x+1
|
||||
|
||||
iny
|
||||
ldx #$00
|
||||
lda (]vertices),y ;y-coord
|
||||
bpl :ypos
|
||||
dex ;sign-extend hi byte
|
||||
:ypos clc
|
||||
adc g_ac_y
|
||||
sta g_out_y
|
||||
bcc :nocarry
|
||||
inx
|
||||
:nocarry stx g_out_y+1
|
||||
rts
|
||||
|
||||
|
||||
|
||||
********************************
|
||||
* Global variables
|
||||
|
||||
g_cur_page ds 1 ;$20 or $40
|
||||
g_hcolor ds 1
|
||||
g_prevxl ds 1
|
||||
g_prevxh ds 1
|
||||
g_prevy ds 1
|
||||
g_ac_xl ds 1 ;Center-point coordinates
|
||||
g_ac_xh ds 1 ; for array-based line
|
||||
g_ac_y ds 1 ; draw (&AT, &PLOT).
|
||||
g_out_x ds 2 ;16-bit coordinates for
|
||||
g_out_y ds 2 ; array-based line draw
|
||||
|
||||
|
||||
|
||||
lst on
|
||||
end equ *
|
||||
sav amperfdraw
|
||||
lst off
|
752
FDRAW.CIRCLE.S
Normal file
752
FDRAW.CIRCLE.S
Normal file
@ -0,0 +1,752 @@
|
||||
********************************
|
||||
* *
|
||||
* Fast Apple II Graphics *
|
||||
* By Andy McFadden *
|
||||
* Version 0.3, Aug 2015 *
|
||||
* *
|
||||
* Circle rendering *
|
||||
* (Included by FDRAW.S) *
|
||||
* *
|
||||
* Developed with Merlin-16 *
|
||||
* *
|
||||
********************************
|
||||
|
||||
* TODO: if USE_FAST is 0, replace the outline circle
|
||||
* plot code with calls to DrawPoint (or maybe a
|
||||
* common sub-function so we don't trash the input
|
||||
* parameters). Saves a little space.
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Draw a circle. The radius is in in_rad, and
|
||||
* the center is at in_x0l+in_x0h,in_y0.
|
||||
*
|
||||
********************************
|
||||
DrawCircle
|
||||
lda #$20 ;JSR
|
||||
cmp _cp08 ;configured for outline?
|
||||
beq :okay
|
||||
jsr fixcplot
|
||||
:okay
|
||||
jmp calc_circle
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Draw filled circle.
|
||||
*
|
||||
********************************
|
||||
FillCircle
|
||||
lda #$2c ;BIT
|
||||
cmp _cp08 ;configured for fill?
|
||||
beq :okay
|
||||
jsr fixcplot
|
||||
:okay
|
||||
jsr calc_circle
|
||||
jmp FillRaster
|
||||
|
||||
|
||||
* Calculate a circle, using Bresenham's algorithm. The
|
||||
* results are placed into the rasterization buffers.
|
||||
*
|
||||
* in_rad must be from 0 to 255. The x/y center
|
||||
* coordinates must be on the screen, but the circle
|
||||
* can extend off the edge.
|
||||
*
|
||||
* The computed values are stored in the rasterization
|
||||
* tables. For an outline circle, we also plot the
|
||||
* points immediately.
|
||||
|
||||
do USE_FAST ;*****
|
||||
* local storage -- not used often enough to merit DP
|
||||
circ_8bit ds 1
|
||||
circ_clip ds 1
|
||||
fin ;*****
|
||||
|
||||
calc_circle
|
||||
max_fast_rad equ 41
|
||||
]cxl equ zloc0
|
||||
]cxh equ zloc1
|
||||
]cy equ zloc2
|
||||
]dlo equ zloc3
|
||||
]dhi equ zloc4
|
||||
]xsav equ zloc5
|
||||
]ysav equ zloc6
|
||||
]min_x equ zloc7 ;min/max offsets from center
|
||||
]max_x equ zloc8 ;(min is above center, max
|
||||
]min_y equ zloc9 ; is below)
|
||||
]max_y equ zloc10
|
||||
]hitmp equ zloc11
|
||||
* only used by hplot for outline circles
|
||||
]hbasl equ zptr0
|
||||
]andmask equ zloc11 ;overlaps with ]hitmp
|
||||
]savxreg equ zloc12
|
||||
]savyreg equ zloc13
|
||||
|
||||
* Special-case radius=0. It removes an annoying
|
||||
* edge case (first y-- becomes 0xff, but 6502 cmp
|
||||
* is unsigned).
|
||||
lda in_rad
|
||||
bne :notzero
|
||||
ldy in_y0
|
||||
sty rast_top
|
||||
sty rast_bottom
|
||||
lda in_x0l
|
||||
sta rastx0l,y
|
||||
sta rastx1l,y
|
||||
lda in_x0h
|
||||
sta rastx0h,y
|
||||
sta rastx1h,y
|
||||
rts
|
||||
|
||||
* Use different version of function for small
|
||||
* circles, because we can do it all in 8 bits.
|
||||
:notzero
|
||||
do USE_FAST ;*****
|
||||
ldy #$01
|
||||
cmp #max_fast_rad ;in_rad in Acc
|
||||
blt :use_fast
|
||||
dey
|
||||
:use_fast sty circ_8bit
|
||||
fin ;*****
|
||||
|
||||
lda in_x0l ;copy center to DP for speed
|
||||
sta ]cxl
|
||||
lda in_x0h
|
||||
sta ]cxh
|
||||
lda in_y0
|
||||
sta ]cy
|
||||
|
||||
* Compute min/max values, based on offset from center.
|
||||
* These are compared against offset-from-center x/y.
|
||||
* We need tight bounds on Y because we use it to
|
||||
* compute the rast_render top/bottom. Getting tight
|
||||
* bounds on X is not so important, but we still need
|
||||
* it for the no-clip optimization.
|
||||
ldx #$04 ;count edges needing clip
|
||||
|
||||
lda #NUM_ROWS-1 ;191
|
||||
sec
|
||||
sbc ]cy ;maxY = 191-cy
|
||||
cmp in_rad
|
||||
blt :ylimok
|
||||
lda in_rad ;clamp to radius
|
||||
dex
|
||||
:ylimok sta ]max_y ;maxY = 191-cy
|
||||
|
||||
lda ]cy ;minY = cy
|
||||
cmp in_rad
|
||||
blt :ylimok2
|
||||
lda in_rad ;clamp to radius
|
||||
dex
|
||||
:ylimok2 sta ]min_y
|
||||
|
||||
lda ]cxh
|
||||
beq :xlimlo
|
||||
* Examples (note #<NUM_COLS-1 is 279-256 = 23):
|
||||
* cx=265 (cxh=1 cxl=11), 23-11=14, chk rad
|
||||
lda #<NUM_COLS-1 ;maxX = 279-cx
|
||||
sec
|
||||
sbc ]cxl
|
||||
cmp in_rad
|
||||
blt :xlimhok
|
||||
lda in_rad ;clamp to radius
|
||||
dex
|
||||
:xlimhok sta ]max_x
|
||||
|
||||
lda in_rad ;min X always out of range
|
||||
dex ; so just clamp to radius
|
||||
sta ]min_x
|
||||
jmp :xlimdone
|
||||
|
||||
* Examples:
|
||||
* For cx=0 to 24, we can never pass right edge (our
|
||||
* maximum radius is 255).
|
||||
* cx=3, 23-3=20 + carry set --> bad, must use rad
|
||||
* cx=24, 23-24=255 + carry clear --> ok, chk rad
|
||||
* cx=255, 23-255=24 + carry clear --> ok, chk rad
|
||||
:xlimlo
|
||||
lda #<NUM_COLS-1 ;maxX = 279-cx
|
||||
sec
|
||||
sbc ]cxl
|
||||
bcs :xuserad
|
||||
cmp in_rad
|
||||
blt :xlimok
|
||||
:xuserad lda in_rad ;clamp to radius
|
||||
dex
|
||||
:xlimok sta ]max_x
|
||||
|
||||
lda ]cxl ;minX = (cx > 255) ?
|
||||
cmp in_rad
|
||||
blt :xlimok2
|
||||
lda in_rad ;clamp to radius
|
||||
dex
|
||||
:xlimok2 sta ]min_x
|
||||
|
||||
:xlimdone
|
||||
|
||||
do USE_FAST ;*****
|
||||
stx circ_clip
|
||||
fin ;*****
|
||||
|
||||
* set top/bottom rows for rasterizer
|
||||
lda ]cy
|
||||
clc
|
||||
adc ]max_y
|
||||
sta rast_bottom
|
||||
lda ]cy
|
||||
sec
|
||||
sbc ]min_y
|
||||
sta rast_top
|
||||
|
||||
DO 0 ;debug debug debug
|
||||
LDA ]min_x ;save a copy where the
|
||||
STA $0380 ; monitor won't trash it
|
||||
LDA ]max_x
|
||||
STA $0381
|
||||
LDA ]min_y
|
||||
STA $0382
|
||||
LDA ]max_y
|
||||
STA $0383
|
||||
FIN
|
||||
|
||||
* Set initial conditions for Bresenham.
|
||||
ldx #0 ;:x = 0
|
||||
stx ]xsav
|
||||
ldy in_rad ;:y = rad
|
||||
sty ]ysav
|
||||
lda #1 ;:d = 1 - rad
|
||||
sec
|
||||
sbc ]ysav ;in_rad
|
||||
sta ]dlo
|
||||
bcs :hizero ;C==1 if in_rad<=1
|
||||
ldx #$ff ;C was 0, make neg
|
||||
:hizero stx ]dhi
|
||||
|
||||
*
|
||||
* Outer loop -- plot 8 points, then update values.
|
||||
*
|
||||
circ_loop
|
||||
|
||||
do USE_FAST ;*****
|
||||
lda circ_clip
|
||||
beq ncypy
|
||||
jmp with_clip
|
||||
|
||||
* Quick version, no clipping required
|
||||
* row cy+y: cx-x and cx+x
|
||||
ncypy
|
||||
lda ]ysav
|
||||
clc
|
||||
adc ]cy
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
lda ]cxl
|
||||
sec
|
||||
sbc ]xsav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp00 jsr cplotl
|
||||
|
||||
lda ]cxl
|
||||
clc
|
||||
adc ]xsav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp01 jsr cplotrn
|
||||
|
||||
* row cy-y: cx-x and cx+x
|
||||
ncymy
|
||||
lda ]cy
|
||||
sec
|
||||
sbc ]ysav
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
lda ]cxl
|
||||
sec
|
||||
sbc ]xsav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp02 jsr cplotl
|
||||
|
||||
lda ]cxl
|
||||
clc
|
||||
adc ]xsav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp03 jsr cplotrn
|
||||
|
||||
* row cy+x: cx-y and cx+y
|
||||
ncypx
|
||||
lda ]xsav ;off bottom?
|
||||
clc
|
||||
adc ]cy
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
lda ]cxl
|
||||
sec
|
||||
sbc ]ysav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp04 jsr cplotl
|
||||
|
||||
lda ]cxl
|
||||
clc
|
||||
adc ]ysav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp05 jsr cplotrn
|
||||
|
||||
* row cy-x: cx-y and cx+y
|
||||
ncymx
|
||||
lda ]cy
|
||||
sec
|
||||
sbc ]xsav
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
lda ]cxl
|
||||
sec
|
||||
sbc ]ysav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp06 jsr cplotl
|
||||
|
||||
lda ]cxl
|
||||
clc
|
||||
adc ]ysav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp07 jsr cplotrn
|
||||
|
||||
* CLICK
|
||||
jmp circ_plot_done
|
||||
|
||||
fin ;***** (USE_FAST)
|
||||
|
||||
*
|
||||
* Same thing, but this time clipping edges.
|
||||
*
|
||||
with_clip
|
||||
|
||||
* row cy+y: cx-x and cx+x
|
||||
ccypy
|
||||
lda ]ysav ;off bottom?
|
||||
cmp ]max_y
|
||||
beq :cypy_ok
|
||||
bge cypy_skip ;completely off screen
|
||||
:cypy_ok clc
|
||||
adc ]cy
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
ldx ]xsav ;handle cx-x
|
||||
cpx ]min_x
|
||||
blt :cxmx_ok
|
||||
beq :cxmx_ok
|
||||
lda #0 ;clip at 0
|
||||
sta rastx0l,y
|
||||
sta rastx0h,y
|
||||
beq cxmx_done0 ;always
|
||||
BREAK
|
||||
:cxmx_ok lda ]cxl
|
||||
sec
|
||||
sbc ]xsav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp08 jsr cplotl
|
||||
cxmx_done0
|
||||
|
||||
cpx ]max_x ;handle cx+x
|
||||
blt :cxpx_ok
|
||||
beq :cxpx_ok
|
||||
lda #<NUM_COLS-1
|
||||
sta rastx1l,y
|
||||
lda #>NUM_COLS-1
|
||||
sta rastx1h,y
|
||||
bne cxpx_done0 ;always
|
||||
BREAK
|
||||
:cxpx_ok lda ]cxl
|
||||
clc
|
||||
adc ]xsav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp09 jsr cplotr
|
||||
cxpx_done0
|
||||
cypy_skip
|
||||
|
||||
* row cy-y: cx-x and cx+x
|
||||
ccymy
|
||||
lda ]ysav ;off top?
|
||||
cmp ]min_y
|
||||
beq :cymy_ok
|
||||
bge cymy_skip
|
||||
:cymy_ok lda ]cy
|
||||
sec
|
||||
sbc ]ysav
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
ldx ]xsav ;handle cx-x
|
||||
cpx ]min_x
|
||||
blt :cxmx_ok
|
||||
beq :cxmx_ok
|
||||
lda #0 ;clip at 0
|
||||
sta rastx0l,y
|
||||
sta rastx0h,y
|
||||
beq cxmx_done1 ;always
|
||||
BREAK
|
||||
:cxmx_ok lda ]cxl
|
||||
sec
|
||||
sbc ]xsav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp10 jsr cplotl
|
||||
cxmx_done1
|
||||
|
||||
cpx ]max_x ;handle cx+x
|
||||
blt :cxpx_ok
|
||||
beq :cxpx_ok
|
||||
lda #<NUM_COLS-1
|
||||
sta rastx1l,y
|
||||
lda #>NUM_COLS-1
|
||||
sta rastx1h,y
|
||||
bne cxpx_done1 ;always
|
||||
BREAK
|
||||
:cxpx_ok lda ]cxl
|
||||
clc
|
||||
adc ]xsav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp11 jsr cplotr
|
||||
cxpx_done1
|
||||
cymy_skip
|
||||
|
||||
* row cy+x: cx-y and cx+y
|
||||
ccypx
|
||||
lda ]xsav ;off bottom?
|
||||
cmp ]max_y
|
||||
beq :cypx_ok
|
||||
bge cypx_skip
|
||||
:cypx_ok clc
|
||||
adc ]cy
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
ldx ]ysav ;handle cx-y
|
||||
cpx ]min_x
|
||||
blt :cxmy_ok
|
||||
beq :cxmy_ok
|
||||
lda #0 ;clip at 0
|
||||
sta rastx0l,y
|
||||
sta rastx0h,y
|
||||
beq cxmy_done2 ;always
|
||||
BREAK
|
||||
:cxmy_ok lda ]cxl
|
||||
sec
|
||||
sbc ]ysav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp12 jsr cplotl
|
||||
cxmy_done2
|
||||
|
||||
cpx ]max_x ;handle cx+y
|
||||
blt :cxpy_ok
|
||||
beq :cxpy_ok
|
||||
lda #<NUM_COLS-1
|
||||
sta rastx1l,y
|
||||
lda #>NUM_COLS-1
|
||||
sta rastx1h,y
|
||||
bne cxpy_done2 ;always
|
||||
BREAK
|
||||
:cxpy_ok lda ]cxl
|
||||
clc
|
||||
adc ]ysav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp13 jsr cplotr
|
||||
cxpy_done2
|
||||
cypx_skip
|
||||
|
||||
* row cy-x: cx-y and cx+y
|
||||
ccymx
|
||||
lda ]xsav ;off top?
|
||||
cmp ]min_y
|
||||
beq :cymx_ok
|
||||
bge cymx_skip
|
||||
:cymx_ok lda ]cy
|
||||
sec
|
||||
sbc ]xsav
|
||||
tay ;y-coord in Y-reg
|
||||
|
||||
ldx ]ysav ;handle cx-y
|
||||
cpx ]min_x
|
||||
blt :cxmy_ok
|
||||
beq :cxmy_ok
|
||||
lda #0 ;clip at 0
|
||||
sta rastx0l,y
|
||||
sta rastx0h,y
|
||||
beq cxmy_done3 ;always
|
||||
BREAK
|
||||
:cxmy_ok lda ]cxl
|
||||
sec
|
||||
sbc ]ysav
|
||||
sta rastx0l,y
|
||||
lda ]cxh
|
||||
sbc #$00
|
||||
sta rastx0h,y
|
||||
_cp14 jsr cplotl
|
||||
cxmy_done3
|
||||
|
||||
cpx ]max_x ;handle cx+y
|
||||
blt :cxpy_ok
|
||||
beq :cxpy_ok
|
||||
lda #<NUM_COLS-1
|
||||
sta rastx1l,y
|
||||
lda #>NUM_COLS-1
|
||||
sta rastx1h,y
|
||||
bne cxpy_done3 ;always
|
||||
BREAK
|
||||
:cxpy_ok lda ]cxl
|
||||
clc
|
||||
adc ]ysav
|
||||
sta rastx1l,y
|
||||
lda ]cxh
|
||||
adc #$00
|
||||
sta rastx1h,y
|
||||
_cp15 jsr cplotr
|
||||
cxpy_done3
|
||||
cymx_skip
|
||||
|
||||
circ_plot_done
|
||||
* Update X/Y/D. Up to about radius=41 we can maintain
|
||||
* 'd' in an 8-bit register.
|
||||
do USE_FAST ;*****
|
||||
lda circ_8bit
|
||||
beq circ_slow
|
||||
|
||||
*
|
||||
* Bresenham update, with 8-bit 'd'.
|
||||
*
|
||||
ldx ]xsav
|
||||
lda ]dlo
|
||||
bmi :dneg
|
||||
txa ;:d = d + ((x-y)*4) +5
|
||||
sec
|
||||
sbc ]ysav ;x <= y, may be neg or 0
|
||||
asl
|
||||
asl
|
||||
clc ;can't know carry
|
||||
adc #5
|
||||
clc ;still don't want carry
|
||||
adc ]dlo
|
||||
sta ]dlo
|
||||
dec ]ysav ;:y--
|
||||
jmp :loopbot
|
||||
:dneg txa ;:d = d + (x*4) +3
|
||||
asl
|
||||
asl ;x always pos, C=0
|
||||
DO 0
|
||||
BCC :TEST ;debug
|
||||
BREAK ;debug
|
||||
:TEST ;debug
|
||||
FIN
|
||||
adc #3
|
||||
adc ]dlo
|
||||
sta ]dlo
|
||||
:loopbot
|
||||
inx ;:x++
|
||||
stx ]xsav
|
||||
cpx ]ysav
|
||||
beq :again
|
||||
bge circ_done
|
||||
:again jmp circ_loop
|
||||
|
||||
fin ;*****
|
||||
|
||||
*
|
||||
* Bresenham update, with 16-bit 'd'
|
||||
*
|
||||
circ_slow
|
||||
CLICK
|
||||
ldx ]xsav
|
||||
lda ]dhi
|
||||
bmi :dneg
|
||||
lda ]dlo
|
||||
clc
|
||||
adc #5
|
||||
sta ]dlo
|
||||
bcc :noinc
|
||||
inc ]dhi
|
||||
:noinc
|
||||
txa ;:d = d + ((x-y)*4) +5
|
||||
ldy #$00
|
||||
sty ]hitmp
|
||||
sec
|
||||
sbc ]ysav ;x <= y, may be neg or 0
|
||||
beq :xeqy ;if x==y, nothing to add
|
||||
ldy #$ff
|
||||
sty ]hitmp
|
||||
asl
|
||||
rol ]hitmp
|
||||
asl
|
||||
rol ]hitmp
|
||||
clc
|
||||
adc ]dlo
|
||||
sta ]dlo
|
||||
lda ]dhi
|
||||
adc ]hitmp
|
||||
sta ]dhi
|
||||
:xeqy
|
||||
dec ]ysav ;:y--
|
||||
jmp :loopbot
|
||||
|
||||
:dneg lda ]dlo ;:d = d + (x*4) + 3
|
||||
clc
|
||||
adc #3
|
||||
sta ]dlo
|
||||
bcc :noinc2
|
||||
inc ]dhi
|
||||
:noinc2 txa
|
||||
ldy #0 ;x always positive
|
||||
sty ]hitmp
|
||||
asl
|
||||
rol ]hitmp
|
||||
asl
|
||||
rol ]hitmp
|
||||
clc ;not needed?
|
||||
adc ]dlo
|
||||
sta ]dlo
|
||||
lda ]dhi
|
||||
adc ]hitmp
|
||||
sta ]dhi
|
||||
:loopbot
|
||||
inx ;:x++
|
||||
stx ]xsav
|
||||
cpx ]ysav
|
||||
beq :again
|
||||
bge circ_done
|
||||
:again jmp circ_loop
|
||||
|
||||
|
||||
circ_done rts
|
||||
|
||||
|
||||
* Plot a point for outline circle rendering.
|
||||
*
|
||||
* X and Y must be preserved. Y holds the current line
|
||||
* number.
|
||||
*
|
||||
* Most DP locations are in use -- see the variable
|
||||
* declarations at the start of the circle function.
|
||||
|
||||
* cplotl is the entry point for the leftmost point.
|
||||
cplotl
|
||||
stx ]savxreg
|
||||
sty ]savyreg
|
||||
|
||||
lda ylooklo,y
|
||||
sta ]hbasl
|
||||
lda ylookhi,y
|
||||
_pg_or2 ora #$20
|
||||
sta ]hbasl+1
|
||||
|
||||
* Convert the X coordinate into byte/bit.
|
||||
ldx rastx0l,y ;x coord, lo
|
||||
lda rastx0h,y ;>= 256?
|
||||
beq :lotabl ;no, use the low table
|
||||
ldy div7hi,x
|
||||
lda mod7hi,x
|
||||
bpl cplotcom ;always
|
||||
BREAK ;debug
|
||||
:lotabl ldy div7lo,x
|
||||
lda mod7lo,x
|
||||
jmp cplotcom
|
||||
|
||||
* cplotr is the entry point for the rightmost point.
|
||||
* We use rastx1 instead of rastx0.
|
||||
cplotr
|
||||
lda ylooklo,y
|
||||
sta ]hbasl
|
||||
lda ylookhi,y
|
||||
_pg_or3 ora #$20
|
||||
sta ]hbasl+1
|
||||
|
||||
* If we just plotted the left point on the same line,
|
||||
* we can skip the Y-lookup by jumping here.
|
||||
cplotrn
|
||||
stx ]savxreg
|
||||
sty ]savyreg
|
||||
|
||||
ldx rastx1l,y ;x coord, lo
|
||||
lda rastx1h,y ;>= 256?
|
||||
beq :lotabl ;no, use the low table
|
||||
ldy div7hi,x
|
||||
lda mod7hi,x
|
||||
bpl cplotcom ;always
|
||||
BREAK ;debug
|
||||
:lotabl ldy div7lo,x
|
||||
lda mod7lo,x
|
||||
|
||||
* Plot the point. The byte offset (0-39) is in Y,
|
||||
* the bit offset (0-6) is in A.
|
||||
cplotcom
|
||||
tax
|
||||
lda colorline,y ;start with color pattern
|
||||
eor (]hbasl),y ;flip all bits
|
||||
and andmask,x ;clear other bits
|
||||
eor (]hbasl),y ;restore ours, set theirs
|
||||
sta (]hbasl),y
|
||||
|
||||
ldx ]savxreg
|
||||
ldy ]savyreg
|
||||
rts
|
||||
|
||||
* Reconfigure calc_circle to either JSR to cplotl/r,
|
||||
* or just BIT the address (a 4-cycle no-op). The
|
||||
* desired instruction is in A.
|
||||
fixcplot
|
||||
do USE_FAST ;*****
|
||||
sta _cp00
|
||||
sta _cp01
|
||||
sta _cp02
|
||||
sta _cp03
|
||||
sta _cp04
|
||||
sta _cp05
|
||||
sta _cp06
|
||||
sta _cp07
|
||||
fin ;*****
|
||||
sta _cp08
|
||||
sta _cp09
|
||||
sta _cp10
|
||||
sta _cp11
|
||||
sta _cp12
|
||||
sta _cp13
|
||||
sta _cp14
|
||||
sta _cp15
|
||||
rts
|
588
FDRAW.LINE.S
Normal file
588
FDRAW.LINE.S
Normal file
@ -0,0 +1,588 @@
|
||||
********************************
|
||||
* *
|
||||
* Fast Apple II Graphics *
|
||||
* By Andy McFadden *
|
||||
* Version 0.3, Aug 2015 *
|
||||
* *
|
||||
* Point and line functions *
|
||||
* (Included by FDRAW.S) *
|
||||
* *
|
||||
* Developed with Merlin-16 *
|
||||
* *
|
||||
********************************
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Draw a single point in the current color.
|
||||
*
|
||||
********************************
|
||||
DrawPoint
|
||||
]hbasl equ zptr0
|
||||
|
||||
ldy in_y0
|
||||
lda ylooklo,y
|
||||
sta ]hbasl
|
||||
lda ylookhi,y
|
||||
ora g_page
|
||||
sta ]hbasl+1
|
||||
|
||||
ldx in_x0l ;x coord, lo
|
||||
lda in_x0h ;>= 256?
|
||||
beq :lotabl ;no, use the low table
|
||||
ldy div7hi,x
|
||||
lda mod7hi,x
|
||||
bpl :plotit ;always
|
||||
BREAK ;debug
|
||||
:lotabl ldy div7lo,x
|
||||
lda mod7lo,x
|
||||
|
||||
* Plot the point. The byte offset (0-39) is in Y,
|
||||
* the bit offset (0-6) is in A.
|
||||
:plotit
|
||||
tax
|
||||
lda colorline,y ;start with color pattern
|
||||
eor (]hbasl),y ;flip all bits
|
||||
and andmask,x ;clear other bits
|
||||
eor (]hbasl),y ;restore ours, set theirs
|
||||
sta (]hbasl),y
|
||||
rts
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Draw a line between two points.
|
||||
*
|
||||
********************************
|
||||
DrawLine
|
||||
|
||||
]hbasl equ zptr0
|
||||
]xposl equ zloc0 ;always left edge
|
||||
]xposh equ zloc1
|
||||
]ypos equ zloc2 ;top or bottom
|
||||
]deltaxl equ zloc3
|
||||
]deltaxh equ zloc4
|
||||
]deltay equ zloc5
|
||||
]count equ zloc6
|
||||
]counth equ zloc7
|
||||
]diff equ zloc8
|
||||
]diffh equ zloc9
|
||||
]andmask equ zloc10
|
||||
]wideflag equ zloc11 ;doesn't really need DP
|
||||
|
||||
* We use a traditional Bresenham run-length approach.
|
||||
* Run-slicing is possible, but the code is larger
|
||||
* and the increased cost means it's only valuable
|
||||
* for longer lines. An optimal solution would switch
|
||||
* approaches based on line length.
|
||||
*
|
||||
* Start by identifying where x0 or x1 is on the
|
||||
* left. To make life simpler we always work from
|
||||
* left to right, flipping the coordinates if
|
||||
* needed.
|
||||
*
|
||||
* We also need to figure out if the line is more
|
||||
* than 255 pixels long -- which, because of
|
||||
* inclusive coordinates, means abs(x0-x1) > 254.
|
||||
lda in_x1l ;assume x0 on left
|
||||
sec
|
||||
sbc in_x0l
|
||||
tax
|
||||
beq checkvert ;low bytes even, check hi
|
||||
lda in_x1h
|
||||
sbc in_x0h
|
||||
bcs lx0left
|
||||
|
||||
* x1 is on the left, so the values are negative
|
||||
* (hi byte in A, lo byte in X)
|
||||
lx0right eor #$ff ;invert hi
|
||||
sta ]deltaxh ;store
|
||||
txa
|
||||
eor #$ff ;invert lo
|
||||
sta ]deltaxl
|
||||
inc ]deltaxl ;add one for 2s complement
|
||||
bne :noinchi ;rolled into high byte?
|
||||
inc ]deltaxh ;yes
|
||||
:noinchi lda in_x1l ;start with x1
|
||||
sta ]xposl
|
||||
lda in_x1h
|
||||
sta ]xposh
|
||||
lda in_y1
|
||||
sta ]ypos
|
||||
sec
|
||||
sbc in_y0 ;compute deltay
|
||||
jmp lncommon
|
||||
|
||||
checkvert
|
||||
lda in_x1h ;diff high bytes
|
||||
sbc in_x0h ;(carry still set)
|
||||
blt lx0right ;width=256, x0 right
|
||||
bne lx0left ;width=256, x0 left
|
||||
jmp vertline ;all zero, go vert
|
||||
|
||||
* (branch back from below)
|
||||
* This is a purely horizontal line. We farm the job
|
||||
* out to the raster fill code for speed. (There's
|
||||
* no problem with the line code handling it; its just
|
||||
* more efficient to let the raster code do it.)
|
||||
phorizontal
|
||||
ldy ]ypos
|
||||
sty rast_top
|
||||
sty rast_bottom
|
||||
lda ]xposl
|
||||
sta rastx0l,y
|
||||
clc
|
||||
adc ]deltaxl ;easier to add delta back
|
||||
sta rastx1l,y ; in than sort out which
|
||||
lda ]xposh ; arg is left vs. right
|
||||
sta rastx0h,y
|
||||
adc ]deltaxh
|
||||
sta rastx1h,y
|
||||
jmp FillRaster
|
||||
|
||||
* x0 is on the left, so the values are positive
|
||||
lx0left stx ]deltaxl
|
||||
sta ]deltaxh
|
||||
lda in_x0l ;start with x0
|
||||
sta ]xposl
|
||||
lda in_x0h
|
||||
sta ]xposh
|
||||
lda in_y0 ;and y0
|
||||
sta ]ypos
|
||||
sec
|
||||
sbc in_y1 ;compute deltay
|
||||
|
||||
* Value of (starty - endy) is in A, flags still set.
|
||||
lncommon
|
||||
bcs :posy
|
||||
eor #$ff ;negative, invert
|
||||
adc #$01
|
||||
sta ]deltay
|
||||
lda #$e8 ;INX
|
||||
bne gotdy
|
||||
:posy
|
||||
_lmb beq phorizontal
|
||||
sta ]deltay
|
||||
lda #$ca ;DEX
|
||||
gotdy sta _hmody
|
||||
sta _vmody
|
||||
sta _wmody
|
||||
|
||||
do 0 ;***** for regression test
|
||||
ldx #$01
|
||||
lda ]deltaxh
|
||||
bne :iswide
|
||||
lda ]deltaxl
|
||||
cmp #$ff ;== 255?
|
||||
beq :iswide
|
||||
ldx #$00 ;notwide
|
||||
:iswide stx $300
|
||||
lda ]xposl
|
||||
sta $301
|
||||
lda ]xposh
|
||||
sta $302
|
||||
lda ]ypos
|
||||
sta $303
|
||||
ldx ]deltaxl
|
||||
stx $304
|
||||
ldx ]deltaxh
|
||||
stx $305
|
||||
ldx ]deltay
|
||||
stx $306
|
||||
lda _hmody
|
||||
and #$20 ;nonzero means inc,
|
||||
sta $307 ; zero means dec
|
||||
fin ;*****
|
||||
|
||||
* At this point we have the initial X position in
|
||||
* ]startxl/h, the initial Y position in ]starty,
|
||||
* deltax in ]deltaxl, deltay in ]deltay, and we've
|
||||
* tweaked the Y-update instructions to either INC or
|
||||
* DEC depending on the direction of movement.
|
||||
*
|
||||
* The next step is to decide whether the line is
|
||||
* horizontal-dominant or vertical-dominant, and
|
||||
* branch to the appropriate handler.
|
||||
*
|
||||
* The core loops for horiz and vert take about
|
||||
* 80 cycles when moving diagonally, and about
|
||||
* 20 fewer when moving in the primary direction.
|
||||
* The wide-horiz is a bit slower.
|
||||
ldy #$01 ;set "wide" flag to 1
|
||||
lda ]deltaxl
|
||||
ldx ]deltaxh
|
||||
bne horzdom ;width >= 256
|
||||
cmp #$ff ;width == 255
|
||||
beq horzdom
|
||||
dey ;not wide
|
||||
cmp ]deltay
|
||||
bge horzdom ; for diagonal lines
|
||||
jmp vertdom
|
||||
|
||||
* We could special-case pure-diagonal lines here
|
||||
* (just BEQ a couple lines up). It does
|
||||
* represent our worst case. I'm not convinced
|
||||
* we'll see them often enough to make it worthwhile.
|
||||
|
||||
|
||||
* horizontal-dominant
|
||||
horzdom
|
||||
sty ]wideflag
|
||||
sta ]count ;:count = deltax + 1
|
||||
inc ]count
|
||||
lsr ;:diff = deltax / 2
|
||||
sta ]diff
|
||||
|
||||
* set Y to the byte offset in the line
|
||||
* load the AND mask into ]andmask
|
||||
ldx ]xposl
|
||||
lda ]xposh ;>= 256?
|
||||
beq :lotabl ;no, use the low table
|
||||
ldy div7hi,x
|
||||
lda mod7hi,x
|
||||
bpl :gottab ;always
|
||||
* BREAK ;debug
|
||||
:lotabl ldy div7lo,x
|
||||
lda mod7lo,x
|
||||
:gottab
|
||||
tax
|
||||
lda andmask,x
|
||||
sta ]andmask
|
||||
|
||||
* Set initial value for line address.
|
||||
ldx ]ypos
|
||||
lda ylooklo,x
|
||||
sta ]hbasl
|
||||
lda ylookhi,x
|
||||
ora g_page
|
||||
sta ]hbasl+1
|
||||
|
||||
lda ]wideflag ;is this a "wide" line?
|
||||
beq :notwide ;nope, stay local
|
||||
jmp widedom
|
||||
|
||||
:notwide lda colorline,y ;set initial color mask
|
||||
sta _hlcolor+1
|
||||
jmp horzloop
|
||||
|
||||
hrts rts
|
||||
|
||||
* bottom of loop, essentially
|
||||
hnoroll sta ]diff ;3
|
||||
hdecc dec ]count ;5 :count--
|
||||
beq hrts ;2 :while (count != 0)
|
||||
;= 7 or 10
|
||||
|
||||
* We keep the byte offset in the line in Y, and the
|
||||
* line index in X, for the entire loop.
|
||||
horzloop
|
||||
_hlcolor lda #$00 ;2 start with color pattern
|
||||
_lmdh eor (]hbasl),y ;5 flip all bits
|
||||
and ]andmask ;3 clear other bits
|
||||
eor (]hbasl),y ;5 restore ours, set theirs
|
||||
sta (]hbasl),y ;6 = 21
|
||||
|
||||
* Move right. We shift the bit mask that determines
|
||||
* the pixel. When we shift into bit 7, we know it's
|
||||
* time to advance another byte.
|
||||
*
|
||||
* If this is a shallow line we would benefit from
|
||||
* keeping the index in X and just doing a 4-cycle
|
||||
* indexed load to get the mask. Not having the
|
||||
* line number in X makes the line calc more
|
||||
* expensive for steeper lines though.
|
||||
lda ]andmask ;3
|
||||
asl ;2 shift, losing hi bit
|
||||
eor #$80 ;2 set the hi bit
|
||||
bne :noh8 ;3 cleared hi bit?
|
||||
* We could BEQ away and branch back in, but this
|
||||
* happens every 7 iterations, so on average it's
|
||||
* a very small improvement. If we happen to branch
|
||||
* across a page boundary the double-branch adds
|
||||
* two more cycles and we lose.
|
||||
iny ;2 advance to next byte
|
||||
lda colorline,y ;4 update color mask
|
||||
sta _hlcolor+1 ;4
|
||||
lda #$81 ;2 reset
|
||||
:noh8 sta ]andmask ;3 = 13 + ((12-1)/7) = 14
|
||||
|
||||
* Update error diff.
|
||||
lda ]diff ;3
|
||||
sec ;2
|
||||
sbc ]deltay ;3 :diff -= deltay
|
||||
bcs hnoroll ;2+ :if (diff < 0) ...
|
||||
;= 11 level, 10 up/down
|
||||
adc ]deltaxl ;3 : diff += deltax
|
||||
sta ]diff ;3
|
||||
_hmody inx ;2 : ypos++ (or --)
|
||||
lda ylooklo,x ;4 update hbasl after line
|
||||
sta ]hbasl ;3 change
|
||||
lda ylookhi,x ;4
|
||||
_pg_or4 ora #$20 ;2
|
||||
sta ]hbasl+1 ;3
|
||||
bne hdecc ;3 = +27 this path -> 37
|
||||
BREAK
|
||||
* horizontal: 10+21+14+11=56 cycles/pixel
|
||||
* diagonal: 7+21+14+37=79 cycles/pixel
|
||||
|
||||
|
||||
* Vertical-dominant line. Could go up or down.
|
||||
vertdom
|
||||
ldx in_y0
|
||||
cpx ]ypos ;starting at y0?
|
||||
bne :endy0 ;yup
|
||||
ldx in_y1 ;nope
|
||||
:endy0 stx _vchk+1 ;end condition
|
||||
|
||||
lda ]deltay
|
||||
lsr
|
||||
sta ]diff ;:diff = deltay / 2
|
||||
|
||||
* set Y to the byte offset in the line
|
||||
* load the AND mask into ]andmask
|
||||
ldx ]xposl
|
||||
lda ]xposh ;>= 256?
|
||||
beq :lotabl ;no, use the low table
|
||||
ldy div7hi,x
|
||||
lda mod7hi,x
|
||||
bpl :gottab ;always
|
||||
BREAK ;debug
|
||||
:lotabl ldy div7lo,x
|
||||
lda mod7lo,x
|
||||
:gottab
|
||||
tax
|
||||
lda andmask,x ;initial pixel mask
|
||||
sta ]andmask
|
||||
|
||||
lda colorline,y ;initial color mask
|
||||
sta _vlcolor+1
|
||||
|
||||
ldx ]ypos
|
||||
jmp vertloop
|
||||
|
||||
* We keep the byte offset in the line in Y, and the
|
||||
* line index in X, for the entire loop.
|
||||
|
||||
* Bottom of loop, essentially.
|
||||
vnoroll sta ]diff ;3
|
||||
|
||||
vertloop
|
||||
lda ylooklo,x ;4
|
||||
sta ]hbasl ;3
|
||||
lda ylookhi,x ;4
|
||||
_pg_or5 ora #$20 ;2
|
||||
sta ]hbasl+1 ;3 = 16
|
||||
|
||||
_vlcolor lda #$00 ;2 start with color pattern
|
||||
_lmdv eor (]hbasl),y ;5 flip all bits
|
||||
and ]andmask ;3 clear other bits
|
||||
eor (]hbasl),y ;5 restore ours, set theirs
|
||||
sta (]hbasl),y ;6 = 21
|
||||
|
||||
_vchk cpx #$00 ;2 was this last line?
|
||||
beq vrts ;2 yes, done
|
||||
_vmody inx ;2 :ypos++ (or --)
|
||||
|
||||
* Update error diff.
|
||||
lda ]diff ;3
|
||||
sec ;2
|
||||
sbc ]deltaxl ;3 :diff -= deltax
|
||||
bcs vnoroll ;2 :if (diff < 0) ...
|
||||
;= 10 vert, 9 move right
|
||||
|
||||
adc ]deltay ;3 : diff += deltay
|
||||
sta ]diff ;3
|
||||
* Move right. We shift the bit mask that determines
|
||||
* the pixel. When we shift into bit 7, we know it's
|
||||
* time to advance another byte.
|
||||
lda ]andmask ;3
|
||||
asl ;2 shift, losing hi bit
|
||||
eor #$80 ;2 set the hi bit
|
||||
beq :is8 ;2+ goes to zero on 8th bit
|
||||
sta ]andmask ;3
|
||||
bne vertloop ;3 = 21 + (18/7) = 24
|
||||
BREAK
|
||||
|
||||
:is8 iny ;2 advance to next byte
|
||||
lda colorline,y ;4 update color
|
||||
sta _vlcolor+1 ;4
|
||||
lda #$81 ;2 reset
|
||||
sta ]andmask ;3
|
||||
bne vertloop ;3 = 18
|
||||
BREAK
|
||||
vrts rts
|
||||
* vertical: 3 + 16 + 21 + 6 + 10 = 56 cycles
|
||||
* diagonal: 16 + 21 + 6 + 9 + 24 = 76 cycles
|
||||
|
||||
|
||||
* "Wide" horizontally-dominant loop. We have to
|
||||
* maintain error-diff and deltax as 16-bit values.
|
||||
* Most of the setup from the "narrow" version carried
|
||||
* over, but we have to re-do the count and diff.
|
||||
*
|
||||
* Normally we set count to (deltax + 1) and decrement
|
||||
* to zero, but it's actually easier to set it equal
|
||||
* to deltax and check for -1.
|
||||
widedom
|
||||
lda ]deltaxh ;:count = deltax
|
||||
sta ]counth
|
||||
ldx ]deltaxl
|
||||
stx ]count
|
||||
stx ]diff
|
||||
lsr ;:diff = deltax / 2
|
||||
ror ]diff
|
||||
sta ]diffh
|
||||
ldx ]ypos
|
||||
|
||||
lda colorline,y ;set initial color mask
|
||||
sta _wlcolor+1
|
||||
|
||||
* We keep the byte offset in the line in Y, and the
|
||||
* line index in X, for the entire loop.
|
||||
wideloop
|
||||
_wlcolor lda #$00 ;2 start with color pattern
|
||||
_lmdw eor (]hbasl),y ;5 flip all bits
|
||||
and ]andmask ;3 clear other bits
|
||||
eor (]hbasl),y ;5 restore ours, set theirs
|
||||
sta (]hbasl),y ;6 = 21
|
||||
|
||||
* Move right. We shift the bit mask that determines
|
||||
* the pixel. When we shift into bit 7, we know it's
|
||||
* time to advance another byte.
|
||||
lda ]andmask ;3
|
||||
asl ;2 shift, losing hi bit
|
||||
eor #$80 ;2 set the hi bit
|
||||
bne :not7 ;3 goes to zero on 8th bit
|
||||
iny ; 2 advance to next byte
|
||||
lda colorline,y ; 4 update color mask
|
||||
sta _hlcolor+1 ; 4
|
||||
lda #$81 ; 2 reset
|
||||
:not7 sta ]andmask ;3 = 13 usually, 25 every 7
|
||||
|
||||
* Update error diff, which is a positive number. If
|
||||
* it goes negative ("if (diff < 0)") we act.
|
||||
lda ]diff
|
||||
sec
|
||||
sbc ]deltay ;:diff -= deltay
|
||||
bcs wnoroll ;didn't even roll low byte
|
||||
dec ]diffh ;check hi byte
|
||||
bpl wnoroll ;went 1->0, keep going
|
||||
|
||||
adc ]deltaxl ;: diff += deltax
|
||||
sta ]diff
|
||||
lda ]diffh
|
||||
adc ]deltaxh
|
||||
sta ]diffh
|
||||
_wmody inx ;: ypos++ (or --)
|
||||
lda ylooklo,x ;update hbasl after line
|
||||
sta ]hbasl ; change
|
||||
lda ylookhi,x
|
||||
_pg_or6 ora #$20
|
||||
sta ]hbasl+1
|
||||
bne wdecc
|
||||
BREAK
|
||||
|
||||
wnoroll sta ]diff
|
||||
|
||||
wdecc dec ]count ;5 :count--
|
||||
lda ]count ;3
|
||||
cmp #$ff ;2
|
||||
bne wideloop ;3 :while (count > -1)
|
||||
dec ]counth ;low rolled, decr high
|
||||
beq wideloop ;went 1->0, keep going
|
||||
rts
|
||||
|
||||
|
||||
* Pure-vertical line. These are common in certain
|
||||
* applications, and checking for it only adds two
|
||||
* cycles to the general case.
|
||||
vertline
|
||||
ldx in_y0
|
||||
ldy in_y1
|
||||
cpx in_y1 ;y0 < y1?
|
||||
blt :usey0 ;yes, go from y0 to y1
|
||||
txa ;swap X/A
|
||||
tay
|
||||
ldx in_y1
|
||||
:usey0 stx ]ypos
|
||||
iny
|
||||
sty _pvytest+1
|
||||
|
||||
ldx in_x0l ;xc lo
|
||||
lda in_x0h ;>= 256?
|
||||
beq :lotabl
|
||||
ldy div7hi,x
|
||||
lda mod7hi,x
|
||||
bpl :gotit ;always
|
||||
:lotabl ldy div7lo,x
|
||||
lda mod7lo,x
|
||||
|
||||
* Byte offset is in Y, mod-7 value is in A.
|
||||
:gotit tax
|
||||
lda andmask,x
|
||||
sta _pvand+1 ;this doesn't change
|
||||
|
||||
lda colorline,y
|
||||
sta _pvcolor+1 ;nor does this
|
||||
|
||||
ldx ]ypos ;top line
|
||||
|
||||
* There's a trick where, when (linenum & 0x07) is
|
||||
* nonzero, you just add 4 to hbasl+1 instead of
|
||||
* re-doing the lookup. However, TXA+AND+BEQ
|
||||
* followed by LDA+CLC+ADC+STA is 16 cycles, the same
|
||||
* as our self-modified lookup, so it's not a win.
|
||||
* (And if we used a second ylookhi and self-modded
|
||||
* the table address, we could shave off another 2.)
|
||||
|
||||
* Main pure-vertical loop
|
||||
pverloop
|
||||
lda ylooklo,x ;4
|
||||
sta ]hbasl ;3
|
||||
lda ylookhi,x ;4
|
||||
_pg_or7 ora #$20 ;2
|
||||
sta ]hbasl+1 ;3 (= 16)
|
||||
|
||||
_pvcolor lda #$00 ;2 start with color pattern
|
||||
_lmdpv eor (]hbasl),y ;5 flip all bits
|
||||
_pvand and #$00 ;2 clear other bits
|
||||
eor (]hbasl),y ;5
|
||||
sta (]hbasl),y ;6 (= 20)
|
||||
|
||||
inx ;2
|
||||
_pvytest cpx #$00 ;2 done?
|
||||
bne pverloop ;3 = 7
|
||||
rts
|
||||
* 43 cycles/pixel
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Set the line mode according to in_arg
|
||||
*
|
||||
* A slightly silly feature to get xdraw lines
|
||||
* without really working for it.
|
||||
*
|
||||
********************************
|
||||
SetLineMode
|
||||
lda in_arg
|
||||
beq :standard
|
||||
|
||||
* configure for xdraw
|
||||
lda #$24 ;BIT dp
|
||||
sta _lmb
|
||||
sta _lmdh
|
||||
sta _lmdv
|
||||
sta _lmdw
|
||||
sta _lmdpv
|
||||
rts
|
||||
|
||||
* configure for standard drawing
|
||||
:standard lda #$f0 ;BEQ
|
||||
sta _lmb
|
||||
lda #$51 ;EOR (dp),y
|
||||
sta _lmdh
|
||||
sta _lmdv
|
||||
sta _lmdw
|
||||
sta _lmdpv
|
||||
rts
|
805
FDRAW.S
Normal file
805
FDRAW.S
Normal file
@ -0,0 +1,805 @@
|
||||
********************************
|
||||
* *
|
||||
* Fast Apple II Graphics *
|
||||
* By Andy McFadden *
|
||||
* Version 0.3, Aug 2015 *
|
||||
* *
|
||||
* Main source file *
|
||||
* *
|
||||
* Developed with Merlin-16 *
|
||||
* *
|
||||
********************************
|
||||
|
||||
* Set to 1 to build FDRAW.FAST, set to zero to
|
||||
* build FDRAW.SMALL.
|
||||
USE_FAST equ 1
|
||||
|
||||
* Set to 1 to turn on beeps/clicks for debugging.
|
||||
NOISE_ON equ 0
|
||||
|
||||
|
||||
lst off
|
||||
org $6000
|
||||
|
||||
*
|
||||
* Macros.
|
||||
*
|
||||
spkr equ $c030
|
||||
bell equ $ff3a
|
||||
|
||||
* If enabled, click the speaker (changes flags only).
|
||||
CLICK mac
|
||||
do NOISE_ON
|
||||
bit spkr
|
||||
fin
|
||||
<<<
|
||||
* If enabled, beep the speaker (scrambles regs).
|
||||
BEEP mac
|
||||
do NOISE_ON
|
||||
jsr bell
|
||||
fin
|
||||
<<<
|
||||
* If enabled, insert a BRK.
|
||||
BREAK mac
|
||||
do NOISE_ON
|
||||
brk $99
|
||||
fin
|
||||
<<<
|
||||
|
||||
* In "fast" mode, we align tables on page boundaries so we
|
||||
* don't take a 1-cycle hit when the indexing crosses a page.
|
||||
* In "small" mode, we skip the alignment.
|
||||
PG_ALIGN mac
|
||||
do USE_FAST
|
||||
ds \
|
||||
fin
|
||||
<<<
|
||||
|
||||
*
|
||||
* Hi-res screen constants.
|
||||
*
|
||||
BYTES_PER_ROW = 40
|
||||
NUM_ROWS = 192
|
||||
NUM_COLS = 280
|
||||
|
||||
*
|
||||
* Variable storage. We assign generic names to
|
||||
* zero-page scratch locations, then assign variables
|
||||
* with real names to these.
|
||||
*
|
||||
* 06-09 are unused (except by SWEET-16)
|
||||
* 1a-1d are Applesoft hi-res scratch
|
||||
* cc-cf are only used by INTBASIC
|
||||
* eb-ef and ff appear totally unused by ROM routines
|
||||
*
|
||||
zptr0 equ $1a ;2b
|
||||
zloc0 equ $06
|
||||
zloc1 equ $07
|
||||
zloc2 equ $08
|
||||
zloc3 equ $09
|
||||
zloc4 equ $1c
|
||||
zloc5 equ $1d
|
||||
zloc6 equ $cc
|
||||
zloc7 equ $cd
|
||||
zloc8 equ $ce
|
||||
zloc9 equ $cf
|
||||
zloc10 equ $eb
|
||||
zloc11 equ $ec
|
||||
zloc12 equ $ed
|
||||
zloc13 equ $ee
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Entry points for external programs.
|
||||
*
|
||||
********************************
|
||||
Entry
|
||||
jmp Init ;initialize data tables
|
||||
dfb 0,3 ;version number
|
||||
|
||||
*
|
||||
* Parameters passed from external programs.
|
||||
*
|
||||
in_arg ds 1 ;generic argument
|
||||
in_x0l ds 1 ;X coordinate 0, low part
|
||||
in_x0h ds 1 ;X coordinate 0, high part
|
||||
in_y0 ds 1 ;Y coordinate 0
|
||||
in_x1l ds 1
|
||||
in_x1h ds 1
|
||||
in_y1 ds 1
|
||||
in_rad ds 1 ;radius for circles
|
||||
|
||||
ds 3 ;pad to 16 bytes
|
||||
|
||||
jmp SetColor
|
||||
jmp SetPage
|
||||
jmp Clear
|
||||
jmp DrawPoint
|
||||
jmp DrawLine
|
||||
jmp DrawRect
|
||||
jmp FillRect
|
||||
jmp DrawCircle
|
||||
jmp FillCircle
|
||||
jmp SetLineMode
|
||||
jmp noimpl ;reserved2
|
||||
jmp FillRaster
|
||||
|
||||
* Raster fill values. Top, bottom, and pointers to tables
|
||||
* for the benefit of external callers.
|
||||
rast_top ds 1
|
||||
rast_bottom ds 1
|
||||
da rastx0l
|
||||
da rastx0h
|
||||
da rastx1l
|
||||
da rastx1h
|
||||
|
||||
noimpl rts
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Global variables.
|
||||
*
|
||||
********************************
|
||||
|
||||
g_inited dfb 0 ;initialized?
|
||||
g_color dfb 0 ;hi-res color (0-7)
|
||||
g_page dfb $20 ;hi-res page ($20 or $40)
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Initialize.
|
||||
*
|
||||
********************************
|
||||
Init
|
||||
lda #$00
|
||||
sta in_arg
|
||||
jsr SetColor ;set color to zero
|
||||
jsr SetLineMode ;set normal lines
|
||||
lda #$20
|
||||
sta in_arg
|
||||
sta g_inited
|
||||
jmp SetPage ;set hi-res page 1
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Set the color.
|
||||
*
|
||||
********************************
|
||||
SetColor
|
||||
lda in_arg
|
||||
cmp g_color ;same as the old color?
|
||||
beq :done
|
||||
|
||||
and #$07 ;safety first
|
||||
sta g_color
|
||||
|
||||
* Update the "colorline" table, which provides a quick color
|
||||
* lookup for odd/even bytes. We could also have one table
|
||||
* per color and self-mod the "LDA addr,y" instructions to
|
||||
* point to the current one, but that uses a bunch of memory
|
||||
* and is kind of ugly. Takes 16 + (12 * 40) = 496 cycles.
|
||||
tax ;2
|
||||
lda xormask,x ;4
|
||||
sta :_xormsk+1 ;4
|
||||
|
||||
lda oddcolor,x ;4
|
||||
ldy #BYTES_PER_ROW-1 ;2
|
||||
]loop sta colorline,y ;5
|
||||
:_xormsk eor #$00 ;2
|
||||
dey ;2
|
||||
bpl ]loop ;3
|
||||
|
||||
:done rts
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Set the page.
|
||||
*
|
||||
********************************
|
||||
SetPage
|
||||
lda g_inited ;let's just check this
|
||||
beq noinit ; (not called too often)
|
||||
|
||||
lda in_arg
|
||||
cmp #$20
|
||||
beq :good
|
||||
cmp #$40
|
||||
beq :good
|
||||
jmp bell
|
||||
:good
|
||||
sta g_page
|
||||
|
||||
do 0 ;*****
|
||||
cmp ylookhi
|
||||
beq :tabok
|
||||
* Check to see if the values currently in the Y-lookup table
|
||||
* match our current page setting. If they don't, we need to
|
||||
* adjust the code that does lookups.
|
||||
|
||||
* This approach modifies the table itself, paying a large
|
||||
* cost now so we don't have to pay it on every lookup.
|
||||
* However, this costs 2+(16*192)=3074 cycles, while an
|
||||
* "ORA imm" only adds two to each lookup, so we'd have
|
||||
* to do a lot of drawing to make this worthwhile.
|
||||
* (Note: assumes ylookhi is based at $2000 not $0000)
|
||||
ldy #NUM_ROWS ;2
|
||||
]loop lda ylookhi-1,y ;4
|
||||
eor #$60 ;2 $20 <--> $40
|
||||
sta ylookhi-1,y ;5
|
||||
dey ;2
|
||||
bne ]loop ;3
|
||||
|
||||
else ;*****
|
||||
|
||||
* This approach uses self-modifying code to update the
|
||||
* relevant instructions. It's a bit messy to have it
|
||||
* here, but it saves us from having to do it on
|
||||
* every call.
|
||||
*
|
||||
* We could also have a second y-lookup table and
|
||||
* use this to update the pointers. That would let
|
||||
* us drop the "ORA imm" entirely, without the cost
|
||||
* of the rewrite above, but eating up another 192 bytes.
|
||||
sta _pg_or1+1 ;rastfill
|
||||
sta _pg_or2+1 ;circle hplot
|
||||
sta _pg_or3+1 ;circle hplot
|
||||
sta _pg_or4+1 ;drawline
|
||||
sta _pg_or5+1 ;drawline
|
||||
sta _pg_or6+1 ;drawline
|
||||
sta _pg_or7+1 ;drawline
|
||||
|
||||
fin ;*****
|
||||
|
||||
:tabok rts
|
||||
|
||||
noinit ldy #$00
|
||||
]loop lda :initmsg,y
|
||||
beq :done
|
||||
jsr $fded ;cout
|
||||
iny
|
||||
bne ]loop
|
||||
:done rts
|
||||
|
||||
:initmsg asc "FDRAW NOT INITIALIZED",87,87,00
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Clear the screen to the current color.
|
||||
*
|
||||
********************************
|
||||
Clear
|
||||
|
||||
do USE_FAST ;*****
|
||||
* This performs a "visually linear" clear, erasing the screen
|
||||
* from left to right and top to bottom. To reduce the amount
|
||||
* of code required we erase in thirds (top/middle/bottom).
|
||||
*
|
||||
* Compare to a "venetian blind" clear, which is what you get
|
||||
* if you erase memory linearly.
|
||||
*
|
||||
* The docs discuss different approaches. This version
|
||||
* requires ((2 + 5*64 + 11) * 40 + 14) * 3 = 40002 cycles.
|
||||
* If we didn't divide it into thirds to keep the top-down
|
||||
* look, we'd need (5*64 + 9) * 120 = 39480 cycles, so
|
||||
* we're spending 522 cycles to avoid the venetian look.
|
||||
lda :clrloop+2
|
||||
cmp g_page
|
||||
beq :pageok
|
||||
|
||||
* We're on the wrong hi-res page. Flip to the other one.
|
||||
* 4 + (20*64) = 1284 cycles to do the flip (+ a few more
|
||||
* because we're probably crossing a page boundary).
|
||||
BEEP
|
||||
ldy #NUM_ROWS ;2
|
||||
]loop lda :clrloop-3+2,y ;4
|
||||
eor #$60 ;2
|
||||
sta :clrloop-3+2,y ;5
|
||||
dey ;2
|
||||
dey ;2
|
||||
dey ;2
|
||||
bne ]loop ;3
|
||||
|
||||
:pageok ldx g_color ;grab the current color
|
||||
lda xormask,x
|
||||
sta :_xormsk+1
|
||||
lda evencolor,x
|
||||
|
||||
ldy #0
|
||||
jsr :clearthird
|
||||
ldy #BYTES_PER_ROW
|
||||
jsr :clearthird
|
||||
ldy #BYTES_PER_ROW*2
|
||||
* fall through into :clearthird for final pass
|
||||
|
||||
:clearthird
|
||||
ldx #BYTES_PER_ROW-1 ;2
|
||||
:clrloop sta $2000,y ;5 (* 64)
|
||||
sta $2400,y ;this could probably be
|
||||
sta $2800,y ; done with LUP math
|
||||
sta $2c00,y
|
||||
sta $3000,y
|
||||
sta $3400,y
|
||||
sta $3800,y
|
||||
sta $3c00,y
|
||||
sta $2080,y
|
||||
sta $2480,y
|
||||
sta $2880,y
|
||||
sta $2c80,y
|
||||
sta $3080,y
|
||||
sta $3480,y
|
||||
sta $3880,y
|
||||
sta $3c80,y
|
||||
sta $2100,y
|
||||
sta $2500,y
|
||||
sta $2900,y
|
||||
sta $2d00,y
|
||||
sta $3100,y
|
||||
sta $3500,y
|
||||
sta $3900,y
|
||||
sta $3d00,y
|
||||
sta $2180,y
|
||||
sta $2580,y
|
||||
sta $2980,y
|
||||
sta $2d80,y
|
||||
sta $3180,y
|
||||
sta $3580,y
|
||||
sta $3980,y
|
||||
sta $3d80,y
|
||||
sta $2200,y
|
||||
sta $2600,y
|
||||
sta $2a00,y
|
||||
sta $2e00,y
|
||||
sta $3200,y
|
||||
sta $3600,y
|
||||
sta $3a00,y
|
||||
sta $3e00,y
|
||||
sta $2280,y
|
||||
sta $2680,y
|
||||
sta $2a80,y
|
||||
sta $2e80,y
|
||||
sta $3280,y
|
||||
sta $3680,y
|
||||
sta $3a80,y
|
||||
sta $3e80,y
|
||||
sta $2300,y
|
||||
sta $2700,y
|
||||
sta $2b00,y
|
||||
sta $2f00,y
|
||||
sta $3300,y
|
||||
sta $3700,y
|
||||
sta $3b00,y
|
||||
sta $3f00,y
|
||||
sta $2380,y
|
||||
sta $2780,y
|
||||
sta $2b80,y
|
||||
sta $2f80,y
|
||||
sta $3380,y
|
||||
sta $3780,y
|
||||
sta $3b80,y
|
||||
sta $3f80,y
|
||||
:_xormsk eor #$00 ;2 flip odd/even bits
|
||||
iny ;2
|
||||
dex ;2
|
||||
bmi :done ;2
|
||||
jmp :clrloop ;3
|
||||
:done rts
|
||||
|
||||
else ;***** not USE_FAST
|
||||
|
||||
* This version was suggested by Marcus Heuser on
|
||||
* comp.sys.apple2.programmer. It does a "venetian blind"
|
||||
* clear, and takes (5 * 32 + 7) * 248 = 41416 cycles.
|
||||
* It overwrites half of the screen holes.
|
||||
lda :clrloop+5
|
||||
cmp g_page
|
||||
beq :pageok
|
||||
|
||||
* We're on the wrong hi-res page. Flip to the other one.
|
||||
* 12 + (20*31) = 632 cycles to do the flip. We have to
|
||||
* single out the first entry because it's $1f not $20.
|
||||
BEEP
|
||||
lda :clrloop+2 ;4
|
||||
eor #$20 ;2 $1f <-> $3f
|
||||
sta :clrloop+2 ;4
|
||||
ldy #31*3 ;2
|
||||
]loop lda :clrloop+2,y ;4
|
||||
eor #$60 ;2 $20 <-> $40
|
||||
sta :clrloop+2,y ;5
|
||||
dey ;2
|
||||
dey ;2
|
||||
dey ;2
|
||||
bne ]loop ;3
|
||||
|
||||
:pageok ldx g_color
|
||||
lda xormask,x
|
||||
sta :_xormsk+1
|
||||
lda oddcolor,x
|
||||
ldy #248 ;120 + 8 + 120
|
||||
:clrloop
|
||||
]addr = $1fff
|
||||
lup 32 ;begin a loop in assembler
|
||||
sta ]addr,y ;5
|
||||
]addr = ]addr+$100 ;sta 20ff,21ff,...
|
||||
--^
|
||||
:_xormsk eor #$00 ;2
|
||||
dey ;2
|
||||
bne :clrloop ;3
|
||||
rts
|
||||
|
||||
fin ;***** not USE_FAST
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Draw rectangle outline.
|
||||
*
|
||||
********************************
|
||||
DrawRect
|
||||
* We could just issue 4 line draw calls here, maybe
|
||||
* adjusting the vertical lines by 1 pixel up/down to
|
||||
* avoid overdraw. But if the user wanted 4 lines,
|
||||
* they could just draw 4 lines. Instead, we're going
|
||||
* to draw a double line on each edge to ensure that
|
||||
* the outline rectangle always has the correct color.
|
||||
*
|
||||
* Rather than draw two vertical lines, we draw a
|
||||
* two-pixel-wide filled rectangle on each side.
|
||||
*
|
||||
* We don't want to double-up if the rect is only one
|
||||
* pixel wide, so we have to check for that.
|
||||
*
|
||||
* If the rect is one pixel high, it's just a line.
|
||||
* If it's two pixels high, we don't need to draw
|
||||
* the left/right edges, just the top/bottom lines.
|
||||
* If it's more than two tall, we don't need to draw
|
||||
* the left/right edges on the top and bottom lines,
|
||||
* so we save a few cycles by skipping those.
|
||||
|
||||
lda in_y1 ;copy top/bottom to local
|
||||
sta rast_bottom
|
||||
dec rast_bottom ;move up one
|
||||
sec
|
||||
sbc in_y0
|
||||
beq :isline ;1 pixel high, just draw line
|
||||
cmp #1
|
||||
beq :twolines ;2 pixels high, lines only
|
||||
ldy in_y0
|
||||
iny ;start down a line
|
||||
sty rast_top
|
||||
|
||||
lda in_x0h ;check to see if left/right
|
||||
cmp in_x1h ; coords are the same; if
|
||||
bne :notline ; so, going +1/-1 at edge
|
||||
lda in_x0l ; will overdraw.
|
||||
cmp in_x1l
|
||||
bne :notlin1
|
||||
|
||||
:isline jmp DrawLine ;just treat like line
|
||||
|
||||
* Set up left edge. Top line is in Y.
|
||||
:notline lda in_x0l
|
||||
:notlin1 sta rastx0l,y
|
||||
clc
|
||||
adc #1
|
||||
sta rastx1l,y
|
||||
lda in_x0h
|
||||
ora #$80 ;"repeat" flag
|
||||
sta rastx0h,y
|
||||
and #$7f
|
||||
adc #0
|
||||
sta rastx1h,y
|
||||
jsr FillRaster
|
||||
|
||||
ldy rast_top
|
||||
lda in_x1l ;now set up right edge
|
||||
sta rastx1l,y
|
||||
sec
|
||||
sbc #1
|
||||
sta rastx0l,y
|
||||
lda in_x1h
|
||||
sta rastx1h,y
|
||||
sbc #0
|
||||
ora #$80 ;"repeat" flag
|
||||
sta rastx0h,y
|
||||
jsr FillRaster
|
||||
|
||||
* Now the top/bottom lines.
|
||||
:twolines
|
||||
ldy in_y0
|
||||
jsr :drawline
|
||||
ldy in_y1
|
||||
|
||||
:drawline
|
||||
sty rast_top
|
||||
sty rast_bottom
|
||||
lda in_x0l ;copy left/right to the
|
||||
sta rastx0l,y ; table entry for the
|
||||
lda in_x0h ; appropriate line
|
||||
sta rastx0h,y
|
||||
lda in_x1l
|
||||
sta rastx1l,y
|
||||
lda in_x1h
|
||||
sta rastx1h,y
|
||||
jmp FillRaster
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Draw filled rectangle.
|
||||
*
|
||||
********************************
|
||||
FillRect
|
||||
* Just fill out the raster table and call the fill routine.
|
||||
* We require y0=top, y1=bottom, x0=left, x1=right.
|
||||
ldy in_y0
|
||||
sty rast_top
|
||||
lda in_y1
|
||||
sta rast_bottom
|
||||
|
||||
lda in_x0l
|
||||
sta rastx0l,y
|
||||
lda in_x0h
|
||||
ora #$80 ;"repeat" flag
|
||||
sta rastx0h,y
|
||||
lda in_x1l
|
||||
sta rastx1l,y
|
||||
lda in_x1h
|
||||
sta rastx1h,y
|
||||
|
||||
jmp FillRaster
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Fill an area defined by the raster tables.
|
||||
*
|
||||
********************************
|
||||
FillRaster
|
||||
|
||||
* Render rasterized output. The left and right edges
|
||||
* are stored in the rastx0/rastx1 tables, and the top
|
||||
* and bottom-most pixels are in rast_top/rast_bottom.
|
||||
*
|
||||
* This can be used to render an arbitrary convex
|
||||
* polygon after it has been rasterized.
|
||||
*
|
||||
* If the high bit of the high byte of X0 is set, we
|
||||
* go into "repeat" mode, where we just repeat the
|
||||
* previous line. This saves about 40 cycles of
|
||||
* overhead per line when drawing rectangles, plus
|
||||
* what we would have to spend to populate multiple
|
||||
* lines of the raster table. It only increases the
|
||||
* general per-line cost by 3 cycles.
|
||||
*
|
||||
* We could use the "repeat" flag to use this code to
|
||||
* draw vertical lines, though that's mostly of value
|
||||
* to an external caller who knows ahead of time that
|
||||
* the line is vertical. The DrawLine code is pretty
|
||||
* good with vertical lines, and adding additional
|
||||
* setup time to every vertical-dominant line to
|
||||
* decide if it should call here seems like a
|
||||
* losing proposition.
|
||||
|
||||
]hbasl equ zptr0
|
||||
]hbash equ zptr0+1
|
||||
]lftbyte equ zloc0
|
||||
]lftbit equ zloc1
|
||||
]rgtbyte equ zloc2
|
||||
]rgtbit equ zloc3
|
||||
]line equ zloc4
|
||||
]andmask equ zloc5
|
||||
]cur_line equ zloc6
|
||||
]repting equ zloc7
|
||||
|
||||
ldx g_color ;configure color XOR byte
|
||||
lda xormask,x
|
||||
do USE_FAST ;*****
|
||||
cmp rast_unroll+3 ;already configured?
|
||||
beq :goodmask
|
||||
jsr fixrastxor
|
||||
:goodmask
|
||||
else
|
||||
sta _xorcolor+1
|
||||
fin ;*****
|
||||
|
||||
lda #$00
|
||||
sta ]repting
|
||||
|
||||
ldy rast_top
|
||||
|
||||
* Main rasterization loop. Y holds the line number.
|
||||
rastloop
|
||||
sty ]cur_line ;3
|
||||
ldx ylooklo,y ;4
|
||||
stx ]hbasl ;3
|
||||
lda ylookhi,y ;4
|
||||
_pg_or1 ora #$20 ;2 will be $20 or $40
|
||||
sta ]hbash ;3 = 19 cycles
|
||||
do USE_FAST-1 ;***** i.e. not USE_FAST
|
||||
stx _wrhires+1
|
||||
sta _wrhires+2
|
||||
fin ;*****
|
||||
|
||||
* divide left edge by 7
|
||||
ldx rastx0l,y ;4 line num in Y
|
||||
lda rastx0h,y ;4
|
||||
bpl :noflag ;2
|
||||
sta rastx0h+1,y ;4 propagate
|
||||
lda ]repting ;3 first time through?
|
||||
beq :firstre ;2 yup, finish calculations
|
||||
lda ]rgtbyte ;3 need this in A
|
||||
bpl :repeat ;3 always
|
||||
:firstre lda rastx0h,y ;reload
|
||||
sta ]repting ;any nonzero will do
|
||||
and #$7f ;strip repeat flag
|
||||
:noflag beq :lotabl
|
||||
lda mod7hi,x
|
||||
sta ]lftbit
|
||||
lda div7hi,x
|
||||
sta ]lftbyte
|
||||
bpl :gotlft ;always
|
||||
BREAK ;debug
|
||||
:lotabl lda mod7lo,x
|
||||
sta ]lftbit
|
||||
lda div7lo,x
|
||||
sta ]lftbyte
|
||||
:gotlft
|
||||
|
||||
* divide right edge by 7
|
||||
ldx rastx1l,y ;4 line num in Y
|
||||
lda rastx1h,y ;4
|
||||
beq :lotabr ;3
|
||||
lda mod7hi,x
|
||||
sta ]rgtbit
|
||||
lda div7hi,x
|
||||
sta ]rgtbyte
|
||||
bpl :gotrgt ;always
|
||||
BREAK ;debug
|
||||
:lotabr lda mod7lo,x ;4
|
||||
sta ]rgtbit ;3
|
||||
lda div7lo,x ;4
|
||||
sta ]rgtbyte ;3 = 25 for X1 < 256
|
||||
:gotrgt
|
||||
|
||||
:repeat
|
||||
cmp ]lftbyte ;3
|
||||
bne :not1byte ;3
|
||||
|
||||
* The left and right edges are in the same byte. We
|
||||
* need to set up the mask differently, so we deal with
|
||||
* it as a special case.
|
||||
ldy ]lftbit
|
||||
lda leftmask,y ;create the AND mask
|
||||
ldx ]rgtbit
|
||||
and rightmask,x ;strip out bits on right
|
||||
sta ]andmask
|
||||
|
||||
ldy ]lftbyte
|
||||
lda colorline,y ;get color bits
|
||||
eor (]hbasl),y ;combine w/screen
|
||||
and ]andmask ;remove not-ours
|
||||
eor (]hbasl),y ;combine again
|
||||
sta (]hbasl),y
|
||||
jmp rastlinedone
|
||||
|
||||
* This is the more general case. We special-case the
|
||||
* left and right edges, then byte-stomp the middle.
|
||||
* On entry, ]rgtbyte is in A
|
||||
:not1byte
|
||||
sec ;2 compute number of full
|
||||
sbc ]lftbyte ;3 and partial bytes to
|
||||
tax ;2 draw
|
||||
inx ;2
|
||||
|
||||
ldy ]rgtbit ;3
|
||||
cpy #6 ;2
|
||||
beq :rgtnospcl ;3
|
||||
lda rightmask,y ;handle partial-byte right
|
||||
sta ]andmask
|
||||
ldy ]rgtbyte
|
||||
lda colorline,y
|
||||
eor (]hbasl),y
|
||||
and ]andmask
|
||||
eor (]hbasl),y
|
||||
sta (]hbasl),y
|
||||
dex ;adjust count
|
||||
:rgtnospcl
|
||||
|
||||
ldy ]lftbit ;3 check left for partial
|
||||
beq :lftnospcl ;3
|
||||
lda leftmask,y ;handle partial-byte left
|
||||
sta ]andmask
|
||||
ldy ]lftbyte
|
||||
lda colorline,y
|
||||
eor (]hbasl),y
|
||||
and ]andmask
|
||||
eor (]hbasl),y
|
||||
sta (]hbasl),y
|
||||
dex ;adjust count
|
||||
beq rastlinedone ;bail if all done
|
||||
iny ;advance start position
|
||||
bne :liny ;always
|
||||
BREAK
|
||||
:lftnospcl
|
||||
|
||||
ldy ]lftbyte ;3
|
||||
:liny
|
||||
|
||||
do USE_FAST ;***** "fast" loop
|
||||
* Instead of looping, jump into an unrolled loop.
|
||||
* Cost is 10 cycles per byte with an extra 14 cycles
|
||||
* of overhead, so we start to win at 4 bytes.
|
||||
lda rastunidx,x ;4
|
||||
sta :_rastun+1 ;4
|
||||
lda colorline,y ;4 get odd/even color val
|
||||
:_rastun jmp rast_unroll ;3
|
||||
|
||||
else ;***** "slow" loop
|
||||
* Inner loop of the renderer. This runs 0-40x.
|
||||
* Cost is 14 cycles/byte.
|
||||
lda colorline,y ;get appropriate odd/even val
|
||||
_wrhires sta $2000,y ;5 replaced with line addr
|
||||
_xorcolor eor #$00 ;2 replaced with $00/$7f
|
||||
iny ;2
|
||||
dex ;2
|
||||
bne _wrhires ;3
|
||||
|
||||
fin ;*****
|
||||
|
||||
rastlinedone
|
||||
ldy ]cur_line ;3 more lines to go?
|
||||
cpy rast_bottom ;4
|
||||
bge :done ;2
|
||||
iny ;2
|
||||
jmp rastloop ;3 must have line in Y
|
||||
|
||||
:done rts
|
||||
|
||||
fixrastxor
|
||||
do USE_FAST ;*****
|
||||
* Update the EOR statements in the unrolled rastfill code.
|
||||
* Doing this with a loop takes ~600 cycles, doing it with
|
||||
* unrolled stores takes 160. We only do this when we
|
||||
* need to, so changing the color from green to blue won't
|
||||
* cause this to run.
|
||||
*
|
||||
* Call with the XOR value in A.
|
||||
]offset = 0
|
||||
lup BYTES_PER_ROW
|
||||
sta rast_unroll+3+]offset
|
||||
]offset = ]offset+5
|
||||
--^
|
||||
BEEP
|
||||
rts
|
||||
fin ;*****
|
||||
|
||||
|
||||
* include the line functions
|
||||
put FDRAW.LINE
|
||||
|
||||
* include the circle functions
|
||||
put FDRAW.CIRCLE
|
||||
|
||||
lst on
|
||||
CODE_END equ * ;end of code section
|
||||
lst off
|
||||
|
||||
* include the data tables
|
||||
put FDRAW.TABLES
|
||||
|
||||
lst on
|
||||
DAT_END equ * ;end of data / BSS
|
||||
lst off
|
||||
|
||||
* Save the appropriate object file.
|
||||
do USE_FAST
|
||||
sav FDRAW.FAST
|
||||
else
|
||||
sav FDRAW.SMALL
|
||||
fin
|
339
FDRAW.TABLES.S
Normal file
339
FDRAW.TABLES.S
Normal file
@ -0,0 +1,339 @@
|
||||
********************************
|
||||
* *
|
||||
* Fast Apple II Graphics *
|
||||
* By Andy McFadden *
|
||||
* Version 0.3, Aug 2015 *
|
||||
* *
|
||||
* Pre-computed data and *
|
||||
* large internal buffers. *
|
||||
* (Included by FDRAW.S) *
|
||||
* *
|
||||
* Developed with Merlin-16 *
|
||||
* *
|
||||
********************************
|
||||
|
||||
* Expected layout with alignment:
|
||||
*
|
||||
* P1 ylooklo, misc tables
|
||||
* P2 ylookhi, colorline
|
||||
* P3 rastx0l
|
||||
* P4 rastx0h
|
||||
* P5 rastx1l
|
||||
* P6 rastx1h, div7hi, mod7hi
|
||||
* P7 div7lo
|
||||
* P8 mod7lo
|
||||
* P9 rast_unroll, rastunidx
|
||||
*
|
||||
* Tables should be just under $900 bytes.
|
||||
|
||||
PG_ALIGN
|
||||
|
||||
* Hi-res Y lookup, low part (192 bytes).
|
||||
ylooklo HEX 0000000000000000
|
||||
HEX 8080808080808080
|
||||
HEX 0000000000000000
|
||||
HEX 8080808080808080
|
||||
HEX 0000000000000000
|
||||
HEX 8080808080808080
|
||||
HEX 0000000000000000
|
||||
HEX 8080808080808080
|
||||
HEX 2828282828282828
|
||||
HEX a8a8a8a8a8a8a8a8
|
||||
HEX 2828282828282828
|
||||
HEX a8a8a8a8a8a8a8a8
|
||||
HEX 2828282828282828
|
||||
HEX a8a8a8a8a8a8a8a8
|
||||
HEX 2828282828282828
|
||||
HEX a8a8a8a8a8a8a8a8
|
||||
HEX 5050505050505050
|
||||
HEX d0d0d0d0d0d0d0d0
|
||||
HEX 5050505050505050
|
||||
HEX d0d0d0d0d0d0d0d0
|
||||
HEX 5050505050505050
|
||||
HEX d0d0d0d0d0d0d0d0
|
||||
HEX 5050505050505050
|
||||
HEX d0d0d0d0d0d0d0d0
|
||||
|
||||
* Color masks for odd/even bytes, colors 0-7.
|
||||
evencolor dfb $00,$2a,$55,$7f,$80,$aa,$d5,$ff
|
||||
oddcolor dfb $00,$55,$2a,$7f,$80,$d5,$aa,$ff
|
||||
|
||||
* XOR mask for colors 0-7 - non-BW flip on odd/even.
|
||||
xormask dfb $00,$7f,$7f,$00,$00,$7f,$7f,$00
|
||||
|
||||
* AND mask for the 7 pixel positions, high bit set
|
||||
* for the color shift.
|
||||
andmask dfb $81,$82,$84,$88,$90,$a0,$c0
|
||||
|
||||
* These are pixel AND masks, used with the modulo 7
|
||||
* result. Entry #2 in leftmask means we're touching
|
||||
* the rightmost 5 pixels, and entry #2 in rightmask
|
||||
* means we're touching the 3 leftmost pixels.
|
||||
*
|
||||
* The high bit is always set, because we want to
|
||||
* keep the color's high bit.
|
||||
leftmask dfb $ff,$fe,$fc,$f8,$f0,$e0,$c0
|
||||
rightmask dfb $81,$83,$87,$8f,$9f,$bf,$ff
|
||||
|
||||
PG_ALIGN
|
||||
|
||||
* Hi-res Y lookup, high part (192 bytes).
|
||||
* OR with $20 or $40.
|
||||
ylookhi HEX 0004080c1014181c
|
||||
HEX 0004080c1014181c
|
||||
HEX 0105090d1115191d
|
||||
HEX 0105090d1115191d
|
||||
HEX 02060a0e12161a1e
|
||||
HEX 02060a0e12161a1e
|
||||
HEX 03070b0f13171b1f
|
||||
HEX 03070b0f13171b1f
|
||||
HEX 0004080c1014181c
|
||||
HEX 0004080c1014181c
|
||||
HEX 0105090d1115191d
|
||||
HEX 0105090d1115191d
|
||||
HEX 02060a0e12161a1e
|
||||
HEX 02060a0e12161a1e
|
||||
HEX 03070b0f13171b1f
|
||||
HEX 03070b0f13171b1f
|
||||
HEX 0004080c1014181c
|
||||
HEX 0004080c1014181c
|
||||
HEX 0105090d1115191d
|
||||
HEX 0105090d1115191d
|
||||
HEX 02060a0e12161a1e
|
||||
HEX 02060a0e12161a1e
|
||||
HEX 03070b0f13171b1f
|
||||
HEX 03070b0f13171b1f
|
||||
|
||||
* Masks for current color (even/odd), e.g. 55 2a 55 2a ...
|
||||
* Updated whenever the color changes.
|
||||
colorline ds 40
|
||||
|
||||
PG_ALIGN
|
||||
rastx0l ds NUM_ROWS
|
||||
PG_ALIGN
|
||||
rastx0h ds NUM_ROWS
|
||||
ds 1 ;repeat mode can overstep
|
||||
PG_ALIGN
|
||||
rastx1l ds NUM_ROWS
|
||||
PG_ALIGN
|
||||
rastx1h ds NUM_ROWS
|
||||
|
||||
* Lookup tables for dividing 0-279 by 7. The "hi"
|
||||
* parts are 24 bytes each, so they fit inside
|
||||
* the previous 192-byte entry. The "lo" parts
|
||||
* each fill a page.
|
||||
div7hi HEX 2424242525252525
|
||||
HEX 2525262626262626
|
||||
HEX 2627272727272727
|
||||
mod7hi HEX 0405060001020304
|
||||
HEX 0506000102030405
|
||||
HEX 0600010203040506
|
||||
|
||||
PG_ALIGN
|
||||
|
||||
div7lo HEX 0000000000000001
|
||||
HEX 0101010101010202
|
||||
HEX 0202020202030303
|
||||
HEX 0303030304040404
|
||||
HEX 0404040505050505
|
||||
HEX 0505060606060606
|
||||
HEX 0607070707070707
|
||||
HEX 0808080808080809
|
||||
HEX 0909090909090a0a
|
||||
HEX 0a0a0a0a0a0b0b0b
|
||||
HEX 0b0b0b0b0c0c0c0c
|
||||
HEX 0c0c0c0d0d0d0d0d
|
||||
HEX 0d0d0e0e0e0e0e0e
|
||||
HEX 0e0f0f0f0f0f0f0f
|
||||
HEX 1010101010101011
|
||||
HEX 1111111111111212
|
||||
HEX 1212121212131313
|
||||
HEX 1313131314141414
|
||||
HEX 1414141515151515
|
||||
HEX 1515161616161616
|
||||
HEX 1617171717171717
|
||||
HEX 1818181818181819
|
||||
HEX 1919191919191a1a
|
||||
HEX 1a1a1a1a1a1b1b1b
|
||||
HEX 1b1b1b1b1c1c1c1c
|
||||
HEX 1c1c1c1d1d1d1d1d
|
||||
HEX 1d1d1e1e1e1e1e1e
|
||||
HEX 1e1f1f1f1f1f1f1f
|
||||
HEX 2020202020202021
|
||||
HEX 2121212121212222
|
||||
HEX 2222222222232323
|
||||
HEX 2323232324242424
|
||||
mod7lo HEX 0001020304050600
|
||||
HEX 0102030405060001
|
||||
HEX 0203040506000102
|
||||
HEX 0304050600010203
|
||||
HEX 0405060001020304
|
||||
HEX 0506000102030405
|
||||
HEX 0600010203040506
|
||||
HEX 0001020304050600
|
||||
HEX 0102030405060001
|
||||
HEX 0203040506000102
|
||||
HEX 0304050600010203
|
||||
HEX 0405060001020304
|
||||
HEX 0506000102030405
|
||||
HEX 0600010203040506
|
||||
HEX 0001020304050600
|
||||
HEX 0102030405060001
|
||||
HEX 0203040506000102
|
||||
HEX 0304050600010203
|
||||
HEX 0405060001020304
|
||||
HEX 0506000102030405
|
||||
HEX 0600010203040506
|
||||
HEX 0001020304050600
|
||||
HEX 0102030405060001
|
||||
HEX 0203040506000102
|
||||
HEX 0304050600010203
|
||||
HEX 0405060001020304
|
||||
HEX 0506000102030405
|
||||
HEX 0600010203040506
|
||||
HEX 0001020304050600
|
||||
HEX 0102030405060001
|
||||
HEX 0203040506000102
|
||||
HEX 0304050600010203
|
||||
|
||||
|
||||
* RastFill unrolled loop. At each step we store the current
|
||||
* color value, XOR it to flip the bits if needed, and advance.
|
||||
* The caller needs to set the appropriate initial value based
|
||||
* on whether the address is odd or even.
|
||||
*
|
||||
* We can use a 3-cycle "EOR dp" or a 2-cycle "EOR imm". The
|
||||
* former is one cycle slower, the latter requires us to
|
||||
* self-mod 40 instructions when the color changes.
|
||||
*
|
||||
* This must be page-aligned so that we can take the value
|
||||
* from the rastunidx table and self-mod a JMP without having
|
||||
* to do a 16-bit add. We have just enough room for the
|
||||
* unrolled loop (40*5+3) and x5 table (41) = 244 bytes, fits
|
||||
* on a single page.
|
||||
|
||||
do USE_FAST ;*****
|
||||
ds \
|
||||
]hbasl equ zptr0 ;must match FillRaster
|
||||
rast_unroll equ *
|
||||
lst off
|
||||
lup BYTES_PER_ROW
|
||||
sta (]hbasl),y ;6
|
||||
eor #$00 ;2
|
||||
iny ;2 10 cycles, 5 bytes
|
||||
--^
|
||||
jmp rastlinedone
|
||||
|
||||
* Index into rast_unroll. If we need to output N bytes,
|
||||
* we want to jump to (rast_unroll + (40 - N) * 5) (where
|
||||
* 5 is the number of bytes per iteration).
|
||||
rastunidx
|
||||
]offset = BYTES_PER_ROW*5
|
||||
lup BYTES_PER_ROW+1 ;0-40
|
||||
dfb ]offset
|
||||
]offset = ]offset-5
|
||||
--^
|
||||
|
||||
fin ;*****
|
||||
|
||||
|
||||
********************************
|
||||
*
|
||||
* Code used to generate tables above. If you want to
|
||||
* decrease load size, use these functions to generate
|
||||
* the data into empty memory, then discard the code.
|
||||
* (Maybe use a negative DS and overlap with rastx0l?)
|
||||
*
|
||||
********************************
|
||||
DO 0 ;*****
|
||||
|
||||
init_ylook
|
||||
]hbasl equ zptr1
|
||||
]hbash equ zptr1+1
|
||||
|
||||
* Initialize Y-lookup table. We just call the bascalc
|
||||
* function.
|
||||
ldx #NUM_ROWS
|
||||
ldy #NUM_ROWS-1
|
||||
]loop tya
|
||||
jsr bascalc
|
||||
lda hbasl
|
||||
sta ylooklo,y
|
||||
lda hbash
|
||||
ora #$20 ;remove for $0000 base
|
||||
sta ylookhi,y
|
||||
dey
|
||||
dex
|
||||
bne ]loop
|
||||
rts
|
||||
|
||||
* Hi-res base address calculation. This is based on the
|
||||
* HPOSN routine at $F411.
|
||||
*
|
||||
* Call with the line in A. The results are placed into
|
||||
* zptr1. X and Y are not disturbed.
|
||||
*
|
||||
* The value is in the $0000-1fff range, so you must OR
|
||||
* the desired hi-res page in.
|
||||
*
|
||||
bascalc
|
||||
pha
|
||||
and #$c0
|
||||
sta ]hbasl
|
||||
lsr
|
||||
lsr
|
||||
ora ]hbasl
|
||||
sta ]hbasl
|
||||
pla
|
||||
sta ]hbash
|
||||
asl
|
||||
asl
|
||||
asl
|
||||
rol ]hbash
|
||||
asl
|
||||
rol ]hbash
|
||||
asl
|
||||
ror ]hbasl
|
||||
lda ]hbash
|
||||
and #$1f
|
||||
sta ]hbash
|
||||
rts
|
||||
|
||||
*
|
||||
* Create divide-by-7 tables.
|
||||
*
|
||||
mkdivtab
|
||||
]val equ zloc0
|
||||
|
||||
ldy #0
|
||||
sty ]val
|
||||
ldx #0
|
||||
]loop lda ]val
|
||||
sta div7lo,y
|
||||
txa
|
||||
sta mod7lo,y
|
||||
inx
|
||||
iny
|
||||
beq :lodone
|
||||
cpx #7
|
||||
bne ]loop
|
||||
inc ]val
|
||||
ldx #0
|
||||
beq ]loop ;always
|
||||
:lodone ;safe to ignore ]va update
|
||||
]loop lda ]val
|
||||
sta div7hi,y
|
||||
txa
|
||||
sta mod7hi,y
|
||||
iny
|
||||
cpy #280-256
|
||||
beq :hidone
|
||||
inx
|
||||
cpx #7
|
||||
bne ]loop
|
||||
inc ]val
|
||||
ldx #0
|
||||
beq ]loop ;always
|
||||
:hidone rts
|
||||
|
||||
FIN ;*****
|
61
README.md
61
README.md
@ -1,2 +1,59 @@
|
||||
# fdraw
|
||||
Fast Apple II graphics
|
||||
fdraw
|
||||
=====
|
||||
|
||||
Fast graphics routines for the Apple II
|
||||
By Andy McFadden
|
||||
Version 0.3, August 2015
|
||||
|
||||
## Overview ##
|
||||
|
||||
The fdraw library provides fast rendering of points, lines, rectangles,
|
||||
and circles, as well as high-speed screen clears, for Apple II hi-res
|
||||
graphics. It can be used from Applesoft or 6502 assembly language.
|
||||
|
||||
Two disk images are available in the [fdraw-disks.zip](fdraw-disks) zip
|
||||
archive. `fdrawdemo.do` is a 140K disk image with the demos that will
|
||||
run on an Apple ][+ or later. `fdrawdev.po` is an 800K disk image with
|
||||
the source code, demos, and a few extras.
|
||||
|
||||
A video of the demos running in the AppleWin emulator
|
||||
[https://www.youtube.com/watch?v=z2RFGVoaROE](is available).
|
||||
|
||||
Learn more about how fdraw works in the
|
||||
[docs/manual.md](library documentation).
|
||||
|
||||
Learn about the demos in the [docs/demos.md](demo documentation).
|
||||
|
||||
Learn more about what possessed me to write a graphics library for the
|
||||
Apple II more than 20 years after the platform was discontinued in the
|
||||
[docs/personal-notes.md](fadden's brain documentation).
|
||||
|
||||
The main bits of source code are accessible from git for easy viewing,
|
||||
but the "official" home is on `fdrawdev.po`.
|
||||
|
||||
All code is copyright 2015 by Andy McFadden. All rights reserved. The
|
||||
source code is available under the Apache 2 license (a very friendly
|
||||
open-source license).
|
||||
|
||||
|
||||
### Version History ###
|
||||
|
||||
##### v0.1 March 13, 2006
|
||||
|
||||
No source code, just a demo with fast filled circles and screen clears.
|
||||
|
||||
##### v0.2 March 20, 2006
|
||||
|
||||
Polished up the sources and published. This version implemented Clear,
|
||||
FillRect, FillCircle, and FillRaster.
|
||||
|
||||
##### v0.3 August 21, 2015
|
||||
|
||||
Added DrawPoint, DrawLine, DrawRect, DrawCircle, and SetLineMode. Various
|
||||
size and performance improvements.
|
||||
|
||||
Added Amperfdraw to make Applesoft BASIC programming easier.
|
||||
|
||||
Added several more demos and tests.
|
||||
|
||||
Added documentation.
|
||||
|
167
docs/demos.md
Normal file
167
docs/demos.md
Normal file
@ -0,0 +1,167 @@
|
||||
fdraw Demo README
|
||||
=================
|
||||
|
||||
The fdraw distribution comes with a handful of demonstration programs.
|
||||
Most of them are written in Applesoft BASIC, and use the amperfdraw
|
||||
interface. This is a somewhat poor way to demonstrate animation
|
||||
performance, as Applesoft adds a tremendous amount of overhead, but it
|
||||
is the only way to show what you *can* do with Applesoft.
|
||||
|
||||
The easiest way to run them is with the "DEMO" program, which scans the
|
||||
DEMOS directory for BASIC programs and presents a list. You can also
|
||||
just run them directly.
|
||||
|
||||
* INTRO : Sort of a "hello, world" for fdraw. Mix of single- and
|
||||
double-buffered animation.
|
||||
|
||||
* CIRCULAR : Draws lots of circles.
|
||||
|
||||
* RECTSPLAT : Draws lots of rectangles.
|
||||
|
||||
* CUBIC : Draws a spinning wireframe 3D cube. (The 3D coordinates are
|
||||
pre-computed -- fdraw doesn't do matrix transforms.)
|
||||
|
||||
* TUNNEL : Animates circles to simulate driving through a tunnel.
|
||||
|
||||
* LINEAR : Draws lots of lines. The wipes show speed differences for
|
||||
horizontal and vertical special cases, while the circular spinner
|
||||
shows HPLOT is not as fast as &HPLOT which is not as fast as &PLOT for
|
||||
a set of lines at a variety of angles.
|
||||
|
||||
* LINE.DIFF : Draws several lines with the ROM routines and fdraw
|
||||
side-by-side to illustrate the difference in line style.
|
||||
|
||||
* CLEARLY : Clears the screen 32 times, 4 sets in each of the 8 colors.
|
||||
The first round is done with the Applesoft ROM routine ("CALL 62454"),
|
||||
the second round uses the fdraw &CLEAR function.
|
||||
|
||||
* HRFAN : A simple line-art demo, using "xdraw" DrawLine with lines in
|
||||
different colors. Not a great demo, as the Applesoft code driving it
|
||||
is rather slow, but it looks pretty good if you bump up the emulation
|
||||
speed or switch to IIgs "fast" mode. (This deserves a conversion to
|
||||
assembly language.)
|
||||
|
||||
* BRIAN.THEME.ORI : The Brian's Theme demo from the DOS 3.3 System
|
||||
Master. Unmodified except for integration with the demo menu
|
||||
system, and with the bug on line 31112 fixed.
|
||||
|
||||
* BRIAN.THEME.NEW : The Brian's Theme demo with '&' placed in front of
|
||||
the various draw calls. There isn't a huge difference in speed, as
|
||||
there's a lot of overhead from Applesoft, but its interesting to note
|
||||
the change in the appearance of the lines.
|
||||
|
||||
* WIGGLE : Sample program that shows direct use of rasterization tables.
|
||||
|
||||
When the demos are launched from the menu, they will assume that fdraw
|
||||
is already loaded and won't try to load it again. If you run the demo
|
||||
program directly, it will try to load FDRAW.FAST and AMPERFDRAW from the
|
||||
parent directory before doing any drawing.
|
||||
|
||||
|
||||
## Extras ##
|
||||
|
||||
The EXTRAS directory has some additional software that isn't "officially"
|
||||
part of fdraw, but may be of use.
|
||||
|
||||
NOTE: some of these assume fdraw and amperfdraw are already loaded, and
|
||||
will hang if not. Run DEMO and hit <esc> before running these.
|
||||
|
||||
* ARRAY.EXAMPLE : The &PLOT example from the documentation.
|
||||
|
||||
* XDRAW.ANIM : A demonstration of line animation using "xdraw" mode and
|
||||
a simple shape that is drawn twice by a single &PLOT call. One copy
|
||||
is offset by 2 pixels, so each &PLOT call erases the previous copy and
|
||||
draws a new copy 2 pixels to the right. The animation is shown twice,
|
||||
once with "erase all, draw all", and once with the erase and draw calls
|
||||
interleaved for every line.
|
||||
|
||||
* LINEFONT : Program for creating draw-array tables for text phrases. Used
|
||||
to create data files for the "intro" demo. See the "LINEFONT Details"
|
||||
section for more information.
|
||||
|
||||
* DAVIEWER: Views the contents of .DA files created by LINEFONT.
|
||||
|
||||
* BENCHCLEAR : Calls the "clear" function 256 times from a small
|
||||
assembly-language program. Handy for benchmarks, but slightly silly
|
||||
since it's relatively easy to calculate the exact cycle cost.
|
||||
|
||||
|
||||
## LINEFONT Details ##
|
||||
|
||||
NOTE: this program is an unfinished rough cut ("pre alpha"), used for
|
||||
preparing data for demos.
|
||||
|
||||
The program includes a font definition, routines for displaying
|
||||
characters, and code for generating and exporting pre-rendered strings.
|
||||
|
||||
Character vertices are expressed as floating-point values. The baseline
|
||||
is at zero, the peak ascent is at 1.0, the lowest descent is -1.0. The
|
||||
leftmost pixel is at zero, the maximum value for the rightmost pixel is 1.0.
|
||||
Characters don't have to fill out the entire cell -- proportionally-spaced
|
||||
fonts are supported -- but they are expected to start at the left edge.
|
||||
|
||||
So a capital 'M' might look like this:
|
||||
|
||||
0.0,0.0 -> 0.0,1.0 -> 0.5,0.7 -> 1.0,1.0 -> 1.0,0.0
|
||||
|
||||
There is currently no "user interface", unless the "user" can program in
|
||||
Applesoft BASIC. To generate strings, add a series of statements that set
|
||||
variables and call 20000 to add rendered strings to the set. The relevant
|
||||
variables are:
|
||||
|
||||
S$ - string to add
|
||||
DW - desired width, in pixels, of a cell 1.0 units wide
|
||||
DH - desired height, in pixels of a cell 2.0 units high (ascent + descent)
|
||||
IS% - inter-character spacing, in pixels
|
||||
SW% - width of the space character (usually same as DW)
|
||||
MO% - monospace flag; if nonzero, all chars are treated as 1.0 units wide
|
||||
|
||||
Remove the REM from the start of line 1010 to enable the character viewer.
|
||||
At present only a couple of lower-case letters are defined.
|
||||
|
||||
|
||||
#### LINEFONT Output ####
|
||||
|
||||
The LINEFONT program outputs a binary blob that can be passed to
|
||||
the &PLOT array-draw function. The file structure is:
|
||||
|
||||
+0 byte - number of array sets in the list.
|
||||
+1 2 bytes * N - table of offsets to individual array sets. One of
|
||||
these per array set. The value is the offset from the start of the
|
||||
file.
|
||||
|
||||
(2N+1) array set #1:
|
||||
+0 byte - number of vertices (0-127)
|
||||
+1 byte - number of index pairs (0-127)
|
||||
+2 2 bytes * V - vertices (values are signed X/Y)
|
||||
+X 2 bytes * I - index pairs (values are 0-127)
|
||||
|
||||
To display phrase #3, you would get the 16-bit value from the offset
|
||||
table with PEEK(start + 1 + 3 * 2) + PEEK(start + 2 + 3 * 2) * 256.
|
||||
You get the number of vertices from PEEK(start + offset), and the number
|
||||
of index pairs from PEEK(start + offset + 1). Finally, call the array-draw
|
||||
function with:
|
||||
|
||||
VA = start + offset + 2
|
||||
IA = VA + num_vertices * 2
|
||||
&PLOT va, ia, num_index_pairs
|
||||
|
||||
The 0,0 point in the blob is in the center of the phrase horizontally
|
||||
(which allows a maximum width of 255 pixels), and at the font baseline
|
||||
vertically (so most of the font will appear above the zero point, but
|
||||
descenders will extend below).
|
||||
|
||||
|
||||
#### Future Enhancements ####
|
||||
|
||||
Right now the font definition is embedded in the program. This takes up
|
||||
a lot of space -- before too long the BASIC program is going to intrude
|
||||
on the hi-res page -- and is unnecessarily restrictive. The font should be
|
||||
defined by a separate program, and BSAVEd into a line-font file that
|
||||
LINEFONT can load.
|
||||
|
||||
Generating strings should be menu-driven and interactive, rather than
|
||||
requiring manual changes to the code to fiddle with sizes and spacing.
|
||||
DAVIEWER should be folded into the generation program (though it's kind
|
||||
of handy as a simple example of how to unpack and access content).
|
||||
|
990
docs/manual.md
Normal file
990
docs/manual.md
Normal file
@ -0,0 +1,990 @@
|
||||
fdraw Library Documentation
|
||||
===========================
|
||||
|
||||
Fast graphics primitives for the Apple II
|
||||
By Andy McFadden
|
||||
Version 0.3, August 2015
|
||||
|
||||
## Overview ##
|
||||
|
||||
The fdraw library provides fast rendering of points, lines, rectangles,
|
||||
and circles, as well as high-speed screen clears, for Apple II hi-res
|
||||
graphics. It can be used from Applesoft or assembly language.
|
||||
|
||||
The Applesoft ROM routines were designed to be as compact as possible,
|
||||
and were unable to use self-modifying code techniques, so their speed is
|
||||
less than what the Apple II is capable of. The fdraw routines pick a
|
||||
different point in the speed/space trade-off continuum, providing fast
|
||||
speeds at a reasonable size. Not everyone agrees on what "reasonable"
|
||||
means, so the fdraw code can be built in two modes, one that favors
|
||||
speed, one that reduces size.
|
||||
|
||||
**Contents:**
|
||||
|
||||
- [Applesoft BASIC Ampersand API](#amperapi)
|
||||
- [Raw API](#rawapi)
|
||||
- [Building the Code](#building)
|
||||
- [Apple II Hi-res in a Nutshell](#nutshell)
|
||||
- [Notes on the Drawing Functions](#notes)
|
||||
- [General Notes](#additional-notes)
|
||||
- [Enhancement Ideas](#ideas)
|
||||
- [My Quest for Lines](#history)
|
||||
|
||||
|
||||
<div id='amperapi'/>
|
||||
## Applesoft BASIC Ampersand API (Amperfdraw) ##
|
||||
|
||||
The ampersand API acts as a bridge between Applesoft BASIC and fdraw.
|
||||
It's more convenient and has less overhead than POKE and CALL, though
|
||||
you are not prevented from using that approach if you prefer. It's
|
||||
best to use one or the other though, not mix and match.
|
||||
|
||||
All arguments are checked for validity. An appropriate Applesoft
|
||||
error is thrown if invalid syntax or arguments are discovered.
|
||||
|
||||
This is not intended to be compatible with, nor a replacement for, the
|
||||
ampersand utilities in Beagle Graphics.
|
||||
|
||||
* &NEW - calls the fdraw Init function (which sets the color to 0 and
|
||||
selects hi-res page 1). You must do this once, at the start of
|
||||
your program, after fdraw has been loaded. This also resets internal
|
||||
amperfdraw state, setting the "HPLOT TO" origin to (0,0) and the "AT"
|
||||
point to (139,95).
|
||||
* &HGR - does what HGR does, only faster. Equivalent to executing
|
||||
`&HCOLOR=0:&SCRN(1):&CLEAR:&HCOLOR=[prevcolor]`, and then setting the
|
||||
display softswitches to display hi-res page 1 in mixed mode. Also sets
|
||||
$e6 (HPAG) for convenience in case you want to mix & match with ROM
|
||||
routines.
|
||||
* &HGR2 - like &HGR, but for page 2. Like HGR2, this turns off
|
||||
mixed-text mode.
|
||||
* &SCRN({1,2}) - sets the hi-res page that will be used for drawing. Does
|
||||
not change which page is displayed. (Use the softswitches, or call
|
||||
&INVERSE.)
|
||||
* &INVERSE - flips the render page to the other page, and hits the
|
||||
display softswitches to show the page that was just rendered. Intended
|
||||
for double-buffered animation.
|
||||
* &HCOLOR={0-7} - sets color, using the same numbering scheme as Applesoft.
|
||||
Does not affect the color used by the ROM routines.
|
||||
* &CLEAR - clears screen to current color.
|
||||
* &HPLOT [TO] x,y [TO x,y ...] - draws a point or a line. Works the same as
|
||||
Applesoft, e.g. "&HPLOT TO" starts from the end of the previously
|
||||
drawn line, and you can chain multiple "TO x,y" in a single statement.
|
||||
* &EXP {0,1} - set line mode. 0 is normal, 1 is "xdraw".
|
||||
* &XDRAW left,top,right,bottom - draws outline rectangle.
|
||||
* &DRAW left,top,right,bottom - draws filled rectangle.
|
||||
* &COS cx,cy,r - draws outline circle.
|
||||
* &SIN cx,cy,r - draws filled circle.
|
||||
|
||||
* &AT cx,cy - sets center offset for array-based rendering. Position must
|
||||
be on the hi-res screen (0-279, 0-191).
|
||||
* &PLOT vertexAddr, indexAddr, indexCount [AT cx,cy] - draws from the
|
||||
specified byte-arrays. See the "Drawing Lines with Indexed Byte-Arrays"
|
||||
section for the full explanation.
|
||||
|
||||
|
||||
<div id='rawapi'/>
|
||||
## Raw API ##
|
||||
|
||||
The code is assembled at $6000 by default. The program's length includes
|
||||
all data tables and work areas, and no memory outside of the program,
|
||||
zero page, and the current hi-res page is modified.
|
||||
|
||||
Input parameters and the function jump table are located near the start
|
||||
of the program. The API description below describes the addresses in
|
||||
relative terms.
|
||||
|
||||
Input parameters are not checked for validity. They must be in the range
|
||||
specified by the API, or undefined (but probably bad) behavior will result.
|
||||
The values will not be modified by fdraw functions.
|
||||
|
||||
All drawing operations use the current color.
|
||||
|
||||
* +0 Init - call this when the library is first loaded. It must be
|
||||
called before any other functions are used. It initializes the
|
||||
color to zero and the page to $20.
|
||||
* +3 (major version number, currently 0)
|
||||
* +4 (minor version number, currently 3)
|
||||
* +5 Input parameter area:
|
||||
* +5 arg - used for misc functions, e.g. SetColor and SetPage
|
||||
* +6 x0l - low part of the X0 coordinate (0-279)
|
||||
* +7 x0h - high part of X0
|
||||
* +8 y0 - Y0 coordinate (0-191)
|
||||
* +9 x1l - low part of X1 (0-279)
|
||||
* +10 x1h - high part of X1
|
||||
* +11 y1 - Y1 coordinate (0-191)
|
||||
* +12 rad - circle radius (0-255)
|
||||
* +13 (reserved)
|
||||
* +16 SetColor - set the color used for drawing (0-7) to the value in "arg".
|
||||
The numbering is the same as the Applesoft hi-res colors.
|
||||
* +19 SetPage - set the hi-res page used for drawing to the value in "arg",
|
||||
which must be $20 or $40. Does not change the page that is displayed.
|
||||
(Because a bad value can cause memory corruption, this value *is*
|
||||
checked, and bad values rejected.)
|
||||
* +22 Clear - erase the current hi-res page to the current color.
|
||||
* +25 DrawPoint - plot a single point at x0,y0.
|
||||
* +28 DrawLine - draw a line from x0,y0 to x1,y1 (inclusive).
|
||||
* +31 DrawRect - draw a rectangle with corners at x0,y0 and x1,y1 (inclusive).
|
||||
x0,y0 is the top-left, x1,y1 is the bottom-right. The left and
|
||||
right edges will be drawn two bits wide to ensure that the edges
|
||||
are visible (drawn at x0+1, x1-1).
|
||||
* +34 FillRect - draw a filled rectangle with corners at x0,y0 and x1,y1
|
||||
(inclusive).
|
||||
* +37 DrawCircle - draw a circle with center at x0,y0 and radius=rad.
|
||||
* +40 FillCircle - draw a filled circle with center at x0,y0 and radius=rad.
|
||||
* +43 SetLineMode - set the DrawLine mode to the value in "arg", which can
|
||||
be 0 (normal) or 1 (xdraw).
|
||||
* +46 (reserved)
|
||||
|
||||
* +49 FillRaster - draw an arbitrary shape from the rasterization tables.
|
||||
For each line from top to bottom, the left and right edges will
|
||||
be read from rastx1/rastx2 and a raster drawn in the current color.
|
||||
* +52 (byte) topmost line to rasterize (0-191)
|
||||
* +53 (byte) bottom-most line to rasterize (0-191), inclusive
|
||||
* +54 (2 bytes) address of rastx1l table
|
||||
* +56 (2 bytes) address of rastx1h table
|
||||
* +58 (2 bytes) address of rastx2l table
|
||||
* +60 (2 bytes) address of rastx2h table
|
||||
|
||||
The rasterization table addresses are read-only; changing them will have
|
||||
no effect.
|
||||
|
||||
fdraw uses a fair number of zero page locations. The exact set can be
|
||||
determined by looking at FDRAW.S. The locations were chosen to not
|
||||
interfere with DOS, ProDOS, Applesoft, or the Monitor. They may
|
||||
interfere with Integer BASIC, SWEET16, or your own application code.
|
||||
Remapping them to different locations is straightforward: just change
|
||||
the assignment of zptr/zloc values near the top of FDRAW.S to use
|
||||
different addresses. fdraw does not expect any zero page value to be
|
||||
preserved across calls, so you're welcome to use those locations in your
|
||||
own code, but understand that fdraw functions will overwrite them.
|
||||
|
||||
|
||||
<div id='nutshell'/>
|
||||
## Apple II Hi-res in a Nutshell ##
|
||||
|
||||
This is a quick overview of the Apple II hi-res graphics architecture
|
||||
for anyone not recently acquainted.
|
||||
|
||||
The Apple II hi-res graphics screen is a quirky beast. The typical
|
||||
API treats it as 280x192 with 6 colors (black, white, green, purple,
|
||||
orange, blue), though the reality is more complicated than that.
|
||||
|
||||
There are two hi-res screens, occupying 8K each, at $2000 and $4000.
|
||||
You turn them on and flip between them by accessing softswitches in
|
||||
memory-mapped I/O space.
|
||||
|
||||
Each byte determines the color of seven adjacent pixels, so it takes
|
||||
(280 / 7) = 40 bytes to store each line. The lines are organized into
|
||||
groups of three (120 bytes), which are interleaved across thirds of
|
||||
the screen. To speed the computation used to find the start of a
|
||||
line in memory, the group is padded out to 128 bytes; this means
|
||||
((192 / 3) * 8) = 512 of the 8192 bytes are part of invisible
|
||||
"screen holes". The interleaving is responsible for the characteristic
|
||||
"venetian blind" effect when clearing the screen.
|
||||
|
||||
Now imagine 280 bits in a row. If two consecutive bits are on, you
|
||||
get white. If they're both off, you get black. If they alternate
|
||||
on and off, you get color. The color depends on the position of the bit;
|
||||
for example, if even-numbered bits are on, you get purple, while
|
||||
odd-numbered bits yield green. The high bit in each byte adjusts the
|
||||
position of bits within that byte by half a pixel, changing purple and
|
||||
green to blue and orange.
|
||||
|
||||
This arrangement has some curious consequences. If you have green and
|
||||
purple next to each other, there will be a color glitch where they meet.
|
||||
The reason is obvious if you look at the bit patterns when odd/even meet:
|
||||
`...010101101010...` or `...101010010101...`. The first pattern has two
|
||||
adjacent 1 bits (white), the latter two adjacent 0 bits (black). Things
|
||||
get even weirder if split occurs at a byte boundary and the high bit is
|
||||
different, as the half-pixel shift can make the "glitch" pixel wider or
|
||||
narrower by half a pixel.
|
||||
|
||||
The Applesoft ROM routines draw lines that are 1 bit wide. If you execute
|
||||
a command like `HGR : HCOLOR=1 : HPLOT 0,0 to 0,10`, you won't see
|
||||
anything happen. That's because HCOLOR=1 sets the color to green,
|
||||
which means it only draws on odd pixels, but the HPLOT command we gave
|
||||
drew a vertical line on even pixels. It set 11 bits to zero, but since
|
||||
the screen was already zeroed out there was no apparent effect.
|
||||
|
||||
If you execute `HGR : HCOLOR=3 : HPLOT 1,0 to 1,10`, you would expect a
|
||||
white line to appear. However, drawing in "white" just means that no
|
||||
bit positions are excluded. So it drew a vertical column of pixels at
|
||||
X=1, which appears as a green line.
|
||||
|
||||
If (without clearing the screen after the previous command) you execute
|
||||
"HCOLOR=4 : HPLOT 5,0 to 5,10`, something curious happens: the green line
|
||||
turns orange. HCOLOR=4 is black with the high-bit set. So we drew a
|
||||
line of black in column 5 (which we won't see, because that part of the
|
||||
screen is already black), and set the high bit in that byte. The same
|
||||
byte holds columns 0 through 6, so drawing in column 5 also affected
|
||||
column 1. We can put it back to green with "HCOLOR=0 : HPLOT 5,0 to 5,10".
|
||||
|
||||
It's important to keep the structure in mind while drawing to avoid
|
||||
surprises.
|
||||
|
||||
Note that the Applesoft ROM routines treat 0,0 as the top-left corner,
|
||||
with positive coordinates moving right and down, and lines are drawn
|
||||
with inclusive end coordinates. This is different from many modern
|
||||
systems. fdraw follows the Applesoft conventions to avoid confusion.
|
||||
|
||||
Handy table of graphics softswitches:
|
||||
|
||||
name | addr | decimal | purpose
|
||||
------ | ----- | ------- | ------------------
|
||||
TXTCLR | $c050 | -16304 | enable graphics
|
||||
TXTSET | $c051 | -16303 | text-only
|
||||
MIXCLR | $c052 | -16302 | disable mixed mode
|
||||
MIXSET | $c053 | -16301 | enable mixed mode (4 lines of text)
|
||||
LOWSCR | $c054 | -16300 | display page 1
|
||||
HISCR | $c055 | -16299 | display page 2
|
||||
LORES | $c056 | -16298 | show lo-res screen
|
||||
HIRES | $c057 | -16297 | show hi-res screen
|
||||
|
||||
|
||||
<div id='building'/>
|
||||
## Building the Code ##
|
||||
|
||||
The main fdraw code is written for the Merlin assembler (specifically
|
||||
Merlin-16 3.40, though other versions should work). It uses plain 6502
|
||||
code, and is expected to run on an Apple ][+.
|
||||
|
||||
For convenience when editing the files on an Apple II, and to allow the
|
||||
code to be compiled by Merlin-16 running under ProDOS 8, the code is
|
||||
broken into four files. The main file, FDRAW.S, includes the other
|
||||
three with PUT directives. FDRAW.S holds the API entry points and some
|
||||
of the drawing code. FDRAW.LINE.S has the code for drawing points and
|
||||
lines, while FDRAW.CIRCLE.S has the code for drawing circles.
|
||||
FDRAW.TABLE.S holds the data tables, as well as empty space for work
|
||||
areas. The empty space is included in the binary so you can determine
|
||||
the full memory footprint by looking at the length of the file.
|
||||
|
||||
Near the top of FDRAW.S is a constant, `USE_FAST`, which may be set
|
||||
to 0 or 1. If set to 0, some code optimizations are disabled,
|
||||
reducing the size of the code and data areas. Further, the page
|
||||
alignment on data tables is disabled, reducing the internal fragmentation
|
||||
of the data area.
|
||||
|
||||
The USE_FAST setting also determines which file recevies the assembler
|
||||
output: FDRAW.FAST or FDRAW.SMALL. To generate both, it is necessary to
|
||||
assemble the file, change the constant, and then assemble the file again.
|
||||
|
||||
Tests and demos are written in Applesoft BASIC, with a couple of
|
||||
exceptions.
|
||||
|
||||
|
||||
### Why So Big? ###
|
||||
|
||||
The fdraw code weighs in at a hefty 5KB (or 4KB for the "small" build).
|
||||
That doesn't sound like much in the age of multi-gigabyte mobile phones,
|
||||
but it's a sizeable fraction of the space available on an Apple ][+.
|
||||
|
||||
If you want to modify individual pixels quickly, you need two things:
|
||||
a line base-address table, and a divide-by-7 table. Computing base
|
||||
addresses and dividing by 7 aren't hugely expensive, but we're going
|
||||
to be doing them often, so they need to be as fast as possible.
|
||||
|
||||
The line address table has 192 entries, one for each line, 2 bytes per
|
||||
entry. The divide-by-7 table has 280 entries, one for each horizontal
|
||||
pixel position, with one byte for the dividend and one for the quotient.
|
||||
(The quotient can be expressed as a numeric value from 0 to 6, or as
|
||||
a byte with a specific bit set.)
|
||||
|
||||
That's 944 bytes. For optimum performance, each table must fit on a
|
||||
single page of memory. We can split the division table into two pieces,
|
||||
one for 0-255 and one for 256-279, and put the smaller half on the same
|
||||
page as the Y table, along with 16 bytes of padding. The final size is
|
||||
256 + 256 + (192+24+24+pad) + 192 = 960. So you can write off 1K of
|
||||
memory before you've written any code.
|
||||
|
||||
(There's a clever way to reduce the size of the y-lookup table to 24
|
||||
entries, but it's slightly faster and much easier to use full tables.)
|
||||
|
||||
For the FillRaster function, fdraw needs to record the left and right
|
||||
X coordinates on each line (2 bytes each), so that's 192 * 4 = 768 bytes.
|
||||
Again, for optimum performance, each table needs to be on its own page,
|
||||
so for USE_FAST=1 that expands to 1024 bytes.
|
||||
|
||||
Add to that another full page of unrolled rasterization code, and you've
|
||||
got 2304 bytes of tables.
|
||||
|
||||
The rest is code, most of which was written with a flagrant disregard
|
||||
for size. Many common code fragments are repeated inline, rather than
|
||||
called as a subroutine, because a subroutine call (JSR+RTS) costs 12
|
||||
cycles. Calling a common "plot a point" function from the line-drawing
|
||||
code would increase the per-pixel cost by 15-20%.
|
||||
|
||||
|
||||
<div id='notes'/>
|
||||
## Notes on the Drawing Functions ##
|
||||
|
||||
### Screen Clear ###
|
||||
|
||||
The Clear function erases the current hi-res page to the current color.
|
||||
It's several times faster than the version built into the ROM.
|
||||
|
||||
#### Performance ####
|
||||
|
||||
The fastest possible way to clear the screen to a specific color on a
|
||||
6502 is to write to every visible location with an absolute store
|
||||
instruction. Subtracting the screen holes, that's 7680 address *
|
||||
4 cycles = 30720 cycles. The code to do that would be 23,040 bytes long,
|
||||
making it impractical.
|
||||
|
||||
A slower but more memory-efficient approach has one store statement for
|
||||
each line, and iterates through 40 times (280 / 7 = 40). Factoring in the
|
||||
loop overhead, that comes out to 40 * (192 * 5 + 9) = 38760 cycles.
|
||||
192 sets of store instructions fills 576 bytes, which is much better
|
||||
than 23K, but still quite a lot.
|
||||
|
||||
We can reduce the size further by taking the lines 3 at a time, erasing
|
||||
the first 120 bytes in each 128-byte group (the last 8 bytes are the
|
||||
screen hole). We'd need to use 7680/120 = 64 store instructions, for a
|
||||
total of 120 * (64 * 5 + 9) = 39480 cycles, with 192 bytes for the main
|
||||
part of the erase loop. We're not quite 2% slower, but 384 bytes
|
||||
smaller, which seems a fair trade-off. Because we're accessing memory
|
||||
linearly we now have a "venetian blind" clear, which is something of an
|
||||
Apple II trademark, but we can fix that by spending an additional 522
|
||||
cycles to erase the screen in thirds (top/middle/bottom).
|
||||
|
||||
Any further changes that make the code smaller also increase the execution
|
||||
time. When built with USE_FAST=0, the code will use a different loop
|
||||
with 32 stores that write 248 bytes each, and takes 41416 cycles. It's
|
||||
half the size, but nearly 2000 cycles slower, and overwrites half of the
|
||||
screen holes.
|
||||
|
||||
At the extreme end of space over speed is the Applesoft ROM routine -- HGR
|
||||
or "CALL 62454" -- which only needs about 30 bytes for its main loop, but
|
||||
takes (8192*33)+(12*64)+17 = 271121 cycles for black or white, or
|
||||
(8192*40)+(12*64)+17 = 328465 cycles for green/purple/blue/orange --
|
||||
7-8x slower than our preferred implementation.
|
||||
|
||||
The screen clear is wired to a specific hi-res page, so the SetPage
|
||||
function must rewrite the store instructions when the page changes (or
|
||||
we need to keep two full copies of the function around). For an
|
||||
application that is constantly doing flip-erase, the overhead must be
|
||||
factored into the efficiency of the approach -- for example, rewriting
|
||||
stores with indexed LDA/EOR/STA in a loop will take 20 cycles per iteration,
|
||||
1280 cycles for the full set of 64. The "slow" clear has half the
|
||||
number of store instructions, so takes half the time to fix up after
|
||||
a page flip.
|
||||
|
||||
|
||||
### Raster Fill ###
|
||||
|
||||
Drawing an outline of a rectangle or circle can be done efficiently by
|
||||
drawing lines or plotting points. Drawing a filled shape is more
|
||||
expensive if one point is plotted at a time, especially on the Apple II
|
||||
where every byte affects 7 pixels.
|
||||
|
||||
For filled shapes, fdraw populates a rasterization table. The table has
|
||||
192 entries, each of which holds the left and right edges of the shape
|
||||
on that line. The code fills in the pixels one line at a time, using
|
||||
a simple byte store for the middle parts, and bit masks at the edges.
|
||||
|
||||
External applications can use the raster renderer directly by filling
|
||||
out the rasterization table and calling FillRaster.
|
||||
|
||||
While the FillRaster function itself will not modify the contents of the
|
||||
raster tables, other fdraw calls will, sometimes unexpectedly. For
|
||||
example, drawing a horizontal line is performed with a single-line
|
||||
fill call. Filled rectangles might populate the table in the way you'd
|
||||
expect, or might use some internal shortcut that only fills out one line
|
||||
and sets a "repeat" flag. Don't make assumptions about what will be in
|
||||
the table after a call to one of the drawing functions. You *can* count
|
||||
on whatever you wrote there yourself to be unmodified after calls to
|
||||
FillRaster, SetColor, or SetPage, so you can do page-flipping and
|
||||
color-cycling without having to repopulate the tables.
|
||||
|
||||
#### Performance ####
|
||||
|
||||
The fill code needs about 100 cycles to set up each line when drawing
|
||||
a rectangle, more if the line doesn't start and end on byte boundaries.
|
||||
The inner loop costs 10 cycles per byte. To clear the screen with the
|
||||
raster fill code, it would take (192 * (100 + 40 * 10)) = 96000 cycles,
|
||||
or nearly 2.5x the time required for the dedicated clear code. Which is
|
||||
about what you'd expect, as the screen erase needs 4 cycles per byte, and
|
||||
has lower per-line overhead. (This can be improved significantly; see
|
||||
the notes in the "enhancements" section.)
|
||||
|
||||
Non-rectangular shapes take slightly longer to set up, as the edges must
|
||||
be recomputed for each line.
|
||||
|
||||
|
||||
### Lines ###
|
||||
|
||||
The goal is to provide a replacement for Applesoft's HPLOT function
|
||||
that is faster and more consistent in appearance. Lines are drawn using
|
||||
Bresenham's run-length algorithm.
|
||||
|
||||
Internally, there are five separate functions. Horizontal and vertical
|
||||
lines each get a special-case handler. There's another for mostly-vertical
|
||||
lines, one for mostly-horizontal lines, and one for wide mostly-horizontal
|
||||
lines (255 pixels or wider). The latter requires 16-bit math, and is
|
||||
slightly slower.
|
||||
|
||||
The Applesoft routine isn't quite the same as the standard Bresenham
|
||||
algorithm, because it doesn't move diagonally. Consider a line from
|
||||
(0,0) to (50,10) -- gently sloping down and to the right. The standard
|
||||
algorithm would plot exactly 51 pixels, one in each horizontal position.
|
||||
The "pen" always moves one pixel right, but sometimes also moves down.
|
||||
|
||||
In Applesoft, the "pen" can move either right or down, but can't do
|
||||
both at once. This results in lines that feel thin when near horizontal
|
||||
or vertical, but become thicker as they approach 45 degrees. This
|
||||
reduces performance, because Applesoft draws twice as many pixels for a
|
||||
diagonal line as the basic algorithm. It can also be visually jarring
|
||||
when animated, because lines get very thick when near diagonal.
|
||||
|
||||
Different applications have used different styles; for example:
|
||||
|
||||
- Stellar 7 and Elite for the Apple II use Bresenham-style lines. If
|
||||
you look at near-diagonal lines on a color monitor you can see the
|
||||
pixels alternating green and purple.
|
||||
- A2-FS1 Flight Simulator appears to be using Bresenham lines but with
|
||||
doubled bits, effectively treating the screen as having 140 pixels. This
|
||||
gives solid white lines with a fairly consistent feel.
|
||||
- GraFORTH doubles the bits, but treats the screen as 256 pixels wide
|
||||
(not 280... it gives up 24 pixels to improve performance). White
|
||||
lines are thick like Flight Simulator, but feel less jagged because
|
||||
each step can move left or right by one bit rather than two.
|
||||
|
||||
The SetLineMode function lets you choose between "draw" and "xdraw". The
|
||||
former draws color pixels, setting and clearing bits as needed, while
|
||||
the latter inverts whatever is currently on the screen. This can have
|
||||
some unusual effects. Drawing the same line twice erases the line.
|
||||
Drawing a green line over a purple line gives you a white line. Drawing
|
||||
with colors 5 and 6 can produce odd results, because the high bit inverts
|
||||
every time you touch a byte -- which means the ends of a horizontal line
|
||||
will be a different color if the byte holds an even number of affected
|
||||
pixels. It's best to draw with colors 0-3 when in xdraw mode. Clearing
|
||||
the background to color 4, rather than 0, will cause drawing in colors
|
||||
0-3 to actually be 4-7.
|
||||
|
||||
#### Performance ####
|
||||
|
||||
Mostly-horizontal lines step horizontally each iteration, and sometimes
|
||||
step vertically. Mostly-vertical lines step vertically each iteration,
|
||||
and sometimes step horizontally. Each part of the operation has a cost,
|
||||
so the fastest lines are the ones drawn primarily in a single direction.
|
||||
Diagonal lines are the worst case for performance.
|
||||
|
||||
The current code requires just under 80 cycles per pixel for diagonal
|
||||
movement, and about 56 for single-direction movement. There's another
|
||||
150 cycles or so per line for the initial setup.
|
||||
|
||||
Vertical lines cost about 43 cycles per pixel. Horizontal lines are
|
||||
handled as a trivial FillRaster call, which at peak performance can write
|
||||
7 pixels in 10 cycles.
|
||||
|
||||
This is about as fast as you can get with the Bresenham run-length
|
||||
algorithm and Applesoft-style color handling. It's possible to go faster
|
||||
by switching to a different pixel style, or using a run-slice approach.
|
||||
|
||||
|
||||
### Rectangles ###
|
||||
|
||||
Filled rectangles are currently implemented by putting the left and
|
||||
right edges into the rasterization table, and calling FillRaster.
|
||||
|
||||
Outline rectangles could be drawn as four lines, but that doesn't look
|
||||
very good in color unless you get the lines on the right columns. To
|
||||
ensure that the edges are in the correct color, outline rectangles are
|
||||
drawn as four separate items: a two-pixel-wide left edge, a two-pixel-wide
|
||||
right edge, and horizontal lines at the top and bottom. FillRaster does
|
||||
the actual work.
|
||||
|
||||
#### Performance ####
|
||||
|
||||
FillRaster is suboptimal for rectangles, because it works by rows rather
|
||||
than by columns (see "Vertically-Challenged Rasterization" later in this
|
||||
document). Rectangles could be drawn 2.5x faster with dedicated code,
|
||||
but at a cost of hundreds of bytes of memory.
|
||||
|
||||
The advantage of using FillRaster is that we need it for filled circles,
|
||||
so adding support for rectangles was nearly free. And it's still pretty
|
||||
fast.
|
||||
|
||||
|
||||
### Circles ###
|
||||
|
||||
Circles are computed with Bresenham's algorithm. The idea is to compute
|
||||
one octant of the circle with this bit of magic:
|
||||
|
||||
void drawOutline(int cx, int cy, int rad) {
|
||||
int x, y, d;
|
||||
|
||||
d = 1 - rad;
|
||||
x = 0;
|
||||
y = rad;
|
||||
|
||||
while (x <= y) {
|
||||
plot(cx, cy, x, y);
|
||||
|
||||
if (d < 0) {
|
||||
d = d + (x * 4) + 3;
|
||||
} else {
|
||||
d = d + ((x - y) * 4) + 5;
|
||||
y--;
|
||||
}
|
||||
x++;
|
||||
}
|
||||
}
|
||||
|
||||
Then each X/Y coordinate is plotted eight times:
|
||||
|
||||
(cx+x, cy+y) (cx-x, cy+y) (cx+x, cy-y) (cx-x, cy-y)
|
||||
(cx+y, cy+x) (cx-y, cy+x) (cx+y, cy-x) (cx-y, cy-x)
|
||||
|
||||
For an outline circle, we plot every point. For a filled circle, we add
|
||||
each point to a rasterization table. Near the top and bottom of the
|
||||
circle there will be multiple updates to the same line, with each update
|
||||
replacing the previous one (which works, as we are moving "outward").
|
||||
|
||||
The center point of the circle must be on screen, but it's not necessary
|
||||
for the entire circle to fit. Coordinates outside screen space are clipped.
|
||||
|
||||
#### Performance ####
|
||||
|
||||
The implementation of Bresenham's algorithm is straightforward, and is
|
||||
about as fast as it's going to get. There are actually two versions of
|
||||
the core computation. If the radius is less than 41, we can keep all of
|
||||
the variables in 8 bits. For circles with radius 41 and larger, we need
|
||||
to use 16 bits, slowing each step slightly.
|
||||
|
||||
There are also two versions of the octant plot. If the circle fits entirely
|
||||
on-screen, we use a simple version. If it doesn't, we use a version that
|
||||
clips values. For rasterization that means clamping X to the left or
|
||||
right edge, and skipping updates that are off the screen in the Y dimension.
|
||||
For an outline circle we simply don't plot any clipped points.
|
||||
|
||||
The rendering of filled circles is very fast, though there is a possibility
|
||||
of optimizing the center-fill of large circles. Outline circles were
|
||||
added by inserting JSR PLOT at key points, and could perhaps be faster.
|
||||
|
||||
|
||||
### Drawing Lines with Indexed Byte-Arrays ###
|
||||
|
||||
The &PLOT command allows a BASIC program to execute a series of line-draw
|
||||
commands with a single statement. Think of it like shape-table animation
|
||||
with lines instead of plotted points.
|
||||
|
||||
Suppose you want to draw a rectangle with an X through the middle. We'll
|
||||
make it 11 units wide and 21 units high. To draw that in the middle of
|
||||
the screen, we'd set CX=139 and CY=95, then draw lines offset from that
|
||||
by +/- 5 in X and +/- 10 in Y:
|
||||
|
||||
HPLOT CX-5,CY-10 TO CX-5,CY+10 : REM LEFT
|
||||
HPLOT CX-5,CY-10 TO CX+5,CY-10 : REM TOP
|
||||
HPLOT CX+5,CY-10 TO CX+5,CY+10 : REM RIGHT
|
||||
HPLOT CX-5,CY+10 TO CX+5,CY+10 : REM BOTTOM
|
||||
HPLOT CX-5,CY-10 to CX+5,CY+10 : SLASH
|
||||
HPLOT CX+5,CY-10 to CX-5,CY+10 : BACKSLASH
|
||||
|
||||
Six lines, each of which needs four coordinates. We'd need 24 bytes
|
||||
to store that in an integer array.
|
||||
|
||||
Suppose instead we identified the four vertices, and numbered them:
|
||||
|
||||
#0 CX-5,CY-10
|
||||
#1 CX+5,CY-10
|
||||
#2 CX-5,CY+10
|
||||
#3 CX+5,CY+10
|
||||
|
||||
and then created a list of line segments using the vertex indices:
|
||||
|
||||
HPLOT #0 TO #2
|
||||
HPLOT #0 to #1
|
||||
HPLOT #1 TO #3
|
||||
HPLOT #2 TO #3
|
||||
HPLOT #0 TO #3
|
||||
HPLOT #1 TO #2
|
||||
|
||||
This requires (4*2) + (6*2) = 20 bytes, for a small savings. The real
|
||||
value in the approach is that it separates the description of the shape
|
||||
from the placement of the points. For example, if you want to change
|
||||
vertex #0 to (CX-7,CY-12), you don't have to make changes two three
|
||||
separate HPLOT calls. (This is particularly useful when you have code
|
||||
that scales and rotates the vertices.)
|
||||
|
||||
For the current release of fdraw, the only built-in transform is
|
||||
translation. Using "&AT cx,cy", you can place the center point anywhere
|
||||
on the screen. This allows you to animate movement of the shape by
|
||||
simply calling &AT to change the position, and &PLOT to draw.
|
||||
|
||||
The &PLOT command takes three arguments: the address of a vertex array,
|
||||
the address of an index array, and the number of line segments to draw.
|
||||
These are referred to as "byte arrays" because they are arbitrary
|
||||
locations in memory where you have BLOADed or POKEd your shape data, not
|
||||
Applesoft arrays. The count can be from 0 to 127. You can optionally
|
||||
add an AT to the end; if not present, the coordinates of the previous AT
|
||||
are used. The initial value is the center of the screen (x=139 y=95).
|
||||
|
||||
The vertex array uses two signed bytes per vertex (-128 to 127), one for
|
||||
the X coordinate and one for the Y coordinate.
|
||||
|
||||
The index array uses two bytes per line segment. Each byte is an index
|
||||
into the vertex array, from 0 to 127.
|
||||
|
||||
Here's an Applesoft program that implements the above example. (The DATA
|
||||
statements use negative numbers for clarity; if you replace the negative
|
||||
values with 256+value, e.g. -5 becomes 251, then you can avoid the IF
|
||||
statement and just poke the value directly.)
|
||||
|
||||
100 TEXT : NORMAL : HOME
|
||||
200 & NEW : & HGR : VTAB 21
|
||||
210 & HCOLOR= 3
|
||||
500 REM ARRAY TEST
|
||||
510 AD = 768: REM $300
|
||||
520 READ D: IF D = 1000 THEN 560
|
||||
530 IF D < 0 THEN D = 256 + D
|
||||
540 POKE AD,D:AD = AD + 1: GOTO 520
|
||||
560 & PLOT 768,776,6: & AT 50,50: & PLOT 768,776,6
|
||||
570 POKE 768,256 - 10: POKE 769,256 - 20: & PLOT 768,776,6 AT 100,50
|
||||
600 DATA -5,-10, 5,-10, -5,10, 5,10
|
||||
610 DATA 0,2, 0,1, 1,3, 2,3, 0,3, 1,2, 1000
|
||||
|
||||
This draws the shape twice, once at the middle of the screen, once centered
|
||||
at 50,50. It then adjusts the top-left coordinate, and draws the shape
|
||||
centered at 100,50. Looking at the output, you can see that the top-left
|
||||
corner of the third instance has moved, and all three lines from that
|
||||
point have moved with it.
|
||||
|
||||
If a vertex ends up off-screen, lines that use that vertex are omitted
|
||||
(not clipped). If you tried to draw the example shape at (0,0), nothing
|
||||
would happen, because every line has at least one point that would be
|
||||
off-screen -- only point #3 is still visible, and all of the lines that
|
||||
use that point extend off screen.
|
||||
|
||||
You can specify a maximum of 128 vertices and 128 index pairs for a
|
||||
single call. If none of the line segments share vertices, you'll need
|
||||
two vertices per line, which means a cap of 64 lines.
|
||||
|
||||
#### Performance ####
|
||||
|
||||
There isn't a whole lot to it -- it just feeds the lines to DrawLine.
|
||||
The key speed advantage is the removal of the Applesoft overhead.
|
||||
|
||||
|
||||
<div id='ideas'/>
|
||||
## Enhancement Ideas ##
|
||||
|
||||
Some ideas for future versions of fdraw.
|
||||
|
||||
### fdraw ###
|
||||
|
||||
Line clipping would make the array-draw function more useful for
|
||||
animation projects. If we accepted signed 16-bit values as input to
|
||||
the clip function, we could specify an AT point outside the screen bounds.
|
||||
That could be extended to circles, which could have off-screen centers.
|
||||
|
||||
A "game line" function or line mode that restricts coordinates to 0-255
|
||||
and ignores color might be worth an experiment.
|
||||
|
||||
Triangle rasterization is possible, but perhaps a bit silly.
|
||||
|
||||
We could handle ellipses, but they're more complicated than circles, and
|
||||
are slower to compute -- you need a couple of multiplications during
|
||||
setup, and the asymmetry means you have to compute a quadrant rather
|
||||
than an octant. If the goal is fast animation rather than general-purpose
|
||||
picture painting then there's little value in supporting ellipses.
|
||||
|
||||
Some of the inner loops are almost certainly paying an extra cycle to
|
||||
cross a page boundary. That's not easy to fix without adding absurd
|
||||
amounts of padding.
|
||||
|
||||
"USE_FAST" could be applied more aggressively to reduce the size.
|
||||
|
||||
Having "fast" vs. "small" builds was mostly an experiment to see how
|
||||
much of a difference in size and speed we'd get by dropping some of
|
||||
the more expensive operations. Another way to reduce size would be to
|
||||
make the build modular, so you could (say) omit circle drawing or only
|
||||
include line drawing. Some trade-offs would have to be made, e.g. if
|
||||
you only wanted line drawing then it makese sense to disable (or replace)
|
||||
the horizontal-line optimization that calls FillRaster, as that requires
|
||||
some sizeable tables that would otherwise be unused.
|
||||
|
||||
### Amperfdraw ###
|
||||
|
||||
The Amperfdraw API is somewhat minimal and could be improved. Taking a
|
||||
cue from Beagle Graphics, the rect and circle calls should probably look
|
||||
more like:
|
||||
|
||||
&DRAW width,height [AT left,top]
|
||||
&COS radius [AT left,top]
|
||||
|
||||
The "&AT" coordinate, currently only used by &PLOT, should be more
|
||||
widely used. Not only is it more convenient, it's also slightly faster,
|
||||
since we don't have to parse the left/top coordinates each time.
|
||||
|
||||
The existing code is (somewhat lazily) using the Applesoft routines to
|
||||
parse coordinates, which includes the range check. We wouldn't be able
|
||||
to use them for width/height, because we would need to take values in the
|
||||
range (0-280, 0-192), where width/height of zero means "draw nothing".
|
||||
|
||||
I deliberately used Applesoft tokens, rather than arbitrary words, to
|
||||
make commands simpler to parse. Some of them don't fit that well. COS
|
||||
and SIN are circle-related, but it's not obvious which is outline and
|
||||
which is filled. DRAW and XDRAW don't really sound like rectangle-draw
|
||||
calls, and would be much more appropriate if used to set the line draw
|
||||
mode. Spending a few bytes & cycles to get better names might be
|
||||
worthwhile.
|
||||
|
||||
It's possible to store &PLOT arrays in actual BASIC integer arrays,
|
||||
which might make them easier to code for. The fact that arrays are
|
||||
DIM()ed once, cannot be resized, and cannot be discarded makes them
|
||||
difficult to use for dynamic data.
|
||||
|
||||
Currently &PLOT takes a list of vertices and a list of line segments.
|
||||
We could also support "continuous line" mode, where it just plays
|
||||
connect-the-dots (saves space, doesn't really affect speed). Being
|
||||
able to embed color changes could be handy.
|
||||
|
||||
&PLOT handles lines and vertices the way Applesoft does, with inclusive
|
||||
coordinates. This results in overdraw when vertices are shared. This
|
||||
is a (small) performance hit, and causes graphical glitches when connected
|
||||
lines are drawn in "xdraw" mode.
|
||||
|
||||
|
||||
<div id='additional-notes'/>
|
||||
# Additional Notes #
|
||||
|
||||
Getting into the gory details here.
|
||||
|
||||
## Setting a pixel ##
|
||||
|
||||
Hi-res pixels are curious creatures.
|
||||
|
||||
Pixel color values are determined by adjacent bits. The various drawing
|
||||
routines only set one bit at a time, so "drawing" in green (hcolor=1) will
|
||||
cause bits to be set in odd columns, cleared in even columns. We don't
|
||||
touch adjacent bits, so drawing purple (hcolor=2) in column 0 and green
|
||||
in column 1 will produce a white line, while drawing them with the columns
|
||||
reversed will produce a black line.
|
||||
|
||||
Making life more complicated is the use of the high bit in each byte, which
|
||||
affects the color. If you draw a purple line in column 0, and a black1
|
||||
line with hcolor=4 in column 6, the purple line turns blue, because the
|
||||
black1 line sets the high bit.
|
||||
|
||||
To set a bit at an arbitrary X offset, we need to do the following:
|
||||
|
||||
(1) Determine which byte to change (xc / 7) and which bit (xc mod 7).
|
||||
(2) Determine the color mask for that byte. For green, it's 0x2a
|
||||
(00101010) in even columns, 0x55 (01010101) in odd columns.
|
||||
(3) Set or clear the target bit and the high bit, leaving the others
|
||||
intact.
|
||||
|
||||
One way to do this is illustrated below. Assume we're drawing a green
|
||||
line at X=17. There's already a green dot at X=15, which gives us a
|
||||
bit pattern of 00000010. (Bits are "backwards", i.e. the bit on the
|
||||
right is the pixel on the left.)
|
||||
|
||||
LDY byteoffset X=2
|
||||
LDX bitoffset X=3
|
||||
LDA bitmask,x A=0x88 (10001000)
|
||||
STA <andmask
|
||||
LDA oddevencolor,y 4 cyc A=0x2a (00101010)
|
||||
EOR (hbasl),y 5 cyc A=0x28 (00101010 ^ 00000010 = 00101000)
|
||||
AND <andmask 3 cyc A=0x08 (00101000 & 10001000 = 00001000)
|
||||
EOR (hbasl),y 5 cyc A=0x0a (00001000 ^ 00000010 = 00001010)
|
||||
STA (hbasl),y 6 cyc
|
||||
|
||||
As a second example, here's how we plot a black1 (hcolor=4) point at X=6
|
||||
when there's a purple point (hcolor=2) at X=0 (00000001).
|
||||
|
||||
LDA bitmask,x A=0xc0 (11000000)
|
||||
STA <andmask
|
||||
LDA oddevencolor,y 4 cyc A=0x80 (10000000)
|
||||
EOR (hbasl),y 5 cyc A=0x81 (10000000 ^ 10000001 = 00000001)
|
||||
AND <andmask 3 cyc A=0x81 (00000001 & 11000000 = 00000000)
|
||||
EOR (hbasl),y 5 cyc A=0x81 (00000000 ^ 10000001 = 10000001)
|
||||
STA (hbasl),y 6 cyc
|
||||
|
||||
Note the purple pixel is still set, but now the high bit is as well,
|
||||
changing it to blue.
|
||||
|
||||
The trick is to start with the color pattern, which specifies how we want
|
||||
the bits to be set or cleared. We EOR in the screen, which causes the
|
||||
bits in A to be inverted wherever they were set on the screen. Next we
|
||||
use the AND mask to zero out the bits we don't want to update on-screen.
|
||||
When we do the second EOR from the screen, the bits we just zeroed will
|
||||
take on the values from the screen, while the bits we didn't zero will
|
||||
return to their original values from the color pattern (because EORing
|
||||
twice with the same value restores the original).
|
||||
|
||||
It might look a little nicer if we always set two adjacent bits. That
|
||||
would avoid the phenomenon where drawing from 0,0 to 0,10 in green doesn't
|
||||
appear to do anything. For 6 out of 7 pixels this is easy, a simple
|
||||
adjustment to the bitmask, but for the 7th pixel we'll need to update an
|
||||
adjacent byte... unless it's the rightmost byte, which would cause us to
|
||||
overflow and wrap around (or write into a screen hole). GraFORTH
|
||||
renders lines this way, avoiding the overflow issue by limiting the X
|
||||
coordinate range to (0,255).
|
||||
|
||||
To implement "xdraw" mode, where instead of setting pixels we invert
|
||||
the current value, we can just omit (or NOP out) the first EOR.
|
||||
|
||||
We could draw faster if we simply set the new bits, rather than setting
|
||||
some and clearing others according to the color mask. This could result
|
||||
in some odd behavior, e.g. drawing a horizontal green line over a
|
||||
horizontal purple line would result in a white line. Given how strange
|
||||
things are in general this might not be an issue.
|
||||
|
||||
For 3D games like Stellar 7 or Elite, which essentially draw thin
|
||||
monochromatic lines, we can drop the color mask and just set the bit on
|
||||
the screen. Plotting a pixel is then simply:
|
||||
|
||||
LDA (hbasl),y 5 cyc
|
||||
ORA <bitmask 3 cyc
|
||||
STA (hbasl),y 6 cyc
|
||||
|
||||
This cuts the cycle count from 23 to 14. It's also not necessary to
|
||||
worry about the high bit, which can save a few cycles when shifting
|
||||
the bitmask. Most games are also able to limit the "active" part of
|
||||
the screen to fewer than 255 pixels, which eliminates some 16-bit math
|
||||
during setup.
|
||||
|
||||
For "xdraw" mode, the "ORA <bitmask" becomes "EOR <bitmask".
|
||||
|
||||
|
||||
## Single- or Double-Buffered Animation ##
|
||||
|
||||
Because the Apple II has two hi-res graphics pages, it's possible to
|
||||
double-buffer the animation to reduce or eliminate flicker. The
|
||||
application displays one page while erasing and redrawing the other.
|
||||
|
||||
In most cases it's faster to erase the entire screen with the Clear
|
||||
function than it is to draw over with black. For example, consider four
|
||||
diagonal lines in a diamond shape, 100 pixels on a side. Diagonal
|
||||
lines are the most expensive, as each step requires advancing in
|
||||
both vertical and horizontal directions. The current implementation
|
||||
needs about 80 cycles per diagonal pixel, or 100 * 4 * 80 = 32,000 cycles
|
||||
to draw four medium-length lines (ignoring the setup cost for each line).
|
||||
If you assume that the average cost to draw a pixel is about 70 cycles,
|
||||
you can draw 570 pixels in the time it takes to erase the full screen.
|
||||
|
||||
We can clear the entire screen in about 40,000 cycles. If the drawing
|
||||
area is smaller, a custom clear routine could do it in even less.
|
||||
(Imagine your drawing routines keep track of the highest and lowest
|
||||
line that anything touches, and then just erase the "dirty" lines.) So
|
||||
unless you're doing relatively light rendering, you'll get the best
|
||||
performance by wiping all or part of screen rather than drawing over the
|
||||
previous contents.
|
||||
|
||||
The &INVERSE command is intended to make double-buffered animation
|
||||
easier from BASIC. Use &HGR2 to switch to full-screen mode, then call
|
||||
`&SCRN(1):&HCOLOR=0:&CLEAR` to select page 1 and clear it. Draw your
|
||||
first frame, then call &INVERSE to display page 1 and select page 2
|
||||
for drawing.
|
||||
|
||||
|
||||
An alternative approach is exemplified by Elite. The game only uses
|
||||
one hi-res page, but doesn't noticeably flicker (though distant objects
|
||||
sort of "sparkle"). Suppose you're writing a similarly line-oriented
|
||||
game, and your rendering cycle looks like this:
|
||||
|
||||
- Step 1: draw over previous content with black
|
||||
- Step 2: draw new content with white
|
||||
|
||||
Your game will flicker badly without double-buffering, because there will
|
||||
be a few display refresh periods where most of the lines have been erased.
|
||||
Suppose instead you did this:
|
||||
|
||||
- For each line in the shape, erase the old line, then draw the line in
|
||||
its new position
|
||||
|
||||
Now you might get some flickering on certain lines if the beam crosses
|
||||
them while they're black, but the shape as a whole will be visible most
|
||||
of the time. The trouble with this approach is that, if your shape is
|
||||
moving across the screen, you'll be drawing black over some recent white
|
||||
lines, causing some distracting artifacts.
|
||||
|
||||
The way to make this work is to use "xdraw" mode, where bits are toggled
|
||||
rather than set or cleared. If you draw a new line across an old line that
|
||||
will soon be erased, the crossing point is cleared. When the old line
|
||||
is erased, the crossing point is set white again, so your new line
|
||||
appears unbroken.
|
||||
|
||||
It should be noted that this works well for Elite because they use backface
|
||||
elimination, so lines within a single shape don't cross. It's also
|
||||
important to avoid re-drawing points at shared vertices, or your corners
|
||||
will disappear unless there are an odd number of lines.
|
||||
|
||||
If there's very little on screen, this could be faster than a full clear.
|
||||
Mostly it's of value if you need the 8KB occupied by the second hi-res
|
||||
page for something other than output.
|
||||
|
||||
|
||||
## Vertically-Challenged Rasterization ##
|
||||
|
||||
As noted earlier, we can clear the screen in about 40,000 cycles with
|
||||
the Clear function, but drawing a screen-sized filled rectangle takes
|
||||
about 96,000. Why the difference?
|
||||
|
||||
The FillRaster function handles one horizontal line at a time. For
|
||||
each line it sets any pixels sticking out on the left and right edges,
|
||||
and then it jumps into an unrolled byte-stomp function that blasts
|
||||
its way through the middle at 10 cycles per byte. Compare this to the
|
||||
Clear function, which only needs 5 cycles per byte.
|
||||
|
||||
The trick to improving the speed at which we draw filled rectangles
|
||||
is to make it more like the Clear function, which operates on columns
|
||||
rather than rows.
|
||||
|
||||
Suppose, for example, we figured out which bits we need to set on the
|
||||
left edge, and then applied them to every row. Then we did the same
|
||||
for the right edge. The set-up cost for each edge went from
|
||||
(N cycles * Y rows) to (N cycles). Can we apply this to the middle
|
||||
byte as well?
|
||||
|
||||
It turns out we can. The fundamental problem with setting bytes
|
||||
horizontally is that we have to index off of a direct page register,
|
||||
e.g. "STA ([hbasl),y". The only ways around this either add too much
|
||||
loop overhead, too much setup overhead, or require too much memory.
|
||||
For any given line, we need to find the base address, and issue a
|
||||
6-cycle indirect store, followed immediately by an increment of the Y
|
||||
register. If we're drawing in color it's worse than that, because we
|
||||
also have to exclusive-OR the color because the bit pattern flips for
|
||||
odd/even columns.
|
||||
|
||||
We're much better off unrolling vertically. Suppose you have 192
|
||||
"STA abs,y" instructions, one for each row, one after the other. You
|
||||
no longer need the base address lookup, because it's baked into the
|
||||
code, and since we're only touching one column we don't need to worry
|
||||
about odd/even color values here. To use this to draw rows 50-100, you
|
||||
would replace the STA in row 101 with an RTS, and then JSR to the 50th
|
||||
STA instruction. After the row is painted, you increment Y, exclusive-OR
|
||||
the color value, and jump through again. (You can make this a little
|
||||
faster by JMPing in and out instead, but you pay a bit more for setup
|
||||
and cleanup, especially when you have to restore the base address that
|
||||
got overwritten by the JMP.)
|
||||
|
||||
With this change we're working at 5 cycles per byte, plus the loop
|
||||
overhead. A full-screen FillRect will be about as fast as a Clear.
|
||||
|
||||
There are a couple of down sides. First, you need 192*3=576 bytes to
|
||||
hold this pile of store instructions. If you're drawing a lot of filled
|
||||
rectangles, though, the 2x speed improvement would make the size penalty
|
||||
worthwhile. The other problem arises if you use double-buffered animation,
|
||||
as the table is hard-wired to page 1. You can either spend a couple
|
||||
thousand cycles when the page flips to rewrite the addresses, or you can
|
||||
have a second full copy of the stores for page 2.
|
||||
|
||||
The current horizontally-focused implementation uses 256 bytes for its
|
||||
unrolled code area, but you wouldn't be able to get rid of that by
|
||||
switching to the vertical approach. The reason the code works the way
|
||||
it does is that it's designed to render circles, and those are hard to do
|
||||
vertically. With horizontal rasters, when you look at the left and right
|
||||
edges you only need to examine the current row, and set pixels in a
|
||||
single byte. With vertical strips, each byte spans seven columns of
|
||||
pixels, so the top and bottom "edges" might be several bytes deep. The
|
||||
code would have to iterate in "edge space" until it reached the meaty
|
||||
center, and the cost of doing so would likely erase the benefit of vertical
|
||||
fills until your circles got reasonably large.
|
||||
|
||||
It's possible that a hybrid approach, in which selected rectangles in the
|
||||
center of a large circle are drawn with a fast vertical fill, could be
|
||||
used, with slower code rendering the outer edges. The trick would be to
|
||||
come up with an approach that doesn't leave gaps, minimizes overdraw, and
|
||||
is sufficiently faster to make the effort worthwhile.
|
||||
|
197
docs/personal-notes.md
Normal file
197
docs/personal-notes.md
Normal file
@ -0,0 +1,197 @@
|
||||
My Quest for Lines
|
||||
==================
|
||||
|
||||
As far back as I can remember, I always wanted to draw lines on the
|
||||
hi-res screen.
|
||||
|
||||
This probably started when I saw Battlezone in the arcades in the early
|
||||
1980s. I still think the game is beautiful -- a first-person shooter
|
||||
reduced to the essential elements. I wanted to write something similar
|
||||
for the Apple II, but I didn't know where to start. (I should probably
|
||||
mention that I was 11 years old in 1980.)
|
||||
|
||||
Battlezone had a dedicated matrix processor (the "math box"), and a
|
||||
vector display that handled the line drawing. The Apple II had neither
|
||||
of those things, which meant that achieving the same level of performance
|
||||
and graphical detail weren't possible. Despite those shortcomings, Damon
|
||||
Slye create a pretty solid Battlezone-ish game in 1983, called Stellar 7.
|
||||
A couple of years later, Braben and Bell made another compelling wireframe
|
||||
combat game, the space combat sim Elite. (The A2-FS1 flight simulator
|
||||
came out much earlier, but the graphics were blinky, enemies were just
|
||||
dots, and the action was much slower-paced. Of course, it loaded from
|
||||
cassette tape and ran in 16KB, so they didn't have much choice.)
|
||||
|
||||
Seeing these games showed me that the problems could be solved. I decided
|
||||
that the place to start was line drawing, because (a) line drawing is
|
||||
pretty fundamental to wireframe 3D, and (b) I wasn't getting the performance
|
||||
I needed out of HPLOT TO.
|
||||
|
||||
Somewhere in the mid-1980s -- I was in high school now -- I began by trying
|
||||
to figure out how line drawing worked. Suppose, for example, you want to
|
||||
HPLOT 0,0 TO 19,5. How do you decide which pixels to set?
|
||||
|
||||
I wrote a program (which I recently found) called "HPLOT SIMULATOR". It
|
||||
computed the ratio of vertical to horizontal pixels (e.g. 20 / 6 = 0.3),
|
||||
and marched horizontally across the screen, adding the fractional value to
|
||||
the Y coordinate at each step. The result was a pretty good-looking line.
|
||||
|
||||
The trouble was that it used floating-point math and required division,
|
||||
things that the 6502 is not very good at. It occurred to me that division
|
||||
can be performed as a series of integer subtractions. (It probably occurred
|
||||
to me because I didn't know any other way to divide on the 6502, not having
|
||||
encountered the shift-and-subtract approach yet.) So if you initialize a
|
||||
counter to zero, and add 6 to it each time you move horizontally, then when
|
||||
it reaches 20 you know it's time to move vertically. Subtracting 20 from
|
||||
the counter resets it, but retains the division remainder as the starting
|
||||
point, so you retain the fractional part.
|
||||
|
||||
When I went to college I took a graphics class, and was introduced to
|
||||
Bresenham's classic line algorithm. This was essentially the same as what
|
||||
I'd figured out for myself, but with two refinements: (1) it used signed
|
||||
values, allowing a slightly cheaper "< 0" comparison, and (2) it started
|
||||
with the counter half full, correcting the slight lopsidedness of my lines.
|
||||
|
||||
The graphics class inspired me to write a 3D game library called Arc3D
|
||||
in 1990. I used it to create a pair of demos: "Not Modulae", which
|
||||
animated several 3D shapes on the screen, including a pair of ships from
|
||||
Elite; and "Not Stellar 7", a graphics demo that let you drive around
|
||||
(and, sadly, through) some tanks from Stellar 7. The Arc3D library was
|
||||
written for the IIgs, in 65816 assembly, and used the super-hi-res screen.
|
||||
Having a better CPU, lots more memory, and a less-quirky graphics
|
||||
architecture made things easier than doing the same on a classic Apple II.
|
||||
|
||||
I wrote my own super-hi-res line drawing code, of course, but a year later
|
||||
when I disassembled somebody else's demo I found better code. Which, it
|
||||
turned out, they had also lifted from another source, an FTA demo. I
|
||||
dropped mine and used theirs.
|
||||
|
||||
After I graduated from college, my side projects tended more toward data
|
||||
compression and Netrek, so Arc3D was never improved upon.
|
||||
|
||||
Fifteen years later, in 2006, there was a discussion on a Usenet group
|
||||
about circle rendering. Once upon a time I'd drawn circles from BASIC
|
||||
with trig functions, but it was painfully slow, which made me wonder
|
||||
about a part of the game Horizon V where you steer through a series of
|
||||
circles. I wanted to try it for myself and see what it would take.
|
||||
(Looking at a youtube video of Horizon V, the animation is more radial
|
||||
than circular... I suspect it's not really drawing circles at all.)
|
||||
|
||||
I first announced my results in a
|
||||
[comp.sys.apple2.programmer](https://groups.google.com/forum/#!msg/comp.sys.apple2.programmer/Vj_xVjMHaR0/cLU3t2TlPrMJ)
|
||||
posting. I had focused on filled circles, rather than outline circles,
|
||||
since that seemed like a more interesting challenge. The "fdraw" demo
|
||||
supported fast rendering of filled circles, filled rectangles, and had
|
||||
a very fast screen clear. A week later, after a bit of cleanup, I
|
||||
[https://groups.google.com/d/msg/comp.sys.apple2.programmer/Un4pV5p8Elw/6qZVAPc_da0J](released the fdraw v0.2 sources).
|
||||
|
||||
It occurred to me at the time that this would be a handy place to stick
|
||||
the hi-res line drawing code I'd always wanted to write. Somewhere around
|
||||
this time I also sort of poked at the idea of writing a dedicated hi-res
|
||||
graphics compression program.
|
||||
|
||||
Fast forward another nine years, to 2015. After learning about the LZ4
|
||||
format, I went back to my data compression roots and wrote
|
||||
[https://github.com/fadden/fhpack](fhpack) and some demos. I had so much
|
||||
fun doing it that I decided it was finally time to write some hi-res
|
||||
line drawing code.
|
||||
|
||||
Being older, wiser, and having easy access to relevant information, I
|
||||
began with the appropriate chapters in Michael Abrash's _Graphics
|
||||
Programming Black Book Special Edition_. This covered the standard
|
||||
algorithm, but also had a chapter on a faster "run-slice" approach.
|
||||
This intrigued me, because instead of the usual "step right, check if
|
||||
it's time to move down, step right, check if it's time ..." logic, it
|
||||
says, "figure out how long each line segment is; then, move right 3
|
||||
times, step down, move right 4 times, step down, ...", saving a lot of
|
||||
redundant computation. The trouble is that it requires fixed-point
|
||||
division, and drawing N adjacent pixels is tricky when your graphics
|
||||
architecture has 7 horizontal pixels per byte. You'd have to be a bit
|
||||
crazy to try to get that to work.
|
||||
|
||||
So I went with a standard approach, and used the Applesoft ROM method of
|
||||
coloring pixels (discussed in the fdraw docs). I carefully optimized
|
||||
the code, and squeezed out as much performance as I could.
|
||||
|
||||
When I was done, I began looking around at what other people did to see if
|
||||
there were any tricks I missed.
|
||||
|
||||
I looked at the Applesoft ROM code. Very clever, but very much optimized
|
||||
for space over speed. Also, because it's in ROM, self-modifying code is
|
||||
not possible, so they lose a cycle here and there.
|
||||
|
||||
Next I looked at GraFORTH. I figured out how functions were arranged,
|
||||
identified the plot function, and disassembled it with CiderPress. It uses
|
||||
a pretty standard algorithm, but supports multiple drawing modes and sets
|
||||
two adjacent bits for better-looking colored lines. Good use of
|
||||
self-modifying code, but some choices were made to reduce code size at the
|
||||
expense of speed. My code was faster.
|
||||
|
||||
Next I looked at Elite. Digging through memory after the program had
|
||||
loaded, I found a collection of purpose-built line functions. Some drew
|
||||
color, most used EOR to "xdraw" monochrome lines. Standard Bresenham
|
||||
approach, with a bit of variation on the Y-lookup table -- their table is
|
||||
only 24 bytes (1/8th of the screen), and they use a quick "add 4 to the
|
||||
high byte" 7 out of every eight lines. I tried applying this to my code,
|
||||
but it turned out that just using a full lookup table was a tiny bit faster.
|
||||
|
||||
Next I looked at Stellar 7, one of my earliest inspirations. I scanned
|
||||
through some files with CiderPress, looking for anything line-draw-esque.
|
||||
(If you spend enough time drawing lines you start to see patterns.)
|
||||
After about five minutes I found the code, in the same file as this
|
||||
gigantic unrolled division routine. But as I started to dig into the code
|
||||
I noticed that it was using a count oddly, and this one function was...
|
||||
HOLY CATS he did run-slicing.
|
||||
|
||||
And he did it big. There are several line functions, all of them padded
|
||||
out to live on a single page (so that none of the branches cross page
|
||||
boundaries, which costs an extra cycle). It has the usual special cases --
|
||||
simple horizontal and vertical lines -- and the usual split between
|
||||
vertically-dominant and horizontally-dominant lines. But there are *three*
|
||||
different functions for drawing mostly-horizontal lines, selected based on
|
||||
slope, all of which try to set multiple horizontal pixels at once. The
|
||||
slope of the line affects how the code is structured; for example, for
|
||||
very shallow lines it expects that it will often be able to set an entire
|
||||
byte at once. Color is not supported, so pixels are set with a simple
|
||||
OR operation.
|
||||
|
||||
It's very impressive, and a wee bit terrifying. But when you're making
|
||||
a game that will be spending much of its time drawing lines, you really
|
||||
want to optimize those draw functions.
|
||||
|
||||
The tricky part is that divide. The division routine is unrolled to a
|
||||
healthy 187 bytes long, and might take 240 cycles to run. For short
|
||||
lines and mostly-vertical lines it might have been more efficicent to skip
|
||||
the division and just use a run-length implementation, but the ability to
|
||||
set multiple bits at once for mostly-horizontal lines is a huge win. It's
|
||||
a fair bet that the code in Stellar 7 is the fastest line drawing
|
||||
implementation for the Apple II. (Of course, I haven't looked at Arcticfox,
|
||||
the sequel...)
|
||||
|
||||
The general structure of the code was actually very similar to mine: always
|
||||
draw left to right, use self-modifying code to handle up vs. down, and so on.
|
||||
I didn't come away with any new ideas for optimizations to my run-length
|
||||
implementation from this or the other programs I looked at... but there
|
||||
are a lot of other games that I haven't disassembled.
|
||||
|
||||
|
||||
So, 30+ years after HPLOT SIMULATOR, here I am with a bunch of code for
|
||||
drawing lines on the Apple II hi-res screen.
|
||||
|
||||
I don't plan on writing Battlezone for the Apple II. Stellar 7 did that,
|
||||
and more. My goal in developing fdraw was to scratch a very old itch.
|
||||
|
||||
I had forgotten how much fun this stuff is. Working in ARM assembly
|
||||
language on Android offered similar challenges, but you're never entirely
|
||||
sure exactly how your code will perform on the wide range of CPU
|
||||
architectures (affecting instruction interleave, cache size and
|
||||
replacement policy, etc.), you have to guess at cache misses and the
|
||||
success rate of data prefetching, and it's difficult to measure results when
|
||||
there's multiple threads running and interrupts firing. On the Apple II
|
||||
you can count every cycle, and know exactly what will happen when.
|
||||
|
||||
I don't expect that anyone will find the code useful, but that wasn't
|
||||
really the point.
|
||||
|
||||
Andy McFadden
|
||||
August 2015
|
||||
|
BIN
fdraw-disks.zip
Normal file
BIN
fdraw-disks.zip
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user