updated from 8bitworkshop.com

This commit is contained in:
tilleul 2021-01-18 16:46:27 +01:00
parent 27cf5eba4e
commit 78b282a573
5 changed files with 381 additions and 79 deletions

View File

@ -25,8 +25,14 @@ hires equ $c057
Start
jsr test_blitSHR_orig
; original blitSHR
;jsr test_blitSHR_orig
; new blitSHR_2 (no output)
; jsr t_blitSHR_n2
; blitSHR_IO (screen 2 output and input 2 screen)
jsr t_blitSHR_IO
end jmp end
@ -34,6 +40,8 @@ end jmp end
include "blitSHR_orig.asm"
include "hires.asm"
include "blitSHR_new.asm"
org $2000
incbin "marioluigi-apple2.hires.bin"

Binary file not shown.

279
blitSHR_new.asm Normal file
View File

@ -0,0 +1,279 @@
hiresXZP equ $ec ; hires X offset (xcol)
hiresYZP equ $ed ; hires Y offset (yrow)
hiresAddrZP equ $fc ; 16 bit address of hires line (from LUT)
inputAddrZP equ $fa ; 16 bit input address (bitmap to draw)
outputAddrZP equ $ee ; 16 bit output address (buffer to save)
scr2outRowsZP equ $ce ; number of rows (output) (height)
inp2scrRowsZP equ $eb ; number of rows (input) (height)
bytesPerRowZP equ $e3 ; bytes per row (width)
toggleMainAuxZP equ $42 ; not used here
ycounter equ $08
xcounter equ $09
blitSHR_I subroutine
ycounter equ $08
xcounter equ $09
; input 2 screen (no output)
; 2372 + 11 = 2383 (= -19% ! vs 2942)
ldy hiresYZP ; +3 ypos
lda inp2scrRowsZP ; +3 height
sta ycounter ; +3
clc ; +2
; = 11
.nextline lda hiresLinesHI,y ; +4
sta .inp2scr+2 ; +4
lda hiresLinesLO,y ; +4
sta .inp2scr+1 ; +4
; = 16
lda bytesPerRowZP ; +3 width
sta xcounter ; +3
ldx hiresXZP ; +3 xpos
; = 9
inp_addr2 lda $1000,x ; +(4|)5
.inp2scr sta $2000,x ; +5
.cont inx ; +2
dec xcounter ; +6
bne inp_addr2 ; +(2|)3
; xloop = 5 + 5 + 2 + 6 + 3 = 21 x width = 63
iny ; +2
lda inp_addr2+1 ; +4
adc bytesPerRowZP ; +3
sta inp_addr2+1 ; +4
bcc .dec_ycounter ; +(2|)3
; = 2 + 4 + 3 + 4 + (3 or +2+6+2) = 16
inc inp_addr2+2 ; +6
clc ; +2
.dec_ycounter dec ycounter ; +6
bne .nextline ; +(2)|3
; = 9
; yloop = (16 + 9 + 63 + 16 + 9)*21 - 1 = 2373 - 1 = 2372
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
blitSHR_IO subroutine
; screen 2 output AND
; input 2 screen
; 3401 + 11 = 3412 vs 4097 = -17%
ycounter equ $08
xcounter equ $09
ldy hiresYZP ; +3 ypos
lda inp2scrRowsZP ; +3 height
sta ycounter ; +3
clc ; +2
; = 11
.nextline lda hiresLinesHI,y ; +4
sta .inp2scr+2 ; +4
sta .scr_addr+2 ; +4
lda hiresLinesLO,y ; +4
sta .inp2scr+1 ; +4
sta .scr_addr+1 ; +4
; =24
lda bytesPerRowZP ; +3 width
sta xcounter ; +3
ldx hiresXZP ; +3 xpos
; = 9
.nextbyte
.scr_addr lda $2000,x ; +4
out_addr3 sta $1000,x ; +5(|4)
inp_addr3 lda $1000,x ; +5(|4)
.inp2scr sta $2000,x ; +5
inx ; +2
dec xcounter ; +6
bne .nextbyte ; +(2|)3
; xloop = 4+5+5+5+2+6+3 = 30 x width = 90 - 1
iny ; +2
lda inp_addr3+1 ; +4
adc bytesPerRowZP ; +3
sta inp_addr3+1 ; +4
bcc .inc_outp_addr ; +(2|)3
; = 2 + 4 + 3 + 4 + (3 or +2+6+2) = 16
inc inp_addr3+2 ; +6
clc ; +2
.inc_outp_addr lda out_addr3+1 ; +4
adc bytesPerRowZP ; +3
sta out_addr3+1 ; +4
bcc .dec_ycounter ; +(2|)3
; = 4 + 3 + 4 + (3 or +2+6+2) = 14
inc out_addr3+2 ; +6
clc ; +2
.dec_ycounter dec ycounter ; +6
bne .nextline ; +(2)|3
; = 9
; yloop = (24 + 9 + 90 + 16 + 14 + 9)*21 - 1 = 3402 - 1 = 3401
rts
;;;;;;;;;;;;;;;;;;;;;;;;; test routines
; test input 2 screen only
t_blitSHR_I subroutine
xcol equ $06
yrow equ $07
lda #0
sta xcol
sta yrow
lda #3
sta bytesPerRowZP ; width
lda #21 ; height
sta inp2scrRowsZP
sta scr2outRowsZP
ldx xcol ; 3
.loop_mario_x
stx hiresXZP ; 3
lda yrow ; 3
sta hiresYZP ; 3
; = 9
sec
lda #<mario ; 2
sbc xcol ; 3
sta inp_addr2+1 ; 4
lda #>mario ; 2
sbc #0 ; 2
sta inp_addr2+2 ; 4
; = 13
jsr blitSHR_I ; 2383
ldx xcol
inx
stx xcol
cpx #38
bcc .loop_mario_x
ldx #0
stx xcol
ldy yrow
iny
sty yrow
cpy #172
bcc .loop_mario_x
.rts rts
t_blitSHR_IO subroutine
xcol equ $06
yrow equ $07
lda #0
sta xcol
sta yrow
lda #3
sta bytesPerRowZP ; width
lda #21 ; height
sta inp2scrRowsZP
sta scr2outRowsZP
ldx xcol ; 3
.loop_mario_x
stx hiresXZP ; 3
lda yrow ; 3
sta hiresYZP ; 3
; = 9
sec
lda #<mario ; 2
sbc xcol ; 3
sta inp_addr3+1 ; 4
lda #>mario ; 2
sbc #0 ; 2
sta inp_addr3+2 ; 4
; = 13
sec
lda #<buffer ; 2
sbc xcol ; 3
sta out_addr3+1 ; 4
sta inp_addr2+1 ; 4
lda #>buffer ; 2
sbc #0 ; 2
sta out_addr3+2 ; 4
sta inp_addr2+2 ; 4
; = 25
jsr blitSHR_IO ; 3412
jsr blitSHR_I ; 2320
ldx xcol
inx
stx xcol
cpx #38
bcc .loop_mario_x
ldx #0
stx xcol
ldy yrow
iny
sty yrow
cpy #172
bcc .loop_mario_x
.rts rts

View File

@ -13,70 +13,73 @@ toggleMainAuxZP equ $42 ; not used here
;; blitSHR original code
blitSHR_orig subroutine
blitSHR_orig subroutine ; 4095 + 2 = 4097
; w/o output = 2940 + 2 = 2942
ldx #0
ldx #0 ; +2
; Copy Screen Address from Hires Tables (using Line Offset Y and Byte Offset X)
.loopRow
ldy hiresYZP ; Y-Offset to Hires Line (ytop)
lda hiresLinesHI,y
sta hiresAddrZP+1
lda hiresLinesLO,y
adc hiresXZP ; X-Offset to Hires Byte (xcol)
sta hiresAddrZP
.loopRow ; loopRow = 20 + 61 + 65 + 18 + 18 + 13 = 195 x height = 195 x 21 = 4095
; w/o outp= 20+ 3+3 + 65 + 18 + 18 + 13 = 140 x height = 2940
ldy hiresYZP ; +3 Y-Offset to Hires Line (ytop)
lda hiresLinesHI,y ; +4
sta hiresAddrZP+1 ; +3
lda hiresLinesLO,y ; +4
adc hiresXZP ; +3 X-Offset to Hires Byte (xcol)
sta hiresAddrZP ; +3
; Copy bytes from SHR buffer to ouput
.screen2output
lda outputAddrZP+1
beq .input2screen ; If high-byte is zero, then skip
ldy #0 ; Y loop: Copy xxx bytes per row
.loopCopy1 ; Copy 1 byte
lda (hiresAddrZP),y
sta (outputAddrZP),y
iny
cpy bytesPerRowZP
bne .loopCopy1 ; Iterate Y loop
.screen2output ; = 7 + loopCopy1 + 3 + 2 = 61
lda outputAddrZP+1 ; +3
beq .input2screen ; +2(|3) If high-byte is zero, then skip
ldy #0 ; +2 Y loop: Copy xxx bytes per row
.loopCopy1 ; loopCopy1 = 19 x width - 1 = 19 x 3 - 1 = 56
lda (hiresAddrZP),y ; +5
sta (outputAddrZP),y ; +6
iny ; +2
cpy bytesPerRowZP ; +3
bne .loopCopy1 ; +(2|)3 Iterate Y loop
; Copy bytes from input to SHR buffer
cpx inp2scrRowsZP ; Check number of input rows (for cropped sprites)
bcs .incAddress1
.input2screen
clc
lda inputAddrZP+1
beq .incAddress1 ; If high-byte is zero, then skip
ldy #0 ; Y loop: Copy xxx bytes per row
.loopCopy2
lda (inputAddrZP),y ; Copy 1 byte
sta (hiresAddrZP),y
iny
cpy bytesPerRowZP ; Iterate Y loop
bne .loopCopy2
cpx inp2scrRowsZP ; +3 Check number of input rows (for cropped sprites)
bcs .incAddress1 ; +(2|)3
.input2screen ; = 9 + loopCopy2 = 65
clc ; +2
lda inputAddrZP+1 ; +3
beq .incAddress1 ; +2(|3) If high-byte is zero, then skip
ldy #0 ; +2 Y loop: Copy xxx bytes per row
.loopCopy2 ; loopCopy2 = 19 x width - 1 = 19 x 3 - 1 = 56
lda (inputAddrZP),y ; +5 Copy 1 byte
sta (hiresAddrZP),y ; +6
iny ; +2
cpy bytesPerRowZP ; +3 Iterate Y loop
bne .loopCopy2 ; +(2|)3
.incAddress1
clc ; Increment address of output block
lda outputAddrZP
adc bytesPerRowZP ; Move by xxx bytes
sta outputAddrZP
bcc .nocarry1 ; Check if carry to high-byte
inc outputAddrZP+1
.incAddress1 ; = 18
clc ; +2 Increment address of output block
lda outputAddrZP ; +3
adc bytesPerRowZP ; +3 Move by xxx bytes
sta outputAddrZP ; +3
bcc .nocarry1 ; +2(|3) Check if carry to high-byte
inc outputAddrZP+1 ; +5
.nocarry1
.incAddress2
clc ; Increment address of input block
lda inputAddrZP
adc bytesPerRowZP ; Move by xxx bytes
sta inputAddrZP
bcc .nocarry2 ; Check if carry to high byte
inc inputAddrZP+1
.incAddress2 ; = 18
clc ; +2 Increment address of input block
lda inputAddrZP ; +3
adc bytesPerRowZP ; +3 Move by xxx bytes
sta inputAddrZP ; +3
bcc .nocarry2 ; +2(|3) Check if carry to high byte
inc inputAddrZP+1 ; +5
.nocarry2
.nextRow
; Move to next row
inc hiresYZP ; Increment Hires Line offset
inx
cpx scr2outRowsZP
bcc .loopRow ; Iterate X loop (rows)
; Move to next row ; = 13
inc hiresYZP ; +5 Increment Hires Line offset
inx ; +2
cpx scr2outRowsZP ; +3
bcc .loopRow ; +(2|)3 Iterate X loop (rows)
rts
@ -102,40 +105,48 @@ yrow equ $07
clc ; needed because not in routine !
.loop_mario_x
lda #<buffer
sta outputAddrZP
lda #>buffer
sta outputAddrZP+1
lda #<buffer ; 2
sta outputAddrZP ; 3
lda #>buffer ; 2
sta outputAddrZP+1 ; 3
; = 10
lda #<mario
sta inputAddrZP
lda #>mario
sta inputAddrZP+1
lda #<mario ; 2
sta inputAddrZP ; 3
lda #>mario ; 2
sta inputAddrZP+1 ; 3
; = 10
lda xcol
sta hiresXZP
lda yrow
sta hiresYZP
lda xcol ; 3
sta hiresXZP ; 3
lda yrow ; 3
sta hiresYZP ; 3
; = 12
jsr blitSHR_orig
jsr blitSHR_orig ; 10 + 10 + 12 + 4097 = 4129
lda #<buffer
sta inputAddrZP
lda #>buffer
sta inputAddrZP+1
lda #<buffer ; 2
sta inputAddrZP ; 3
lda #>buffer ; 2
sta inputAddrZP+1 ; 3
; = 10
lda #0
sta outputAddrZP+1
lda #0 ; 2
sta outputAddrZP+1 ; 3
; = 5
lda xcol
sta hiresXZP
lda yrow
sta hiresYZP
lda xcol ; 3
sta hiresXZP ; 3
lda yrow ; 3
sta hiresYZP ; 3
; = 12
clc ; needed because not in routine !
jsr blitSHR_orig
clc ; +2 needed because not in routine !
jsr blitSHR_orig ; 10 + 5 + 12 + 2 + 2942 = 2971
; 2971 + 4129 = 7100
ldx xcol
inx
@ -148,7 +159,7 @@ yrow equ $07
ldx yrow
inx
stx yrow
cpx #191-21
cpx #172
bcc .loop_mario_x
.rts rts

View File

@ -49,7 +49,7 @@ clear_hgr1 subroutine
bpl .loop
rts
org $0900
hiresLinesHI
hex 2024282C3034383C
hex 2024282C3034383C
@ -75,6 +75,8 @@ hiresLinesHI
hex 22262A2E32363A3E
hex 23272B2F33373B3F
hex 23272B2F33373B3F
org $0a00
hiresLinesLO
hex 0000000000000000
@ -126,5 +128,7 @@ mario ; 3x21
hex A8808A
hex AA80AA
org $b00
buffer ; 3x21
ds.b 63,00