4cade/src/fx/fx.hgr.precomputed.3bit.a

444 lines
9.9 KiB
Plaintext

;license:MIT
;(c) 2019-2022 by 4am
;
copymasks = $0200 ; $100 bytes but sparse, index is 0..7 but in high 3 bits, so $00, $20, $40...
tmplo = $0300 ; $C0 bytes
extra_cols = $02F8 ; $08 bytes
tmphi = $BF40 ; $C0 bytes
hgrlo3a = $BC00 ; $80 bytes
hgrlo3b = $BC80 ; $80 bytes
hgrlo3c = $BD00 ; $80 bytes
hgrhi3a = $BD80 ; $80 bytes
hgrhi3b = $BE00 ; $80 bytes
hgrhi3c = $BE80 ; $80 bytes
dither1_lo = $6900 ; $80 bytes
dither2_lo = $6980 ; $80 bytes
dither3_lo = $6A00 ; $80 bytes
dithermasks = $6A80 ; $58 bytes
evenrow_masks = dithermasks
oddrow_masks = dithermasks+2
no_masks = dithermasks+44
Coordinates3Bit= $6B00 ; pixel data is $5001 bytes ($5000 on disk + 1 byte as EOF marker)
; and for optimization we only support loading external files page-aligned
; and $BC00+ is used by the 3bit macros for data tables
; so we load at $6B00 and waste $FF bytes at $BB01+ but it's okay
EndCoordinates3Bit = Coordinates3Bit + $5000
!source "src/fx/macros.a"
!macro BUILD_EXTRA_COLS {
; build lookup table to get $20+y for y in $00..$07
ldx #$07
ldy #$27
- tya
sta extra_cols-$20, y
dey
dex
bpl -
}
!macro BUILD_SPARSE_BITMASKS_3BIT {
; build sparse lookup tables for bitmasks
lda #%10000011
sta copymasks
lda #%10001100
sta copymasks+$20
lda #%10110000
sta copymasks+$40
lda #%11000000
sta copymasks+$60
lda #%10000001
sta copymasks+$80
lda #%10000110
sta copymasks+$A0
lda #%10011000
sta copymasks+$C0
lda #%11100000
sta copymasks+$E0
}
!macro BUILD_3BIT_HGR_LOOKUP_TABLES {
+BUILD_HGR_LOOKUP_TABLES tmplo, tmphi
ldx #$BF
ldy #$3F
- lda tmplo, x
sta hgrlo3c, y
sta hgrlo3c+$40, y
lda tmphi, x
sta hgrhi3c, y
sta hgrhi3c+$40, y
dex
lda tmplo, x
sta hgrlo3b, y
sta hgrlo3b+$40, y
lda tmphi, x
sta hgrhi3b, y
sta hgrhi3b+$40, y
dex
lda tmplo, x
sta hgrlo3a, y
sta hgrlo3a+$40, y
lda tmphi, x
sta hgrhi3a, y
sta hgrhi3a+$40, y
dex
dey
bpl -
}
!macro ROW_X_TO_3BIT_BASE_ADDRESSES {
; X = $00..$3F, mapping to row 0, 3, 6, 9, 12, ... 189
lda hgrlo3a, x
sta <dest1
sta <src1
lda hgrhi3a, x
sta <dest1+1
eor #$60
sta <src1+1
lda hgrlo3b, x
sta <dest2
sta <src2
lda hgrhi3b, x
sta <dest2+1
eor #$60
sta <src2+1
lda hgrlo3c, x
sta <dest3
sta <src3
lda hgrhi3c, x
sta <dest3+1
eor #$60
sta <src3+1
}
!macro FX_INITONCE_3BIT .CoordinatesFile, .Start {
InitOnce
bit .Start
lda #$4C
sta InitOnce
+READ_RAM2_WRITE_RAM2
+LOAD_FILE_AT .CoordinatesFile, Coordinates3Bit
+READ_RAM1_WRITE_RAM1
sec
ror Coordinates3Bit + $5000
}
!macro FX_REVERSE_3BIT {
ldy #0 ; <Coordinates3Bit
sty $f0
lda #>Coordinates3Bit
sta $f1
lda #<(Coordinates3Bit + $5000 - 2)
sta $f2
lda #>(Coordinates3Bit + $5000 - 2)
sta $f3
ldx #$28 ; #$50/2
clc
!byte $24
- sec
-- lda ($f0), y
pha
lda ($f2), y
sta ($f0), y
pla
sta ($f2), y
iny
bcc -
ldy #0
!byte $24
- clc
inc $f0
bne +
inc $f1
dex
beq ++
+ lda $f2
bne +
dec $f3
+ dec $f2
bcs -
bcc -- ; always branches
++
}
!macro FX_RIPPLE_3BIT {
ldx #$1B
- lda ripplezp, x
sta $e0, x
dex
bpl -
--- ldx #$0c
-- ldy $ee, x
lda $ef, x
jsr aslmod
sty $ee, x
sta $ef, x
sty $ec
clc
adc #>Coordinates3Bit
sta $ed
ldy #0
!byte $24
- sec
lda ($ec), y
pha
lda $de, x
sta ($ec), y
pla
sta $de, x
inx
iny
bcc -
dex
dex
dex
dex
bne --
dec $ee
bne ---
dec $ef
bpl ---
bmi ++ ; always branches
aslmod
jsr +
+ cmp #$28
bcc +
iny
+ pha
tya
asl
tay
pla
rol
cmp #$50
bcc +
sbc #$50
+ rts
ripplezp
!byte $1F,$F3,$20,$F3,$20,$14,$20,$D3
!byte $1E,$F3,$1F,$54,$00,$00,$AA,$06
!byte $02,$00,$04,$00,$06,$00,$0C,$00
!byte $16,$00,$1A,$00
++
}
!macro FX_PRECOMPUTED_3BIT .coords {
+BUILD_3BIT_HGR_LOOKUP_TABLES
+BUILD_EXTRA_COLS
+BUILD_SPARSE_BITMASKS_3BIT
+COPY_TO_0 start, end
jmp InputLoop
start
!pseudopc 0 {
Exit3Bit rts
InputLoop
ldy #0
input=*+1
lda .coords
bmi Exit3Bit ; if high bit is 1 then we're done
cmp #$40
php
tax
+ROW_X_TO_3BIT_BASE_ADDRESSES
inc <input
lda (<input), y
and #%11100000
tax
eor (<input), y
plp
bcc +
tay
lda extra_cols, y
+ tay
; 2x3 block
src1=*+1
lda $FDFD, y
eor (<dest1), y
and copymasks, x
eor (<dest1), y
dest1=*+1
sta $FDFD, y
src2=*+1
lda $FDFD, y
eor (<dest2), y
and copymasks, x
eor (<dest2), y
dest2=*+1
sta $FDFD, y
src3=*+1
lda $FDFD, y
eor (<dest3), y
and copymasks, x
eor (<dest3), y
dest3=*+1
sta $FDFD, y
inc <input
bne InputLoop
bit KBD
bmi Exit3Bit
inc <input+1
bne InputLoop ; always branches
}
end
}
!macro FX_PRECOMPUTED_3BIT_DITHER .coords, .endcoords {
+BUILD_DITHER_MASKS dithermasks
+BUILD_3BIT_HGR_LOOKUP_TABLES
+BUILD_EXTRA_COLS
+BUILD_SPARSE_BITMASKS_3BIT
; phase 1 - in reverse, with additional masking
+COPY_TO_0 start, end
; set up starting coordinate for reading coordinates in reverse order
+LDADDR .endcoords-2
sta <input
sty <input+1
; set up EOF marker to stop reading coordinates in reverse order
lda #$80
sta .coords-2
; set up logic to advance to next coordinates in reverse order
ldx #(next2_end-next2_start-1)
- lda next2_start, x
sta <next, x
dex
bpl -
; set up pointers to dither mask pointers so even rows do even-row dithering and odd rows do odd-row dithering
ldx #$7F
- lda #<evenrow_masks
sta dither1_lo, x
sta dither2_lo-1, x
sta dither3_lo, x
lda #<oddrow_masks
sta dither1_lo-1, x
sta dither2_lo, x
sta dither3_lo-1, x
dex
dex
bpl -
jsr InputLoop
bit KBD
bmi start
; phase 2 - in order, without additional masking
+COPY_TO_0 start, end
; set up pointers to dither mask pointers so all rows do no dithering
ldx #$7F
- lda #<no_masks
sta dither1_lo, x
sta dither2_lo, x
sta dither3_lo, x
dex
bpl -
jmp InputLoop
start
!pseudopc 0 {
Exit3Bit rts
dither1_ptr
!word evenrow_masks
dither2_ptr
!word oddrow_masks
dither3_ptr
!word evenrow_masks
InputLoop
ldy #0
input=*+1
lda .coords
bmi Exit3Bit ; if high bit is 1 then we're done
cmp #$40
php
tax
lda dither1_lo, x
sta dither1_ptr
lda dither2_lo, x
sta dither2_ptr
lda dither3_lo, x
sta dither3_ptr
+ROW_X_TO_3BIT_BASE_ADDRESSES
iny
lda (<input), y
and #%11100000
tax
eor (<input), y
plp
bcc +
tay
lda extra_cols, y
+ tay
; 2x3 block
src1=*+1
lda $FDFD, y
eor (<dest1), y
and copymasks, x
and (dither1_ptr), y
eor (<dest1), y
dest1=*+1
sta $FDFD, y
src2=*+1
lda $FDFD, y
eor (<dest2), y
and copymasks, x
and (dither2_ptr), y
eor (<dest2), y
dest2=*+1
sta $FDFD, y
src3=*+1
lda $FDFD, y
eor (<dest3), y
and copymasks, x
and (dither3_ptr), y
eor (<dest3), y
dest3=*+1
sta $FDFD, y
next
inc <input
inc <input
bne +
bit KBD
bmi ++
inc <input+1
+ jmp InputLoop
++ rts
}
end
next2_start
!pseudopc next {
lda <input
php
dec <input
dec <input
plp
bne +
dec <input+1
bit KBD
bmi ++
+ jmp InputLoop
++ rts
}
next2_end
}