shave 60481 cycles from all precomputed 1-bit HGR transitions (~3% faster)

This commit is contained in:
4am 2020-09-06 14:02:11 -04:00
parent 0f86f9671b
commit 735d057736
4 changed files with 105 additions and 51 deletions

View File

@ -1,17 +1,6 @@
;license:MIT
;(c) 2019 by 4am
;
mirror_src1 = $E8 ; word
mirror_dest1 = $EA ; word
mirror_src2 = $EC ; word
mirror_dest2 = $EE ; word
src1 = $F0 ; word
dest1 = $F2 ; word
src2 = $F4 ; word
dest2 = $F6 ; word
reverse_input = $FC ; word
input = $FE ; word
copymasks = $0200 ; $100 bytes but sparse, index is 0..6 but in high 3 bits, so $00, $20, $40, $60, $80, $A0, $C0
mirror_copymasks = $0201
hgrlo = $0300 ; $C0 bytes
@ -27,7 +16,7 @@ hgr1himirror = $BF40 ; $C0 bytes
ldx #$27
ldy #$00
- tya
sta .mirror_cols,x
sta .mirror_cols, x
iny
dex
bpl -
@ -65,53 +54,53 @@ hgr1himirror = $BF40 ; $C0 bytes
}
!macro ROW_X_TO_BASE_ADDRESSES {
lda hgrlo,x
sta dest1
sta src1
lda hgr1hi,x
sta dest1+1
lda hgrlo, x
sta <dest1
sta <src1
lda hgr1hi, x
sta <dest1+1
eor #$60
sta src1+1
lda hgrlo+1,x
sta dest2
sta src2
lda hgr1hi+1,x
sta dest2+1
sta <src1+1
lda hgrlo+1, x
sta <dest2
sta <src2
lda hgr1hi+1, x
sta <dest2+1
eor #$60
sta src2+1
sta <src2+1
}
!macro ROW_X_TO_MIRROR_ADDRESSES {
lda hgrlomirror,x
sta mirror_dest1
sta mirror_src1
lda hgr1himirror,x
sta mirror_dest1+1
lda hgrlomirror, x
sta <mirror_dest1
sta <mirror_src1
lda hgr1himirror, x
sta <mirror_dest1+1
eor #$60
sta mirror_src1+1
lda hgrlomirror+1,x
sta mirror_dest2
sta mirror_src2
lda hgr1himirror+1,x
sta mirror_dest2+1
sta <mirror_src1+1
lda hgrlomirror+1, x
sta <mirror_dest2
sta <mirror_src2
lda hgr1himirror+1, x
sta <mirror_dest2+1
eor #$60
sta mirror_src2+1
sta <mirror_src2+1
}
!macro HIGH_3_LOW_5 .input {
and #%11100000 ; second value: high 3 bits = index into tables to find bitmasks
tax
eor (.input),y ; second value: low 5 bits = byte offset within the row (implicitly "and #%00011111")
eor (<.input), y ; second value: low 5 bits = byte offset within the row (implicitly "and #%00011111")
tay
}
!macro INC_INPUT_AND_LOOP .loop {
inc input
inc <input
beq +
jmp .loop
+ bit $c000
bmi +
inc input+1
inc <input+1
jmp .loop
+ rts
}
@ -136,44 +125,77 @@ hgr1himirror = $BF40 ; $C0 bytes
+BUILD_HGR_MIRROR_LOOKUP_TABLES hgrlomirror, hgr1himirror
+BUILD_MIRROR_COLS mirror_cols
+BUILD_SPARSE_BITMASKS copymasks, mirror_copymasks
+LDADDR .coords
+ST16 input
jmp InputLoop
ldx #(end-start) ; copy InputLoop code to zero page
- lda start-1, x
sta $FF, x
dex
bne -
txa
pha
pha
start
!pseudopc 0 {
Exit1Bit rts
InputLoop
ldy #0
lda (input),y ; first value: HGR row (only 0..95 will be in input array)
input=*+1
lda .coords, y ; first value: HGR row (only 0..95 will be in input array)
bmi Exit1Bit ; if > 127 then we're done
tax
+ROW_X_TO_BASE_ADDRESSES
+ROW_X_TO_MIRROR_ADDRESSES
inc input
lda (input),y
lda (input), y
+HIGH_3_LOW_5 input
; main 1x2 block in top-left quadrant
+COPY_BIT src1, dest1, copymasks
+COPY_BIT src2, dest2, copymasks
src1=*+1
lda $FDFD, y
eor (<dest1), y
and copymasks, x
eor (<dest1), y
dest1=*+1
sta $FDFD, y
src2=*+1
lda $FDFD, y
eor (<dest2), y
and copymasks, x
eor (<dest2), y
dest2=*+1
sta $FDFD, y
; corresponding 1x2 block in top-right quadrant (same row, opposite column)
lda mirror_cols,y
lda mirror_cols, y
tay
+COPY_BIT src1, dest1, mirror_copymasks
+COPY_BIT src2, dest2, mirror_copymasks
; corresponding 1x2 block in bottom-right quadrant (opposite row, opposite column)
+COPY_BIT mirror_src1, mirror_dest1, mirror_copymasks
+COPY_BIT mirror_src2, mirror_dest2, mirror_copymasks
mirror_src1=*+1
lda $FDFD, y
eor (<mirror_dest1), y
and mirror_copymasks, x
eor (<mirror_dest1), y
mirror_dest1=*+1
sta $FDFD, y
mirror_src2=*+1
lda $FDFD, y
eor (<mirror_dest2), y
and mirror_copymasks, x
eor (<mirror_dest2), y
mirror_dest2=*+1
sta $FDFD, y
; corresponding 1x2 block in bottom-left quadrant (opposite row, original column)
lda mirror_cols,y
lda mirror_cols, y
tay
+COPY_BIT mirror_src1, mirror_dest1, copymasks
+COPY_BIT mirror_src2, mirror_dest2, copymasks
+INC_INPUT_AND_LOOP InputLoop
rts
}
end
!if * and 1 {
!byte 0 ;align 2 but avoids the fake allocation bug if it was aligned already
}

View File

@ -5,6 +5,16 @@
!to "build/FX/RADIAL",plain
*=$6000
mirror_src1 = $E8 ; word
mirror_dest1 = $EA ; word
mirror_src2 = $EC ; word
mirror_dest2 = $EE ; word
src1 = $F0 ; word
dest1 = $F2 ; word
src2 = $F4 ; word
dest2 = $F6 ; word
input = $FE ; word
!source "src/fx/fx.hgr.precomputed.1bit.a"
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi

View File

@ -1,3 +1,14 @@
mirror_src1 = $E8 ; word
mirror_dest1 = $EA ; word
mirror_src2 = $EC ; word
mirror_dest2 = $EE ; word
src1 = $F0 ; word
dest1 = $F2 ; word
src2 = $F4 ; word
dest2 = $F6 ; word
reverse_input = $FC ; word
input = $FE ; word
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi
+BUILD_HGR_MIRROR_LOOKUP_TABLES hgrlomirror, hgr1himirror
+BUILD_MIRROR_COLS mirror_cols

View File

@ -5,6 +5,17 @@
!to "build/FX/RADIAL3",plain
*=$6000
mirror_src1 = $E8 ; word
mirror_dest1 = $EA ; word
mirror_src2 = $EC ; word
mirror_dest2 = $EE ; word
src1 = $F0 ; word
dest1 = $F2 ; word
src2 = $F4 ; word
dest2 = $F6 ; word
reverse_input = $FC ; word
input = $FE ; word
!source "src/fx/fx.hgr.precomputed.1bit.a"
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi