mirror of
https://github.com/a2-4am/4cade.git
synced 2024-11-28 00:52:00 +00:00
shave 60481 cycles from all precomputed 1-bit HGR transitions (~3% faster)
This commit is contained in:
parent
0f86f9671b
commit
735d057736
@ -1,17 +1,6 @@
|
|||||||
;license:MIT
|
;license:MIT
|
||||||
;(c) 2019 by 4am
|
;(c) 2019 by 4am
|
||||||
;
|
;
|
||||||
mirror_src1 = $E8 ; word
|
|
||||||
mirror_dest1 = $EA ; word
|
|
||||||
mirror_src2 = $EC ; word
|
|
||||||
mirror_dest2 = $EE ; word
|
|
||||||
src1 = $F0 ; word
|
|
||||||
dest1 = $F2 ; word
|
|
||||||
src2 = $F4 ; word
|
|
||||||
dest2 = $F6 ; word
|
|
||||||
reverse_input = $FC ; word
|
|
||||||
input = $FE ; word
|
|
||||||
|
|
||||||
copymasks = $0200 ; $100 bytes but sparse, index is 0..6 but in high 3 bits, so $00, $20, $40, $60, $80, $A0, $C0
|
copymasks = $0200 ; $100 bytes but sparse, index is 0..6 but in high 3 bits, so $00, $20, $40, $60, $80, $A0, $C0
|
||||||
mirror_copymasks = $0201
|
mirror_copymasks = $0201
|
||||||
hgrlo = $0300 ; $C0 bytes
|
hgrlo = $0300 ; $C0 bytes
|
||||||
@ -27,7 +16,7 @@ hgr1himirror = $BF40 ; $C0 bytes
|
|||||||
ldx #$27
|
ldx #$27
|
||||||
ldy #$00
|
ldy #$00
|
||||||
- tya
|
- tya
|
||||||
sta .mirror_cols,x
|
sta .mirror_cols, x
|
||||||
iny
|
iny
|
||||||
dex
|
dex
|
||||||
bpl -
|
bpl -
|
||||||
@ -65,53 +54,53 @@ hgr1himirror = $BF40 ; $C0 bytes
|
|||||||
}
|
}
|
||||||
|
|
||||||
!macro ROW_X_TO_BASE_ADDRESSES {
|
!macro ROW_X_TO_BASE_ADDRESSES {
|
||||||
lda hgrlo,x
|
lda hgrlo, x
|
||||||
sta dest1
|
sta <dest1
|
||||||
sta src1
|
sta <src1
|
||||||
lda hgr1hi,x
|
lda hgr1hi, x
|
||||||
sta dest1+1
|
sta <dest1+1
|
||||||
eor #$60
|
eor #$60
|
||||||
sta src1+1
|
sta <src1+1
|
||||||
lda hgrlo+1,x
|
lda hgrlo+1, x
|
||||||
sta dest2
|
sta <dest2
|
||||||
sta src2
|
sta <src2
|
||||||
lda hgr1hi+1,x
|
lda hgr1hi+1, x
|
||||||
sta dest2+1
|
sta <dest2+1
|
||||||
eor #$60
|
eor #$60
|
||||||
sta src2+1
|
sta <src2+1
|
||||||
}
|
}
|
||||||
|
|
||||||
!macro ROW_X_TO_MIRROR_ADDRESSES {
|
!macro ROW_X_TO_MIRROR_ADDRESSES {
|
||||||
lda hgrlomirror,x
|
lda hgrlomirror, x
|
||||||
sta mirror_dest1
|
sta <mirror_dest1
|
||||||
sta mirror_src1
|
sta <mirror_src1
|
||||||
lda hgr1himirror,x
|
lda hgr1himirror, x
|
||||||
sta mirror_dest1+1
|
sta <mirror_dest1+1
|
||||||
eor #$60
|
eor #$60
|
||||||
sta mirror_src1+1
|
sta <mirror_src1+1
|
||||||
lda hgrlomirror+1,x
|
lda hgrlomirror+1, x
|
||||||
sta mirror_dest2
|
sta <mirror_dest2
|
||||||
sta mirror_src2
|
sta <mirror_src2
|
||||||
lda hgr1himirror+1,x
|
lda hgr1himirror+1, x
|
||||||
sta mirror_dest2+1
|
sta <mirror_dest2+1
|
||||||
eor #$60
|
eor #$60
|
||||||
sta mirror_src2+1
|
sta <mirror_src2+1
|
||||||
}
|
}
|
||||||
|
|
||||||
!macro HIGH_3_LOW_5 .input {
|
!macro HIGH_3_LOW_5 .input {
|
||||||
and #%11100000 ; second value: high 3 bits = index into tables to find bitmasks
|
and #%11100000 ; second value: high 3 bits = index into tables to find bitmasks
|
||||||
tax
|
tax
|
||||||
eor (.input),y ; second value: low 5 bits = byte offset within the row (implicitly "and #%00011111")
|
eor (<.input), y ; second value: low 5 bits = byte offset within the row (implicitly "and #%00011111")
|
||||||
tay
|
tay
|
||||||
}
|
}
|
||||||
|
|
||||||
!macro INC_INPUT_AND_LOOP .loop {
|
!macro INC_INPUT_AND_LOOP .loop {
|
||||||
inc input
|
inc <input
|
||||||
beq +
|
beq +
|
||||||
jmp .loop
|
jmp .loop
|
||||||
+ bit $c000
|
+ bit $c000
|
||||||
bmi +
|
bmi +
|
||||||
inc input+1
|
inc <input+1
|
||||||
jmp .loop
|
jmp .loop
|
||||||
+ rts
|
+ rts
|
||||||
}
|
}
|
||||||
@ -136,44 +125,77 @@ hgr1himirror = $BF40 ; $C0 bytes
|
|||||||
+BUILD_HGR_MIRROR_LOOKUP_TABLES hgrlomirror, hgr1himirror
|
+BUILD_HGR_MIRROR_LOOKUP_TABLES hgrlomirror, hgr1himirror
|
||||||
+BUILD_MIRROR_COLS mirror_cols
|
+BUILD_MIRROR_COLS mirror_cols
|
||||||
+BUILD_SPARSE_BITMASKS copymasks, mirror_copymasks
|
+BUILD_SPARSE_BITMASKS copymasks, mirror_copymasks
|
||||||
+LDADDR .coords
|
ldx #(end-start) ; copy InputLoop code to zero page
|
||||||
+ST16 input
|
- lda start-1, x
|
||||||
jmp InputLoop
|
sta $FF, x
|
||||||
|
dex
|
||||||
|
bne -
|
||||||
|
txa
|
||||||
|
pha
|
||||||
|
pha
|
||||||
|
start
|
||||||
|
!pseudopc 0 {
|
||||||
Exit1Bit rts
|
Exit1Bit rts
|
||||||
InputLoop
|
InputLoop
|
||||||
ldy #0
|
ldy #0
|
||||||
lda (input),y ; first value: HGR row (only 0..95 will be in input array)
|
input=*+1
|
||||||
|
lda .coords, y ; first value: HGR row (only 0..95 will be in input array)
|
||||||
bmi Exit1Bit ; if > 127 then we're done
|
bmi Exit1Bit ; if > 127 then we're done
|
||||||
tax
|
tax
|
||||||
+ROW_X_TO_BASE_ADDRESSES
|
+ROW_X_TO_BASE_ADDRESSES
|
||||||
+ROW_X_TO_MIRROR_ADDRESSES
|
+ROW_X_TO_MIRROR_ADDRESSES
|
||||||
|
|
||||||
inc input
|
inc input
|
||||||
lda (input),y
|
lda (input), y
|
||||||
+HIGH_3_LOW_5 input
|
+HIGH_3_LOW_5 input
|
||||||
|
|
||||||
; main 1x2 block in top-left quadrant
|
; main 1x2 block in top-left quadrant
|
||||||
+COPY_BIT src1, dest1, copymasks
|
src1=*+1
|
||||||
+COPY_BIT src2, dest2, copymasks
|
lda $FDFD, y
|
||||||
|
eor (<dest1), y
|
||||||
|
and copymasks, x
|
||||||
|
eor (<dest1), y
|
||||||
|
dest1=*+1
|
||||||
|
sta $FDFD, y
|
||||||
|
src2=*+1
|
||||||
|
lda $FDFD, y
|
||||||
|
eor (<dest2), y
|
||||||
|
and copymasks, x
|
||||||
|
eor (<dest2), y
|
||||||
|
dest2=*+1
|
||||||
|
sta $FDFD, y
|
||||||
|
|
||||||
; corresponding 1x2 block in top-right quadrant (same row, opposite column)
|
; corresponding 1x2 block in top-right quadrant (same row, opposite column)
|
||||||
lda mirror_cols,y
|
lda mirror_cols, y
|
||||||
tay
|
tay
|
||||||
+COPY_BIT src1, dest1, mirror_copymasks
|
+COPY_BIT src1, dest1, mirror_copymasks
|
||||||
+COPY_BIT src2, dest2, mirror_copymasks
|
+COPY_BIT src2, dest2, mirror_copymasks
|
||||||
|
|
||||||
; corresponding 1x2 block in bottom-right quadrant (opposite row, opposite column)
|
; corresponding 1x2 block in bottom-right quadrant (opposite row, opposite column)
|
||||||
+COPY_BIT mirror_src1, mirror_dest1, mirror_copymasks
|
mirror_src1=*+1
|
||||||
+COPY_BIT mirror_src2, mirror_dest2, mirror_copymasks
|
lda $FDFD, y
|
||||||
|
eor (<mirror_dest1), y
|
||||||
|
and mirror_copymasks, x
|
||||||
|
eor (<mirror_dest1), y
|
||||||
|
mirror_dest1=*+1
|
||||||
|
sta $FDFD, y
|
||||||
|
mirror_src2=*+1
|
||||||
|
lda $FDFD, y
|
||||||
|
eor (<mirror_dest2), y
|
||||||
|
and mirror_copymasks, x
|
||||||
|
eor (<mirror_dest2), y
|
||||||
|
mirror_dest2=*+1
|
||||||
|
sta $FDFD, y
|
||||||
|
|
||||||
; corresponding 1x2 block in bottom-left quadrant (opposite row, original column)
|
; corresponding 1x2 block in bottom-left quadrant (opposite row, original column)
|
||||||
lda mirror_cols,y
|
lda mirror_cols, y
|
||||||
tay
|
tay
|
||||||
+COPY_BIT mirror_src1, mirror_dest1, copymasks
|
+COPY_BIT mirror_src1, mirror_dest1, copymasks
|
||||||
+COPY_BIT mirror_src2, mirror_dest2, copymasks
|
+COPY_BIT mirror_src2, mirror_dest2, copymasks
|
||||||
|
|
||||||
+INC_INPUT_AND_LOOP InputLoop
|
+INC_INPUT_AND_LOOP InputLoop
|
||||||
rts
|
}
|
||||||
|
end
|
||||||
!if * and 1 {
|
!if * and 1 {
|
||||||
!byte 0 ;align 2 but avoids the fake allocation bug if it was aligned already
|
!byte 0 ;align 2 but avoids the fake allocation bug if it was aligned already
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,16 @@
|
|||||||
!to "build/FX/RADIAL",plain
|
!to "build/FX/RADIAL",plain
|
||||||
*=$6000
|
*=$6000
|
||||||
|
|
||||||
|
mirror_src1 = $E8 ; word
|
||||||
|
mirror_dest1 = $EA ; word
|
||||||
|
mirror_src2 = $EC ; word
|
||||||
|
mirror_dest2 = $EE ; word
|
||||||
|
src1 = $F0 ; word
|
||||||
|
dest1 = $F2 ; word
|
||||||
|
src2 = $F4 ; word
|
||||||
|
dest2 = $F6 ; word
|
||||||
|
input = $FE ; word
|
||||||
|
|
||||||
!source "src/fx/fx.hgr.precomputed.1bit.a"
|
!source "src/fx/fx.hgr.precomputed.1bit.a"
|
||||||
|
|
||||||
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi
|
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi
|
||||||
|
@ -1,3 +1,14 @@
|
|||||||
|
mirror_src1 = $E8 ; word
|
||||||
|
mirror_dest1 = $EA ; word
|
||||||
|
mirror_src2 = $EC ; word
|
||||||
|
mirror_dest2 = $EE ; word
|
||||||
|
src1 = $F0 ; word
|
||||||
|
dest1 = $F2 ; word
|
||||||
|
src2 = $F4 ; word
|
||||||
|
dest2 = $F6 ; word
|
||||||
|
reverse_input = $FC ; word
|
||||||
|
input = $FE ; word
|
||||||
|
|
||||||
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi
|
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi
|
||||||
+BUILD_HGR_MIRROR_LOOKUP_TABLES hgrlomirror, hgr1himirror
|
+BUILD_HGR_MIRROR_LOOKUP_TABLES hgrlomirror, hgr1himirror
|
||||||
+BUILD_MIRROR_COLS mirror_cols
|
+BUILD_MIRROR_COLS mirror_cols
|
||||||
|
@ -5,6 +5,17 @@
|
|||||||
!to "build/FX/RADIAL3",plain
|
!to "build/FX/RADIAL3",plain
|
||||||
*=$6000
|
*=$6000
|
||||||
|
|
||||||
|
mirror_src1 = $E8 ; word
|
||||||
|
mirror_dest1 = $EA ; word
|
||||||
|
mirror_src2 = $EC ; word
|
||||||
|
mirror_dest2 = $EE ; word
|
||||||
|
src1 = $F0 ; word
|
||||||
|
dest1 = $F2 ; word
|
||||||
|
src2 = $F4 ; word
|
||||||
|
dest2 = $F6 ; word
|
||||||
|
reverse_input = $FC ; word
|
||||||
|
input = $FE ; word
|
||||||
|
|
||||||
!source "src/fx/fx.hgr.precomputed.1bit.a"
|
!source "src/fx/fx.hgr.precomputed.1bit.a"
|
||||||
|
|
||||||
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi
|
+BUILD_HGR_LOOKUP_TABLES hgrlo, hgr1hi
|
||||||
|
Loading…
Reference in New Issue
Block a user