Merge pull request #59 from jbrandwood/master

Reorganize 6502 decompress_faster depackers for smaller size and grea…
This commit is contained in:
Emmanuel Marty 2021-03-26 14:22:42 +01:00 committed by GitHub
commit 42aad36b4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 175 additions and 454 deletions

View File

@ -7,12 +7,9 @@
; ;
; This code is written for the ACME assembler. ; This code is written for the ACME assembler.
; ;
; Optional code is presented for one minor 6502 optimization that breaks ; The code is 167 bytes for the small version, and 192 bytes for the normal.
; compatibility with the current LZSA1 format standard.
; ;
; The code is 168 bytes for the small version, and 205 bytes for the normal. ; Copyright John Brandwood 2021.
;
; Copyright John Brandwood 2019.
; ;
; Distributed under the Boost Software License, Version 1.0. ; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at ; (See accompanying file LICENSE_1_0.txt or copy at
@ -30,80 +27,11 @@
; ;
; ;
; Choose size over space (within sane limits)? ; Choose size over decompression speed (within sane limits)?
; ;
LZSA_SMALL_SIZE = 0 LZSA_SMALL_SIZE = 0
;
; Remove code inlining to save space?
;
; This saves 15 bytes of code at the cost of 7% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_NO_INLINE = 1
} else {
LZSA_NO_INLINE = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 15% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_CP = 1
} else {
LZSA_SHORT_CP = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 30% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_LZ = 1
} else {
LZSA_SHORT_LZ = 0
}
;
; Macro to increment the source pointer to the next page.
;
; This should call a subroutine to determine if a bank
; has been crossed, and a new bank should be paged in.
;
!macro LZSA_INC_PAGE {
inc <lzsa_srcptr + 1
}
;
; Macro to read a byte from the compressed source data.
;
!if LZSA_NO_INLINE {
!macro LZSA_GET_SRC {
jsr lzsa1_get_byte
}
} else {
!macro LZSA_GET_SRC {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .skip
+LZSA_INC_PAGE
.skip:
}
}
; *************************************************************************** ; ***************************************************************************
@ -112,10 +40,7 @@ LZSA_SHORT_LZ = 0
; Data usage is last 8 bytes of zero-page. ; Data usage is last 8 bytes of zero-page.
; ;
!if (LZSA_SHORT_CP | LZSA_SHORT_LZ) {
lzsa_length = $F8 ; 1 byte. lzsa_length = $F8 ; 1 byte.
}
lzsa_cmdbuf = $F9 ; 1 byte. lzsa_cmdbuf = $F9 ; 1 byte.
lzsa_winptr = $FA ; 1 word. lzsa_winptr = $FA ; 1 word.
lzsa_srcptr = $FC ; 1 word. lzsa_srcptr = $FC ; 1 word.
@ -127,6 +52,7 @@ LZSA_DST_LO = $FE
LZSA_DST_HI = $FF LZSA_DST_HI = $FF
; *************************************************************************** ; ***************************************************************************
; *************************************************************************** ; ***************************************************************************
; ;
@ -147,118 +73,114 @@ lzsa1_unpack: ldy #0 ; Initialize source index.
; N.B. X=0 is expected and guaranteed when we get here. ; N.B. X=0 is expected and guaranteed when we get here.
; ;
.cp_length: +LZSA_GET_SRC .cp_length: !if LZSA_SMALL_SIZE {
sta <lzsa_cmdbuf ; Preserve this for later.
jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .cp_skip0
inc <lzsa_srcptr + 1
}
.cp_skip0: sta <lzsa_cmdbuf ; Preserve this for later.
and #$70 ; Extract literal length. and #$70 ; Extract literal length.
lsr ; Set CC before ...
beq .lz_offset ; Skip directly to match? beq .lz_offset ; Skip directly to match?
lsr ; Get 3-bit literal length. lsr ; Get 3-bit literal length.
lsr lsr
lsr lsr
lsr
cmp #$07 ; Extended length? cmp #$07 ; Extended length?
bne .got_cp_len bcc .inc_cp_len
jsr .get_length ; CS from CMP, X=0. jsr .get_length ; CS from CMP, X=0.
!if LZSA_SHORT_CP { ora #0 ; Check the lo-byte of length
beq .put_cp_len ; without effecting CC.
.got_cp_len: cmp #0 ; Check the lo-byte of length. .inc_cp_len: inx ; Increment # of pages to copy.
beq .put_cp_len
inx ; Increment # of pages to copy.
.put_cp_len: stx <lzsa_length .put_cp_len: stx <lzsa_length
tax tax
.cp_page: lda (lzsa_srcptr),y .cp_page: lda (lzsa_srcptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y sta (lzsa_dstptr),y ; of this .cp_page loop.
inc <lzsa_srcptr + 0 inc <lzsa_srcptr + 0
bne .skip1 bne .cp_skip1
inc <lzsa_srcptr + 1 inc <lzsa_srcptr + 1
.skip1: inc <lzsa_dstptr + 0 .cp_skip1: inc <lzsa_dstptr + 0
bne .skip2 bne .cp_skip2
inc <lzsa_dstptr + 1 inc <lzsa_dstptr + 1
.skip2: dex .cp_skip2: dex
bne .cp_page bne .cp_page
dec <lzsa_length ; Any full pages left to copy? dec <lzsa_length ; Any full pages left to copy?
bne .cp_page bne .cp_page
} else {
.got_cp_len: tay ; Check the lo-byte of length.
beq .cp_page
inx ; Increment # of pages to copy.
.get_cp_src: clc ; Calc address of partial page.
adc <lzsa_srcptr + 0
sta <lzsa_srcptr + 0
bcs .get_cp_dst
dec <lzsa_srcptr + 1
.get_cp_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_cp_idx
dec <lzsa_dstptr + 1
.get_cp_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
iny
bne .cp_page
inc <lzsa_srcptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .cp_page
}
; ;
; Copy bytes from decompressed window. ; Copy bytes from decompressed window.
; ;
; N.B. X=0 is expected and guaranteed when we get here. ; N.B. X=0 is expected and guaranteed when we get here.
; ;
.lz_offset: +LZSA_GET_SRC .lz_offset: !if LZSA_SMALL_SIZE {
clc
adc <lzsa_dstptr + 0 jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .offset_lo
inc <lzsa_srcptr + 1
}
.offset_lo: adc <lzsa_dstptr + 0 ; Always CC from .cp_page loop.
sta <lzsa_winptr + 0 sta <lzsa_winptr + 0
lda #$FF lda #$FF
bit <lzsa_cmdbuf bit <lzsa_cmdbuf
bpl .hi_offset bpl .offset_hi
+LZSA_GET_SRC
.hi_offset: adc <lzsa_dstptr + 1 !if LZSA_SMALL_SIZE {
sta <lzsa_winptr + 1 jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .offset_hi
inc <lzsa_srcptr + 1
}
.offset_hi: adc <lzsa_dstptr + 1 ; lzsa_winptr < lzsa_dstptr, so
sta <lzsa_winptr + 1 ; always leaves CS.
!if LZSA_SMALL_SIZE {
.lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop. .lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop.
and #$0F and #$0F
adc #$03 - 1 ; CS from previous ADC. adc #$03 - 1 ; CS from previous ADC.
cmp #$12 ; Extended length? cmp #$12 ; Extended length?
bne .got_lz_len bcc .inc_lz_len
jsr .get_length ; CS from CMP, X=0. jsr .get_length ; CS from CMP, X=0, returns CC.
!if LZSA_SHORT_LZ { ora #0 ; Check the lo-byte of length
beq .put_lz_len ; without effecting CC.
.got_lz_len: cmp #0 ; Check the lo-byte of length. .inc_lz_len: inx ; Increment # of pages to copy.
beq .put_lz_len
inx ; Increment # of pages to copy.
.put_lz_len: stx <lzsa_length .put_lz_len: stx <lzsa_length
tax tax
.lz_page: lda (lzsa_winptr),y .lz_page: lda (lzsa_winptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y sta (lzsa_dstptr),y ; of this .lz_page loop.
inc <lzsa_winptr + 0 inc <lzsa_winptr + 0
bne .skip3 bne .skip3
inc <lzsa_winptr + 1 inc <lzsa_winptr + 1
@ -274,19 +196,26 @@ lzsa1_unpack: ldy #0 ; Initialize source index.
} else { } else {
.lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop.
and #$0F
adc #$03 - 1 ; CS from previous ADC.
cmp #$12 ; Extended length?
bcc .got_lz_len
jsr .get_length ; CS from CMP, X=0, returns CC.
.got_lz_len: tay ; Check the lo-byte of length. .got_lz_len: tay ; Check the lo-byte of length.
beq .lz_page beq .lz_page
inx ; Increment # of pages to copy. inx ; Increment # of pages to copy.
.get_lz_win: clc ; Calc address of partial page. .get_lz_win: adc <lzsa_winptr + 0 ; Calc address of partial page.
adc <lzsa_winptr + 0 sta <lzsa_winptr + 0 ; Always CC from previous CMP.
sta <lzsa_winptr + 0
bcs .get_lz_dst bcs .get_lz_dst
dec <lzsa_winptr + 1 dec <lzsa_winptr + 1
.get_lz_dst: tya .get_lz_dst: tya ; Calc address of partial page.
clc ; Calc address of partial page. clc
adc <lzsa_dstptr + 0 adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0 sta <lzsa_dstptr + 0
bcs .get_lz_idx bcs .get_lz_idx
@ -311,7 +240,7 @@ lzsa1_unpack: ldy #0 ; Initialize source index.
} }
; ;
; Get 16-bit length in X:A register pair. ; Get 16-bit length in X:A register pair, return with CC.
; ;
; N.B. X=0 is expected and guaranteed when we get here. ; N.B. X=0 is expected and guaranteed when we get here.
; ;
@ -320,31 +249,30 @@ lzsa1_unpack: ldy #0 ; Initialize source index.
adc (lzsa_srcptr),y ; the length. adc (lzsa_srcptr),y ; the length.
inc <lzsa_srcptr + 0 inc <lzsa_srcptr + 0
bne .skip_inc bne .skip_inc
+LZSA_INC_PAGE inc <lzsa_srcptr + 1
.skip_inc: bcc .got_length ; No overflow means done. .skip_inc: bcc .got_length ; No overflow means done.
cmp #$00 ; Overflow to 256 or 257? cmp #$01 ; Overflow to 256 or 257?
beq .extra_word bcc .extra_word
.extra_byte: inx .extra_byte: clc ; MUST return CC!
jmp lzsa1_get_byte ; So rare, this can be slow! inx
bne .get_byte ; Always NZ from previous INX.
.extra_word: jsr lzsa1_get_byte ; So rare, this can be slow! .extra_word: jsr .get_byte ; So rare, this can be slow!
pha pha
jsr lzsa1_get_byte ; So rare, this can be slow! jsr .get_byte ; So rare, this can be slow!
tax tax
beq .finished ; Length-hi == 0 at EOF. beq .finished ; Length-hi == 0 at EOF.
pla ; Length-lo. pla ; Length-lo.
rts rts
lzsa1_get_byte: .get_byte: lda (lzsa_srcptr),y ; Subroutine version for when
lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous. inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
beq lzsa1_next_page beq .next_page
.got_length: rts .got_length: rts
lzsa1_next_page: .next_page: inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
rts rts
.finished: pla ; Length-lo. .finished: pla ; Length-lo.

View File

@ -7,12 +7,9 @@
; ;
; This code is written for the ACME assembler. ; This code is written for the ACME assembler.
; ;
; Optional code is presented for two minor 6502 optimizations that break ; The code is 240 bytes for the small version, and 255 bytes for the normal.
; compatibility with the current LZSA2 format standard.
; ;
; The code is 241 bytes for the small version, and 267 bytes for the normal. ; Copyright John Brandwood 2021.
;
; Copyright John Brandwood 2019.
; ;
; Distributed under the Boost Software License, Version 1.0. ; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at ; (See accompanying file LICENSE_1_0.txt or copy at
@ -30,99 +27,11 @@
; ;
; ;
; Choose size over space (within sane limits)? ; Choose size over decompression speed (within sane limits)?
; ;
LZSA_SMALL_SIZE = 0 LZSA_SMALL_SIZE = 0
;
; Remove code inlining to save space?
;
; This saves 15 bytes of code at the cost of 7% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_NO_INLINE = 1
} else {
LZSA_NO_INLINE = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 5% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_CP = 1
} else {
LZSA_SHORT_CP = 0
}
;
; We will read from or write to $FFFF. This prevents the
; use of the "INC ptrhi / BNE" trick and reduces speed.
;
LZSA_USE_FFFF = 0
;
; Macro to increment the source pointer to the next page.
;
!macro LZSA_INC_PAGE {
inc <lzsa_srcptr + 1
}
;
; Macro to read a byte from the compressed source data.
;
!if LZSA_NO_INLINE {
!macro LZSA_GET_SRC {
jsr lzsa2_get_byte
}
} else {
!macro LZSA_GET_SRC {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .skip
+LZSA_INC_PAGE
.skip:
}
}
;
; Macro to speed up reading 50% of nibbles.
;
; This seems to save very few cycles compared to the
; increase in code size, and it isn't recommended.
;
LZSA_SLOW_NIBL = 1
!if (LZSA_SLOW_NIBL + LZSA_SMALL_SIZE) {
!macro LZSA_GET_NIBL {
jsr lzsa2_get_nibble ; Always call a function.
}
} else {
!macro LZSA_GET_NIBL {
lsr <lzsa_nibflg ; Is there a nibble waiting?
lda <lzsa_nibble ; Extract the lo-nibble.
bcs .skip
jsr lzsa2_new_nibble ; Extract the hi-nibble.
.skip: ora #$F0
}
}
; *************************************************************************** ; ***************************************************************************
@ -131,6 +40,8 @@ LZSA_SLOW_NIBL = 1
; Data usage is last 11 bytes of zero-page. ; Data usage is last 11 bytes of zero-page.
; ;
lzsa_length = lzsa_winptr ; 1 word.
lzsa_cmdbuf = $F5 ; 1 byte. lzsa_cmdbuf = $F5 ; 1 byte.
lzsa_nibflg = $F6 ; 1 byte. lzsa_nibflg = $F6 ; 1 byte.
lzsa_nibble = $F7 ; 1 byte. lzsa_nibble = $F7 ; 1 byte.
@ -159,49 +70,30 @@ LZSA_DST_HI = $FF
; ;
DECOMPRESS_LZSA2_FAST: DECOMPRESS_LZSA2_FAST:
lzsa2_unpack: ldy #0 ; Initialize source index. lzsa2_unpack: ldx #$00 ; Hi-byte of length or offset.
ldy #$00 ; Initialize source index.
sty <lzsa_nibflg ; Initialize nibble buffer. sty <lzsa_nibflg ; Initialize nibble buffer.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
beq .cp_length ; always taken
.incsrc1:
inc <lzsa_srcptr + 1
bne .resume_src1 ; always taken
!if LZSA_SHORT_CP {
.incsrc2:
inc <lzsa_srcptr + 1
bne .resume_src2 ; always taken
.incdst:
inc <lzsa_dstptr + 1
bne .resume_dst ; always taken
}
}
; ;
; Copy bytes from compressed source data. ; Copy bytes from compressed source data.
; ;
; N.B. X=0 is expected and guaranteed when we get here.
;
.cp_length: ldx #$00 ; Hi-byte of length or offset. .cp_length: !if LZSA_SMALL_SIZE {
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) { jsr .get_byte
+LZSA_GET_SRC
} else { } else {
lda (lzsa_srcptr),y lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0 inc <lzsa_srcptr + 0
beq .incsrc1 bne .cp_skip0
inc <lzsa_srcptr + 1
} }
.resume_src1: .cp_skip0: sta <lzsa_cmdbuf ; Preserve this for later.
sta <lzsa_cmdbuf ; Preserve this for later.
and #$18 ; Extract literal length. and #$18 ; Extract literal length.
beq .lz_offset ; Skip directly to match? beq .lz_offset ; Skip directly to match?
@ -209,91 +101,48 @@ lzsa2_unpack: ldy #0 ; Initialize source index.
lsr lsr
lsr lsr
cmp #$03 ; Extended length? cmp #$03 ; Extended length?
bne .got_cp_len bcc .inc_cp_len
jsr .get_length ; X=0 table index for literals. inx
jsr .get_length ; X=1 for literals, returns CC.
!if LZSA_SHORT_CP { ora #0 ; Check the lo-byte of length
beq .put_cp_len ; without effecting CC.
.got_cp_len: cmp #0 ; Check the lo-byte of length. .inc_cp_len: inx ; Increment # of pages to copy.
beq .put_cp_len
inx ; Increment # of pages to copy.
.put_cp_len: stx <lzsa_length .put_cp_len: stx <lzsa_length
tax tax
.cp_page: lda (lzsa_srcptr),y .cp_page: lda (lzsa_srcptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y sta (lzsa_dstptr),y ; of this .cp_page loop.
inc <lzsa_srcptr + 0 inc <lzsa_srcptr + 0
bne .cp_skip1
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
bne .skip1
inc <lzsa_srcptr + 1 inc <lzsa_srcptr + 1
.skip1: inc <lzsa_dstptr + 0
bne .skip2 .cp_skip1: inc <lzsa_dstptr + 0
bne .cp_skip2
inc <lzsa_dstptr + 1 inc <lzsa_dstptr + 1
.skip2:
} else { .cp_skip2: dex
beq .incsrc2
.resume_src2:
inc <lzsa_dstptr + 0
beq .incdst
.resume_dst:
}
dex
bne .cp_page bne .cp_page
dec <lzsa_length ; Any full pages left to copy? dec <lzsa_length ; Any full pages left to copy?
bne .cp_page bne .cp_page
} else { ;
; Copy bytes from decompressed window.
.got_cp_len: tay ; Check the lo-byte of length. ;
beq .cp_page ; N.B. X=0 is expected and guaranteed when we get here.
;
inx ; Increment # of pages to copy.
.get_cp_src: clc ; Calc address of partial page.
adc <lzsa_srcptr + 0
sta <lzsa_srcptr + 0
bcs .get_cp_dst
dec <lzsa_srcptr + 1
.get_cp_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_cp_idx
dec <lzsa_dstptr + 1
.get_cp_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
iny
bne .cp_page
inc <lzsa_srcptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .cp_page
}
; ================================
; xyz ; xyz
; ===========================
; 00z 5-bit offset ; 00z 5-bit offset
; 01z 9-bit offset ; 01z 9-bit offset
; 10z 13-bit offset ; 10z 13-bit offset
; 110 16-bit offset ; 110 16-bit offset
; 111 repeat offset ; 111 repeat offset
;
.lz_offset: lda <lzsa_cmdbuf .lz_offset: lda <lzsa_cmdbuf
asl asl
@ -304,48 +153,40 @@ lzsa2_unpack: ldy #0 ; Initialize source index.
.get_5_bits: dex ; X=$FF .get_5_bits: dex ; X=$FF
.get_13_bits: asl .get_13_bits: asl
php php
+LZSA_GET_NIBL ; Always returns with CS. jsr .get_nibble
plp plp
rol ; Shift into position, set C. rol ; Shift into position, clr C.
eor #$01 eor #$E1
cpx #$00 ; X=$FF for a 5-bit offset. cpx #$00 ; X=$FF for a 5-bit offset.
bne .set_offset bne .set_offset
sbc #2 ; Subtract 512 because 13-bit sbc #2 ; 13-bit offset from $FE00.
; offset starts at $FE00. bne .set_hi_8 ; Always NZ from previous SBC.
bne .get_low8x ; Always NZ from previous SBC.
.get_9_bits: dex ; X=$FF if CS, X=$FE if CC. .get_9_bits: dex ; X=$FF if CS, X=$FE if CC.
asl asl
bcc .get_low8 bcc .get_lo_8
dex dex
bcs .get_low8 ; Always VS from previous BIT. bcs .get_lo_8 ; Always VS from previous BIT.
.get_13_16_rep: asl .get_13_16_rep: asl
bcc .get_13_bits ; Shares code with 5-bit path. bcc .get_13_bits ; Shares code with 5-bit path.
.get_16_rep: bmi .lz_length ; Repeat previous offset. .get_16_rep: bmi .lz_length ; Repeat previous offset.
; .get_16_bits: jsr .get_byte ; Get hi-byte of offset.
; Copy bytes from decompressed window.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.get_16_bits: jsr lzsa2_get_byte ; Get hi-byte of offset. .set_hi_8: tax
.get_low8x: tax .get_lo_8: !if LZSA_SMALL_SIZE {
.get_low8: jsr .get_byte ; Get lo-byte of offset.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC ; Get lo-byte of offset.
} else { } else {
lda (lzsa_srcptr),y lda (lzsa_srcptr),y ; Get lo-byte of offset.
inc <lzsa_srcptr + 0 inc <lzsa_srcptr + 0
beq .incsrc3 bne .set_offset
.resume_src3: inc <lzsa_srcptr + 1
} }
@ -359,26 +200,24 @@ lzsa2_unpack: ldy #0 ; Initialize source index.
clc clc
adc #$02 adc #$02
cmp #$09 ; Extended length? cmp #$09 ; Extended length?
bne .got_lz_len bcc .got_lz_len
inx jsr .get_length ; X=0 for match, returns CC.
jsr .get_length ; X=1 table index for match.
.got_lz_len: eor #$FF ; Negate the lo-byte of length .got_lz_len: eor #$FF ; Negate the lo-byte of length
tay ; and check for zero. tay ; and check for zero.
iny iny
beq .calc_lz_addr beq .get_lz_win
eor #$FF eor #$FF
inx ; Increment # of pages to copy. inx ; Increment # of pages to copy.
clc ; Calc destination for partial .get_lz_dst: adc <lzsa_dstptr + 0 ; Calc address of partial page.
adc <lzsa_dstptr + 0 ; page. sta <lzsa_dstptr + 0 ; Always CC from previous CMP.
sta <lzsa_dstptr + 0 bcs .get_lz_win
bcs .calc_lz_addr
dec <lzsa_dstptr + 1 dec <lzsa_dstptr + 1
.calc_lz_addr: clc ; Calc address of match. .get_lz_win: clc ; Calc address of match.
lda <lzsa_dstptr + 0 ; N.B. Offset is negative! lda <lzsa_dstptr + 0 ; N.B. Offset is negative!
adc <lzsa_offset + 0 adc <lzsa_offset + 0
sta <lzsa_winptr + 0 sta <lzsa_winptr + 0
@ -397,56 +236,47 @@ lzsa2_unpack: ldy #0 ; Initialize source index.
jmp .cp_length ; Loop around to the beginning. jmp .cp_length ; Loop around to the beginning.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
.incsrc3:
inc <lzsa_srcptr + 1
bne .resume_src3 ; always taken
}
; ;
; Lookup tables to differentiate literal and match lengths. ; Lookup tables to differentiate literal and match lengths.
; ;
.nibl_len_tbl: !byte 3 + $10 ; 0+3 (for literal). .nibl_len_tbl: !byte 9 ; 2+7 (for match).
!byte 9 + $10 ; 2+7 (for match). !byte 3 ; 0+3 (for literal).
.byte_len_tbl: !byte 18 - 1 ; 0+3+15 - CS (for literal). .byte_len_tbl: !byte 24 - 1 ; 2+7+15 - CS (for match).
!byte 24 - 1 ; 2+7+15 - CS (for match). !byte 18 - 1 ; 0+3+15 - CS (for literal).
; ;
; Get 16-bit length in X:A register pair. ; Get 16-bit length in X:A register pair, return with CC.
; ;
.get_length: +LZSA_GET_NIBL .get_length: jsr .get_nibble
cmp #$FF ; Extended length? cmp #$0F ; Extended length?
bcs .byte_length bcs .byte_length
adc .nibl_len_tbl,x ; Always CC from previous CMP. adc .nibl_len_tbl,x ; Always CC from previous CMP.
.got_length: ldx #$00 ; Set hi-byte of 4 & 8 bit .got_length: ldx #$00 ; Set hi-byte of 4 & 8 bit
rts ; lengths. rts ; lengths.
.byte_length: jsr lzsa2_get_byte ; So rare, this can be slow! .byte_length: jsr .get_byte ; So rare, this can be slow!
adc .byte_len_tbl,x ; Always CS from previous CMP. adc .byte_len_tbl,x ; Always CS from previous CMP.
bcc .got_length bcc .got_length
beq .finished beq .finished
.word_length: jsr lzsa2_get_byte ; So rare, this can be slow! .word_length: jsr .get_byte ; So rare, this can be slow!
pha pha
jsr lzsa2_get_byte ; So rare, this can be slow! jsr .get_byte ; So rare, this can be slow!
tax tax
pla pla
clc ; MUST return CC!
rts rts
lzsa2_get_byte: .get_byte: lda (lzsa_srcptr),y ; Subroutine version for when
lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous. inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
beq lzsa2_next_page beq .next_page
rts rts
lzsa2_next_page: .next_page: inc <lzsa_srcptr + 1
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
rts rts
.finished: pla ; Decompression completed, pop .finished: pla ; Decompression completed, pop
@ -457,66 +287,29 @@ lzsa2_next_page:
; Get a nibble value from compressed data in A. ; Get a nibble value from compressed data in A.
; ;
!if (LZSA_SLOW_NIBL | LZSA_SMALL_SIZE) { .get_nibble: lsr <lzsa_nibflg ; Is there a nibble waiting?
lzsa2_get_nibble:
lsr <lzsa_nibflg ; Is there a nibble waiting?
lda <lzsa_nibble ; Extract the lo-nibble. lda <lzsa_nibble ; Extract the lo-nibble.
bcs .got_nibble bcs .got_nibble
inc <lzsa_nibflg ; Reset the flag. inc <lzsa_nibflg ; Reset the flag.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC !if LZSA_SMALL_SIZE {
jsr .get_byte
} else { } else {
lda (lzsa_srcptr),y lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0 inc <lzsa_srcptr + 0
beq .incsrc4 bne .set_nibble
.resume_src4:
}
sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
.got_nibble: ora #$F0
rts
} else {
lzsa2_new_nibble:
inc <lzsa_nibflg ; Reset the flag.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc4
.resume_src4:
}
sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
rts
}
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
.incsrc4:
inc <lzsa_srcptr + 1 inc <lzsa_srcptr + 1
bne .resume_src4 ; always taken
} }
.set_nibble: sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
.got_nibble: and #$0F
rts