zlib6502/inflate.asx

473 lines
10 KiB
Plaintext
Raw Normal View History

2007-06-17 13:18:50 +00:00
; inflate - uncompress data stored in the DEFLATE format
; by Piotr Fusik <fox@scene.pl>
; Last modified: 2017-11-07
2007-06-17 13:18:50 +00:00
; Compile with xasm (http://xasm.atari.org/), for example:
; xasm inflate.asx /l /d:inflate=$b700 /d:inflate_data=$b900 /d:inflate_zp=$f0
; inflate is 508 bytes of code and constants
; inflate_data is 765 bytes of uninitialized data
2007-06-17 13:18:50 +00:00
; inflate_zp is 10 bytes on page zero
; Pointer to compressed data
inputPointer equ inflate_zp ; 2 bytes
; Pointer to uncompressed data
outputPointer equ inflate_zp+2 ; 2 bytes
; Local variables
getBit_buffer equ inflate_zp+4 ; 1 byte
getBits_base equ inflate_zp+5 ; 1 byte
2017-02-07 14:51:06 +00:00
inflateStored_pageCounter equ inflate_zp+5 ; 1 byte
2007-06-17 13:18:50 +00:00
inflateCodes_sourcePointer equ inflate_zp+6 ; 2 bytes
2017-02-07 14:51:06 +00:00
inflateDynamic_symbol equ inflate_zp+6 ; 1 byte
inflateDynamic_lastLength equ inflate_zp+7 ; 1 byte
inflateDynamic_tempCodes equ inflate_zp+7 ; 1 byte
2007-06-17 13:18:50 +00:00
inflateCodes_lengthMinus2 equ inflate_zp+8 ; 1 byte
2017-02-07 14:51:06 +00:00
inflateDynamic_allCodes equ inflate_zp+8 ; 1 byte
2017-02-07 14:51:06 +00:00
inflateDynamic_primaryCodes equ inflate_zp+9 ; 1 byte
2007-06-17 13:18:50 +00:00
; Argument values for getBits
GET_1_BIT equ $81
GET_2_BITS equ $82
GET_3_BITS equ $84
GET_4_BITS equ $88
GET_5_BITS equ $90
GET_6_BITS equ $a0
GET_7_BITS equ $c0
; Huffman trees
2017-02-12 10:28:55 +00:00
TREE_SIZE equ 16
2007-06-17 13:18:50 +00:00
PRIMARY_TREE equ 0
DISTANCE_TREE equ TREE_SIZE
; Alphabet
LENGTH_SYMBOLS equ 1+29+2
DISTANCE_SYMBOLS equ 30
CONTROL_SYMBOLS equ LENGTH_SYMBOLS+DISTANCE_SYMBOLS
; Uncompress DEFLATE stream starting from the address stored in inputPointer
; to the memory starting from the address stored in outputPointer
org inflate
mvy #0 getBit_buffer
inflate_blockLoop
2000-08-14 15:26:04 +00:00
; Get a bit of EOF and two bits of block type
2007-06-17 13:18:50 +00:00
; ldy #0
sty getBits_base
lda #GET_3_BITS
2000-08-14 15:26:04 +00:00
jsr getBits
lsr @
2007-06-17 13:18:50 +00:00
php
2017-02-07 14:51:06 +00:00
bne inflateCompressed
2007-06-17 13:18:50 +00:00
; Copy uncompressed block
; ldy #0
2017-02-12 10:28:55 +00:00
sty getBit_buffer ; ignore bits until byte boundary
jsr getWord ; skip the length we don't need
2017-11-06 06:39:41 +00:00
jsr getWord ; get the one's complement length
2017-02-07 14:51:06 +00:00
sta inflateStored_pageCounter
; jmp inflateStored_firstByte
bcs inflateStored_firstByte
inflateStored_copyByte
2007-06-17 13:18:50 +00:00
jsr getByte
jsr storeByte
2017-02-07 14:51:06 +00:00
inflateStored_firstByte
2007-06-17 13:18:50 +00:00
inx
2017-02-07 14:51:06 +00:00
bne inflateStored_copyByte
inc inflateStored_pageCounter
bne inflateStored_copyByte
2007-06-17 13:18:50 +00:00
inflate_nextBlock
plp
bcc inflate_blockLoop
2000-08-14 15:26:04 +00:00
rts
2017-02-07 14:51:06 +00:00
inflateCompressed
2017-02-07 16:46:10 +00:00
; A=1: fixed block, initialize with fixed codes
2017-02-12 10:28:55 +00:00
; A=2: dynamic block, start by clearing all code lengths
; A=3: invalid compressed data, not handled in this routine
2017-02-07 16:46:10 +00:00
eor #2
2007-06-17 13:18:50 +00:00
2017-02-07 16:46:10 +00:00
; ldy #0
inflateCompressed_setCodeLengths
tax
beq inflateCompressed_setLiteralCodeLength
; fixed Huffman literal codes:
2007-06-17 13:18:50 +00:00
; :144 dta 8
; :112 dta 9
lda #4
cpy #144
rol @
2017-02-07 16:46:10 +00:00
inflateCompressed_setLiteralCodeLength
2007-06-17 13:18:50 +00:00
sta literalSymbolCodeLength,y
2017-02-07 16:46:10 +00:00
beq inflateCompressed_setControlCodeLength
; fixed Huffman control codes:
; :24 dta 7
; :6 dta 8
; :2 dta 8 ; meaningless codes
; :30 dta 5+DISTANCE_TREE
2000-08-14 15:26:04 +00:00
lda #5+DISTANCE_TREE
2007-06-17 13:18:50 +00:00
cpy #LENGTH_SYMBOLS
2017-02-07 16:46:10 +00:00
bcs inflateCompressed_setControlCodeLength
2007-06-17 13:18:50 +00:00
cpy #24
adc #2-DISTANCE_TREE
2017-02-07 16:46:10 +00:00
inflateCompressed_setControlCodeLength
cpy #CONTROL_SYMBOLS
scs:sta controlSymbolCodeLength,y
2000-08-14 15:26:04 +00:00
iny
2017-02-07 16:46:10 +00:00
bne inflateCompressed_setCodeLengths
2007-06-17 13:18:50 +00:00
2017-02-07 16:46:10 +00:00
tax
bne inflateCodes
2007-06-17 13:18:50 +00:00
; Decompress a block reading Huffman trees first
; Build the tree for temporary codes
2007-06-17 13:18:50 +00:00
jsr buildTempHuffmanTree
2000-08-14 15:26:04 +00:00
; Use temporary codes to get lengths of literal/length and distance codes
; ldx #0
2007-06-17 13:18:50 +00:00
; sec
2017-02-07 14:51:06 +00:00
inflateDynamic_decodeLength
2017-02-12 10:28:55 +00:00
; C=1: literal codes
; C=0: control codes
2017-02-07 14:51:06 +00:00
stx inflateDynamic_symbol
2017-02-12 10:28:55 +00:00
php
2007-06-17 13:18:50 +00:00
; Fetch a temporary code
jsr fetchPrimaryCode
; Temporary code 0..15: put this length
2017-02-07 14:51:06 +00:00
bpl inflateDynamic_verbatimLength
2007-06-17 13:18:50 +00:00
; Temporary code 16: repeat last length 3 + getBits(2) times
; Temporary code 17: put zero length 3 + getBits(3) times
; Temporary code 18: put zero length 11 + getBits(7) times
tax
2007-06-17 13:18:50 +00:00
jsr getBits
cpx #GET_3_BITS
2017-02-12 10:28:55 +00:00
bcc inflateDynamic_repeatLast
seq:adc #7
; ldy #0
sty inflateDynamic_lastLength
inflateDynamic_repeatLast
tay
lda inflateDynamic_lastLength
iny:iny
2017-02-07 14:51:06 +00:00
inflateDynamic_verbatimLength
2007-06-17 13:18:50 +00:00
iny
plp
2017-02-12 10:28:55 +00:00
ldx inflateDynamic_symbol
2017-02-07 14:51:06 +00:00
inflateDynamic_storeLength
bcc inflateDynamic_controlSymbolCodeLength
2007-06-17 13:18:50 +00:00
sta literalSymbolCodeLength,x+
cpx #1
2017-02-07 14:51:06 +00:00
inflateDynamic_storeNext
2007-06-17 13:18:50 +00:00
dey
2017-02-07 14:51:06 +00:00
bne inflateDynamic_storeLength
sta inflateDynamic_lastLength
; jmp inflateDynamic_decodeLength
beq inflateDynamic_decodeLength
inflateDynamic_controlSymbolCodeLength
cpx inflateDynamic_primaryCodes
bcc inflateDynamic_storeControl
; the code lengths we skip here were zero-initialized
2017-02-07 16:46:10 +00:00
; in inflateCompressed_setControlCodeLength
sne:ldx #LENGTH_SYMBOLS
ora #DISTANCE_TREE
2017-02-07 14:51:06 +00:00
inflateDynamic_storeControl
2007-06-17 13:18:50 +00:00
sta controlSymbolCodeLength,x+
2017-02-07 14:51:06 +00:00
cpx inflateDynamic_allCodes
bcc inflateDynamic_storeNext
2007-06-17 13:18:50 +00:00
dey
; ldy #0
; Decompress a block
2000-08-14 15:26:04 +00:00
inflateCodes
jsr buildHuffmanTree
2017-02-07 16:46:10 +00:00
; jmp inflateCodes_loop
beq inflateCodes_loop
inflateCodes_literal
jsr storeByte
2007-06-17 13:18:50 +00:00
inflateCodes_loop
2002-08-06 15:03:06 +00:00
jsr fetchPrimaryCode
2017-02-07 16:46:10 +00:00
bcc inflateCodes_literal
2007-06-17 13:18:50 +00:00
beq inflate_nextBlock
; Copy sequence from look-behind buffer
; ldy #0
sty getBits_base
cmp #9
bcc inflateCodes_setSequenceLength
2000-08-14 15:26:04 +00:00
tya
2007-06-17 13:18:50 +00:00
; lda #0
cpx #1+28
bcs inflateCodes_setSequenceLength
dex
txa
lsr @
ror getBits_base
inc getBits_base
lsr @
rol getBits_base
jsr getAMinus1BitsMax8
; sec
adc #0
inflateCodes_setSequenceLength
sta inflateCodes_lengthMinus2
2000-08-14 15:26:04 +00:00
ldx #DISTANCE_TREE
jsr fetchCode
2007-06-17 13:18:50 +00:00
cmp #4
bcc inflateCodes_setOffsetLowByte
inc getBits_base
lsr @
jsr getAMinus1BitsMax8
inflateCodes_setOffsetLowByte
2002-08-06 15:03:06 +00:00
eor #$ff
2007-06-17 13:18:50 +00:00
sta inflateCodes_sourcePointer
lda getBits_base
cpx #10
bcc inflateCodes_setOffsetHighByte
lda getNPlus1Bits_mask-10,x
2000-08-14 15:26:04 +00:00
jsr getBits
2007-06-17 13:18:50 +00:00
clc
inflateCodes_setOffsetHighByte
2002-08-06 15:03:06 +00:00
eor #$ff
2007-06-17 13:18:50 +00:00
; clc
2002-08-06 15:03:06 +00:00
adc outputPointer+1
2007-06-17 13:18:50 +00:00
sta inflateCodes_sourcePointer+1
jsr copyByte
jsr copyByte
inflateCodes_copyByte
jsr copyByte
dec inflateCodes_lengthMinus2
bne inflateCodes_copyByte
; jmp inflateCodes_loop
beq inflateCodes_loop
2017-02-12 10:28:55 +00:00
; Get dynamic block header and use it to build the temporary tree
2007-06-17 13:18:50 +00:00
buildTempHuffmanTree
; ldy #0
; numberOfPrimaryCodes = 257 + getBits(5)
; numberOfDistanceCodes = 1 + getBits(5)
; numberOfTemporaryCodes = 4 + getBits(4)
ldx #3
2017-02-07 14:51:06 +00:00
inflateDynamic_getHeader
lda inflateDynamic_headerBits-1,x
2007-06-17 13:18:50 +00:00
jsr getBits
; sec
2017-02-07 14:51:06 +00:00
adc inflateDynamic_headerBase-1,x
sta inflateDynamic_tempCodes-1,x
2007-06-17 13:18:50 +00:00
dex
2017-02-07 14:51:06 +00:00
bne inflateDynamic_getHeader
2007-06-17 13:18:50 +00:00
2017-02-07 14:51:06 +00:00
; Get lengths of temporary codes in the order stored in inflateDynamic_tempSymbols
2007-06-17 13:18:50 +00:00
; ldx #0
2017-02-07 14:51:06 +00:00
inflateDynamic_getTempCodeLengths
2007-06-17 13:18:50 +00:00
lda #GET_3_BITS
jsr getBits
2017-02-07 14:51:06 +00:00
ldy inflateDynamic_tempSymbols,x
2007-06-17 13:18:50 +00:00
sta literalSymbolCodeLength,y
ldy #0
inx
2017-02-07 14:51:06 +00:00
cpx inflateDynamic_tempCodes
bcc inflateDynamic_getTempCodeLengths
2000-08-14 15:26:04 +00:00
; Build Huffman trees basing on code lengths (in bits)
2007-06-17 13:18:50 +00:00
; stored in the *SymbolCodeLength arrays
2000-08-14 15:26:04 +00:00
buildHuffmanTree
2017-11-06 18:13:29 +00:00
; Clear nBitCode_literalCount, nBitCode_controlCount
2007-06-17 13:18:50 +00:00
tya
; lda #0
sta:rne nBitCode_clearFrom,y+
2000-08-14 15:26:04 +00:00
; Count number of codes of each length
2007-06-17 13:18:50 +00:00
; ldy #0
buildHuffmanTree_countCodeLengths
ldx literalSymbolCodeLength,y
inc nBitCode_literalCount,x
sne:stx allLiteralsCodeLength
2007-06-17 13:18:50 +00:00
cpy #CONTROL_SYMBOLS
bcs buildHuffmanTree_noControlSymbol
ldx controlSymbolCodeLength,y
inc nBitCode_controlCount,x
buildHuffmanTree_noControlSymbol
2002-08-06 15:03:06 +00:00
iny
2007-06-17 13:18:50 +00:00
bne buildHuffmanTree_countCodeLengths
; Calculate offsets of symbols sorted by code length
; lda #0
2017-11-06 18:13:29 +00:00
ldx #-4*TREE_SIZE
2007-06-17 13:18:50 +00:00
buildHuffmanTree_calculateOffsets
2017-11-06 18:13:29 +00:00
sta nBitCode_literalOffset+4*TREE_SIZE-$100,x
add nBitCode_literalCount+4*TREE_SIZE-$100,x
2000-08-14 15:26:04 +00:00
inx
2007-06-17 13:18:50 +00:00
bne buildHuffmanTree_calculateOffsets
; Put symbols in their place in the sorted array
; ldy #0
buildHuffmanTree_assignCode
2002-08-06 15:03:06 +00:00
tya
2007-06-17 13:18:50 +00:00
ldx literalSymbolCodeLength,y
ldy:inc nBitCode_literalOffset,x
sta codeToLiteralSymbol,y
2002-08-06 15:03:06 +00:00
tay
2007-06-17 13:18:50 +00:00
cpy #CONTROL_SYMBOLS
bcs buildHuffmanTree_noControlSymbol2
ldx controlSymbolCodeLength,y
ldy:inc nBitCode_controlOffset,x
sta codeToControlSymbol,y
2002-08-06 15:03:06 +00:00
tay
2007-06-17 13:18:50 +00:00
buildHuffmanTree_noControlSymbol2
iny
bne buildHuffmanTree_assignCode
2000-08-14 15:26:04 +00:00
rts
2007-06-17 13:18:50 +00:00
; Read Huffman code using the primary tree
2002-08-06 15:03:06 +00:00
fetchPrimaryCode
ldx #PRIMARY_TREE
2017-02-12 10:28:55 +00:00
; Read a code from input using the tree specified in X,
2007-06-17 13:18:50 +00:00
; return low byte of this code in A,
; return C flag reset for literal code, set for length code
2000-08-14 15:26:04 +00:00
fetchCode
2007-06-17 13:18:50 +00:00
; ldy #0
tya
fetchCode_nextBit
2002-08-06 15:03:06 +00:00
jsr getBit
2000-08-14 15:26:04 +00:00
rol @
inx
bcs fetchCode_ge256
; are all 256 literal codes of this length?
cpx allLiteralsCodeLength
beq fetchCode_allLiterals
; is it literal code of length X?
2017-11-06 18:13:29 +00:00
sub nBitCode_literalCount,x
2017-11-07 18:52:27 +00:00
bcs fetchCode_notLiteral
; literal code
; clc
adc nBitCode_literalOffset,x
tax
lda codeToLiteralSymbol,x
fetchCode_allLiterals
clc
rts
; code >= 256, must be control
fetchCode_ge256
; sec
sbc nBitCode_literalCount,x
sec
; is it control code of length X?
2017-11-07 18:52:27 +00:00
fetchCode_notLiteral
2017-11-06 18:13:29 +00:00
; sec
sbc nBitCode_controlCount,x
2007-06-17 13:18:50 +00:00
bcs fetchCode_nextBit
; control code
2007-06-17 13:18:50 +00:00
; clc
2017-11-06 18:13:29 +00:00
adc nBitCode_controlOffset,x
tax
lda codeToControlSymbol,x
and #$1f ; make distance symbols zero-based
tax
; sec
rts
2000-08-14 15:26:04 +00:00
2007-06-17 13:18:50 +00:00
; Read A minus 1 bits, but no more than 8
getAMinus1BitsMax8
rol getBits_base
tax
cmp #9
bcs getByte
lda getNPlus1Bits_mask-2,x
2000-08-14 15:26:04 +00:00
getBits
2007-06-17 13:18:50 +00:00
jsr getBits_loop
getBits_normalizeLoop
lsr getBits_base
ror @
bcc getBits_normalizeLoop
rts
; Read 16 bits
getWord
jsr getByte
tax
; Read 8 bits
getByte
lda #$80
getBits_loop
2002-08-06 15:03:06 +00:00
jsr getBit
2007-06-17 13:18:50 +00:00
ror @
bcc getBits_loop
2002-08-06 15:03:06 +00:00
rts
2000-08-14 15:26:04 +00:00
2007-06-17 13:18:50 +00:00
; Read one bit, return in the C flag
2000-08-14 15:26:04 +00:00
getBit
2007-06-17 13:18:50 +00:00
lsr getBit_buffer
bne getBit_return
2000-08-14 15:26:04 +00:00
pha
2007-06-17 13:18:50 +00:00
; ldy #0
lda (inputPointer),y
2000-08-14 15:26:04 +00:00
inw inputPointer
2007-06-17 13:18:50 +00:00
sec
ror @
sta getBit_buffer
2000-08-14 15:26:04 +00:00
pla
2007-06-17 13:18:50 +00:00
getBit_return
2002-08-06 15:03:06 +00:00
rts
2000-08-14 15:26:04 +00:00
2007-06-17 13:18:50 +00:00
; Copy a previously written byte
copyByte
ldy outputPointer
lda (inflateCodes_sourcePointer),y
ldy #0
; Write a byte
storeByte
2017-02-07 10:18:27 +00:00
; ldy #0
2007-06-17 13:18:50 +00:00
sta (outputPointer),y
inc outputPointer
bne storeByte_return
inc outputPointer+1
inc inflateCodes_sourcePointer+1
storeByte_return
rts
2002-08-06 15:03:06 +00:00
2007-06-17 13:18:50 +00:00
getNPlus1Bits_mask
dta GET_1_BIT,GET_2_BITS,GET_3_BITS,GET_4_BITS,GET_5_BITS,GET_6_BITS,GET_7_BITS
2017-02-07 14:51:06 +00:00
inflateDynamic_tempSymbols
2007-06-17 13:18:50 +00:00
dta GET_2_BITS,GET_3_BITS,GET_7_BITS,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
2017-02-07 14:51:06 +00:00
inflateDynamic_headerBits
dta GET_4_BITS,GET_5_BITS,GET_5_BITS
inflateDynamic_headerBase
dta 3,LENGTH_SYMBOLS,0
2021-06-16 07:04:55 +00:00
org inflate_data
2000-08-14 15:26:04 +00:00
2007-06-17 13:18:50 +00:00
; Data for building trees
2002-08-06 15:03:06 +00:00
2007-06-17 13:18:50 +00:00
literalSymbolCodeLength
org *+256
controlSymbolCodeLength
org *+CONTROL_SYMBOLS
; Huffman trees
nBitCode_clearFrom
nBitCode_literalCount
2017-11-06 18:13:29 +00:00
org *+2*TREE_SIZE
2007-06-17 13:18:50 +00:00
nBitCode_controlCount
org *+2*TREE_SIZE
nBitCode_literalOffset
2017-11-06 18:13:29 +00:00
org *+2*TREE_SIZE
2007-06-17 13:18:50 +00:00
nBitCode_controlOffset
org *+2*TREE_SIZE
allLiteralsCodeLength
org *+1
2007-06-17 13:18:50 +00:00
codeToLiteralSymbol
org *+256
codeToControlSymbol
org *+CONTROL_SYMBOLS
2000-08-14 15:26:04 +00:00
end