fc8-compression/fc8-decompress-68000.c

1 line
6.1 KiB
C

/*
* FC8 compression by Steve Chamberlin
* Derived from liblzg by Marcus Geelnard
* 68000 decompressor by Steve Chamberlin
*/
#define FC8_DECODED_SIZE_OFFSET 4
#define FC8_HEADER_SIZE 8
//-------------------------------------------------------------------------------
// fc8_decode - Decode a compressed memory block
// a0 = in buffer
// a1 = out buffer
// d1 = outsize
// d0 = result (1 if decompression was successful, or 0 upon failure)
//-------------------------------------------------------------------------------
#pragma parameter __D0 fc8_decode(__A0, __A1, __D1)
asm unsigned short fc8_decode(unsigned char* in, unsigned char* out, unsigned long outsize)
{
machine 68020
movem.l d1-d7/a0-a6,-(sp)
bra _Init_Decode
// lookup table for decoding the copy length-1 parameter
_FC8_LENGTH_DECODE_LUT:
dc.b 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
dc.b 18,19,20,21,22,23,24,25,26,27,28,34,47,71,127,255
//-------------------------------------------------------------------------------
// _GetUINT32 - alignment independent reader for 32-bit integers
// a0 = in
// d6 = offset
// d7 = result
//-------------------------------------------------------------------------------
_GetUINT32:
move.b (a0,d6.w),d7
asl.w #8,d7
move.b 1(a0,d6.w),d7
swap d7
move.b 2(a0,d6.w),d7
asl.w #8,d7
move.b 3(a0,d6.w),d7
rts
_Init_Decode:
// check magic ID
cmp.b #'F',(a0)
bne _fail
cmp.b #'C',1(a0)
bne _fail
cmp.b #'8',2(a0)
bne _fail
cmp.b #'_',3(a0)
bne _fail
// check decoded size - enough space in the output buffer?
moveq #FC8_DECODED_SIZE_OFFSET,d6
bsr.s _GetUINT32
cmp.l d7,d1
bhi _fail
// advance a0 to start of compressed data
lea FC8_HEADER_SIZE(a0), a0
// a5 = base of length decode lookup table
//lea _FC8_LENGTH_DECODE_LUT(pc),a5
lea -110(pc),a5 // fix PC offset manually after assembly
// helpful constants
move.l #0x0000001F, d1
move.l #0x00000007, d2
move.l #0x00000001, d3
move.l #0x0000003F, d4
// Main decompression loop
_mainloop:
move.b (a0)+, d6 // d6 = next token
bmi.s _BR1_BR2 // BR1 and BR2 tokens have the high bit set
btst #6, d6 // check bit 6 to see if this is a BR0 or LIT token
bne.s _BR0
// LIT 00aaaaaa - copy literal string of aaaaaa+1 bytes from input to output
_LIT:
move.l a0, a4 // a4 = source ptr for copy
and.w d4, d6 // AND with 0x3F, d6 = length-1 word for copy
lea 1(a0,d6.w), a0 // advance a0 to the end of the literal string
bra.s _copyLoop
// BR0 01baaaaa - copy b+3 bytes from output backref aaaaa to output
_BR0:
move.b d6, d5
and.l d1, d5 // AND with 0x1F, d5 = offset for copy = (long)(t0 & 0x1F)
beq _done // BR0 with 0 offset means EOF
move.l a1, a4
sub.l d5, a4 // a4 = source ptr for copy
move.b (a4)+, (a1)+ // copy 3 bytes, can't use move.w. or move.l because src and dest may overlap
move.b (a4)+, (a1)+
move.b (a4)+, (a1)+
btst #5, d6 // check b bit
beq.s _mainloop
move.b (a4)+, (a1)+ // copy 1 more byte
bra.s _mainloop
_BR1_BR2:
btst #6, d6 // check bit 6 to see if this is a BR1 or BR2 token
bne.s _BR2
// BR1 10bbbaaa'aaaaaaaa - copy bbb+3 bytes from output backref aaa'aaaaaaaa to output
_BR1:
move.b d6,d5
and.l d2, d5 // AND with 0x07
lsl.l #8, d5
move.b (a0)+, d5 // d5 = offset for copy = ((long)(t0 & 0x07) << 8) | t1
lsr.b #3, d6
and.w d2, d6 // AND with 0x07
addq.w #2, d6 // d6 = length-1 word for copy = ((word)(t0 >> 3) & 0x7) + 1
bra.s _copyBackref
// BR2 11bbbbba'aaaaaaaa'aaaaaaaa - copy lookup_table[bbbbb] bytes from output backref a'aaaaaaaa'aaaaaaaa to output
_BR2:
move.b d6,d5
and.w d3, d5 // AND with 0x01
swap d5
move.w (a0)+, d5 // d5 = offset for copy = ((long)(t0 & 0x01) << 16) | (t1 << 8) | t2
lsr.b #1, d6
and.w d1, d6 // AND with 0x1F
move.b (a5,d6.w),d6 // d6 = length-1 word for copy = ((word)(t0 >> 3) & 0x7) + 1
// fall through to _copyBackref
// copy data from a previous location in the output buffer
// d5 = offset from current buffer position
_copyBackref:
move.l a1, a4
sub.l d5, a4 // a4 = source ptr for copy
cmpi.l #4, d5
blt.s _nearCopy // must copy byte-by-byte if offset < 4, to avoid overlapping long copies
// Partially unrolled block copy. Requires 68020 or better.
// Uses move.l and move.w where possible, even though both source and dest may be unaligned.
// It's still faster than multiple move.b instructions
// d6 = length-1
_copyLoop:
cmpi.w #16, d6
bge.s _copy17orMore
jmp _copy16orFewer(d6.w*2)
_copy16orFewer:
bra.s _copy1
bra.s _copy2
bra.s _copy3
bra.s _copy4
bra.s _copy5
bra.s _copy6
bra.s _copy7
bra.s _copy8
bra.s _copy9
bra.s _copy10
bra.s _copy11
bra.s _copy12
bra.s _copy13
bra.s _copy14
bra.s _copy15
bra.s _copy16
_copy15: move.l (a4)+, (a1)+
_copy11: move.l (a4)+, (a1)+
_copy7: move.l (a4)+, (a1)+
_copy3: move.w (a4)+, (a1)+
_copy1: move.b (a4)+, (a1)+
bra _mainloop
_copy14: move.l (a4)+, (a1)+
_copy10: move.l (a4)+, (a1)+
_copy6: move.l (a4)+, (a1)+
_copy2: move.w (a4)+, (a1)+
bra _mainloop
_copy13: move.l (a4)+, (a1)+
_copy9: move.l (a4)+, (a1)+
_copy5: move.l (a4)+, (a1)+
move.b (a4)+, (a1)+
bra _mainloop
_copy16: move.l (a4)+, (a1)+
_copy12: move.l (a4)+, (a1)+
_copy8: move.l (a4)+, (a1)+
_copy4: move.l (a4)+, (a1)+
bra _mainloop
_copy17orMore:
move.l (a4)+, (a1)+
move.l (a4)+, (a1)+
move.l (a4)+, (a1)+
move.l (a4)+, (a1)+
subi.w #16, d6
cmpi.w #16, d6
bge.s _copy17orMore
jmp _copy16orFewer(d6.w*2)
_nearCopy:
cmpi.l #1,d5
beq.s _copyRLE
_nearLoop:
move.b (a4)+, (a1)+
dbf d6, _nearLoop
bra _mainloop
_copyRLE:
// assumes length is at least 3
move.b (a4), (a1)+ // copy first byte
btst #0, d6
beq.s _doRLE // branch if copy length is odd (because d6 is length-1)
move.b (a4), (a1)+ // copy second byte
_doRLE:
subq.w #2, d6
lsr.w #1, d6 // length = (length-2) / 2
move.w (a4), d5
_rleLoop:
move.w d5, (a1)+
dbf d6, _rleLoop
bra _mainloop
_fail:
moveq.w #0, d0 // result = 0
bra _exit
_done:
moveq.w #1, d0 // result = 1
_exit:
movem.l (sp)+, d1-d7/a0-a6
rts
}