Finalize lzsa1 compressed format, speed up and simplify decompression

2025-04-01 16:32:28 +00:00 · 2019-04-24 09:47:40 +02:00 · 2019-04-24 09:47:40 +02:00 · 2b9780bd65
commit 2b9780bd65
parent 593110ae5d
7 changed files with 180 additions and 177 deletions
--- a/asm/6502/decompress.asm
+++ b/asm/6502/decompress.asm
@ -44,18 +44,16 @@ DECODE_TOKEN
   BNE EMBEDDED_LITERALS                ; if not, count is directly embedded in token

   JSR GETSRC                           ; get extra byte of variable literals count
-   CMP #$FF                             ; FF <low 8 bits> <high 8 bits>?
-   BEQ LARGE_VARLEN_LITERALS            ; yes, go grab it
-
-   JSR GETSINGLEVARLENSRC               ; handle single extra byte of variable literals count
-
   CLC                                  ; add extra byte to len from token
   ADC #$07                             ; (LITERALS_RUN_LEN)
   BCC PREPARE_COPY_LITERALS
-   INY
+   BEQ LARGE_VARLEN_LITERALS            ; if adding up to zero, go grab 16-bit count
+
+   JSR GETSRC                           ; get single extended byte of variable literals count
+   INY                                  ; add 256 to literals count
   JMP PREPARE_COPY_LITERALS

-LARGE_VARLEN_LITERALS                   ; FF <low 8 bits> <high 8 bits>
+LARGE_VARLEN_LITERALS                   ; handle 16 bits literals count
                                        ; literals count = directly these 16 bits
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
@ -85,37 +83,46 @@ NO_LITERALS
   BMI GET_LONG_OFFSET                  ; $80: 16 bit offset

   JSR GETSRC                           ; get 8 bit offset from stream in A
-                                        ; Y (high 8 bits) is already set to 0 here
-   JMP FIX_OFFSET                       ; go increase offset
+
+   STA OFFSLO                           ; store final match offset
+
+   CLC                                  ; add dest + match offset
+   LDA PUTDST+1                         ; low 8 bits
+   ADC OFFSLO
+   STA COPY_MATCH_LOOP+1                ; store back reference address
+   LDA #$0FF                            ; high 8 bits
+   JMP GOT_OFFSET                       ; go prepare match

 GET_LONG_OFFSET                         ; handle 16 bit offset:
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
-   TAY                                  ; put high 8 bits in Y
-   TXA                                  ; put low 8 bits in A

-FIX_OFFSET
-   CLC                                  ; add 1 to offset
-   ADC #$01
-   BCC OFFSET_FIXED
+   STX OFFSLO                           ; store final match offset
+   STA OFFSHI

-   INY
-
-OFFSET_FIXED
-   STA OFFSLO                           ; store final match offset
-   STY OFFSHI
-   LDY #$00                             ; reset Y again
+   CLC                                  ; add dest + match offset
+   LDA PUTDST+1                         ; low 8 bits
+   ADC OFFSLO
+   STA COPY_MATCH_LOOP+1                ; store back reference address
+   LDA OFFSHI                           ; high 8 bits

+GOT_OFFSET
+   ADC PUTDST+2
+   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
+   
   PLA                                  ; retrieve token from stack again
   AND #$0F                             ; isolate match len (MMMM)
-   CMP #$0F                             ; MATCH_RUN_LEN?
+   CLC
+   ADC #$03
+   CMP #$12                             ; MATCH_RUN_LEN?
   BNE PREPARE_COPY_MATCH               ; if not, count is directly embedded in token

   JSR GETSRC                           ; get extra byte of variable match length
-   CMP #$FF                             ; FF <low 8 bits> <high 8 bits>?
-   BNE SHORT_VARLEN_MATCHLEN            ; if not, handle <8 bits> or <FE> <8 extra bits> sequence
+   CLC
+   ADC #$12                             ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
+   BCC PREPARE_COPY_MATCH
+   BNE SHORT_VARLEN_MATCHLEN

-                                        ; Handle FF <low 8 bits> <high 8 bits>:
-                                        ; match length = directly these 16 bits
+                                        ; Handle 16 bits match length
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
                                        ; large match length with zero high byte?
@ -123,33 +130,14 @@ OFFSET_FIXED
   JMP PREPARE_COPY_MATCH_Y

 SHORT_VARLEN_MATCHLEN
-   JSR GETSINGLEVARLENSRC               ; handle single extra byte of variable match len
-
-   CLC                                  ; add extra byte to len from token
-   ADC #$0F                             ; (MATCH_RUN_LEN)
-   BCC PREPARE_COPY_MATCH
-   INY
+   JSR GETSRC                           ; get single extended byte of variable match len
+   INY                                  ; add 256 to match length

 PREPARE_COPY_MATCH
-   CLC                                  ; add MIN_MATCH_SIZE to match length
-   ADC #$03
-   BCC MIN_MATCH_SIZE_ADDED
-   INY
-
-MIN_MATCH_SIZE_ADDED
   TAX
 PREPARE_COPY_MATCH_Y
   INY

-COPY_MATCH
-   SEC                                  ; substract dest - match offset
-   LDA PUTDST+1                         ; low 8 bits
-   SBC OFFSLO
-   STA COPY_MATCH_LOOP+1                ; store back reference address
-   LDA PUTDST+2                         ; high 8 bits
-   SBC OFFSHI
-   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
-   
 COPY_MATCH_LOOP
   LDA $AAAA                            ; get one byte of backreference
   INC COPY_MATCH_LOOP+1
@ -190,16 +178,3 @@ LZSA_SRC_HI = *+2
   INC GETSRC+2
 GETSRC_DONE
   RTS
-
-GETSINGLEVARLENSRC
-   CMP #$FE                             ; FE <extra 8 bits>?
-   BNE SINGLE_VARLEN_ADDED              ; no, just this byte
-
-   JSR GETSRC                           ; get extra byte of variable literals count
-   CLC                                  ; add $FE to len from token
-   ADC #$FE
-   BCC SINGLE_VARLEN_ADDED
-   INY
-
-SINGLE_VARLEN_ADDED
-   RTS
--- a/asm/8088/decompress_small.S
+++ b/asm/8088/decompress_small.S
@ -45,13 +45,23 @@ lzsa_decompress:
   mov cl,4
   shr al,cl               ; shift literals length into place

-   mov cx,ax               ; copy literals length into cx
   cmp al,07H              ; LITERALS_RUN_LEN?
-   jne .copy_literals      ; no, we have the full literals count from the token, go copy
+   jne .got_literals       ; no, we have the full literals count from the token, go copy

-   call .get_varlen        ; get complete literals length
+   lodsb                   ; grab extra length byte
+   add al,07H              ; add LITERALS_RUN_LEN
+   jnc .got_literals       ; if no overflow, we have the full literals count, go copy
+   jne .mid_literals

-.copy_literals:
+   lodsw                   ; grab 16-bit extra length
+   jmp short .got_literals
+
+.mid_literals:
+   lodsb                   ; grab single extra length byte
+   inc ah                  ; add 256
+
+.got_literals:
+   xchg cx,ax
   rep movsb               ; copy cx literals from ds:si to es:di

   test dl,dl              ; check match offset size in token (O bit)
@ -59,32 +69,42 @@ lzsa_decompress:

   xchg ax,cx              ; clear ah - cx is zero from the rep movsb above
   lodsb
+   dec ah
   jmp short .get_match_length

 .get_long_offset:
   lodsw                   ; Get 2-byte match offset

 .get_match_length:
-   inc ax                  ; the match offset is stored off-by-1, increase it
   xchg ax,dx              ; dx: match offset  ax: original token
   and al,0FH              ; isolate match length in token (MMMM)
+   add al,3                ; add MIN_MATCH_SIZE

-   mov cx,ax               ; copy match length into cx
-   cmp al,0FH              ; MATCH_RUN_LEN?
-   jne .copy_match         ; no, we have the full match length from the token, go copy
-   
-   call .get_varlen        ; get complete match length
-   test cx,cx
-   je short .done_decompressing ; bail if we hit EOD
+   cmp al,012H             ; MATCH_RUN_LEN?
+   jne .got_matchlen       ; no, we have the full match length from the token, go copy

-.copy_match:
-   add cx,3                ; add MIN_MATCH_SIZE to get the final match length to copy
+   lodsb                   ; grab extra length byte
+   add al,012H             ; add MIN_MATCH_SIZE + MATCH_RUN_LEN
+   jnc .got_matchlen       ; if no overflow, we have the entire length
+   jne .mid_matchlen       
+
+   lodsw                   ; grab 16-bit length
+   test ax,ax              ; bail if we hit EOD
+   je short .done_decompressing 
+   jmp short .got_matchlen
+
+.mid_matchlen:
+   lodsb                   ; grab single extra length byte
+   inc ah                  ; add 256
+
+.got_matchlen:
+   xchg cx,ax              ; copy match length into cx
   push ds                 ; save ds:si (current pointer to compressed data)
   xchg si,ax          
   push es
   pop ds
   mov si,di               ; ds:si now points at back reference in output data
-   sub si,dx
+   add si,dx
   rep movsb               ; copy match
   xchg si,ax              ; restore ds:si
   pop ds
@ -95,23 +115,3 @@ lzsa_decompress:
   xchg ax,di              ; compute decompressed size
   sub ax,di
   ret                     ; done
-
-.get_varlen:
-   lodsb                   ; grab extra length byte
-
-   cmp al,0FFH             ; 3-byte extra length?
-   je .large_varlen        ; yes, go grab it
-
-   add cx,ax               ; add extra length byte to length from token
-   cmp al,0FEH             ; 2-byte extra length?
-   jne .varlen_done        ; no, we have the full length now, bail
-
-   lodsb                   ; grab extra length byte
-   add cx,ax               ; add to length from token
-.varlen_done:
-   ret                     ; bail
-
-.large_varlen:
-   lodsw                   ; grab 16-bit extra length
-   xchg cx,ax
-   ret
--- a/asm/z80/unlzsa_fast_v1.asm
+++ b/asm/z80/unlzsa_fast_v1.asm
@ -1,5 +1,5 @@
 ;
-;  Speed-optimized LZSA decompressor by spke (v.1 03-05/04/2019, 122 bytes)
+;  Speed-optimized LZSA decompressor by spke (v.1 23-24/04/2019, 134 bytes)
 ;
 ;  The data must be comressed using the command line compressor by Emmanuel Marty
 ;  The compression is done as follows:
@ -19,6 +19,7 @@
 ;
 ;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
 ;
+;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
@ -40,64 +41,69 @@
 		ld b,0 : jr ReadToken

 MoreLiterals:	; there are three possible situations here
-		; 1) a byte 0..253 is added to LLL and that is it or
-		; 2) a byte 254 is followed by another byte to add or
-		; 3) a byte 255 is followed by a word to be used
-		ld a,7 : add (hl) : inc hl : jp nc,CopyLiterals
+		xor (hl) : inc hl : exa
+		ld a,7 : add (hl) : inc hl : jr c,ManyLiterals

-.Overflow	; we get here if the literals length byte plus 7 is greater than 255
-		inc b : cp 5 : jr c,CopyLiterals : jr nz,.Code255		; 5 is 7+254 modulo 256
-.Code254	add (hl) : inc hl : jr nc,CopyLiterals : inc b : jr CopyLiterals
-.Code255	ld c,(hl) : inc hl : ld b,(hl) : inc hl : jr CopyLiterals.UseC
+CopyLiterals:	ld c,a
+.UseC		ldir
+
+		push de : ld e,(hl) : inc hl : exa : jp m,LongOffset
+		ld d,#FF : add 3 : cp 15+3 : jp c,CopyMatch
+		jr LongerMatch
+
+ManyLiterals:
+.code1		ld b,a : ld c,(hl) : inc hl : jr nz,CopyLiterals.UseC
+.code0		ld b,(hl) : inc hl : jr CopyLiterals.UseC
+		
+NoLiterals:	xor (hl) : inc hl
+		push de : ld e,(hl) : inc hl : jp m,LongOffset
+		ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch

 		; placed here this saves a JP per iteration
-CopyMatchNC	scf								; flag C for SBC HL,DE below must be set!!!
 CopyMatch:	ld c,a
 .UseC		ex (sp),hl : push hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
-		sbc hl,de : pop de						; BC = len, DE = dest, HL = dest-offset, SP->[src]
+		add hl,de : pop de						; BC = len, DE = dest, HL = dest-offset, SP->[src]
 		ldir : pop hl							; BC = 0, DE = dest, HL = src
 	
 ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
 		; where LLL is the number of literals and MMMM is
 		; a length of the match that follows after the literals
-		ld a,(hl) : exa : ld a,(hl) : inc hl				; token is read twice to be re-used later
-		and #70 : jr z,NoLiterals
+		ld a,(hl) : and #70 : jr z,NoLiterals

 		cp #70 : jr z,MoreLiterals					; LLL=7 means 7+ literals...
 		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...

-CopyLiterals:	ld c,a
-.UseC		ldir
+		ld c,a : ld a,(hl) : inc hl
+		ldir

-NoLiterals:	; next we read the first byte of the offset
+		; next we read the first byte of the offset
 		push de : ld e,(hl) : inc hl
 		; the top bit of token is set if the offset contains two bytes
-		exa : and #8F : jp m,LongOffset
+		and #8F : jp m,LongOffset

-ShortOffset:	ld d,b								; we keep B=0 for situations like this
+ShortOffset:	ld d,#FF

 		; short matches have length 0+3..14+3
 ReadMatchLen:	add 3 : cp 15+3 : jp c,CopyMatch

 		; MMMM=15 indicates a multi-byte number of literals
-		; there are three possible situations here
-		; 1) a byte 0..253 is added to MMMM and that is it or
-		; 2) a byte 254 is followed by another byte to add or
-		; 3) a byte 255 is followed by a word to be used
-LongerMatch:	add (hl) : inc hl : jp nc,CopyMatchNC
+LongerMatch:	add (hl) : inc hl : jr nc,CopyMatch

-.Overflow	; we get here if the match length byte plus 15+3 is greater than 255
-		inc b : cp 16 : jr c,CopyMatch : jr nz,.Code255			; 16 is 15+3+254 modulo 256
-.Code254	add (hl) : inc hl : jr nc,CopyMatchNC : inc b : jr CopyMatch
-.Code255	ld c,(hl) : inc hl : ld b,(hl) : inc hl
-		; two-byte match length that is equal to zero is the marker for End-of-Data (EOD)
-.CheckEOD	ld a,b : or c : jr nz,CopyMatch.UseC
+		; the codes are designed to overflow;
+		; the overflow value 1 means read 1 extra byte
+		; and overflow value 0 means read 2 extra bytes
+.code1		ld b,a : ld c,(hl) : inc hl : jr nz,CopyMatch.UseC
+.code0		ld b,(hl) : inc hl
+
+		; the two-byte match length equal to zero
+		; designates the end-of-data marker
+		ld a,b : or c : jr nz,CopyMatch.UseC
 		pop de : ret

 LongOffset:	; read second byte of the offset
 		ld d,(hl) : inc hl
 		add -128+3 : cp 15+3 : jp c,CopyMatch
-		add (hl) : inc hl : jp nc,CopyMatchNC
-		jr LongerMatch.Overflow
+		add (hl) : inc hl : jr nc,CopyMatch
+		jr LongerMatch.code1


--- a/asm/z80/unlzsa_small_v1.asm
+++ b/asm/z80/unlzsa_small_v1.asm
@ -1,5 +1,5 @@
 ;
-;  Size-optimized LZSA decompressor by spke (v.1 19/04/2019, 81 bytes)
+;  Size-optimized LZSA decompressor by spke (v.1 23/04/2019, 69 bytes)
 ;
 ;  The data must be comressed using the command line compressor by Emmanuel Marty
 ;  The compression is done as follows:
@ -19,14 +19,30 @@
 ;
 ;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
 ;
+;  This software is provided 'as-is', without any express or implied
+;  warranty.  In no event will the authors be held liable for any damages
+;  arising from the use of this software.
+;
+;  Permission is granted to anyone to use this software for any purpose,
+;  including commercial applications, and to alter it and redistribute it
+;  freely, subject to the following restrictions:
+;
+;  1. The origin of this software must not be misrepresented; you must not
+;     claim that you wrote the original software. If you use this software
+;     in a product, an acknowledgment in the product documentation would be
+;     appreciated but is not required.
+;  2. Altered source versions must be plainly marked as such, and must not be
+;     misrepresented as being the original software.
+;  3. This notice may not be removed or altered from any source distribution.
+;

@DecompressLZSA:
 		ld b,0

-ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
+		; first a byte token "O|LLL|MMMM" is read from the stream,
 		; where LLL is the number of literals and MMMM is
 		; a length of the match that follows after the literals
-		ld a,(hl) : exa : ld a,(hl) : inc hl
+ReadToken:	ld a,(hl) : exa : ld a,(hl) : inc hl
 		and #70 : jr z,NoLiterals

 		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...
@ -34,29 +50,36 @@ ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,

 		ld c,a : ldir

-		; next we read the first byte of the offset
-NoLiterals:	push de : ld e,(hl) : inc hl : ld d,b
-		; the top bit of token is set if the offset contains two bytes
+		; next we read the low byte of the -offset
+NoLiterals:	push de : ld e,(hl) : inc hl : ld d,#FF
+		; the top bit of token is set if
+		; the offset contains the high byte as well
 		exa : or a : jp p,ShortOffset

 LongOffset:	ld d,(hl) : inc hl

-ShortOffset:	and #0F : add 3 : cp 15+3 : call z,ReadLongBA
-
+		; last but not least, the match length is read
+ShortOffset:	and #0F : add 3							; MMMM<15 means match lengths 0+3..14+3
+		cp 15+3 : call z,ReadLongBA					; MMMM=15 means lengths 14+3+
 		ld c,a
-		ex (sp),hl : push hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
-		scf : sbc hl,de : pop de					; BC = len, DE = dest, HL = dest-offset, SP->[src]
+
+		ex (sp),hl : push hl						; BC = len, DE = -offset, HL = dest, SP ->[dest,src]
+		add hl,de : pop de						; BC = len, DE = dest, HL = dest+(-offset), SP->[src]
 		ldir : pop hl							; BC = 0, DE = dest, HL = src
 		jr ReadToken

-ReadLongBA:	ld c,(hl) : inc hl
-		add c : jr nc,$+3 : inc b
-		inc c : jr z,.Code255
-		inc c : ret nz
+		; a standard routine to read extended codes
+		; into registers B (higher byte) and A (lower byte).
+ReadLongBA:	add (hl) : inc hl : ret nc

-.Code254	add (hl) : inc hl : ret nc : inc b : ret
+		; the codes are designed to overflow;
+		; the overflow value 1 means read 1 extra byte
+		; and overflow value 0 means read 2 extra bytes
+.code1:		ld b,a : ld a,(hl) : inc hl : ret nz
+.code0:		ld c,a : ld b,(hl) : inc hl

-.Code255	ld c,(hl) : inc hl : ld b,(hl) : inc hl
-
-		ld a,b : or c : ld a,c : ret nz
+		; the two-byte match length equal to zero
+		; designates the end-of-data marker
+		or b : ld a,c : ret nz
 		pop de : pop de : ret
+
--- a/src/expand.c
+++ b/src/expand.c
@ -43,15 +43,15 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p
         nByte = *pInBlock++;
         nLiterals += ((unsigned int)nByte);

-         if (nByte == 254) {
+         if (nByte == 250) {
            if (pInBlock < pInBlockEnd) {
-               nLiterals += ((unsigned int)*pInBlock++);
+               nLiterals = 256 + ((unsigned int)*pInBlock++);
            }
            else {
               return -1;
            }
         }
-         else if (nByte == 255) {
+         else if (nByte == 249) {
            if ((pInBlock + 1) < pInBlockEnd) {
               nLiterals = ((unsigned int)*pInBlock++);
               nLiterals |= (((unsigned int)*pInBlock++) << 8);
@ -87,22 +87,23 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
   const unsigned char *pInBlock = *ppInBlock;
   unsigned char *pCurOutData = *ppCurOutData;

-   if (nMatchLen == MATCH_RUN_LEN) {
+   nMatchLen += MIN_MATCH_SIZE;
+   if (nMatchLen == (MATCH_RUN_LEN + MIN_MATCH_SIZE)) {
      unsigned char nByte;

      if (pInBlock < pInBlockEnd) {
         nByte = *pInBlock++;
         nMatchLen += ((unsigned int)nByte);

-         if (nByte == 254) {
+         if (nByte == 239) {
            if (pInBlock < pInBlockEnd) {
-               nMatchLen += ((unsigned int)*pInBlock++);
+               nMatchLen = 256 + ((unsigned int)*pInBlock++);
            }
            else {
               return -1;
            }
         }
-         else if (nByte == 255) {
+         else if (nByte == 238) {
            if ((pInBlock + 1) < pInBlockEnd) {
               nMatchLen = ((unsigned int)*pInBlock++);
               nMatchLen |= (((unsigned int)*pInBlock++) << 8);
@ -117,8 +118,6 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
      }
   }

-   nMatchLen += MIN_MATCH_SIZE;
-
   if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
      /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */

@ -196,9 +195,9 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
         int nMatchOffset;

-         nMatchOffset = ((unsigned int)*pInBlock++);
+         nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff));
         if (token & 0x80) {
-            nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
+            nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8);
         }
         nMatchOffset++;

@ -234,9 +233,9 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
         int nMatchOffset;

-         nMatchOffset = ((unsigned int)*pInBlock++);
+         nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff));
         if (token & 0x80) {
-            nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
+            nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8);
         }
         nMatchOffset++;

--- a/src/main.c
+++ b/src/main.c
@ -229,7 +229,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,

   if ((nOptions & OPT_RAW) != 0) {
      cFooter[0] = 0x00;         /* EOD marker for raw block */
-      cFooter[1] = 0xff;         
+      cFooter[1] = 0xee;
      cFooter[2] = 0x00;
      cFooter[3] = 0x00;
      nFooterSize = 4;
--- a/src/shrink.c
+++ b/src/shrink.c
@ -400,10 +400,10 @@ static inline int lzsa_get_literals_varlen_size(const int nLength) {
      return 0;
   }
   else {
-      if (nLength < (LITERALS_RUN_LEN + 254))
+      if (nLength < 256)
         return 1;
      else {
-         if (nLength < (LITERALS_RUN_LEN + 510))
+         if (nLength < 512)
            return 2;
         else
            return 3;
@ -421,15 +421,15 @@ static inline int lzsa_get_literals_varlen_size(const int nLength) {
 */
 static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
   if (nLength >= LITERALS_RUN_LEN) {
-      if (nLength < (LITERALS_RUN_LEN + 254))
+      if (nLength < 256)
         pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN;
      else {
-         if (nLength < (LITERALS_RUN_LEN + 510)) {
-            pOutData[nOutOffset++] = 254;
-            pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN - 254;
+         if (nLength < 512) {
+            pOutData[nOutOffset++] = 250;
+            pOutData[nOutOffset++] = nLength - 256;
         }
         else {
-            pOutData[nOutOffset++] = 255;
+            pOutData[nOutOffset++] = 249;
            pOutData[nOutOffset++] = nLength & 0xff;
            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
         }
@ -451,10 +451,10 @@ static inline int lzsa_get_match_varlen_size(const int nLength) {
      return 0;
   }
   else {
-      if (nLength < (MATCH_RUN_LEN + 254))
+      if ((nLength + MIN_MATCH_SIZE) < 256)
         return 1;
      else {
-         if (nLength < (MATCH_RUN_LEN + 510))
+         if ((nLength + MIN_MATCH_SIZE) < 512)
            return 2;
         else
            return 3;
@ -472,17 +472,17 @@ static inline int lzsa_get_match_varlen_size(const int nLength) {
 */
 static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
   if (nLength >= MATCH_RUN_LEN) {
-      if (nLength < (MATCH_RUN_LEN + 254))
+      if ((nLength + MIN_MATCH_SIZE) < 256)
         pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN;
      else {
-         if (nLength < (MATCH_RUN_LEN + 510)) {
-            pOutData[nOutOffset++] = 254;
-            pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN - 254;
+         if ((nLength + MIN_MATCH_SIZE) < 512) {
+            pOutData[nOutOffset++] = 239;
+            pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE - 256;
         }
         else {
-            pOutData[nOutOffset++] = 255;
-            pOutData[nOutOffset++] = nLength & 0xff;
-            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
+            pOutData[nOutOffset++] = 238;
+            pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE) & 0xff;
+            pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE) >> 8) & 0xff;
         }
      }
   }
@ -511,7 +511,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart

      int nLiteralsLen = nLastLiteralsOffset - i;
      nBestCost = 1 + cost[i + 1];
-      if (nLiteralsLen == LITERALS_RUN_LEN || nLiteralsLen == (LITERALS_RUN_LEN + 254) || nLiteralsLen == (LITERALS_RUN_LEN + 510)) {
+      if (nLiteralsLen == LITERALS_RUN_LEN || nLiteralsLen == 256 || nLiteralsLen == 512) {
         /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
          * The cost automatically accumulates down the chain. */
         nBestCost++;
@ -722,9 +722,9 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
            nNumLiterals = 0;
         }

-         pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
+         pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
         if (nTokenLongOffset) {
-            pOutData[nOutOffset++] = (nMatchOffset - 1) >> 8;
+            pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
         }
         nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
         i += nMatchLen;