tbxi/tbxi/slow_lzss.py

# This file is adapted from LZSS.C by Haruhiko Okumura 4/6/1989

# Decompression is pretty quick
# Compression is pretty slow:
# about 50s to compress a 4 MB rom on my machine

from warnings import warn
have_warned_about_slowness = False

N = 0x1000
F = 18
THRESHOLD = 2
NIL = N


def memset(buf, start, stop, to):
    for i in range(start, stop):
        buf[i] = to


def decompress(lzss):
    lzss = iter(lzss)
    plain = bytearray()

    lzdict = bytearray(b' ' * N)

    dict_i = N - F
    def push(byte):
        nonlocal dict_i
        lzdict[dict_i % N] = byte
        dict_i += 1

        plain.append(byte)

    # Iterate through byte-headed "runs"
    try:
        for headerbyte in lzss:
            for bitnum in range(8):
                if (headerbyte >> bitnum) & 1:
                    # Copy a single byte verbatim
                    push(next(lzss))
                else:
                    # Copy 3-18 bytes from the dictionary
                    byte1 = next(lzss)
                    byte2 = next(lzss)
                    lookup_i = (byte2 << 4) & 0xf00 | byte1
                    lookup_len = (byte2 & 0x0f) + 3

                    for i in range(lookup_i, lookup_i+lookup_len):
                        push(lzdict[i % N])

    except StopIteration:
        # Means the last header had <8 real bits, no problem
        pass

    return bytes(plain)


def compress(plain):
    global have_warned_about_slowness

    if not have_warned_about_slowness:
        have_warned_about_slowness = True
        warn('Using slow pure-Python LZSS compression')

    if not plain: return b''

    # Init the variables that get shared with the two closures below
    lchild = [0] * (N + 1)
    rchild = [0] * (N + 257); memset(rchild, N + 1, N + 256 + 1, NIL)
    parent = [0] * (N + 1); memset(parent, 0, N, NIL)
    text_buf = bytearray(N + F - 1); memset(text_buf, 0, N - F, ord(' '))
    match_length = match_position = 0


    # Inserts string of length F, text_buf[r..r+F-1], into one of the trees
    # (text_buf[r]'th tree) and returns the longest-match position and length
    # via the global variables match_position and match_length.
    # If match_length = F, then removes the old node in favor of the new one,
    # because the old one will be deleted sooner. Note r plays double role,
    # as tree node and position in buffer.
    def insert_node(r):
        nonlocal lchild, rchild, parent, text_buf, match_length, match_position

        cmp = 1
        key = text_buf[r:]
        p = N + 1 + key[0]
        rchild[r] = lchild[r] = NIL

        match_length = 0

        while 1:
            if cmp >= 0:
                if rchild[p] != NIL:
                    p = rchild[p]
                else:
                    rchild[p] = r
                    parent[r] = p
                    return
            else:
                if lchild[p] != NIL:
                    p = lchild[p]
                else:
                    lchild[p] = r
                    parent[r] = p
                    return

            i = 1
            while i < F:
                cmp = key[i] - text_buf[p + i]
                if cmp != 0: break
                i += 1

            if i > match_length:
                match_position = p
                match_length = i
                if match_length >= F: break # out of while loop

        parent[r] = parent[p]
        lchild[r] = lchild[p]
        rchild[r] = rchild[p]
        parent[lchild[p]] = r
        parent[rchild[p]] = r

        if rchild[parent[p]] == p:
            rchild[parent[p]] = r
        else:
            lchild[parent[p]] = r

        parent[p] = NIL;


    # deletes node p from tree
    def delete_node(p):
        nonlocal lchild, rchild, parent

        if parent[p] == NIL: return

        if rchild[p] == NIL:
            q = lchild[p]
        elif lchild[p] == NIL:
            q = rchild[p]
        else:
            q = lchild[p]
            if rchild[q] != NIL:
                while 1:
                    q = rchild[q]
                    if rchild[q] == NIL: break

                rchild[parent[q]] = lchild[q]
                parent[lchild[q]] = parent[q]
                lchild[q] = lchild[p]
                parent[lchild[p]] = q

            rchild[q] = rchild[p]
            parent[rchild[p]] = q

        parent[q] = parent[p]

        if rchild[parent[p]] == p:
            rchild[parent[p]] = q
        else:
            lchild[parent[p]] = q

        parent[p] = NIL


    # End of function defs, now onto the main attraction
    plain_len = len(plain)
    plain_i = 0

    # code_buf[1..16] saves eight units of code, and code_buf[0] works
    # as eight flags, "1" representing that the unit is an unencoded
    # letter (1 byte), "" a position-and-length pair (2 bytes).
    # Thus, eight units require at most 16 bytes of code.
    code_buf = bytearray(1)
    code_buf_list = [code_buf]
    mask = 1

    # Clear the buffer with any character that will appear often.
    s = 0;  r = N - F

    # Read F bytes into the last F bytes of the buffer
    tblen = 0
    while tblen < F and plain_i < plain_len:
        text_buf[r + tblen] = plain[plain_i]
        tblen += 1
        plain_i += 1

    # Insert the F strings, each of which begins with one or more
    # 'space' characters.  Note the order in which these strings are
    # inserted.  This way, degenerate trees will be less likely to occur.
    for i in range(1, F+1):
        insert_node(r - i)

    # Finally, insert the whole string just read.
    # The global variables match_length and match_position are set.
    insert_node(r)
    while 1:
        match_length = min(match_length, tblen)

        if match_length <= THRESHOLD:
            # Not long enough match.  Send one byte.
            match_length = 1
            code_buf[0] |= mask # 'send one byte' flag
            code_buf.append(text_buf[r]) # Send uncoded.
        else:
            # Send position and length pair. Note match_length > THRESHOLD.
            byte1 = match_position & 0xFF
            byte2 = (match_position >> 4 & 0xF0) | (match_length - THRESHOLD - 1)
            code_buf.append(byte1)
            code_buf.append(byte2)

        # Shift mask left one bit.
        mask = (mask << 1) & 0xFF
        # Send at most 8 units of code together
        if mask == 0:
            code_buf = bytearray(1)
            code_buf_list.append(code_buf)
            mask = 1

        last_match_length = match_length
        i = 0
        while i < last_match_length and plain_i < plain_len:
            delete_node(s) # Delete old strings and
            c = plain[plain_i]; plain_i += 1
            text_buf[s] = c # read new bytes

            # If the position is near the end of buffer, extend the buffer
            # to make string comparison easier.
            if s < F - 1:
                text_buf[s + N] = c

            # Since this is a ring buffer, increment the position modulo N.
            s = (s + 1) % N
            r = (r + 1) % N

            # Register the string in text_buf[r..r+F-1]
            insert_node(r)

            i += 1

        while i < last_match_length:
            delete_node(s)

            # After the end of text, no need to read,
            s = (s + 1) % N
            r = (r + 1) % N

            # but buffer may not be empty.
            tblen -= 1
            if tblen:
                insert_node(r)

            i += 1

        # until length of string to be processed is zero
        if tblen == 0: break

    if len(code_buf_list[-1]) == 1:
        code_buf_list.pop()

    return b''.join(code_buf_list)