Split code, add automated tests, update LZSA2

2025-04-06 20:37:12 +00:00 · 2019-06-07 23:15:40 +02:00 · 2019-06-07 23:15:40 +02:00 · b4e3c07d3a
commit b4e3c07d3a
parent 45cb124c4d
28 changed files with 4002 additions and 216 deletions
--- a/src/dictionary.c
+++ b/src/dictionary.c
@ -0,0 +1,101 @@
+/*
+ * dictionary.c - dictionary implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "format.h"
+#include "lib.h"
+
+/**
+ * Load dictionary contents
+ *
+ * @param pszDictionaryFilename name of dictionary file, or NULL for none
+ * @param pDictionaryData pointer to returned dictionary contents, or NULL for none
+ * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) {
+   unsigned char *pDictionaryData = NULL;
+   int nDictionaryDataSize = 0;
+
+   if (pszDictionaryFilename) {
+      pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE);
+      if (!pDictionaryData) {
+         return LZSA_ERROR_MEMORY;
+      }
+
+      FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
+      if (!pDictionaryFile) {
+         free(pDictionaryData);
+         pDictionaryData = NULL;
+         return LZSA_ERROR_DICTIONARY;
+      }
+
+      fseek(pDictionaryFile, 0, SEEK_END);
+#ifdef _WIN32
+      __int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
+#else
+      off_t nDictionaryFileSize = ftello(pDictionaryFile);
+#endif
+      if (nDictionaryFileSize > BLOCK_SIZE) {
+         /* Use the last BLOCK_SIZE bytes of the dictionary */
+         fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
+      }
+      else {
+         fseek(pDictionaryFile, 0, SEEK_SET);
+      }
+
+      nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile);
+      if (nDictionaryDataSize < 0)
+         nDictionaryDataSize = 0;
+
+      fclose(pDictionaryFile);
+      pDictionaryFile = NULL;
+   }
+
+   *ppDictionaryData = pDictionaryData;
+   *pDictionaryDataSize = nDictionaryDataSize;
+   return LZSA_OK;
+}
+
+/**
+ * Free dictionary contents
+ *
+ * @param pDictionaryData pointer to pointer to dictionary contents
+ */
+void lzsa_dictionary_free(void **ppDictionaryData) {
+   if (*ppDictionaryData) {
+      free(*ppDictionaryData);
+      ppDictionaryData = NULL;
+   }
+}
--- a/src/dictionary.h
+++ b/src/dictionary.h
@ -0,0 +1,56 @@
+/*
+ * dictionary.h - dictionary definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _DICTIONARY_H
+#define _DICTIONARY_H
+
+#include <stdlib.h>
+
+/**
+ * Load dictionary contents
+ *
+ * @param pszDictionaryFilename name of dictionary file, or NULL for none
+ * @param pDictionaryData pointer to returned dictionary contents, or NULL for none
+ * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
+
+/**
+ * Free dictionary contents
+ *
+ * @param pDictionaryData pointer to pointer to dictionary contents
+ */
+void lzsa_dictionary_free(void **ppDictionaryData);
+
+#endif /* _DICTIONARY_H */
--- a/src/expand_block_v1.c
+++ b/src/expand_block_v1.c
@ -0,0 +1,217 @@
+/*
+ * expand_v1.c - LZSA1 block decompressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "format.h"
+#include "expand_block_v1.h"
+
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else /* _MSC_VER */
+#define FORCE_INLINE __attribute__((always_inline))
+#endif /* _MSC_VER */
+
+static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) {
+   unsigned int nByte;
+   const unsigned char *pInBlock = *ppInBlock;
+
+   if (pInBlock < pInBlockEnd) {
+      nByte = *pInBlock++;
+      (*nLiterals) += nByte;
+
+      if (nByte == 250) {
+         if (pInBlock < pInBlockEnd) {
+            (*nLiterals) = 256 + ((unsigned int)*pInBlock++);
+         }
+         else {
+            return -1;
+         }
+      }
+      else if (nByte == 249) {
+         if ((pInBlock + 1) < pInBlockEnd) {
+            (*nLiterals) = ((unsigned int)*pInBlock++);
+            (*nLiterals) |= (((unsigned int)*pInBlock++) << 8);
+         }
+         else {
+            return -1;
+         }
+      }
+
+      *ppInBlock = pInBlock;
+      return 0;
+   }
+   else {
+      return -1;
+   }
+}
+
+static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) {
+   unsigned int nByte;
+   const unsigned char *pInBlock = *ppInBlock;
+
+   if (pInBlock < pInBlockEnd) {
+      nByte = *pInBlock++;
+      (*nMatchLen) += nByte;
+
+      if (nByte == 239) {
+         if (pInBlock < pInBlockEnd) {
+            (*nMatchLen) = 256 + ((unsigned int)*pInBlock++);
+         }
+         else {
+            return -1;
+         }
+      }
+      else if (nByte == 238) {
+         if ((pInBlock + 1) < pInBlockEnd) {
+            (*nMatchLen) = ((unsigned int)*pInBlock++);
+            (*nMatchLen) |= (((unsigned int)*pInBlock++) << 8);
+         }
+         else {
+            return -1;
+         }
+      }
+
+      *ppInBlock = pInBlock;
+      return 0;
+   }
+   else {
+      return -1;
+   }
+}
+
+/**
+ * Decompress one LZSA1 data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
+   const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
+   unsigned char *pCurOutData = pOutData + nOutDataOffset;
+   const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
+   const unsigned char *pOutDataFastEnd = pOutDataEnd - 18;
+
+   while (pInBlock < pInBlockEnd) {
+      const unsigned char token = *pInBlock++;
+      unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
+
+      if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
+         memcpy(pCurOutData, pInBlock, 8);
+         pInBlock += nLiterals;
+         pCurOutData += nLiterals;
+      }
+      else {
+         if (nLiterals == LITERALS_RUN_LEN_V1) {
+            if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals))
+               return -1;
+         }
+
+         if (nLiterals != 0) {
+            if ((pInBlock + nLiterals) <= pInBlockEnd &&
+               (pCurOutData + nLiterals) <= pOutDataEnd) {
+               memcpy(pCurOutData, pInBlock, nLiterals);
+               pInBlock += nLiterals;
+               pCurOutData += nLiterals;
+            }
+            else {
+               return -1;
+            }
+         }
+      }
+
+      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
+         unsigned int nMatchOffset;
+
+         nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff;
+         if (token & 0x80) {
+            nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00;
+         }
+         nMatchOffset++;
+
+         const unsigned char *pSrc = pCurOutData - nMatchOffset;
+         if (pSrc >= pOutData) {
+            unsigned int nMatchLen = (unsigned int)(token & 0x0f);
+            if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+               memcpy(pCurOutData, pSrc, 8);
+               memcpy(pCurOutData + 8, pSrc + 8, 8);
+               memcpy(pCurOutData + 16, pSrc + 16, 2);
+               pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
+            }
+            else {
+               nMatchLen += MIN_MATCH_SIZE_V1;
+               if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
+                  if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
+                     return -1;
+               }
+
+               if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+                  /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
+
+                  if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
+                     const unsigned char *pCopySrc = pSrc;
+                     unsigned char *pCopyDst = pCurOutData;
+                     const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
+
+                     do {
+                        memcpy(pCopyDst, pCopySrc, 16);
+                        pCopySrc += 16;
+                        pCopyDst += 16;
+                     } while (pCopyDst < pCopyEndDst);
+
+                     pCurOutData += nMatchLen;
+                  }
+                  else {
+                     while (nMatchLen) {
+                        *pCurOutData++ = *pSrc++;
+                        nMatchLen--;
+                     }
+                  }
+               }
+               else {
+                  return -1;
+               }
+            }
+         }
+         else {
+            return -1;
+         }
+      }
+   }
+
+   return (int)(pCurOutData - (pOutData + nOutDataOffset));
+}
--- a/src/expand_block_v1.h
+++ b/src/expand_block_v1.h
@ -0,0 +1,49 @@
+/*
+ * expand_v1.h - LZSA1 block decompressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _EXPAND_V1_H
+#define _EXPAND_V1_H
+
+/**
+ * Decompress one LZSA1 data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
+
+#endif /* _EXPAND_V1_H */
--- a/src/expand_block_v2.c
+++ b/src/expand_block_v2.c
@ -0,0 +1,242 @@
+/*
+ * expand_v2.c - LZSA2 block decompressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "format.h"
+#include "expand_block_v2.h"
+
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else /* _MSC_VER */
+#define FORCE_INLINE __attribute__((always_inline))
+#endif /* _MSC_VER */
+
+static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) {
+   if ((*nCurNibbles ^= 1) != 0) {
+      const unsigned char *pInBlock = *ppInBlock;
+      if (pInBlock < pInBlockEnd) {
+         (*nibbles) = *pInBlock++;
+         *ppInBlock = pInBlock;
+         (*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
+         return 0;
+      }
+      else {
+         return -1;
+      }
+   }
+
+   (*nValue) = (unsigned int)((*nibbles) & 0x0f);
+   return 0;
+}
+
+static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) {
+   unsigned int nValue;
+
+   if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) {
+      (*nLength) += nValue;
+
+      if (nValue == 15) {
+         const unsigned char *pInBlock = *ppInBlock;
+
+         if (pInBlock < pInBlockEnd) {
+            (*nLength) += ((unsigned int)*pInBlock++);
+
+            if ((*nLength) == 257) {
+               if ((pInBlock + 1) < pInBlockEnd) {
+                  (*nLength) = ((unsigned int)*pInBlock++);
+                  (*nLength) |= (((unsigned int)*pInBlock++) << 8);
+               }
+               else {
+                  return -1;
+               }
+            }
+         }
+         else {
+            return -1;
+         }
+
+         *ppInBlock = pInBlock;
+      }
+
+      return 0;
+   }
+   else {
+      return -1;
+   }
+}
+
+/**
+ * Decompress one LZSA2 data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
+   const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
+   unsigned char *pCurOutData = pOutData + nOutDataOffset;
+   const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
+   const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
+   int nCurNibbles = 0;
+   unsigned char nibbles;
+   int nMatchOffset = 0;
+
+   while (pInBlock < pInBlockEnd) {
+      const unsigned char token = *pInBlock++;
+      unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
+
+      if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
+         memcpy(pCurOutData, pInBlock, 4);
+         pInBlock += nLiterals;
+         pCurOutData += nLiterals;
+      }
+      else {
+         if (nLiterals == LITERALS_RUN_LEN_V2) {
+            if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals))
+               return -1;
+         }
+
+         if (nLiterals != 0) {
+            if ((pInBlock + nLiterals) <= pInBlockEnd &&
+               (pCurOutData + nLiterals) <= pOutDataEnd) {
+               memcpy(pCurOutData, pInBlock, nLiterals);
+               pInBlock += nLiterals;
+               pCurOutData += nLiterals;
+            }
+            else {
+               return -1;
+            }
+         }
+      }
+
+      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
+         unsigned char nOffsetMode = token & 0xc0;
+         unsigned int nValue;
+
+         switch (nOffsetMode) {
+         case 0x00:
+            /* 5 bit offset */
+            if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
+               return -1;
+            nMatchOffset = nValue;
+            nMatchOffset |= ((token & 0x20) >> 1);
+            nMatchOffset ^= 0x1f;
+            nMatchOffset++;
+            break;
+
+         case 0x40:
+            /* 9 bit offset */
+            nMatchOffset = (unsigned int)(*pInBlock++);
+            nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
+            nMatchOffset ^= 0x1ff;
+            nMatchOffset++;
+            break;
+
+         case 0x80:
+            /* 13 bit offset */
+            if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
+               return -1;
+            nMatchOffset = (unsigned int)(*pInBlock++);
+            nMatchOffset |= (nValue << 8);
+            nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
+            nMatchOffset ^= 0x1fff;
+            nMatchOffset += (512 + 1);
+            break;
+
+         default:
+            /* Check if this is a 16 bit offset or a rep-match */
+            if ((token & 0x20) == 0) {
+               /* 16 bit offset */
+               nMatchOffset = (((unsigned int)(*pInBlock++)) << 8);
+               nMatchOffset |= (unsigned int)(*pInBlock++);
+               nMatchOffset ^= 0xffff;
+               nMatchOffset++;
+            }
+            break;
+         }
+
+         const unsigned char *pSrc = pCurOutData - nMatchOffset;
+         if (pSrc >= pOutData) {
+            unsigned int nMatchLen = (unsigned int)(token & 0x07);
+            if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+               memcpy(pCurOutData, pSrc, 8);
+               memcpy(pCurOutData + 8, pSrc + 8, 2);
+               pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
+            }
+            else {
+               nMatchLen += MIN_MATCH_SIZE_V2;
+               if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
+                  if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
+                     return -1;
+               }
+
+               if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+                  /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
+
+                  if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
+                     const unsigned char *pCopySrc = pSrc;
+                     unsigned char *pCopyDst = pCurOutData;
+                     const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
+
+                     do {
+                        memcpy(pCopyDst, pCopySrc, 16);
+                        pCopySrc += 16;
+                        pCopyDst += 16;
+                     } while (pCopyDst < pCopyEndDst);
+
+                     pCurOutData += nMatchLen;
+                  }
+                  else {
+                     while (nMatchLen) {
+                        *pCurOutData++ = *pSrc++;
+                        nMatchLen--;
+                     }
+                  }
+               }
+               else {
+                  return -1;
+               }
+            }
+         }
+         else {
+            return -1;
+         }
+      }
+   }
+
+   return (int)(pCurOutData - (pOutData + nOutDataOffset));
+}
--- a/src/expand_block_v2.h
+++ b/src/expand_block_v2.h
@ -0,0 +1,49 @@
+/*
+ * expand_v2.h - LZSA2 block decompressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _EXPAND_V2_H
+#define _EXPAND_V2_H
+
+/**
+ * Decompress one LZSA2 data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
+
+#endif /* _EXPAND_V2_H */
--- a/src/expand_context.c
+++ b/src/expand_context.c
@ -0,0 +1,57 @@
+/*
+ * expand_context.h - decompressor context definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "expand_context.h"
+#include "expand_block_v1.h"
+#include "expand_block_v2.h"
+
+/**
+ * Decompress one data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
+   if (nFormatVersion == 1)
+      return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
+   else if (nFormatVersion == 2)
+      return lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
+   else
+      return -1;
+}
--- a/src/expand_context.h
+++ b/src/expand_context.h
@ -0,0 +1,51 @@
+/*
+ * expand_context.h - decompressor context definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _EXPAND_CONTEXT_H
+#define _EXPAND_CONTEXT_H
+
+#include <stdlib.h>
+
+/**
+ * Decompress one data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
+
+#endif /* _EXPAND_CONTEXT_H */
--- a/src/expand_inmem.c
+++ b/src/expand_inmem.c
@ -0,0 +1,160 @@
+/*
+ * expand_inmem.c - in-memory decompression implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "expand_inmem.h"
+#include "lib.h"
+#include "frame.h"
+
+#define BLOCK_SIZE 65536
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pFileData compressed data
+ * @param nFileSize compressed size in bytes
+ *
+ * @return maximum decompressed size
+ */
+size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize) {
+   const unsigned char *pCurFileData = pFileData;
+   const unsigned char *pEndFileData = pCurFileData + nFileSize;
+   int nFormatVersion = 0;
+   size_t nMaxDecompressedSize = 0;
+   const int nHeaderSize = lzsa_get_header_size();
+
+   /* Check header */
+   if ((pCurFileData + nHeaderSize) > pEndFileData ||
+       lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
+      return -1;
+
+   pCurFileData += nHeaderSize;
+
+   while (pCurFileData < pEndFileData) {
+      unsigned int nBlockDataSize = 0;
+      int nIsUncompressed = 0;
+      const int nFrameSize = lzsa_get_frame_size();
+
+      /* Decode frame header */
+      if ((pCurFileData + nFrameSize) > pEndFileData ||
+          lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
+         return -1;
+      pCurFileData += nFrameSize;
+
+      if (!nBlockDataSize)
+         break;
+
+      /* Add one potentially full block to the decompressed size */
+      nMaxDecompressedSize += BLOCK_SIZE;
+
+      if ((pCurFileData + nBlockDataSize) > pEndFileData)
+         return -1;
+
+      pCurFileData += nBlockDataSize;
+   }
+
+   return nMaxDecompressedSize;
+}
+
+/**
+ * Decompress data in memory
+ *
+ * @param pFileData compressed data
+ * @param pOutBuffer buffer for decompressed data
+ * @param nFileSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param pFormatVersion pointer to format version, updated if this function is successful
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion) {
+   const unsigned char *pCurFileData = pFileData;
+   const unsigned char *pEndFileData = pCurFileData + nFileSize;
+   unsigned char *pCurOutBuffer = pOutBuffer;
+   const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
+   int nFormatVersion = 0;
+   int nPreviousBlockSize;
+   const int nHeaderSize = lzsa_get_header_size();
+
+   /* Check header */
+   if ((pCurFileData + nHeaderSize) > pEndFileData ||
+      lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
+      return -1;
+
+   pCurFileData += nHeaderSize;
+   nPreviousBlockSize = 0;
+
+   while (pCurFileData < pEndFileData) {
+      unsigned int nBlockDataSize = 0;
+      int nIsUncompressed = 0;
+      const int nFrameSize = lzsa_get_frame_size();
+
+      /* Decode frame header */
+      if ((pCurFileData + nFrameSize) > pEndFileData ||
+          lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
+         return -1;
+      pCurFileData += nFrameSize;
+
+      if (!nBlockDataSize)
+         break;
+
+      if (!nIsUncompressed) {
+         int nDecompressedSize;
+
+         /* Decompress block */
+         if ((pCurFileData + nBlockDataSize) > pEndFileData)
+            return -1;
+
+         nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize));
+         if (nDecompressedSize < 0)
+            return -1;
+
+         pCurOutBuffer += nDecompressedSize;
+         nPreviousBlockSize = nDecompressedSize;
+      }
+      else {
+         /* Copy uncompressed block */
+         if ((pCurFileData + nBlockDataSize) > pEndFileData)
+            return -1;
+         if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer)
+            return -1;
+         memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize);
+         pCurOutBuffer += nBlockDataSize;
+      }
+
+      pCurFileData += nBlockDataSize;
+   }
+
+   *pFormatVersion = nFormatVersion;
+   return (int)(pCurOutBuffer - pOutBuffer);
+}
--- a/src/expand_inmem.h
+++ b/src/expand_inmem.h
@ -0,0 +1,61 @@
+/*
+ * expand_inmem.h - in-memory decompression definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _EXPAND_INMEM_H
+#define _EXPAND_INMEM_H
+
+#include <stdlib.h>
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pFileData compressed data
+ * @param nFileSize compressed size in bytes
+ *
+ * @return maximum decompressed size
+ */
+size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize);
+
+/**
+ * Decompress data in memory
+ *
+ * @param pFileData compressed data
+ * @param pOutBuffer buffer for decompressed data
+ * @param nFileSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param pFormatVersion pointer to format version, updated if this function is successful
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion);
+
+#endif /* _EXPAND_INMEM_H */
--- a/src/expand_streaming.c
+++ b/src/expand_streaming.c
@ -0,0 +1,243 @@
+/*
+ * expand_streaming.c - streaming decompression definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include "expand_streaming.h"
+#include "format.h"
+#include "frame.h"
+#include "lib.h"
+
+/*-------------- File API -------------- */
+
+/**
+ * Decompress file
+ *
+ * @param pszInFilename name of input(compressed) file to decompress
+ * @param pszOutFilename name of output(decompressed) file to generate
+ * @param pszDictionaryFilename name of dictionary file, or NULL for none
+ * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
+ * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
+ * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
+                                   long long *pOriginalSize, long long *pCompressedSize) {
+   lzsa_stream_t inStream, outStream;
+   void *pDictionaryData = NULL;
+   int nDictionaryDataSize = 0;
+   lzsa_status_t nStatus;
+
+   if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
+      return LZSA_ERROR_SRC;
+   }
+
+   if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
+      inStream.close(&inStream);
+      return LZSA_ERROR_DST;
+   }
+
+   nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
+   if (nStatus) {
+      outStream.close(&outStream);
+      inStream.close(&inStream);
+      return nStatus;
+   }
+
+   nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize);
+
+   lzsa_dictionary_free(&pDictionaryData);
+   outStream.close(&outStream);
+   inStream.close(&inStream);
+
+   return nStatus;
+}
+
+/*-------------- Streaming API -------------- */
+
+/**
+ * Decompress stream
+ *
+ * @param pInStream input(compressed) stream to decompress
+ * @param pOutStream output(decompressed) stream to write to
+ * @param pDictionaryData dictionary contents, or NULL for none
+ * @param nDictionaryDataSize size of dictionary contents, or 0
+ * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
+ * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
+ * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
+      long long *pOriginalSize, long long *pCompressedSize) {
+   long long nStartTime = 0LL, nEndTime = 0LL;
+   long long nOriginalSize = 0LL, nCompressedSize = 0LL;
+   unsigned char cFrameData[16];
+   unsigned char *pInBlock;
+   unsigned char *pOutData;
+
+   if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
+      const int nHeaderSize = lzsa_get_header_size();
+
+      memset(cFrameData, 0, 16);
+      if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) {
+         return LZSA_ERROR_SRC;
+      }
+
+      if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
+         return LZSA_ERROR_FORMAT;
+      }
+
+      nCompressedSize += (long long)nHeaderSize;
+   }
+
+   pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
+   if (!pInBlock) {
+      return LZSA_ERROR_MEMORY;
+   }
+
+   pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
+   if (!pOutData) {
+      free(pInBlock);
+      pInBlock = NULL;
+
+      return LZSA_ERROR_MEMORY;
+   }
+
+   int nDecompressionError = 0;
+   int nPrevDecompressedSize = 0;
+   int nNumBlocks = 0;
+
+   while (!pInStream->eof(pInStream) && !nDecompressionError) {
+      unsigned int nBlockSize = 0;
+      int nIsUncompressed = 0;
+
+      if (nPrevDecompressedSize != 0) {
+         memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
+      }
+      else if (nDictionaryDataSize && pDictionaryData) {
+         nPrevDecompressedSize = nDictionaryDataSize;
+         memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize);
+      }
+
+      if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
+         const int nFrameSize = lzsa_get_frame_size();
+
+         memset(cFrameData, 0, 16);
+         if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) {
+            if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
+               nDecompressionError = LZSA_ERROR_FORMAT;
+               nBlockSize = 0;
+            }
+
+            nCompressedSize += (long long)nFrameSize;
+         }
+         else {
+            nDecompressionError = LZSA_ERROR_SRC;
+            nBlockSize = 0;
+         }
+      }
+      else {
+         if (!nNumBlocks)
+            nBlockSize = BLOCK_SIZE;
+         else
+            nBlockSize = 0;
+      }
+
+      if (nBlockSize != 0) {
+         int nDecompressedSize = 0;
+
+         if ((int)nBlockSize > BLOCK_SIZE) {
+            nDecompressionError = LZSA_ERROR_FORMAT;
+            break;
+         }
+         size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
+         if (nFlags & LZSA_FLAG_RAW_BLOCK) {
+            if (nReadBytes > 2)
+               nReadBytes -= 2;
+            else
+               nReadBytes = 0;
+            nBlockSize = (unsigned int)nReadBytes;
+         }
+
+         if (nReadBytes == nBlockSize) {
+            nCompressedSize += (long long)nReadBytes;
+
+            if (nIsUncompressed) {
+               memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
+               nDecompressedSize = nBlockSize;
+            }
+            else {
+               unsigned int nBlockOffs = 0;
+
+               nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
+               if (nDecompressedSize < 0) {
+                  nDecompressionError = LZSA_ERROR_DECOMPRESSION;
+                  break;
+               }
+            }
+
+            if (nDecompressedSize != 0) {
+               nOriginalSize += (long long)nDecompressedSize;
+
+               if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize)
+                  nDecompressionError = LZSA_ERROR_DST;
+               nPrevDecompressedSize = nDecompressedSize;
+               nDecompressedSize = 0;
+            }
+         }
+         else {
+            break;
+         }
+
+         nNumBlocks++;
+      }
+      else {
+         break;
+      }
+   }
+
+   free(pOutData);
+   pOutData = NULL;
+
+   free(pInBlock);
+   pInBlock = NULL;
+
+   *pOriginalSize = nOriginalSize;
+   *pCompressedSize = nCompressedSize;
+   return nDecompressionError;
+}
+
--- a/src/expand_streaming.h
+++ b/src/expand_streaming.h
@ -0,0 +1,78 @@
+/*
+ * expand_streaming.h - streaming decompression definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _EXPAND_STREAMING_H
+#define _EXPAND_STREAMING_H
+
+#include "stream.h"
+
+/* Forward declaration */
+typedef enum _lzsa_status_t lzsa_status_t;
+
+/*-------------- File API -------------- */
+
+/**
+ * Decompress file
+ *
+ * @param pszInFilename name of input(compressed) file to decompress
+ * @param pszOutFilename name of output(decompressed) file to generate
+ * @param pszDictionaryFilename name of dictionary file, or NULL for none
+ * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
+ * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
+ * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
+   long long *pOriginalSize, long long *pCompressedSize);
+
+/*-------------- Streaming API -------------- */
+
+/**
+ * Decompress stream
+ *
+ * @param pInStream input(compressed) stream to decompress
+ * @param pOutStream output(decompressed) stream to write to
+ * @param pDictionaryData dictionary contents, or NULL for none
+ * @param nDictionaryDataSize size of dictionary contents, or 0
+ * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
+ * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
+ * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
+   long long *pOriginalSize, long long *pCompressedSize);
+
+#endif /* _EXPAND_STREAMING_H */
--- a/src/format.h
+++ b/src/format.h
@ -38,6 +38,8 @@

 #define MAX_VARLEN 0xffff

+#define BLOCK_SIZE 65536
+
 #define MIN_MATCH_SIZE_V1 3
 #define LITERALS_RUN_LEN_V1 7
 #define MATCH_RUN_LEN_V1 15
--- a/src/frame.c
+++ b/src/frame.c
@ -31,6 +31,7 @@
 */

 #include <stdlib.h>
+#include <string.h>
 #include "frame.h"

 #define LZSA_ID_0   0x7b
--- a/src/lib.h
+++ b/src/lib.h
@ -33,11 +33,19 @@
 #ifndef _LIB_H
 #define _LIB_H

-#include "divsufsort.h"
 #include "stream.h"
+#include "dictionary.h"
+#include "frame.h"
+#include "format.h"
+#include "shrink_context.h"
+#include "shrink_streaming.h"
+#include "shrink_inmem.h"
+#include "expand_context.h"
+#include "expand_streaming.h"
+#include "expand_inmem.h"

 /** High level status for compression and decompression */
-typedef enum {
+typedef enum _lzsa_status_t {
   LZSA_OK = 0,                           /**< Success */
   LZSA_ERROR_SRC,                        /**< Error reading input */
   LZSA_ERROR_DST,                        /**< Error reading output */
@ -58,200 +66,4 @@ typedef enum {
 #define LZSA_FLAG_FAVOR_RATIO    (1<<0)      /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
 #define LZSA_FLAG_RAW_BLOCK      (1<<1)      /**< 1 to emit raw block */

-/*-------------- Top level API -------------- */
-
-/**
- * Compress file
- *
- * @param pszInFilename name of input(source) file to compress
- * @param pszOutFilename name of output(compressed) file to generate
- * @param pszDictionaryFilename name of dictionary file, or NULL for none
- * @param nFlags compression flags (LZSA_FLAG_xxx)
- * @param nMinMatchSize minimum match size
- * @param nFormatVersion version of format to use (1-2)
- * @param progress progress function, called after compressing each block, or NULL for none
- * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
- * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
- * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
- *
- * @return LZSA_OK for success, or an error value from lzsa_status_t
- */
-lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
-   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
-   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
-
-/**
- * Decompress file
- *
- * @param pszInFilename name of input(compressed) file to decompress
- * @param pszOutFilename name of output(decompressed) file to generate
- * @param pszDictionaryFilename name of dictionary file, or NULL for none
- * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
- * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
- * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
- * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
- *
- * @return LZSA_OK for success, or an error value from lzsa_status_t
- */
-lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
-   long long *pOriginalSize, long long *pCompressedSize);
-
-/*-------------- Streaming API -------------- */
-
-/**
- * Load dictionary contents
- *
- * @param pszDictionaryFilename name of dictionary file, or NULL for none
- * @param pDictionaryData pointer to returned dictionary contents, or NULL for none
- * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
- *
- * @return LZSA_OK for success, or an error value from lzsa_status_t
- */
-int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
-
-/**
- * Free dictionary contents
- *
- * @param pDictionaryData pointer to pointer to dictionary contents
- */
-void lzsa_dictionary_free(void **ppDictionaryData);
-
-/**
- * Compress stream
- *
- * @param pInStream input(source) stream to compress
- * @param pOutStream output(compressed) stream to write to
- * @param pDictionaryData dictionary contents, or NULL for none
- * @param nDictionaryDataSize size of dictionary contents, or 0
- * @param nFlags compression flags (LZSA_FLAG_xxx)
- * @param nMinMatchSize minimum match size
- * @param nFormatVersion version of format to use (1-2)
- * @param progress progress function, called after compressing each block, or NULL for none
- * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
- * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
- * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
- *
- * @return LZSA_OK for success, or an error value from lzsa_status_t
- */
-lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
-   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
-   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
-
-/**
- * Decompress stream
- *
- * @param pInStream input(compressed) stream to decompress
- * @param pOutStream output(decompressed) stream to write to
- * @param pDictionaryData dictionary contents, or NULL for none
- * @param nDictionaryDataSize size of dictionary contents, or 0
- * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
- * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
- * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
- * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
- *
- * @return LZSA_OK for success, or an error value from lzsa_status_t
- */
-lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
-   long long *pOriginalSize, long long *pCompressedSize);
-
-/*-------------- Block compression API --------------*/
-
-#define LCP_BITS 15
-#define LCP_MAX (1<<(LCP_BITS - 1))
-#define LCP_SHIFT (32-LCP_BITS)
-#define LCP_MASK (((1<<LCP_BITS) - 1) << LCP_SHIFT)
-#define POS_MASK ((1<<LCP_SHIFT) - 1)
-
-#define NMATCHES_PER_OFFSET 8
-#define MATCHES_PER_OFFSET_SHIFT 3
-
-#define LEAVE_ALONE_MATCH_SIZE 1000
-
-#define LAST_MATCH_OFFSET 4
-#define LAST_LITERALS 1
-
-#define MODESWITCH_PENALTY 1
-
-/** One match */
-typedef struct _lzsa_match {
-   unsigned short length;
-   unsigned short offset;
-} lzsa_match;
-
-/** One rep-match slot (for LZSA2) */
-typedef struct _lzsa_repmatch_opt {
-   int incoming_offset;
-   short best_slot_for_incoming;
-   short expected_repmatch;
-} lzsa_repmatch_opt;
-
-/** Compression context */
-typedef struct _lsza_compressor {
-   divsufsort_ctx_t divsufsort_context;
-   unsigned int *intervals;
-   unsigned int *pos_data;
-   unsigned int *open_intervals;
-   lzsa_match *match;
-   lzsa_match *best_match;
-   int *slot_cost;
-   lzsa_repmatch_opt *repmatch_opt;
-   int min_match_size;
-   int format_version;
-   int flags;
-   int num_commands;
-} lsza_compressor;
-
-/**
- * Initialize compression context
- *
- * @param pCompressor compression context to initialize
- * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
- * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
- * @param nFlags compression flags
- *
- * @return 0 for success, non-zero for failure
- */
-int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
-
-/**
- * Clean up compression context and free up any associated resources
- *
- * @param pCompressor compression context to clean up
- */
-void lzsa_compressor_destroy(lsza_compressor *pCompressor);
-
-/**
- * Compress one block of data
- *
- * @param pCompressor compression context
- * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
- * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
- * @param nInDataSize number of input bytes to compress
- * @param pOutData pointer to output buffer
- * @param nMaxOutDataSize maximum size of output buffer, in bytes
- *
- * @return size of compressed data in output buffer, or -1 if the data is uncompressible
- */
-int lzsa_compressor_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
-
-/**
- * Get the number of compression commands issued in compressed data blocks
- *
- * @return number of commands
- */
-int lzsa_compressor_get_command_count(lsza_compressor *pCompressor);
-
-/**
- * Decompress one data block
- *
- * @param pInBlock pointer to compressed data
- * @param nInBlockSize size of compressed data, in bytes
- * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
- * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
- * @param nBlockMaxSize total size of output decompression buffer, in bytes
- *
- * @return size of decompressed data in bytes, or -1 for error
- */
-int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
-
 #endif /* _LIB_H */
--- a/src/lzsa.c
+++ b/src/lzsa.c
@ -41,7 +41,6 @@
 #include <sys/time.h>
 #endif
 #include "lib.h"
-#include "inmem.h"

 #define OPT_VERBOSE     1
 #define OPT_RAW         2
@ -115,7 +114,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
      nStartTime = do_get_time();
   }

-   nStatus = lsza_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);
+   nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);

   if ((nOptions & OPT_VERBOSE)) {
      nEndTime = do_get_time();
@ -345,7 +344,369 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con

 /*---------------------------------------------------------------------------*/

-static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
+static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, int nMinMatchSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) {
+   size_t nIndex = 0;
+   int nMatchProbability = (int)(fMatchProbability * 1023.0f);
+
+   srand(nSeed);
+   
+   if (nIndex >= nBufferSize) return;
+   pBuffer[nIndex++] = rand() % nNumLiteralValues;
+
+   while (nIndex < nBufferSize) {
+      if ((rand() & 1023) >= nMatchProbability) {
+         size_t nLiteralCount = rand() & 127;
+         if (nLiteralCount > (nBufferSize - nIndex))
+            nLiteralCount = nBufferSize - nIndex;
+
+         while (nLiteralCount--)
+            pBuffer[nIndex++] = rand() % nNumLiteralValues;
+      }
+      else {
+         size_t nMatchLength = nMinMatchSize + (rand() & 1023);
+         size_t nMatchOffset;
+
+         if (nMatchLength > (nBufferSize - nIndex))
+            nMatchLength = nBufferSize - nIndex;
+         if (nMatchLength > nIndex)
+            nMatchLength = nIndex;
+
+         if (nMatchLength < nIndex)
+            nMatchOffset = rand() % (nIndex - nMatchLength);
+         else
+            nMatchOffset = 0;
+
+         while (nMatchLength--) {
+            pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset];
+            nIndex++;
+         }
+      }
+   }
+}
+
+static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) {
+   size_t nIndex = 0;
+   int nXorProbability = (int)(fXorProbability * 1023.0f);
+
+   srand(nSeed);
+
+   if (nIndex >= nBufferSize) return;
+
+   while (nIndex < nBufferSize) {
+      if ((rand() & 1023) < nXorProbability) {
+         pBuffer[nIndex] ^= 0xff;
+      }
+      nIndex++;
+   }
+}
+
+static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
+   unsigned char *pGeneratedData;
+   unsigned char *pCompressedData;
+   unsigned char *pTmpCompressedData;
+   unsigned char *pTmpDecompressedData;
+   size_t nGeneratedDataSize;
+   size_t nMaxCompressedDataSize;
+   unsigned int nSeed = 123;
+   int nFlags;
+   int i;
+
+   nFlags = 0;
+   if (nOptions & OPT_FAVOR_RATIO)
+      nFlags |= LZSA_FLAG_FAVOR_RATIO;
+   if (nOptions & OPT_RAW)
+      nFlags |= LZSA_FLAG_RAW_BLOCK;
+
+   pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
+   if (!pGeneratedData) {
+      fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
+      return 100;
+   }
+
+   nMaxCompressedDataSize = lzsa_get_max_compressed_size_inmem(4 * BLOCK_SIZE);
+   pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
+   if (!pCompressedData) {
+      free(pGeneratedData);
+      pGeneratedData = NULL;
+
+      fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
+      return 100;
+   }
+
+   pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
+   if (!pTmpCompressedData) {
+      free(pCompressedData);
+      pCompressedData = NULL;
+      free(pGeneratedData);
+      pGeneratedData = NULL;
+
+      fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
+      return 100;
+   }
+
+   pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
+   if (!pTmpDecompressedData) {
+      free(pTmpCompressedData);
+      pTmpCompressedData = NULL;
+      free(pCompressedData);
+      pCompressedData = NULL;
+      free(pGeneratedData);
+      pGeneratedData = NULL;
+
+      fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
+      return 100;
+   }
+
+   memset(pGeneratedData, 0, 4 * BLOCK_SIZE);
+   memset(pCompressedData, 0, nMaxCompressedDataSize);
+   memset(pTmpCompressedData, 0, nMaxCompressedDataSize);
+
+   /* Test compressing with a too small buffer to do anything, expect to fail cleanly */
+   for (i = 0; i < 12; i++) {
+      generate_compressible_data(pGeneratedData, i, nMinMatchSize, nSeed, 256, 0.5f);
+      lzsa_compress_inmem(pGeneratedData, pCompressedData, i, i, nFlags, nMinMatchSize, nFormatVersion);
+   }
+
+   size_t nDataSizeStep = 128;
+   float fProbabilitySizeStep = 0.0005f;
+
+   for (nGeneratedDataSize = 1024; nGeneratedDataSize <= (4 * BLOCK_SIZE); nGeneratedDataSize += nDataSizeStep) {
+      float fMatchProbability;
+
+      fprintf(stdout, "size %zd", nGeneratedDataSize);
+      for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
+         int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
+         float fXorProbability;
+
+         fputc('.', stdout);
+         fflush(stdout);
+
+         for (i = 0; i < 12; i++) {
+            /* Generate data to compress */
+            generate_compressible_data(pGeneratedData, nGeneratedDataSize, nMinMatchSize, nSeed, nNumLiteralValues[i], fMatchProbability);
+
+            /* Try to compress it, expected to succeed */
+            size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
+               nFlags, nMinMatchSize, nFormatVersion);
+            if (nActualCompressedSize == -1 || nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
+               free(pTmpDecompressedData);
+               pTmpDecompressedData = NULL;
+               free(pTmpCompressedData);
+               pTmpCompressedData = NULL;
+               free(pCompressedData);
+               pCompressedData = NULL;
+               free(pGeneratedData);
+               pGeneratedData = NULL;
+
+               fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+               return 100;
+            }
+
+            /* Try to decompress it, expected to succeed */
+            size_t nActualDecompressedSize;
+            int nDecFormatVersion = 0;
+            nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion);
+            if (nActualDecompressedSize == -1) {
+               free(pTmpDecompressedData);
+               pTmpDecompressedData = NULL;
+               free(pTmpCompressedData);
+               pTmpCompressedData = NULL;
+               free(pCompressedData);
+               pCompressedData = NULL;
+               free(pGeneratedData);
+               pGeneratedData = NULL;
+
+               fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+               return 100;
+            }
+
+            if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) {
+               free(pTmpDecompressedData);
+               pTmpDecompressedData = NULL;
+               free(pTmpCompressedData);
+               pTmpCompressedData = NULL;
+               free(pCompressedData);
+               pCompressedData = NULL;
+               free(pGeneratedData);
+               pGeneratedData = NULL;
+
+               fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+               return 100;
+            }
+
+            /* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */
+            for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) {
+               memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize);
+               xor_data(pTmpCompressedData + lzsa_get_header_size() + lzsa_get_frame_size(), nActualCompressedSize - lzsa_get_header_size() - lzsa_get_frame_size() - lzsa_get_frame_size() /* footer */, nSeed, fXorProbability);
+               lzsa_decompress_inmem(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion);
+            }
+         }
+
+         nSeed++;
+      }
+
+      fputc(10, stdout);
+      fflush(stdout);
+
+      nDataSizeStep <<= 1;
+      if (nDataSizeStep > (128 * 4096))
+         nDataSizeStep = 128 * 4096;
+      fProbabilitySizeStep *= 1.25;
+      if (fProbabilitySizeStep > (0.0005f * 4096))
+         fProbabilitySizeStep = 0.0005f * 4096;
+   }
+
+   free(pTmpDecompressedData);
+   pTmpDecompressedData = NULL;
+
+   free(pTmpCompressedData);
+   pTmpCompressedData = NULL;
+
+   free(pCompressedData);
+   pCompressedData = NULL;
+
+   free(pGeneratedData);
+   pGeneratedData = NULL;
+
+   fprintf(stdout, "All tests passed.\n");
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
+   size_t nFileSize, nMaxCompressedSize;
+   unsigned char *pFileData;
+   unsigned char *pCompressedData;
+   int nFlags;
+   int i;
+
+   nFlags = 0;
+   if (nOptions & OPT_FAVOR_RATIO)
+      nFlags |= LZSA_FLAG_FAVOR_RATIO;
+   if (nOptions & OPT_RAW)
+      nFlags |= LZSA_FLAG_RAW_BLOCK;
+
+   if (pszDictionaryFilename) {
+      fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
+      return 100;
+   }
+
+   /* Read the whole original file in memory */
+
+   FILE *f_in = fopen(pszInFilename, "rb");
+   if (!f_in) {
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nFileSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pFileData = (unsigned char*)malloc(nFileSize);
+   if (!pFileData) {
+      fclose(f_in);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
+      return 100;
+   }
+
+   if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
+      free(pFileData);
+      fclose(f_in);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   /* Allocate max compressed size */
+
+   nMaxCompressedSize = lzsa_get_max_compressed_size_inmem(nFileSize);
+
+   pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048);
+   if (!pCompressedData) {
+      free(pFileData);
+      fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
+      return 100;
+   }
+
+   memset(pCompressedData + 1024, 0, nMaxCompressedSize);
+
+   long long nBestCompTime = -1;
+
+   size_t nActualCompressedSize = 0;
+   size_t nRightGuardPos = nMaxCompressedSize;
+
+   for (i = 0; i < 5; i++) {
+      unsigned char nGuard = 0x33 + i;
+      int j;
+
+      /* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */
+      memset(pCompressedData, nGuard, 1024);
+      memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024);
+
+      long long t0 = do_get_time();
+      nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
+      long long t1 = do_get_time();
+      if (nActualCompressedSize == -1) {
+         free(pCompressedData);
+         free(pFileData);
+         fprintf(stderr, "compression error\n");
+         return 100;
+      }
+
+      long long nCurDecTime = t1 - t0;
+      if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
+         nBestCompTime = nCurDecTime;
+
+      /* Check guard bytes before the output buffer */
+      for (j = 0; j < 1024; j++) {
+         if (pCompressedData[j] != nGuard) {
+            free(pCompressedData);
+            free(pFileData);
+            fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024);
+            return 100;
+         }
+      }
+
+      /* Check guard bytes after the output buffer */
+      for (j = 0; j < 1024; j++) {
+         if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) {
+            free(pCompressedData);
+            free(pFileData);
+            fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j);
+            return 100;
+         }
+      }
+
+      nRightGuardPos = nActualCompressedSize;
+   }
+
+   if (pszOutFilename) {
+      FILE *f_out;
+
+      /* Write whole compressed file out */
+
+      f_out = fopen(pszOutFilename, "wb");
+      if (f_out) {
+         fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out);
+         fclose(f_out);
+      }
+   }
+
+   free(pCompressedData);
+   free(pFileData);
+
+   fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize);
+   fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0));
+
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
   size_t nFileSize, nMaxDecompressedSize;
   unsigned char *pFileData;
   unsigned char *pDecompressedData;
@ -389,7 +750,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c
   if (nOptions & OPT_RAW)
      nMaxDecompressedSize = 65536;
   else
-      nMaxDecompressedSize = lzsa_inmem_get_max_decompressed_size(pFileData, nFileSize);
+      nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
   if (nMaxDecompressedSize == -1) {
      free(pFileData);
      fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
@ -413,7 +774,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c
      if (nOptions & OPT_RAW)
         nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize);
      else
-         nActualDecompressedSize = lzsa_inmem_decompress_stream(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
+         nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
      long long t1 = do_get_time();
      if (nActualDecompressedSize == -1) {
         free(pDecompressedData);
@ -490,7 +851,15 @@ int main(int argc, char **argv) {
         else
            bArgsError = true;
      }
-      else if (!strcmp(argv[i], "-bench")) {
+      else if (!strcmp(argv[i], "-cbench")) {
+         if (!bCommandDefined) {
+            bCommandDefined = true;
+            cCommand = 'B';
+         }
+         else
+            bArgsError = true;
+      }
+      else if (!strcmp(argv[i], "-dbench")) {
         if (!bCommandDefined) {
            bCommandDefined = true;
            cCommand = 'b';
@ -498,6 +867,14 @@ int main(int argc, char **argv) {
         else
            bArgsError = true;
      }
+      else if (!strcmp(argv[i], "-test")) {
+         if (!bCommandDefined) {
+            bCommandDefined = true;
+            cCommand = 't';
+         }
+         else
+            bArgsError = true;
+      }
      else if (!strcmp(argv[i], "-D")) {
         if (!pszDictionaryFilename && (i + 1) < argc) {
            pszDictionaryFilename = argv[i + 1];
@ -616,12 +993,18 @@ int main(int argc, char **argv) {
      }
   }

+   if (!bArgsError && cCommand == 't') {
+      return do_self_test(nOptions, nMinMatchSize, nFormatVersion);
+   }
+
   if (bArgsError || !pszInFilename || !pszOutFilename) {
      fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n");
      fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]);
      fprintf(stderr, "       -c: check resulting stream after compressing\n");
      fprintf(stderr, "       -d: decompress (default: compress)\n");
-      fprintf(stderr, "   -bench: benchmary in-memory decompression\n");
+      fprintf(stderr, "  -cbench: benchmary in-memory compression\n");
+      fprintf(stderr, "  -dbench: benchmary in-memory decompression\n");
+      fprintf(stderr, "    -test: run automated self-tests\n");
      fprintf(stderr, "       -v: be verbose\n");
      fprintf(stderr, "       -f <value>: LZSA compression format (1-2)\n");
      fprintf(stderr, "       -r: raw block format (max. 64 Kb files)\n");
@ -643,8 +1026,11 @@ int main(int argc, char **argv) {
   else if (cCommand == 'd') {
      return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
   }
+   else if (cCommand == 'B') {
+      return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
+   }
   else if (cCommand == 'b') {
-      return do_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
+      return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
   }
   else {
      return 100;
--- a/src/matchfinder.c
+++ b/src/matchfinder.c
@ -29,7 +29,6 @@
 *
 */

-#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "matchfinder.h"
@ -45,7 +44,7 @@
 *
 * @return 0 for success, non-zero for failure
 */
-int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
+int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
   unsigned int *intervals = pCompressor->intervals;

   /* Build suffix array from input data */
@ -170,7 +169,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p
 *
 * @return number of matches
 */
-int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
+int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
   unsigned int *intervals = pCompressor->intervals;
   unsigned int *pos_data = pCompressor->pos_data;
   unsigned int ref;
@ -248,7 +247,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m
 * @param nStartOffset current offset in input window (typically 0)
 * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
 */
-void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
   lzsa_match match;
   int i;

@ -267,7 +266,7 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 */
-void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
   lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
   int i;

--- a/src/matchfinder.h
+++ b/src/matchfinder.h
@ -35,7 +35,7 @@

 /* Forward declarations */
 typedef struct _lzsa_match lzsa_match;
-typedef struct _lsza_compressor lsza_compressor;
+typedef struct _lzsa_compressor lzsa_compressor;

 /**
 * Parse input data, build suffix array and overlaid data structures to speed up match finding
@ -46,7 +46,7 @@ typedef struct _lsza_compressor lsza_compressor;
 *
 * @return 0 for success, non-zero for failure
 */
-int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
+int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);

 /**
 * Find matches at the specified offset in the input window
@ -58,7 +58,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p
 *
 * @return number of matches
 */
-int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
+int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);

 /**
 * Skip previously compressed bytes
@ -67,7 +67,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m
 * @param nStartOffset current offset in input window (typically 0)
 * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
 */
-void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
+void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);

 /**
 * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
@ -77,6 +77,6 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 */
-void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
+void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);

 #endif /* _MATCHFINDER_H */
--- a/src/shrink_block_v1.c
+++ b/src/shrink_block_v1.c
@ -0,0 +1,459 @@
+/*
+ * shrink_v1.c - LZSA1 block compressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "lib.h"
+#include "shrink_block_v1.h"
+#include "format.h"
+
+/**
+ * Get the number of extra bits required to represent a literals length
+ *
+ * @param nLength literals length
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
+   if (nLength < LITERALS_RUN_LEN_V1) {
+      return 0;
+   }
+   else {
+      if (nLength < 256)
+         return 8;
+      else {
+         if (nLength < 512)
+            return 16;
+         else
+            return 24;
+      }
+   }
+}
+
+/**
+ * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength literals length
+ */
+static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
+   if (nLength >= LITERALS_RUN_LEN_V1) {
+      if (nLength < 256)
+         pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
+      else {
+         if (nLength < 512) {
+            pOutData[nOutOffset++] = 250;
+            pOutData[nOutOffset++] = nLength - 256;
+         }
+         else {
+            pOutData[nOutOffset++] = 249;
+            pOutData[nOutOffset++] = nLength & 0xff;
+            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
+         }
+      }
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent an encoded match length
+ *
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
+   if (nLength < MATCH_RUN_LEN_V1) {
+      return 0;
+   }
+   else {
+      if ((nLength + MIN_MATCH_SIZE_V1) < 256)
+         return 8;
+      else {
+         if ((nLength + MIN_MATCH_SIZE_V1) < 512)
+            return 16;
+         else
+            return 24;
+      }
+   }
+}
+
+/**
+ * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
+ */
+static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
+   if (nLength >= MATCH_RUN_LEN_V1) {
+      if ((nLength + MIN_MATCH_SIZE_V1) < 256)
+         pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
+      else {
+         if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
+            pOutData[nOutOffset++] = 239;
+            pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
+         }
+         else {
+            pOutData[nOutOffset++] = 238;
+            pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
+            pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
+         }
+      }
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ */
+static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+   int *cost = (int*)pCompressor->pos_data;  /* Reuse */
+   int nLastLiteralsOffset;
+   int nMinMatchSize = pCompressor->min_match_size;
+   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
+   int i;
+
+   cost[nEndOffset - 1] = 8;
+   nLastLiteralsOffset = nEndOffset;
+
+   for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
+      int nBestCost, nBestMatchLen, nBestMatchOffset;
+
+      int nLiteralsLen = nLastLiteralsOffset - i;
+      nBestCost = 8 + cost[i + 1];
+      if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
+         /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
+          * The cost automatically accumulates down the chain. */
+         nBestCost += 8;
+      }
+      if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+         nBestCost += MODESWITCH_PENALTY;
+      nBestMatchLen = 0;
+      nBestMatchOffset = 0;
+
+      lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+      int m;
+
+      for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
+         int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
+
+         if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
+            int nCurCost;
+            int nMatchLen = pMatch[m].length;
+
+            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+               nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+            nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
+            nCurCost += cost[i + nMatchLen];
+            if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+               nCurCost += MODESWITCH_PENALTY;
+
+            if (nBestCost > (nCurCost - nFavorRatio)) {
+               nBestCost = nCurCost;
+               nBestMatchLen = nMatchLen;
+               nBestMatchOffset = pMatch[m].offset;
+            }
+         }
+         else {
+            int nMatchLen = pMatch[m].length;
+            int k, nMatchRunLen;
+
+            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+               nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+            nMatchRunLen = nMatchLen;
+            if (nMatchRunLen > MATCH_RUN_LEN_V1)
+               nMatchRunLen = MATCH_RUN_LEN_V1;
+
+            for (k = nMinMatchSize; k < nMatchRunLen; k++) {
+               int nCurCost;
+
+               nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
+               nCurCost += cost[i + k];
+               if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+                  nCurCost += MODESWITCH_PENALTY;
+
+               if (nBestCost > (nCurCost - nFavorRatio)) {
+                  nBestCost = nCurCost;
+                  nBestMatchLen = k;
+                  nBestMatchOffset = pMatch[m].offset;
+               }
+            }
+
+            for (; k <= nMatchLen; k++) {
+               int nCurCost;
+
+               nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
+               nCurCost += cost[i + k];
+               if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+                  nCurCost += MODESWITCH_PENALTY;
+
+               if (nBestCost > (nCurCost - nFavorRatio)) {
+                  nBestCost = nCurCost;
+                  nBestMatchLen = k;
+                  nBestMatchOffset = pMatch[m].offset;
+               }
+            }
+         }
+      }
+
+      if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
+         nLastLiteralsOffset = i;
+
+      cost[i] = nBestCost;
+      pMatch->length = nBestMatchLen;
+      pMatch->offset = nBestMatchOffset;
+   }
+}
+
+/**
+ * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
+ * impacting the compression ratio
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ *
+ * @return non-zero if the number of tokens was reduced, 0 if it wasn't
+ */
+static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+   int i;
+   int nNumLiterals = 0;
+   int nDidReduce = 0;
+
+   for (i = nStartOffset; i < nEndOffset; ) {
+      lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+
+      if (pMatch->length >= MIN_MATCH_SIZE_V1) {
+         int nMatchLen = pMatch->length;
+         int nReduce = 0;
+
+         if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
+            int nMatchOffset = pMatch->offset;
+            int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
+            int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
+
+            if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) {
+               if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) {
+                  /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
+                   * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
+                   * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
+                   * match command by literals, the output size will not increase and it will remove one command. */
+                  nReduce = 1;
+               }
+            }
+            else {
+               int nCurIndex = i + nMatchLen;
+               int nNextNumLiterals = 0;
+
+               do {
+                  nCurIndex++;
+                  nNextNumLiterals++;
+               } while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
+
+               if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
+                  /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
+                   * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
+                  nReduce = 1;
+               }
+            }
+         }
+
+         if (nReduce) {
+            int j;
+
+            for (j = 0; j < nMatchLen; j++) {
+               pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
+            }
+            nNumLiterals += nMatchLen;
+            i += nMatchLen;
+
+            nDidReduce = 1;
+         }
+         else {
+            if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
+               pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
+               (nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
+               /* Join */
+
+               pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
+               pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
+               pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
+               continue;
+            }
+
+            nNumLiterals = 0;
+            i += nMatchLen;
+         }
+      }
+      else {
+         nNumLiterals++;
+         i++;
+      }
+   }
+
+   return nDidReduce;
+}
+
+/**
+ * Emit block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
+   int i;
+   int nNumLiterals = 0;
+   int nInFirstLiteralOffset = 0;
+   int nOutOffset = 0;
+
+   for (i = nStartOffset; i < nEndOffset; ) {
+      lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+
+      if (pMatch->length >= MIN_MATCH_SIZE_V1) {
+         int nMatchOffset = pMatch->offset;
+         int nMatchLen = pMatch->length;
+         int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
+         int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
+         int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
+         int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
+         int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
+
+         if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
+            return -1;
+         if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
+            return -1;
+
+         pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
+         nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
+
+         if (nNumLiterals != 0) {
+            memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+            nOutOffset += nNumLiterals;
+            nNumLiterals = 0;
+         }
+
+         pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
+         if (nTokenLongOffset) {
+            pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
+         }
+         nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
+         i += nMatchLen;
+
+         pCompressor->num_commands++;
+      }
+      else {
+         if (nNumLiterals == 0)
+            nInFirstLiteralOffset = i;
+         nNumLiterals++;
+         i++;
+      }
+   }
+
+   {
+      int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
+      int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
+
+      if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
+         return -1;
+
+      if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
+         pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
+      else
+         pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
+      nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
+
+      if (nNumLiterals != 0) {
+         memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+         nOutOffset += nNumLiterals;
+         nNumLiterals = 0;
+      }
+
+      pCompressor->num_commands++;
+   }
+
+   if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
+      /* Emit EOD marker for raw block */
+
+      if ((nOutOffset + 4) > nMaxOutDataSize)
+         return -1;
+
+      pOutData[nOutOffset++] = 0;
+      pOutData[nOutOffset++] = 238;
+      pOutData[nOutOffset++] = 0;
+      pOutData[nOutOffset++] = 0;
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
+   lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+
+   int nDidReduce;
+   int nPasses = 0;
+   do {
+      nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+      nPasses++;
+   } while (nDidReduce && nPasses < 20);
+
+   return lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
+}
--- a/src/shrink_block_v1.h
+++ b/src/shrink_block_v1.h
@ -0,0 +1,53 @@
+/*
+ * shrink_v1.h - LZSA1 block compressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _SHRINK_BLOCK_V1_H
+#define _SHRINK_BLOCK_V1_H
+
+/* Forward declarations */
+typedef struct _lzsa_compressor lzsa_compressor;
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
+
+#endif /* _SHRINK_BLOCK_V1_H */
--- a/src/shrink_block_v2.c
+++ b/src/shrink_block_v2.c
@ -0,0 +1,727 @@
+/*
+ * shrink_v2.c - LZSA2 block compressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "lib.h"
+#include "shrink_block_v2.h"
+#include "format.h"
+
+/**
+ * Write 4-bit nibble to output (compressed) buffer
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles
+ * @param nCurFreeNibbles current number of free nibbles in byte
+ * @param nNibbleValue value to write (0..15)
+ */
+static int lzsa_write_nibble_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nNibbleValue) {
+   if (nOutOffset < 0) return -1;
+
+   if ((*nCurNibbleOffset) == -1) {
+      if (nOutOffset >= nMaxOutDataSize) return -1;
+      (*nCurNibbleOffset) = nOutOffset;
+      (*nCurFreeNibbles) = 2;
+      pOutData[nOutOffset++] = 0;
+   }
+
+   pOutData[*nCurNibbleOffset] = (pOutData[*nCurNibbleOffset] << 4) | (nNibbleValue & 0x0f);
+   (*nCurFreeNibbles)--;
+   if ((*nCurFreeNibbles) == 0) {
+      (*nCurNibbleOffset) = -1;
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent a literals length
+ *
+ * @param nLength literals length
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_literals_varlen_size_v2(const int nLength) {
+   if (nLength < LITERALS_RUN_LEN_V2) {
+      return 0;
+   }
+   else {
+      if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
+         return 4;
+      }
+      else {
+         if (nLength < 256)
+            return 4+8;
+         else {
+            return 4+24;
+         }
+      }
+   }
+}
+
+/**
+ * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength literals length
+ */
+static inline int lzsa_write_literals_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
+   if (nLength >= LITERALS_RUN_LEN_V2) {
+      if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
+         nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - LITERALS_RUN_LEN_V2);
+      }
+      else {
+         nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
+         if (nOutOffset < 0) return -1;
+
+         if (nLength < 256)
+            pOutData[nOutOffset++] = nLength - 18;
+         else {
+            pOutData[nOutOffset++] = 239;
+            pOutData[nOutOffset++] = nLength & 0xff;
+            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
+         }
+      }
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent an encoded match length
+ *
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_match_varlen_size_v2(const int nLength) {
+   if (nLength < MATCH_RUN_LEN_V2) {
+      return 0;
+   }
+   else {
+      if (nLength < (MATCH_RUN_LEN_V2 + 15))
+         return 4;
+      else {
+         if ((nLength + MIN_MATCH_SIZE_V2) < 256)
+            return 4+8;
+         else {
+            return 4 + 24;
+         }
+      }
+   }
+}
+
+/**
+ * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
+ */
+static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
+   if (nLength >= MATCH_RUN_LEN_V2) {
+      if (nLength < (MATCH_RUN_LEN_V2 + 15)) {
+         nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - MATCH_RUN_LEN_V2);
+      }
+      else {
+         nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
+         if (nOutOffset < 0) return -1;
+
+         if ((nLength + MIN_MATCH_SIZE_V2) < 256)
+            pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V2 - 24;
+         else {
+            pOutData[nOutOffset++] = 233;
+            pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V2) & 0xff;
+            pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V2) >> 8) & 0xff;
+         }
+      }
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ */
+static void lzsa_optimize_matches_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+   int *cost = (int*)pCompressor->pos_data;  /* Reuse */
+   int *prev_match = (int*)pCompressor->intervals; /* Reuse */
+   lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
+   lzsa_match *pBestMatch = pCompressor->best_match;
+   int nLastLiteralsOffset;
+   int nMinMatchSize = pCompressor->min_match_size;
+   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
+   int i;
+
+   cost[nEndOffset - 1] = 8;
+   prev_match[nEndOffset - 1] = nEndOffset;
+   nLastLiteralsOffset = nEndOffset;
+
+   pCompressor->best_match[nEndOffset - 1].length = 0;
+   pCompressor->best_match[nEndOffset - 1].offset = 0;
+
+   repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1;
+   repmatch_opt[nEndOffset - 1].incoming_offset = -1;
+   repmatch_opt[nEndOffset - 1].expected_repmatch = 0;
+
+   for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
+      int nLiteralsCost;
+
+      int nLiteralsLen = nLastLiteralsOffset - i;
+      nLiteralsCost = 8 + cost[i + 1];
+
+      /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
+       * The cost automatically accumulates down the chain. */
+      if (nLiteralsLen == LITERALS_RUN_LEN_V2) {
+         nLiteralsCost += 4;
+      }
+      else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) {
+         nLiteralsCost += 8;
+      }
+      else if (nLiteralsLen == 256) {
+         nLiteralsCost += 16;
+      }
+      if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2)
+         nLiteralsCost += MODESWITCH_PENALTY;
+
+      lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+      int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT);
+      int m;
+
+      cost[i] = nLiteralsCost;
+      pCompressor->best_match[i].length = 0;
+      pCompressor->best_match[i].offset = 0;
+
+      repmatch_opt[i].best_slot_for_incoming = -1;
+      repmatch_opt[i].incoming_offset = -1;
+      repmatch_opt[i].expected_repmatch = 0;
+
+      for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
+         int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch;
+
+         nBestCost = nLiteralsCost;
+         nBestMatchLen = 0;
+         nBestMatchOffset = 0;
+         nBestUpdatedSlot = -1;
+         nBestUpdatedIndex = -1;
+         nBestExpectedRepMatch = 0;
+
+         if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
+            int nCurCost;
+            int nMatchLen = pMatch[m].length;
+
+            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+               nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+            int nCurIndex = prev_match[i + nMatchLen];
+
+            int nMatchOffsetSize = 0;
+            int nCurExpectedRepMatch = 1;
+            if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+                pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
+               nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
+               nCurExpectedRepMatch = 0;
+            }
+
+            nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
+            nCurCost += cost[i + nMatchLen];
+            if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2)
+               nCurCost += MODESWITCH_PENALTY;
+
+            if (nBestCost > (nCurCost - nFavorRatio)) {
+               nBestCost = nCurCost;
+               nBestMatchLen = nMatchLen;
+               nBestMatchOffset = pMatch[m].offset;
+               nBestUpdatedSlot = -1;
+               nBestUpdatedIndex = -1;
+               nBestExpectedRepMatch = nCurExpectedRepMatch;
+            }
+         }
+         else {
+            int nMatchLen = pMatch[m].length;
+            int k, nMatchRunLen;
+
+            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+               nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+            nMatchRunLen = nMatchLen;
+            if (nMatchRunLen > MATCH_RUN_LEN_V2)
+               nMatchRunLen = MATCH_RUN_LEN_V2;
+
+            for (k = nMinMatchSize; k < nMatchRunLen; k++) {
+               int nCurCost;
+
+               int nCurIndex = prev_match[i + k];
+               int nMatchOffsetSize = 0;
+               int nCurExpectedRepMatch = 1;
+               if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+                  pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
+                  nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
+                  nCurExpectedRepMatch = 0;
+               }
+
+               nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
+               nCurCost += cost[i + k];
+               if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
+                  nCurCost += MODESWITCH_PENALTY;
+
+               int nCurUpdatedSlot = -1;
+               int nCurUpdatedIndex = -1;
+
+               if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
+                  int r;
+
+                  for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
+                     if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
+                        int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
+
+                        if (nAltCost <= nCurCost) {
+                           nCurUpdatedSlot = r;
+                           nCurUpdatedIndex = nCurIndex;
+                           nCurCost = nAltCost;
+                           nCurExpectedRepMatch = 2;
+                        }
+                     }
+                  }
+               }
+
+               if (nBestCost > (nCurCost - nFavorRatio)) {
+                  nBestCost = nCurCost;
+                  nBestMatchLen = k;
+                  nBestMatchOffset = pMatch[m].offset;
+                  nBestUpdatedSlot = nCurUpdatedSlot;
+                  nBestUpdatedIndex = nCurUpdatedIndex;
+                  nBestExpectedRepMatch = nCurExpectedRepMatch;
+               }
+            }
+
+            for (; k <= nMatchLen; k++) {
+               int nCurCost;
+
+               int nCurIndex = prev_match[i + k];
+               int nMatchOffsetSize = 0;
+               int nCurExpectedRepMatch = 1;
+               if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+                  pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
+                  nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
+                  nCurExpectedRepMatch = 0;
+               }
+
+               nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
+               nCurCost += cost[i + k];
+               if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
+                  nCurCost += MODESWITCH_PENALTY;
+
+               int nCurUpdatedSlot = -1;
+               int nCurUpdatedIndex = -1;
+
+               if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
+                  int r;
+
+                  for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
+                     if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
+                        int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
+
+                        if (nAltCost <= nCurCost) {
+                           nCurUpdatedSlot = r;
+                           nCurUpdatedIndex = nCurIndex;
+                           nCurCost = nAltCost;
+                           nCurExpectedRepMatch = 2;
+                        }
+                     }
+                  }
+               }
+
+               if (nBestCost > (nCurCost - nFavorRatio)) {
+                  nBestCost = nCurCost;
+                  nBestMatchLen = k;
+                  nBestMatchOffset = pMatch[m].offset;
+                  nBestUpdatedSlot = nCurUpdatedSlot;
+                  nBestUpdatedIndex = nCurUpdatedIndex;
+                  nBestExpectedRepMatch = nCurExpectedRepMatch;
+               }
+            }
+         }
+
+         pSlotCost[m] = nBestCost;
+         pMatch[m].length = nBestMatchLen;
+         pMatch[m].offset = nBestMatchOffset; /* not necessary */
+
+         if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) {
+            cost[i] = nBestCost;
+            pCompressor->best_match[i].length = nBestMatchLen;
+            pCompressor->best_match[i].offset = nBestMatchOffset;
+
+            repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch;
+
+            if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) {
+               repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot;
+               repmatch_opt[nBestUpdatedIndex].incoming_offset = i;
+            }
+         }
+      }
+      for (; m < NMATCHES_PER_OFFSET; m++) {
+         pSlotCost[m] = 0;
+      }
+
+      if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2)
+         nLastLiteralsOffset = i;
+
+      prev_match[i] = nLastLiteralsOffset;
+   }
+
+   int nIncomingOffset = -1;
+   for (i = nStartOffset; i < nEndOffset; ) {
+      if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) {
+         if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) {
+            lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
+            int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
+
+            pCompressor->best_match[i].length = pMatch->length;
+            pCompressor->best_match[i].offset = pMatch->offset;
+            cost[i] = *pSlotCost;
+
+            if (repmatch_opt[i].expected_repmatch == 2)
+               repmatch_opt[i].expected_repmatch = 1;
+         }
+         else {
+            if (repmatch_opt[i].expected_repmatch == 2)
+               repmatch_opt[i].expected_repmatch = 0;
+         }
+
+         nIncomingOffset = i;
+         i += pCompressor->best_match[i].length;
+      }
+      else {
+         i++;
+      }
+   }
+}
+
+/**
+ * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
+ * impacting the compression ratio
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ *
+ * @return non-zero if the number of tokens was reduced, 0 if it wasn't
+ */
+static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+   int i;
+   int nNumLiterals = 0;
+   int nDidReduce = 0;
+   int nPreviousMatchOffset = -1;
+   lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
+
+   for (i = nStartOffset; i < nEndOffset; ) {
+      lzsa_match *pMatch = pCompressor->best_match + i;
+
+      if (pMatch->length >= MIN_MATCH_SIZE_V2) {
+         int nMatchLen = pMatch->length;
+         int nReduce = 0;
+         int nCurrentMatchOffset = i;
+
+         if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
+            int nMatchOffset = pMatch->offset;
+            int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
+            int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : 16;
+
+            if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) {
+               int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;
+
+               if (pCompressor->best_match[i + nMatchLen].offset != nMatchOffset) {
+                  nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
+               }
+
+               if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) {
+                  /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
+                   * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
+                   * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
+                   * match command by literals, the output size will not increase and it will remove one command. */
+                  nReduce = 1;
+               }
+            }
+            else {
+               int nCurIndex = i + nMatchLen;
+               int nNextNumLiterals = 0;
+               int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;;
+
+               do {
+                  nCurIndex++;
+                  nNextNumLiterals++;
+               } while (nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2);
+
+               if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+                  pCompressor->best_match[nCurIndex].offset != nMatchOffset) {
+                  nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
+               }
+
+               if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) {
+                  /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
+                   * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
+                  nReduce = 1;
+               }
+            }
+         }
+
+         if (nReduce) {
+            int j;
+
+            for (j = 0; j < nMatchLen; j++) {
+               pCompressor->best_match[i + j].length = 0;
+            }
+            nNumLiterals += nMatchLen;
+            i += nMatchLen;
+
+            nDidReduce = 1;
+
+            if (nPreviousMatchOffset >= 0) {
+               repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0;
+               nPreviousMatchOffset = -1;
+            }
+         }
+         else {
+            if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
+               pMatch->offset && pMatch->offset <= 32 && pCompressor->best_match[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
+               (nMatchLen + pCompressor->best_match[i + nMatchLen].length) <= MAX_VARLEN) {
+               /* Join */
+
+               pMatch->length += pCompressor->best_match[i + nMatchLen].length;
+               pCompressor->best_match[i + nMatchLen].offset = 0;
+               pCompressor->best_match[i + nMatchLen].length = -1;
+               continue;
+            }
+
+            nNumLiterals = 0;
+            i += nMatchLen;
+         }
+
+         nPreviousMatchOffset = nCurrentMatchOffset;
+      }
+      else {
+         nNumLiterals++;
+         i++;
+      }
+   }
+
+   return nDidReduce;
+}
+
+/**
+ * Emit block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
+   int i;
+   int nNumLiterals = 0;
+   int nInFirstLiteralOffset = 0;
+   int nOutOffset = 0;
+   int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
+   int nRepMatchOffset = 0;
+   lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
+
+   for (i = nStartOffset; i < nEndOffset; ) {
+      lzsa_match *pMatch = pCompressor->best_match + i;
+
+      if (pMatch->length >= MIN_MATCH_SIZE_V2) {
+         int nMatchOffset = pMatch->offset;
+         int nMatchLen = pMatch->length;
+         int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
+         int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
+         int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V2) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen;
+         int nTokenOffsetMode;
+         int nOffsetSize;
+
+         if (nMatchOffset == nRepMatchOffset) {
+            nTokenOffsetMode = 0xe0;
+            nOffsetSize = 0;
+         }
+         else {
+            if (nMatchOffset <= 32) {
+               nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1);
+               nOffsetSize = 4;
+            }
+            else if (nMatchOffset <= 512) {
+               nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3);
+               nOffsetSize = 8;
+            }
+            else if (nMatchOffset <= (8192 + 512)) {
+               nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7);
+               nOffsetSize = 12;
+            }
+            else {
+               nTokenOffsetMode = 0xc0;
+               nOffsetSize = 16;
+            }
+         }
+
+         int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen);
+
+         if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
+            return -1;
+         if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
+            return -1;
+
+         pOutData[nOutOffset++] = nTokenOffsetMode | (nTokenLiteralsLen << 3) | nTokenMatchLen;
+         nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
+         if (nOutOffset < 0) return -1;
+
+         if (nNumLiterals != 0) {
+            memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+            nOutOffset += nNumLiterals;
+            nNumLiterals = 0;
+         }
+
+         if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
+            nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f);
+            if (nOutOffset < 0) return -1;
+         }
+         else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
+            pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
+         }
+         else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
+            nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f);
+            if (nOutOffset < 0) return -1;
+            pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
+         }
+         else if (nTokenOffsetMode == 0xc0) {
+            pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
+            pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
+         }
+         nRepMatchOffset = nMatchOffset;
+
+         nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen);
+         if (nOutOffset < 0) return -1;
+
+         i += nMatchLen;
+
+         pCompressor->num_commands++;
+      }
+      else {
+         if (nNumLiterals == 0)
+            nInFirstLiteralOffset = i;
+         nNumLiterals++;
+         i++;
+      }
+   }
+
+   {
+      int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
+      int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3);
+
+      if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
+         return -1;
+
+      if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
+         pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x47;
+      else
+         pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x00;
+      nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
+      if (nOutOffset < 0) return -1;
+
+      if (nNumLiterals != 0) {
+         memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+         nOutOffset += nNumLiterals;
+         nNumLiterals = 0;
+      }
+
+      pCompressor->num_commands++;
+   }
+
+   if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
+      /* Emit EOD marker for raw block */
+
+      if (nOutOffset >= nMaxOutDataSize)
+         return -1;
+      pOutData[nOutOffset++] = 0;      /* Match offset */
+
+      nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 15);   /* Extended match length nibble */
+      if (nOutOffset < 0) return -1;
+
+      if ((nOutOffset + 1) > nMaxOutDataSize)
+         return -1;
+
+      pOutData[nOutOffset++] = 232;    /* EOD match length byte */
+   }
+
+   if (nCurNibbleOffset != -1) {
+      nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 0);
+      if (nOutOffset < 0 || nCurNibbleOffset != -1)
+         return -1;
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
+   lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+
+   int nDidReduce;
+   int nPasses = 0;
+   do {
+      nDidReduce = lzsa_optimize_command_count_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+      nPasses++;
+   } while (nDidReduce && nPasses < 20);
+
+   return lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
+}
--- a/src/shrink_block_v2.h
+++ b/src/shrink_block_v2.h
@ -0,0 +1,53 @@
+/*
+ * shrink_v2.h - LZSA2 block compressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _SHRINK_BLOCK_V2_H
+#define _SHRINK_BLOCK_V2_H
+
+/* Forward declarations */
+typedef struct _lzsa_compressor lzsa_compressor;
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
+
+#endif /* _SHRINK_BLOCK_V2_H */
--- a/src/shrink_context.c
+++ b/src/shrink_context.c
@ -0,0 +1,194 @@
+/*
+ * shrink_context.c - compression context implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "shrink_context.h"
+#include "shrink_block_v1.h"
+#include "shrink_block_v2.h"
+#include "format.h"
+#include "matchfinder.h"
+
+/**
+ * Initialize compression context
+ *
+ * @param pCompressor compression context to initialize
+ * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
+ * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
+ * @param nFlags compression flags
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
+   int nResult;
+   int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
+
+   nResult = divsufsort_init(&pCompressor->divsufsort_context);
+   pCompressor->intervals = NULL;
+   pCompressor->pos_data = NULL;
+   pCompressor->open_intervals = NULL;
+   pCompressor->match = NULL;
+   pCompressor->best_match = NULL;
+   pCompressor->slot_cost = NULL;
+   pCompressor->repmatch_opt = NULL;
+   pCompressor->min_match_size = nMinMatchSize;
+   if (pCompressor->min_match_size < nMinMatchSizeForFormat)
+      pCompressor->min_match_size = nMinMatchSizeForFormat;
+   else if (pCompressor->min_match_size > 5)
+      pCompressor->min_match_size = 5;
+   pCompressor->format_version = nFormatVersion;
+   pCompressor->flags = nFlags;
+   pCompressor->num_commands = 0;
+
+   if (!nResult) {
+      pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
+
+      if (pCompressor->intervals) {
+         pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
+
+         if (pCompressor->pos_data) {
+            pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
+
+            if (pCompressor->open_intervals) {
+               pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
+
+               if (pCompressor->match) {
+                  if (pCompressor->format_version == 2) {
+                     pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
+
+                     if (pCompressor->best_match) {
+                        pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
+
+                        if (pCompressor->slot_cost) {
+                           pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
+
+                           if (pCompressor->repmatch_opt)
+                              return 0;
+                        }
+                     }
+                  }
+                  else {
+                     return 0;
+                  }
+               }
+            }
+         }
+      }
+   }
+
+   lzsa_compressor_destroy(pCompressor);
+   return 100;
+}
+
+/**
+ * Clean up compression context and free up any associated resources
+ *
+ * @param pCompressor compression context to clean up
+ */
+void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
+   divsufsort_destroy(&pCompressor->divsufsort_context);
+
+   if (pCompressor->repmatch_opt) {
+      free(pCompressor->repmatch_opt);
+      pCompressor->repmatch_opt = NULL;
+   }
+
+   if (pCompressor->slot_cost) {
+      free(pCompressor->slot_cost);
+      pCompressor->slot_cost = NULL;
+   }
+
+   if (pCompressor->best_match) {
+      free(pCompressor->best_match);
+      pCompressor->best_match = NULL;
+   }
+
+   if (pCompressor->match) {
+      free(pCompressor->match);
+      pCompressor->match = NULL;
+   }
+
+   if (pCompressor->open_intervals) {
+      free(pCompressor->open_intervals);
+      pCompressor->open_intervals = NULL;
+   }
+
+   if (pCompressor->pos_data) {
+      free(pCompressor->pos_data);
+      pCompressor->pos_data = NULL;
+   }
+
+   if (pCompressor->intervals) {
+      free(pCompressor->intervals);
+      pCompressor->intervals = NULL;
+   }
+}
+
+/**
+ * Compress one block of data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
+ * @param nInDataSize number of input bytes to compress
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
+   if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
+      return -1;
+   if (nPreviousBlockSize) {
+      lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
+   }
+   lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+
+   if (pCompressor->format_version == 1) {
+      return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
+   }
+   else if (pCompressor->format_version == 2) {
+      return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
+   }
+   else {
+      return -1;
+   }
+}
+
+/**
+ * Get the number of compression commands issued in compressed data blocks
+ *
+ * @return number of commands
+ */
+int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor) {
+   return pCompressor->num_commands;
+}
--- a/src/shrink_context.h
+++ b/src/shrink_context.h
@ -0,0 +1,123 @@
+/*
+ * shrink_context.h - compression context definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _SHRINK_CONTEXT_H
+#define _SHRINK_CONTEXT_H
+
+#include "divsufsort.h"
+
+#define LCP_BITS 15
+#define LCP_MAX (1U<<(LCP_BITS - 1))
+#define LCP_SHIFT (32-LCP_BITS)
+#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
+#define POS_MASK ((1U<<LCP_SHIFT) - 1)
+
+#define NMATCHES_PER_OFFSET 8
+#define MATCHES_PER_OFFSET_SHIFT 3
+
+#define LEAVE_ALONE_MATCH_SIZE 1000
+
+#define LAST_MATCH_OFFSET 4
+#define LAST_LITERALS 1
+
+#define MODESWITCH_PENALTY 1
+
+/** One match */
+typedef struct _lzsa_match {
+   unsigned short length;
+   unsigned short offset;
+} lzsa_match;
+
+/** One rep-match slot (for LZSA2) */
+typedef struct _lzsa_repmatch_opt {
+   int incoming_offset;
+   short best_slot_for_incoming;
+   short expected_repmatch;
+} lzsa_repmatch_opt;
+
+/** Compression context */
+typedef struct _lzsa_compressor {
+   divsufsort_ctx_t divsufsort_context;
+   unsigned int *intervals;
+   unsigned int *pos_data;
+   unsigned int *open_intervals;
+   lzsa_match *match;
+   lzsa_match *best_match;
+   int *slot_cost;
+   lzsa_repmatch_opt *repmatch_opt;
+   int min_match_size;
+   int format_version;
+   int flags;
+   int num_commands;
+} lzsa_compressor;
+
+/**
+ * Initialize compression context
+ *
+ * @param pCompressor compression context to initialize
+ * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
+ * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
+ * @param nFlags compression flags
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
+
+/**
+ * Clean up compression context and free up any associated resources
+ *
+ * @param pCompressor compression context to clean up
+ */
+void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
+
+/**
+ * Compress one block of data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
+ * @param nInDataSize number of input bytes to compress
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
+
+/**
+ * Get the number of compression commands issued in compressed data blocks
+ *
+ * @return number of commands
+ */
+int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
+
+#endif /* _SHRINK_CONTEXT_H */
--- a/src/shrink_inmem.c
+++ b/src/shrink_inmem.c
@ -0,0 +1,178 @@
+/*
+ * shrink_inmem.c - in-memory compression implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "shrink_inmem.h"
+#include "shrink_context.h"
+#include "frame.h"
+#include "format.h"
+#include "lib.h"
+
+/**
+ * Get maximum compressed size of input(source) data
+ *
+ * @param pFileData pointer to input(source) data
+ * @param nFileSize input(source) size in bytes
+ *
+ * @return maximum compressed size
+ */
+size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) {
+   return lzsa_get_header_size() + ((nInputSize + (BLOCK_SIZE - 1)) >> 16) * lzsa_get_frame_size() + nInputSize + lzsa_get_frame_size() /* footer */;
+}
+
+/**
+ * Compress memory
+ *
+ * @param pInputData pointer to input(source) data to compress
+ * @param pOutBuffer buffer for compressed data
+ * @param nInputSize input(source) size in bytes
+ * @param nMaxOutBufferSize maximum capacity of compression buffer
+ * @param nFlags compression flags (LZSA_FLAG_xxx)
+ * @param nMinMatchSize minimum match size
+ * @param nFormatVersion version of format to use (1-2)
+ *
+ * @return actual compressed size, or -1 for error
+ */
+size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
+                             const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) {
+   lzsa_compressor compressor;
+   size_t nOriginalSize = 0;
+   size_t nCompressedSize = 0L;
+   int nResult;
+   int nError = 0;
+
+   nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
+   if (nResult != 0) {
+      return -1;
+   }
+
+   if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
+      int nHeaderSize = lzsa_encode_header(pOutBuffer, (int)nMaxOutBufferSize, nFormatVersion);
+      if (nHeaderSize < 0)
+         nError = LZSA_ERROR_COMPRESSION;
+      else {
+         nCompressedSize += nHeaderSize;
+      }
+   }
+
+   int nPreviousBlockSize = 0;
+   int nNumBlocks = 0;
+
+   while (nOriginalSize < nInputSize && !nError) {
+      int nInDataSize;
+
+      nInDataSize = (int)(nInputSize - nOriginalSize);
+      if (nInDataSize > BLOCK_SIZE)
+         nInDataSize = BLOCK_SIZE;
+
+      if (nInDataSize > 0) {
+         if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
+            nError = LZSA_ERROR_RAW_TOOLARGE;
+            break;
+         }
+
+         int nOutDataSize;
+         int nOutDataEnd = (int)(nMaxOutBufferSize - (lzsa_get_frame_size() + nCompressedSize + lzsa_get_frame_size() /* footer */));
+
+         if (nOutDataEnd > BLOCK_SIZE)
+            nOutDataEnd = BLOCK_SIZE;
+
+         nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + lzsa_get_frame_size() + nCompressedSize, nOutDataEnd);
+         if (nOutDataSize >= 0) {
+            /* Write compressed block */
+
+            if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
+               int nBlockheaderSize = lzsa_encode_compressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nOutDataSize);
+               if (nBlockheaderSize < 0)
+                  nError = LZSA_ERROR_COMPRESSION;
+               else {
+                  nCompressedSize += nBlockheaderSize;
+
+                  nOriginalSize += nInDataSize;
+                  nCompressedSize += nOutDataSize;
+               }
+            }
+         }
+         else {
+            /* Write uncompressible, literal block */
+
+            if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
+               nError = LZSA_ERROR_RAW_UNCOMPRESSED;
+               break;
+            }
+
+            int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nInDataSize);
+            if (nBlockheaderSize < 0)
+               nError = LZSA_ERROR_COMPRESSION;
+            else {
+               if (nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
+                  nError = LZSA_ERROR_DST;
+               else {
+                  memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);
+
+                  nOriginalSize += nInDataSize;
+                  nCompressedSize += nBlockheaderSize + nInDataSize;
+               }
+            }
+         }
+
+         nPreviousBlockSize = nInDataSize;
+         nNumBlocks++;
+      }
+   }
+
+   if (!nError) {
+      int nFooterSize;
+
+      if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
+         nFooterSize = 0;
+      }
+      else {
+         nFooterSize = lzsa_encode_footer_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize));
+         if (nFooterSize < 0)
+            nError = LZSA_ERROR_COMPRESSION;
+      }
+
+      nCompressedSize += nFooterSize;
+   }
+
+   lzsa_compressor_destroy(&compressor);
+
+   if (nError) {
+      return -1;
+   }
+   else {
+      return nCompressedSize;
+   }
+}
+
--- a/src/shrink_inmem.h
+++ b/src/shrink_inmem.h
@ -0,0 +1,64 @@
+/*
+ * shrink_inmem.h - in-memory compression definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _SHRINK_INMEM_H
+#define _SHRINK_INMEM_H
+
+#include <stdlib.h>
+
+/**
+ * Get maximum compressed size of input(source) data
+ *
+ * @param pFileData pointer to input(source) data
+ * @param nFileSize input(source) size in bytes
+ *
+ * @return maximum compressed size
+ */
+size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
+
+/**
+ * Compress memory
+ *
+ * @param pInputData pointer to input(source) data to compress
+ * @param pOutBuffer buffer for compressed data
+ * @param nInputSize input(source) size in bytes
+ * @param nMaxOutBufferSize maximum capacity of compression buffer
+ * @param nFlags compression flags (LZSA_FLAG_xxx)
+ * @param nMinMatchSize minimum match size
+ * @param nFormatVersion version of format to use (1-2)
+ *
+ * @return actual compressed size, or -1 for error
+ */
+size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
+   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
+
+#endif /* _SHRINK_INMEM_H */
--- a/src/shrink_streaming.c
+++ b/src/shrink_streaming.c
@ -0,0 +1,285 @@
+/*
+ * shrink_streaming.h - streaming compression definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "shrink_streaming.h"
+#include "format.h"
+#include "frame.h"
+#include "lib.h"
+
+/*-------------- File API -------------- */
+
+/**
+ * Compress file
+ *
+ * @param pszInFilename name of input(source) file to compress
+ * @param pszOutFilename name of output(compressed) file to generate
+ * @param pszDictionaryFilename name of dictionary file, or NULL for none
+ * @param nFlags compression flags (LZSA_FLAG_xxx)
+ * @param nMinMatchSize minimum match size
+ * @param nFormatVersion version of format to use (1-2)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
+ * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
+      void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
+   lzsa_stream_t inStream, outStream;
+   void *pDictionaryData = NULL;
+   int nDictionaryDataSize = 0;
+   lzsa_status_t nStatus;
+
+   if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
+      return LZSA_ERROR_SRC;
+   }
+
+   if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
+      inStream.close(&inStream);
+      return LZSA_ERROR_DST;
+   }
+
+   nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
+
+   if (nStatus) {
+      outStream.close(&outStream);
+      inStream.close(&inStream);
+      return nStatus;
+   }
+
+   nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount);
+
+   lzsa_dictionary_free(&pDictionaryData);
+   outStream.close(&outStream);
+   inStream.close(&inStream);
+   return nStatus;
+}
+
+/*-------------- Streaming API -------------- */
+
+/**
+ * Compress stream
+ *
+ * @param pInStream input(source) stream to compress
+ * @param pOutStream output(compressed) stream to write to
+ * @param pDictionaryData dictionary contents, or NULL for none
+ * @param nDictionaryDataSize size of dictionary contents, or 0
+ * @param nFlags compression flags (LZSA_FLAG_xxx)
+ * @param nMinMatchSize minimum match size
+ * @param nFormatVersion version of format to use (1-2)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
+ * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
+                                   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
+                                   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
+   unsigned char *pInData, *pOutData;
+   lzsa_compressor compressor;
+   long long nStartTime = 0LL, nEndTime = 0LL;
+   long long nOriginalSize = 0LL, nCompressedSize = 0LL;
+   int nResult;
+   unsigned char cFrameData[16];
+   int nError = 0;
+
+   pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
+   if (!pInData) {
+      return LZSA_ERROR_MEMORY;
+   }
+   memset(pInData, 0, BLOCK_SIZE * 2);
+
+   pOutData = (unsigned char*)malloc(BLOCK_SIZE);
+   if (!pOutData) {
+      free(pInData);
+      pInData = NULL;
+
+      return LZSA_ERROR_MEMORY;
+   }
+   memset(pOutData, 0, BLOCK_SIZE);
+
+   nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
+   if (nResult != 0) {
+      free(pOutData);
+      pOutData = NULL;
+
+      free(pInData);
+      pInData = NULL;
+
+      return LZSA_ERROR_MEMORY;
+   }
+
+   if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
+      int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
+      if (nHeaderSize < 0)
+         nError = LZSA_ERROR_COMPRESSION;
+      else {
+         if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
+            nError = LZSA_ERROR_DST;
+         nCompressedSize += (long long)nHeaderSize;
+      }
+   }
+
+   int nPreviousBlockSize = 0;
+   int nNumBlocks = 0;
+
+   while (!pInStream->eof(pInStream) && !nError) {
+      int nInDataSize;
+
+      if (nPreviousBlockSize) {
+         memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
+      }
+      else if (nDictionaryDataSize && pDictionaryData) {
+         nPreviousBlockSize = nDictionaryDataSize;
+         memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize);
+      }
+
+      nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE);
+      if (nInDataSize > 0) {
+         if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
+            nError = LZSA_ERROR_RAW_TOOLARGE;
+            break;
+         }
+         nDictionaryDataSize = 0;
+
+         int nOutDataSize;
+
+         nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize);
+         if (nOutDataSize >= 0) {
+            /* Write compressed block */
+
+            if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
+               int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize);
+               if (nBlockheaderSize < 0)
+                  nError = LZSA_ERROR_COMPRESSION;
+               else {
+                  nCompressedSize += (long long)nBlockheaderSize;
+                  if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
+                     nError = LZSA_ERROR_DST;
+                  }
+               }
+            }
+
+            if (!nError) {
+               if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) {
+                  nError = LZSA_ERROR_DST;
+               }
+               else {
+                  nOriginalSize += (long long)nInDataSize;
+                  nCompressedSize += (long long)nOutDataSize;
+               }
+            }
+         }
+         else {
+            /* Write uncompressible, literal block */
+
+            if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
+               nError = LZSA_ERROR_RAW_UNCOMPRESSED;
+               break;
+            }
+
+            int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
+            if (nBlockheaderSize < 0)
+               nError = LZSA_ERROR_COMPRESSION;
+            else {
+               if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
+                  nError = LZSA_ERROR_DST;
+               }
+               else {
+                  if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) {
+                     nError = LZSA_ERROR_DST;
+                  }
+                  else {
+                     nOriginalSize += (long long)nInDataSize;
+                     nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
+                  }
+               }
+            }
+         }
+
+         nPreviousBlockSize = nInDataSize;
+         nNumBlocks++;
+      }
+
+      if (!nError && !pInStream->eof(pInStream)) {
+         if (progress)
+            progress(nOriginalSize, nCompressedSize);
+      }
+   }
+
+   if (!nError) {
+      int nFooterSize;
+
+      if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
+         nFooterSize = 0;
+      }
+      else {
+         nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
+         if (nFooterSize < 0)
+            nError = LZSA_ERROR_COMPRESSION;
+      }
+
+      if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize)
+         nError = LZSA_ERROR_DST;
+      nCompressedSize += (long long)nFooterSize;
+   }
+
+   if (progress)
+      progress(nOriginalSize, nCompressedSize);
+
+   int nCommandCount = lzsa_compressor_get_command_count(&compressor);
+   lzsa_compressor_destroy(&compressor);
+
+   free(pOutData);
+   pOutData = NULL;
+
+   free(pInData);
+   pInData = NULL;
+
+   if (nError) {
+      return nError;
+   }
+   else {
+      if (pOriginalSize)
+         *pOriginalSize = nOriginalSize;
+      if (pCompressedSize)
+         *pCompressedSize = nCompressedSize;
+      if (pCommandCount)
+         *pCommandCount = nCommandCount;
+      return LZSA_OK;
+   }
+}
--- a/src/shrink_streaming.h
+++ b/src/shrink_streaming.h
@ -0,0 +1,86 @@
+/*
+ * shrink_streaming.h - streaming compression definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _SHRINK_STREAMING_H
+#define _SHRINK_STREAMING_H
+
+#include "stream.h"
+
+/* Forward declaration */
+typedef enum _lzsa_status_t lzsa_status_t;
+
+/*-------------- File API -------------- */
+
+/**
+ * Compress file
+ *
+ * @param pszInFilename name of input(source) file to compress
+ * @param pszOutFilename name of output(compressed) file to generate
+ * @param pszDictionaryFilename name of dictionary file, or NULL for none
+ * @param nFlags compression flags (LZSA_FLAG_xxx)
+ * @param nMinMatchSize minimum match size
+ * @param nFormatVersion version of format to use (1-2)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
+ * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
+   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
+   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
+
+/*-------------- Streaming API -------------- */
+
+/**
+ * Compress stream
+ *
+ * @param pInStream input(source) stream to compress
+ * @param pOutStream output(compressed) stream to write to
+ * @param pDictionaryData dictionary contents, or NULL for none
+ * @param nDictionaryDataSize size of dictionary contents, or 0
+ * @param nFlags compression flags (LZSA_FLAG_xxx)
+ * @param nMinMatchSize minimum match size
+ * @param nFormatVersion version of format to use (1-2)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
+ * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
+ * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
+ *
+ * @return LZSA_OK for success, or an error value from lzsa_status_t
+ */
+lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
+   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
+   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
+
+#endif /* _SHRINK_STREAMING_H */