Simplify and speed up in-tool decompressors; add in-memory benchmark

2025-04-05 14:37:05 +00:00 · 2019-05-17 08:57:01 +02:00 · 2019-05-17 08:57:01 +02:00 · 45cb124c4d
commit 45cb124c4d
parent 055a80abfd
8 changed files with 601 additions and 359 deletions
--- a/1
+++ b/1
@ -12,6 +12,7 @@ APP := lzsa

 OBJS := $(OBJDIR)/src/lzsa.o
 OBJS += $(OBJDIR)/src/lib.o
+OBJS += $(OBJDIR)/src/inmem.o
 OBJS += $(OBJDIR)/src/stream.o
 OBJS += $(OBJDIR)/src/frame.o
 OBJS += $(OBJDIR)/src/matchfinder.o
--- a/VS2017/lzsa.vcxproj
+++ b/VS2017/lzsa.vcxproj
@ -181,6 +181,7 @@
    <ClInclude Include="..\src\expand_v2.h" />
    <ClInclude Include="..\src\format.h" />
    <ClInclude Include="..\src\frame.h" />
+    <ClInclude Include="..\src\inmem.h" />
    <ClInclude Include="..\src\lib.h" />
    <ClInclude Include="..\src\libdivsufsort\include\config.h" />
    <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" />
@ -195,6 +196,7 @@
    <ClCompile Include="..\src\expand_v1.c" />
    <ClCompile Include="..\src\expand_v2.c" />
    <ClCompile Include="..\src\frame.c" />
+    <ClCompile Include="..\src\inmem.c" />
    <ClCompile Include="..\src\lib.c" />
    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" />
    <ClCompile Include="..\src\libdivsufsort\lib\sssort.c" />
--- a/VS2017/lzsa.vcxproj.filters
+++ b/VS2017/lzsa.vcxproj.filters
@ -63,6 +63,9 @@
    <ClInclude Include="..\src\stream.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
+    <ClInclude Include="..\src\inmem.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\src\libdivsufsort\lib\utils.c">
@ -104,5 +107,8 @@
    <ClCompile Include="..\src\stream.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
+    <ClCompile Include="..\src\inmem.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
  </ItemGroup>
 </Project>
--- a/src/expand_v1.c
+++ b/src/expand_v1.c
@ -42,130 +42,72 @@
 #define FORCE_INLINE __attribute__((always_inline))
 #endif /* _MSC_VER */

-static inline FORCE_INLINE int lzsa_expand_literals_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
+static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) {
+   unsigned int nByte;
   const unsigned char *pInBlock = *ppInBlock;
-   unsigned char *pCurOutData = *ppCurOutData;

-   if (nLiterals == LITERALS_RUN_LEN_V1) {
-      unsigned char nByte;
+   if (pInBlock < pInBlockEnd) {
+      nByte = *pInBlock++;
+      (*nLiterals) += nByte;

-      if (pInBlock < pInBlockEnd) {
-         nByte = *pInBlock++;
-         nLiterals += ((unsigned int)nByte);
-
-         if (nByte == 250) {
-            if (pInBlock < pInBlockEnd) {
-               nLiterals = 256 + ((unsigned int)*pInBlock++);
-            }
-            else {
-               return -1;
-            }
+      if (nByte == 250) {
+         if (pInBlock < pInBlockEnd) {
+            (*nLiterals) = 256 + ((unsigned int)*pInBlock++);
         }
-         else if (nByte == 249) {
-            if ((pInBlock + 1) < pInBlockEnd) {
-               nLiterals = ((unsigned int)*pInBlock++);
-               nLiterals |= (((unsigned int)*pInBlock++) << 8);
-            }
-            else {
-               return -1;
-            }
+         else {
+            return -1;
         }
      }
-      else {
-         return -1;
-      }
-   }
-
-   if (nLiterals != 0) {
-      if ((pInBlock + nLiterals) <= pInBlockEnd &&
-         (pCurOutData + nLiterals) <= pOutDataEnd) {
-         memcpy(pCurOutData, pInBlock, nLiterals);
-         pInBlock += nLiterals;
-         pCurOutData += nLiterals;
-      }
-      else {
-         return -1;
-      }
-   }
-
-   *ppInBlock = pInBlock;
-   *ppCurOutData = pCurOutData;
-   return 0;
-}
-
-static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
-   const unsigned char *pInBlock = *ppInBlock;
-   unsigned char *pCurOutData = *ppCurOutData;
-
-   nMatchLen += MIN_MATCH_SIZE_V1;
-   if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
-      unsigned char nByte;
-
-      if (pInBlock < pInBlockEnd) {
-         nByte = *pInBlock++;
-         nMatchLen += ((unsigned int)nByte);
-
-         if (nByte == 239) {
-            if (pInBlock < pInBlockEnd) {
-               nMatchLen = 256 + ((unsigned int)*pInBlock++);
-            }
-            else {
-               return -1;
-            }
+      else if (nByte == 249) {
+         if ((pInBlock + 1) < pInBlockEnd) {
+            (*nLiterals) = ((unsigned int)*pInBlock++);
+            (*nLiterals) |= (((unsigned int)*pInBlock++) << 8);
         }
-         else if (nByte == 238) {
-            if ((pInBlock + 1) < pInBlockEnd) {
-               nMatchLen = ((unsigned int)*pInBlock++);
-               nMatchLen |= (((unsigned int)*pInBlock++) << 8);
-            }
-            else {
-               return -1;
-            }
+         else {
+            return -1;
         }
      }
-      else {
-         return -1;
-      }
-   }

-   if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
-      /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
-
-      if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
-         const unsigned char *pCopySrc = pSrc;
-         unsigned char *pCopyDst = pCurOutData;
-         const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
-
-         do {
-            memcpy(pCopyDst, pCopySrc, 8);
-            memcpy(pCopyDst + 8, pCopySrc + 8, 8);
-            pCopySrc += 16;
-            pCopyDst += 16;
-         } while (pCopyDst < pCopyEndDst);
-
-         pCurOutData += nMatchLen;
-      }
-      else {
-         while (nMatchLen >= 4) {
-            *pCurOutData++ = *pSrc++;
-            *pCurOutData++ = *pSrc++;
-            *pCurOutData++ = *pSrc++;
-            *pCurOutData++ = *pSrc++;
-            nMatchLen -= 4;
-         }
-         while (nMatchLen) {
-            *pCurOutData++ = *pSrc++;
-            nMatchLen--;
-         }
-      }
+      *ppInBlock = pInBlock;
+      return 0;
   }
   else {
      return -1;
   }
+}

-   *ppInBlock = pInBlock;
-   *ppCurOutData = pCurOutData;
-   return 0;
+static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) {
+   unsigned int nByte;
+   const unsigned char *pInBlock = *ppInBlock;
+
+   if (pInBlock < pInBlockEnd) {
+      nByte = *pInBlock++;
+      (*nMatchLen) += nByte;
+
+      if (nByte == 239) {
+         if (pInBlock < pInBlockEnd) {
+            (*nMatchLen) = 256 + ((unsigned int)*pInBlock++);
+         }
+         else {
+            return -1;
+         }
+      }
+      else if (nByte == 238) {
+         if ((pInBlock + 1) < pInBlockEnd) {
+            (*nMatchLen) = ((unsigned int)*pInBlock++);
+            (*nMatchLen) |= (((unsigned int)*pInBlock++) << 8);
+         }
+         else {
+            return -1;
+         }
+      }
+
+      *ppInBlock = pInBlock;
+      return 0;
+   }
+   else {
+      return -1;
+   }
 }

 /**
@ -181,77 +123,90 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **p
 */
 int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
   const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
-   const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
   unsigned char *pCurOutData = pOutData + nOutDataOffset;
   const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
-   const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
-
-   /* Fast loop */
-
-   while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) {
-      const unsigned char token = *pInBlock++;
-      unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
-
-      if (nLiterals < LITERALS_RUN_LEN_V1) {
-         memcpy(pCurOutData, pInBlock, 8);
-         pInBlock += nLiterals;
-         pCurOutData += nLiterals;
-      }
-      else {
-         if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
-            return -1;
-      }
-
-      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
-         int nMatchOffset;
-
-         nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00;
-         if (token & 0x80) {
-            nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8);
-         }
-
-         const unsigned char *pSrc = pCurOutData + nMatchOffset;
-         if (pSrc >= pOutData) {
-            unsigned int nMatchLen = (unsigned int)(token & 0x0f);
-            if (nMatchLen < MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
-               memcpy(pCurOutData, pSrc, 8);
-               memcpy(pCurOutData + 8, pSrc + 8, 8);
-               memcpy(pCurOutData + 16, pSrc + 16, 4);
-               pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
-            }
-            else {
-               if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
-                  return -1;
-            }
-         }
-         else {
-            return -1;
-         }
-      }
-   }
-
-   /* Slow loop for the remainder of the buffer */
+   const unsigned char *pOutDataFastEnd = pOutDataEnd - 18;

   while (pInBlock < pInBlockEnd) {
      const unsigned char token = *pInBlock++;
      unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);

-      if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
-         return -1;
-
-      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
-         int nMatchOffset;
-
-         nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00;
-         if (token & 0x80) {
-            nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8);
+      if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
+         memcpy(pCurOutData, pInBlock, 8);
+         pInBlock += nLiterals;
+         pCurOutData += nLiterals;
+      }
+      else {
+         if (nLiterals == LITERALS_RUN_LEN_V1) {
+            if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals))
+               return -1;
         }

-         const unsigned char *pSrc = pCurOutData + nMatchOffset;
+         if (nLiterals != 0) {
+            if ((pInBlock + nLiterals) <= pInBlockEnd &&
+               (pCurOutData + nLiterals) <= pOutDataEnd) {
+               memcpy(pCurOutData, pInBlock, nLiterals);
+               pInBlock += nLiterals;
+               pCurOutData += nLiterals;
+            }
+            else {
+               return -1;
+            }
+         }
+      }
+
+      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
+         unsigned int nMatchOffset;
+
+         nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff;
+         if (token & 0x80) {
+            nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00;
+         }
+         nMatchOffset++;
+
+         const unsigned char *pSrc = pCurOutData - nMatchOffset;
         if (pSrc >= pOutData) {
            unsigned int nMatchLen = (unsigned int)(token & 0x0f);
-            if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
-               return -1;
+            if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+               memcpy(pCurOutData, pSrc, 8);
+               memcpy(pCurOutData + 8, pSrc + 8, 8);
+               memcpy(pCurOutData + 16, pSrc + 16, 2);
+               pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
+            }
+            else {
+               nMatchLen += MIN_MATCH_SIZE_V1;
+               if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
+                  if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
+                     return -1;
+               }
+
+               if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+                  /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
+
+                  if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
+                     const unsigned char *pCopySrc = pSrc;
+                     unsigned char *pCopyDst = pCurOutData;
+                     const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
+
+                     do {
+                        memcpy(pCopyDst, pCopySrc, 16);
+                        pCopySrc += 16;
+                        pCopyDst += 16;
+                     } while (pCopyDst < pCopyEndDst);
+
+                     pCurOutData += nMatchLen;
+                  }
+                  else {
+                     while (nMatchLen) {
+                        *pCurOutData++ = *pSrc++;
+                        nMatchLen--;
+                     }
+                  }
+               }
+               else {
+                  return -1;
+               }
+            }
         }
         else {
            return -1;
--- a/src/expand_v2.c
+++ b/src/expand_v2.c
@ -42,85 +42,40 @@
 #define FORCE_INLINE __attribute__((always_inline))
 #endif /* _MSC_VER */

-static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles) {
-   unsigned int nValue;
-
+static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) {
   if ((*nCurNibbles ^= 1) != 0) {
      const unsigned char *pInBlock = *ppInBlock;
-      if (pInBlock >= pInBlockEnd) return -1;
-      (*nibbles) = *pInBlock++;
-      *ppInBlock = pInBlock;
-   }
-
-   nValue = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
-
-   (*nibbles) <<= 4;
-
-   return nValue;
-}
-
-static inline FORCE_INLINE int lzsa_expand_literals_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, int *nCurNibbles, unsigned char *nibbles,
-      unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
-   const unsigned char *pInBlock = *ppInBlock;
-   unsigned char *pCurOutData = *ppCurOutData;
-
-   if (nLiterals == LITERALS_RUN_LEN_V2) {
-      nLiterals += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles);
-
-      if (nLiterals == (LITERALS_RUN_LEN_V2 + 15)) {
-         if (pInBlock < pInBlockEnd) {
-            nLiterals = ((unsigned int)*pInBlock++);
-
-            if (nLiterals == 0) {
-               if ((pInBlock + 1) < pInBlockEnd) {
-                  nLiterals = ((unsigned int)*pInBlock++);
-                  nLiterals |= (((unsigned int)*pInBlock++) << 8);
-               }
-               else {
-                  return -1;
-               }
-            }
-         }
-         else {
-            return -1;
-         }
-      }
-   }
-
-   if (nLiterals != 0) {
-      if ((pInBlock + nLiterals) <= pInBlockEnd &&
-         (pCurOutData + nLiterals) <= pOutDataEnd) {
-         memcpy(pCurOutData, pInBlock, nLiterals);
-         pInBlock += nLiterals;
-         pCurOutData += nLiterals;
+      if (pInBlock < pInBlockEnd) {
+         (*nibbles) = *pInBlock++;
+         *ppInBlock = pInBlock;
+         (*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
+         return 0;
      }
      else {
         return -1;
      }
   }

-   *ppInBlock = pInBlock;
-   *ppCurOutData = pCurOutData;
+   (*nValue) = (unsigned int)((*nibbles) & 0x0f);
   return 0;
 }

-static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, int *nCurNibbles, unsigned char *nibbles,
-      unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
-   const unsigned char *pInBlock = *ppInBlock;
-   unsigned char *pCurOutData = *ppCurOutData;
+static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) {
+   unsigned int nValue;

-   nMatchLen += MIN_MATCH_SIZE_V2;
-   if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
-      nMatchLen += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles);
+   if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) {
+      (*nLength) += nValue;
+
+      if (nValue == 15) {
+         const unsigned char *pInBlock = *ppInBlock;

-      if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2 + 15)) {
         if (pInBlock < pInBlockEnd) {
-            nMatchLen = ((unsigned int)*pInBlock++);
+            (*nLength) = ((unsigned int)*pInBlock++);

-            if (nMatchLen == 0) {
+            if ((*nLength) == 0) {
               if ((pInBlock + 1) < pInBlockEnd) {
-                  nMatchLen = ((unsigned int)*pInBlock++);
-                  nMatchLen |= (((unsigned int)*pInBlock++) << 8);
+                  (*nLength) = ((unsigned int)*pInBlock++);
+                  (*nLength) |= (((unsigned int)*pInBlock++) << 8);
               }
               else {
                  return -1;
@ -130,47 +85,15 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **p
         else {
            return -1;
         }
+
+         *ppInBlock = pInBlock;
      }
-   }

-   if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
-      /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
-
-      if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
-         const unsigned char *pCopySrc = pSrc;
-         unsigned char *pCopyDst = pCurOutData;
-         const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
-
-         do {
-            memcpy(pCopyDst, pCopySrc, 8);
-            memcpy(pCopyDst + 8, pCopySrc + 8, 8);
-            pCopySrc += 16;
-            pCopyDst += 16;
-         } while (pCopyDst < pCopyEndDst);
-
-         pCurOutData += nMatchLen;
-      }
-      else {
-         while (nMatchLen >= 4) {
-            *pCurOutData++ = *pSrc++;
-            *pCurOutData++ = *pSrc++;
-            *pCurOutData++ = *pSrc++;
-            *pCurOutData++ = *pSrc++;
-            nMatchLen -= 4;
-         }
-         while (nMatchLen) {
-            *pCurOutData++ = *pSrc++;
-            nMatchLen--;
-         }
-      }
+      return 0;
   }
   else {
      return -1;
   }
-
-   *ppInBlock = pInBlock;
-   *ppCurOutData = pCurOutData;
-   return 0;
 }

 /**
@ -186,7 +109,6 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **p
 */
 int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
   const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
-   const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
   unsigned char *pCurOutData = pOutData + nOutDataOffset;
   const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
   const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
@ -194,113 +116,66 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
   unsigned char nibbles;
   int nMatchOffset = 0;

-   /* Fast loop */
-
-   while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) {
-      const unsigned char token = *pInBlock++;
-      unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
-
-      if (nLiterals < LITERALS_RUN_LEN_V2) {
-         memcpy(pCurOutData, pInBlock, 8);
-         pInBlock += nLiterals;
-         pCurOutData += nLiterals;
-      }
-      else {
-         if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd))
-            return -1;
-      }
-
-      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
-         unsigned char nOffsetMode = token & 0xc0;
-
-         switch (nOffsetMode) {
-         case 0x00:
-            /* 5 bit offset */
-            nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles);
-            nMatchOffset |= ((token & 0x20) >> 1);
-            nMatchOffset |= 0xffffffe0;
-            break;
-
-         case 0x40:
-            /* 9 bit offset */
-            nMatchOffset = (unsigned int)(*pInBlock++);
-            nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
-            nMatchOffset |= 0xfffffe00;
-            break;
-
-         case 0x80:
-            /* 13 bit offset */
-            nMatchOffset = (unsigned int)(*pInBlock++);
-            nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8);
-            nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
-            nMatchOffset |= 0xffffe000;
-            nMatchOffset -= 512;
-            break;
-
-         default:
-            /* Check if this is a 16 bit offset or a rep-match */
-            if ((token & 0x20) == 0) {
-               /* 16 bit offset */
-               nMatchOffset = (unsigned int)(*pInBlock++);
-               nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8);
-               nMatchOffset |= 0xffff0000;
-            }
-            break;
-         }
-
-         const unsigned char *pSrc = pCurOutData + nMatchOffset;
-         if (pSrc >= pOutData) {
-            unsigned int nMatchLen = (unsigned int)(token & 0x07);
-            if (nMatchLen < MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
-               memcpy(pCurOutData, pSrc, 8);
-               memcpy(pCurOutData + 8, pSrc + 8, 4);
-               pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
-            }
-            else {
-               if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
-                  return -1;
-            }
-         }
-         else {
-            return -1;
-         }
-      }
-   }
-
-   /* Slow loop for the remainder of the buffer */
-
   while (pInBlock < pInBlockEnd) {
      const unsigned char token = *pInBlock++;
      unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);

-      if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd))
-         return -1;
+      if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
+         memcpy(pCurOutData, pInBlock, 4);
+         pInBlock += nLiterals;
+         pCurOutData += nLiterals;
+      }
+      else {
+         if (nLiterals == LITERALS_RUN_LEN_V2) {
+            if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals))
+               return -1;
+         }
+
+         if (nLiterals != 0) {
+            if ((pInBlock + nLiterals) <= pInBlockEnd &&
+               (pCurOutData + nLiterals) <= pOutDataEnd) {
+               memcpy(pCurOutData, pInBlock, nLiterals);
+               pInBlock += nLiterals;
+               pCurOutData += nLiterals;
+            }
+            else {
+               return -1;
+            }
+         }
+      }

      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
         unsigned char nOffsetMode = token & 0xc0;
+         unsigned int nValue;

         switch (nOffsetMode) {
         case 0x00:
            /* 5 bit offset */
-            nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles);
+            if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
+               return -1;
+            nMatchOffset = nValue;
            nMatchOffset |= ((token & 0x20) >> 1);
-            nMatchOffset |= 0xffffffe0;
+            nMatchOffset ^= 0x1f;
+            nMatchOffset++;
            break;

         case 0x40:
            /* 9 bit offset */
            nMatchOffset = (unsigned int)(*pInBlock++);
            nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
-            nMatchOffset |= 0xfffffe00;
+            nMatchOffset ^= 0x1ff;
+            nMatchOffset++;
            break;

         case 0x80:
            /* 13 bit offset */
            nMatchOffset = (unsigned int)(*pInBlock++);
-            nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8);
+            if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
+               return -1;
+            nMatchOffset |= (nValue << 8);
            nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
-            nMatchOffset |= 0xffffe000;
-            nMatchOffset -= 512;
+            nMatchOffset ^= 0x1fff;
+            nMatchOffset += (512 + 1);
            break;

         default:
@ -309,16 +184,54 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
               /* 16 bit offset */
               nMatchOffset = (unsigned int)(*pInBlock++);
               nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8);
-               nMatchOffset |= 0xffff0000;
+               nMatchOffset ^= 0xffff;
+               nMatchOffset++;
            }
            break;
         }

-         const unsigned char *pSrc = pCurOutData + nMatchOffset;
+         const unsigned char *pSrc = pCurOutData - nMatchOffset;
         if (pSrc >= pOutData) {
            unsigned int nMatchLen = (unsigned int)(token & 0x07);
-            if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
-               return -1;
+            if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+               memcpy(pCurOutData, pSrc, 8);
+               memcpy(pCurOutData + 8, pSrc + 8, 2);
+               pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
+            }
+            else {
+               nMatchLen += MIN_MATCH_SIZE_V2;
+               if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
+                  if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
+                     return -1;
+               }
+
+               if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+                  /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
+
+                  if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
+                     const unsigned char *pCopySrc = pSrc;
+                     unsigned char *pCopyDst = pCurOutData;
+                     const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
+
+                     do {
+                        memcpy(pCopyDst, pCopySrc, 16);
+                        pCopySrc += 16;
+                        pCopyDst += 16;
+                     } while (pCopyDst < pCopyEndDst);
+
+                     pCurOutData += nMatchLen;
+                  }
+                  else {
+                     while (nMatchLen) {
+                        *pCurOutData++ = *pSrc++;
+                        nMatchLen--;
+                     }
+                  }
+               }
+               else {
+                  return -1;
+               }
+            }
         }
         else {
            return -1;
--- a/src/inmem.c
+++ b/src/inmem.c
@ -0,0 +1,161 @@
+/*
+ * inmem.c - in-memory decompression for benchmarks
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "inmem.h"
+#include "lib.h"
+#include "frame.h"
+
+#define BLOCK_SIZE 65536
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pFileData compressed data
+ * @param nFileSize compressed size in bytes
+ *
+ * @return maximum decompressed size
+ */
+size_t lzsa_inmem_get_max_decompressed_size(const unsigned char *pFileData, size_t nFileSize) {
+   const unsigned char *pCurFileData = pFileData;
+   const unsigned char *pEndFileData = pCurFileData + nFileSize;
+   int nFormatVersion = 0;
+   size_t nMaxDecompressedSize = 0;
+   const int nHeaderSize = lzsa_get_header_size();
+
+   /* Check header */
+   if ((pCurFileData + nHeaderSize) > pEndFileData ||
+       lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
+      return -1;
+
+   pCurFileData += nHeaderSize;
+
+   while (pCurFileData < pEndFileData) {
+      unsigned int nBlockDataSize = 0;
+      int nIsUncompressed = 0;
+      const int nFrameSize = lzsa_get_frame_size();
+
+      /* Decode frame header */
+      if ((pCurFileData + nFrameSize) > pEndFileData ||
+          lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
+         return -1;
+      pCurFileData += nFrameSize;
+
+      if (!nBlockDataSize)
+         break;
+
+      /* Add one potentially full block to the decompressed size */
+      nMaxDecompressedSize += BLOCK_SIZE;
+
+      if ((pCurFileData + nBlockDataSize) > pEndFileData)
+         return -1;
+
+      pCurFileData += nBlockDataSize;
+   }
+
+   return nMaxDecompressedSize;
+}
+
+/**
+ * Decompress data in memory
+ *
+ * @param pFileData compressed data
+ * @param pOutBuffer buffer for decompressed data
+ * @param nFileSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param pFormatVersion pointer to format version, updated if this function is successful
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t lzsa_inmem_decompress_stream(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion) {
+   const unsigned char *pCurFileData = pFileData;
+   const unsigned char *pEndFileData = pCurFileData + nFileSize;
+   unsigned char *pCurOutBuffer = pOutBuffer;
+   const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
+   int nFormatVersion = 0;
+   int nPreviousBlockSize;
+   const int nHeaderSize = lzsa_get_header_size();
+
+   /* Check header */
+   if ((pCurFileData + nHeaderSize) > pEndFileData ||
+      lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
+      return -1;
+
+   pCurFileData += nHeaderSize;
+   nPreviousBlockSize = 0;
+
+   while (pCurFileData < pEndFileData) {
+      unsigned int nBlockDataSize = 0;
+      int nIsUncompressed = 0;
+      const int nFrameSize = lzsa_get_frame_size();
+
+      /* Decode frame header */
+      if ((pCurFileData + nFrameSize) > pEndFileData ||
+          lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
+         return -1;
+      pCurFileData += nFrameSize;
+
+      if (!nBlockDataSize)
+         break;
+
+      if (!nIsUncompressed) {
+         int nDecompressedSize;
+
+         /* Decompress block */
+         if ((pCurFileData + nBlockDataSize) > pEndFileData)
+            return -1;
+
+         nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize));
+         if (nDecompressedSize < 0)
+            return -1;
+
+         pCurOutBuffer += nDecompressedSize;
+         nPreviousBlockSize = nDecompressedSize;
+      }
+      else {
+         /* Copy uncompressed block */
+         if ((pCurFileData + nBlockDataSize) > pEndFileData)
+            return -1;
+         if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer)
+            return -1;
+         memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize);
+         pCurOutBuffer += nBlockDataSize;
+      }
+
+      pCurFileData += nBlockDataSize;
+   }
+
+   *pFormatVersion = nFormatVersion;
+   return (int)(pCurOutBuffer - pOutBuffer);
+}
--- a/src/inmem.h
+++ b/src/inmem.h
@ -0,0 +1,61 @@
+/*
+ * inmem.h - in-memory decompression for benchmarks
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _INMEM_H
+#define _INMEM_H
+
+#include <stdio.h>
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pFileData compressed data
+ * @param nFileSize compressed size in bytes
+ *
+ * @return maximum decompressed size
+ */
+size_t lzsa_inmem_get_max_decompressed_size(const unsigned char *pFileData, size_t nFileSize);
+
+/**
+ * Decompress data in memory
+ *
+ * @param pFileData compressed data
+ * @param pOutBuffer buffer for decompressed data
+ * @param nFileSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param pFormatVersion pointer to format version, updated if this function is successful
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t lzsa_inmem_decompress_stream(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion);
+
+#endif /* _INMEM_H */
--- a/src/lzsa.c
+++ b/src/lzsa.c
@ -35,11 +35,13 @@
 #include <stdlib.h>
 #include <string.h>
 #ifdef _WIN32
+#include <windows.h>
 #include <sys/timeb.h>
 #else
 #include <sys/time.h>
 #endif
 #include "lib.h"
+#include "inmem.h"

 #define OPT_VERBOSE     1
 #define OPT_RAW         2
@ -47,16 +49,37 @@

 #define TOOL_VERSION "0.6.0"

-/*---------------------------------------------------------------------------*/
+ /*---------------------------------------------------------------------------*/
+
+#ifdef _WIN32
+LARGE_INTEGER hpc_frequency;
+BOOL hpc_available = FALSE;
+#endif
+
+static void do_init_time() {
+#ifdef _WIN32
+   hpc_frequency.QuadPart = 0;
+   hpc_available = QueryPerformanceFrequency(&hpc_frequency);
+#endif
+}

 static long long do_get_time() {
   long long nTime;

 #ifdef _WIN32
-   struct _timeb tb;
-   _ftime(&tb);
+   if (hpc_available) {
+      LARGE_INTEGER nCurTime;

-   nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL;
+      /* Use HPC hardware for best precision */
+      QueryPerformanceCounter(&nCurTime);
+      nTime = (long long)(nCurTime.QuadPart * 1000000LL / hpc_frequency.QuadPart);
+   }
+   else {
+      struct _timeb tb;
+      _ftime(&tb);
+
+      nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL;
+   }
 #else
   struct timeval tm;
   gettimeofday(&tm, NULL);
@ -322,6 +345,112 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con

 /*---------------------------------------------------------------------------*/

+static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
+   size_t nFileSize, nMaxDecompressedSize;
+   unsigned char *pFileData;
+   unsigned char *pDecompressedData;
+   int i;
+
+   if (pszDictionaryFilename) {
+      fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
+      return 100;
+   }
+
+   /* Read the whole compressed file in memory */
+
+   FILE *f_in = fopen(pszInFilename, "rb");
+   if (!f_in) {
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nFileSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pFileData = (unsigned char*)malloc(nFileSize);
+   if (!pFileData) {
+      fclose(f_in);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
+      return 100;
+   }
+
+   if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
+      free(pFileData);
+      fclose(f_in);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   /* Allocate max decompressed size */
+
+   if (nOptions & OPT_RAW)
+      nMaxDecompressedSize = 65536;
+   else
+      nMaxDecompressedSize = lzsa_inmem_get_max_decompressed_size(pFileData, nFileSize);
+   if (nMaxDecompressedSize == -1) {
+      free(pFileData);
+      fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize);
+   if (!pDecompressedData) {
+      free(pFileData);
+      fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
+      return 100;
+   }
+
+   memset(pDecompressedData, 0, nMaxDecompressedSize);
+
+   long long nBestDecTime = -1;
+
+   size_t nActualDecompressedSize = 0;
+   for (i = 0; i < 50; i++) {
+      long long t0 = do_get_time();
+      if (nOptions & OPT_RAW)
+         nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize);
+      else
+         nActualDecompressedSize = lzsa_inmem_decompress_stream(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
+      long long t1 = do_get_time();
+      if (nActualDecompressedSize == -1) {
+         free(pDecompressedData);
+         free(pFileData);
+         fprintf(stderr, "decompression error\n");
+         return 100;
+      }
+
+      long long nCurDecTime = t1 - t0;
+      if (nBestDecTime == -1 || nBestDecTime > nCurDecTime)
+         nBestDecTime = nCurDecTime;
+   }
+
+   if (pszOutFilename) {
+      FILE *f_out;
+
+      /* Write whole decompressed file out */
+
+      f_out = fopen(pszOutFilename, "wb");
+      if (f_out) {
+         fwrite(pDecompressedData, 1, nActualDecompressedSize, f_out);
+         fclose(f_out);
+      }
+   }
+
+   free(pDecompressedData);
+   free(pFileData);
+
+   fprintf(stdout, "format: LZSA%d\n", nFormatVersion);
+   fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize);
+   fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0));
+
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
 int main(int argc, char **argv) {
   int i;
   const char *pszInFilename = NULL;
@ -361,6 +490,14 @@ int main(int argc, char **argv) {
         else
            bArgsError = true;
      }
+      else if (!strcmp(argv[i], "-bench")) {
+         if (!bCommandDefined) {
+            bCommandDefined = true;
+            cCommand = 'b';
+         }
+         else
+            bArgsError = true;
+      }
      else if (!strcmp(argv[i], "-D")) {
         if (!pszDictionaryFilename && (i + 1) < argc) {
            pszDictionaryFilename = argv[i + 1];
@ -484,6 +621,7 @@ int main(int argc, char **argv) {
      fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]);
      fprintf(stderr, "       -c: check resulting stream after compressing\n");
      fprintf(stderr, "       -d: decompress (default: compress)\n");
+      fprintf(stderr, "   -bench: benchmary in-memory decompression\n");
      fprintf(stderr, "       -v: be verbose\n");
      fprintf(stderr, "       -f <value>: LZSA compression format (1-2)\n");
      fprintf(stderr, "       -r: raw block format (max. 64 Kb files)\n");
@ -494,6 +632,8 @@ int main(int argc, char **argv) {
      return 100;
   }

+   do_init_time();
+
   if (cCommand == 'z') {
      int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
      if (nResult == 0 && bVerifyCompression) {
@ -503,6 +643,9 @@ int main(int argc, char **argv) {
   else if (cCommand == 'd') {
      return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
   }
+   else if (cCommand == 'b') {
+      return do_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
+   }
   else {
      return 100;
   }