diff --git a/Makefile b/Makefile index ebfaf6d..1411861 100755 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ APP := lzsa OBJS := $(OBJDIR)/src/lzsa.o OBJS += $(OBJDIR)/src/lib.o +OBJS += $(OBJDIR)/src/inmem.o OBJS += $(OBJDIR)/src/stream.o OBJS += $(OBJDIR)/src/frame.o OBJS += $(OBJDIR)/src/matchfinder.o diff --git a/VS2017/lzsa.vcxproj b/VS2017/lzsa.vcxproj index 3304589..122d826 100755 --- a/VS2017/lzsa.vcxproj +++ b/VS2017/lzsa.vcxproj @@ -181,6 +181,7 @@ + @@ -195,6 +196,7 @@ + diff --git a/VS2017/lzsa.vcxproj.filters b/VS2017/lzsa.vcxproj.filters index e00658a..332d097 100755 --- a/VS2017/lzsa.vcxproj.filters +++ b/VS2017/lzsa.vcxproj.filters @@ -63,6 +63,9 @@ Fichiers sources + + Fichiers sources + @@ -104,5 +107,8 @@ Fichiers sources + + Fichiers sources + \ No newline at end of file diff --git a/src/expand_v1.c b/src/expand_v1.c index eaae559..ae212fe 100644 --- a/src/expand_v1.c +++ b/src/expand_v1.c @@ -42,130 +42,72 @@ #define FORCE_INLINE __attribute__((always_inline)) #endif /* _MSC_VER */ -static inline FORCE_INLINE int lzsa_expand_literals_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) { +static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) { + unsigned int nByte; const unsigned char *pInBlock = *ppInBlock; - unsigned char *pCurOutData = *ppCurOutData; - if (nLiterals == LITERALS_RUN_LEN_V1) { - unsigned char nByte; + if (pInBlock < pInBlockEnd) { + nByte = *pInBlock++; + (*nLiterals) += nByte; - if (pInBlock < pInBlockEnd) { - nByte = *pInBlock++; - nLiterals += ((unsigned int)nByte); - - if (nByte == 250) { - if (pInBlock < pInBlockEnd) { - nLiterals = 256 + ((unsigned int)*pInBlock++); - } - else { - return -1; - } + if (nByte == 250) { + if (pInBlock < pInBlockEnd) { + (*nLiterals) = 256 + ((unsigned int)*pInBlock++); } - else if (nByte == 249) { - if ((pInBlock + 1) < pInBlockEnd) { - nLiterals = ((unsigned int)*pInBlock++); - nLiterals |= (((unsigned int)*pInBlock++) << 8); - } - else { - return -1; - } + else { + return -1; } } - else { - return -1; - } - } - - if (nLiterals != 0) { - if ((pInBlock + nLiterals) <= pInBlockEnd && - (pCurOutData + nLiterals) <= pOutDataEnd) { - memcpy(pCurOutData, pInBlock, nLiterals); - pInBlock += nLiterals; - pCurOutData += nLiterals; - } - else { - return -1; - } - } - - *ppInBlock = pInBlock; - *ppCurOutData = pCurOutData; - return 0; -} - -static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) { - const unsigned char *pInBlock = *ppInBlock; - unsigned char *pCurOutData = *ppCurOutData; - - nMatchLen += MIN_MATCH_SIZE_V1; - if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) { - unsigned char nByte; - - if (pInBlock < pInBlockEnd) { - nByte = *pInBlock++; - nMatchLen += ((unsigned int)nByte); - - if (nByte == 239) { - if (pInBlock < pInBlockEnd) { - nMatchLen = 256 + ((unsigned int)*pInBlock++); - } - else { - return -1; - } + else if (nByte == 249) { + if ((pInBlock + 1) < pInBlockEnd) { + (*nLiterals) = ((unsigned int)*pInBlock++); + (*nLiterals) |= (((unsigned int)*pInBlock++) << 8); } - else if (nByte == 238) { - if ((pInBlock + 1) < pInBlockEnd) { - nMatchLen = ((unsigned int)*pInBlock++); - nMatchLen |= (((unsigned int)*pInBlock++) << 8); - } - else { - return -1; - } + else { + return -1; } } - else { - return -1; - } - } - if ((pCurOutData + nMatchLen) <= pOutDataEnd) { - /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ - - if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { - const unsigned char *pCopySrc = pSrc; - unsigned char *pCopyDst = pCurOutData; - const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; - - do { - memcpy(pCopyDst, pCopySrc, 8); - memcpy(pCopyDst + 8, pCopySrc + 8, 8); - pCopySrc += 16; - pCopyDst += 16; - } while (pCopyDst < pCopyEndDst); - - pCurOutData += nMatchLen; - } - else { - while (nMatchLen >= 4) { - *pCurOutData++ = *pSrc++; - *pCurOutData++ = *pSrc++; - *pCurOutData++ = *pSrc++; - *pCurOutData++ = *pSrc++; - nMatchLen -= 4; - } - while (nMatchLen) { - *pCurOutData++ = *pSrc++; - nMatchLen--; - } - } + *ppInBlock = pInBlock; + return 0; } else { return -1; } +} - *ppInBlock = pInBlock; - *ppCurOutData = pCurOutData; - return 0; +static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) { + unsigned int nByte; + const unsigned char *pInBlock = *ppInBlock; + + if (pInBlock < pInBlockEnd) { + nByte = *pInBlock++; + (*nMatchLen) += nByte; + + if (nByte == 239) { + if (pInBlock < pInBlockEnd) { + (*nMatchLen) = 256 + ((unsigned int)*pInBlock++); + } + else { + return -1; + } + } + else if (nByte == 238) { + if ((pInBlock + 1) < pInBlockEnd) { + (*nMatchLen) = ((unsigned int)*pInBlock++); + (*nMatchLen) |= (((unsigned int)*pInBlock++) << 8); + } + else { + return -1; + } + } + + *ppInBlock = pInBlock; + return 0; + } + else { + return -1; + } } /** @@ -181,77 +123,90 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **p */ int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { const unsigned char *pInBlockEnd = pInBlock + nBlockSize; - const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8; unsigned char *pCurOutData = pOutData + nOutDataOffset; const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize; - const unsigned char *pOutDataFastEnd = pOutDataEnd - 20; - - /* Fast loop */ - - while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) { - const unsigned char token = *pInBlock++; - unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4); - - if (nLiterals < LITERALS_RUN_LEN_V1) { - memcpy(pCurOutData, pInBlock, 8); - pInBlock += nLiterals; - pCurOutData += nLiterals; - } - else { - if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) - return -1; - } - - if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ - int nMatchOffset; - - nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00; - if (token & 0x80) { - nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8); - } - - const unsigned char *pSrc = pCurOutData + nMatchOffset; - if (pSrc >= pOutData) { - unsigned int nMatchLen = (unsigned int)(token & 0x0f); - if (nMatchLen < MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { - memcpy(pCurOutData, pSrc, 8); - memcpy(pCurOutData + 8, pSrc + 8, 8); - memcpy(pCurOutData + 16, pSrc + 16, 4); - pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen); - } - else { - if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) - return -1; - } - } - else { - return -1; - } - } - } - - /* Slow loop for the remainder of the buffer */ + const unsigned char *pOutDataFastEnd = pOutDataEnd - 18; while (pInBlock < pInBlockEnd) { const unsigned char token = *pInBlock++; unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4); - if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) - return -1; - - if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ - int nMatchOffset; - - nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00; - if (token & 0x80) { - nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8); + if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pInBlock, 8); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + if (nLiterals == LITERALS_RUN_LEN_V1) { + if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals)) + return -1; } - const unsigned char *pSrc = pCurOutData + nMatchOffset; + if (nLiterals != 0) { + if ((pInBlock + nLiterals) <= pInBlockEnd && + (pCurOutData + nLiterals) <= pOutDataEnd) { + memcpy(pCurOutData, pInBlock, nLiterals); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + return -1; + } + } + } + + if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ + unsigned int nMatchOffset; + + nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff; + if (token & 0x80) { + nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00; + } + nMatchOffset++; + + const unsigned char *pSrc = pCurOutData - nMatchOffset; if (pSrc >= pOutData) { unsigned int nMatchLen = (unsigned int)(token & 0x0f); - if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) - return -1; + if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pSrc, 8); + memcpy(pCurOutData + 8, pSrc + 8, 8); + memcpy(pCurOutData + 16, pSrc + 16, 2); + pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen); + } + else { + nMatchLen += MIN_MATCH_SIZE_V1; + if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) { + if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen)) + return -1; + } + + if ((pCurOutData + nMatchLen) <= pOutDataEnd) { + /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ + + if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { + const unsigned char *pCopySrc = pSrc; + unsigned char *pCopyDst = pCurOutData; + const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; + + do { + memcpy(pCopyDst, pCopySrc, 16); + pCopySrc += 16; + pCopyDst += 16; + } while (pCopyDst < pCopyEndDst); + + pCurOutData += nMatchLen; + } + else { + while (nMatchLen) { + *pCurOutData++ = *pSrc++; + nMatchLen--; + } + } + } + else { + return -1; + } + } } else { return -1; diff --git a/src/expand_v2.c b/src/expand_v2.c index f2aaa46..af11a97 100644 --- a/src/expand_v2.c +++ b/src/expand_v2.c @@ -42,85 +42,40 @@ #define FORCE_INLINE __attribute__((always_inline)) #endif /* _MSC_VER */ -static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles) { - unsigned int nValue; - +static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) { if ((*nCurNibbles ^= 1) != 0) { const unsigned char *pInBlock = *ppInBlock; - if (pInBlock >= pInBlockEnd) return -1; - (*nibbles) = *pInBlock++; - *ppInBlock = pInBlock; - } - - nValue = ((unsigned int)((*nibbles) & 0xf0)) >> 4; - - (*nibbles) <<= 4; - - return nValue; -} - -static inline FORCE_INLINE int lzsa_expand_literals_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, int *nCurNibbles, unsigned char *nibbles, - unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) { - const unsigned char *pInBlock = *ppInBlock; - unsigned char *pCurOutData = *ppCurOutData; - - if (nLiterals == LITERALS_RUN_LEN_V2) { - nLiterals += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles); - - if (nLiterals == (LITERALS_RUN_LEN_V2 + 15)) { - if (pInBlock < pInBlockEnd) { - nLiterals = ((unsigned int)*pInBlock++); - - if (nLiterals == 0) { - if ((pInBlock + 1) < pInBlockEnd) { - nLiterals = ((unsigned int)*pInBlock++); - nLiterals |= (((unsigned int)*pInBlock++) << 8); - } - else { - return -1; - } - } - } - else { - return -1; - } - } - } - - if (nLiterals != 0) { - if ((pInBlock + nLiterals) <= pInBlockEnd && - (pCurOutData + nLiterals) <= pOutDataEnd) { - memcpy(pCurOutData, pInBlock, nLiterals); - pInBlock += nLiterals; - pCurOutData += nLiterals; + if (pInBlock < pInBlockEnd) { + (*nibbles) = *pInBlock++; + *ppInBlock = pInBlock; + (*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4; + return 0; } else { return -1; } } - *ppInBlock = pInBlock; - *ppCurOutData = pCurOutData; + (*nValue) = (unsigned int)((*nibbles) & 0x0f); return 0; } -static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, int *nCurNibbles, unsigned char *nibbles, - unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) { - const unsigned char *pInBlock = *ppInBlock; - unsigned char *pCurOutData = *ppCurOutData; +static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) { + unsigned int nValue; - nMatchLen += MIN_MATCH_SIZE_V2; - if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) { - nMatchLen += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles); + if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) { + (*nLength) += nValue; + + if (nValue == 15) { + const unsigned char *pInBlock = *ppInBlock; - if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2 + 15)) { if (pInBlock < pInBlockEnd) { - nMatchLen = ((unsigned int)*pInBlock++); + (*nLength) = ((unsigned int)*pInBlock++); - if (nMatchLen == 0) { + if ((*nLength) == 0) { if ((pInBlock + 1) < pInBlockEnd) { - nMatchLen = ((unsigned int)*pInBlock++); - nMatchLen |= (((unsigned int)*pInBlock++) << 8); + (*nLength) = ((unsigned int)*pInBlock++); + (*nLength) |= (((unsigned int)*pInBlock++) << 8); } else { return -1; @@ -130,47 +85,15 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **p else { return -1; } + + *ppInBlock = pInBlock; } - } - if ((pCurOutData + nMatchLen) <= pOutDataEnd) { - /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ - - if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { - const unsigned char *pCopySrc = pSrc; - unsigned char *pCopyDst = pCurOutData; - const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; - - do { - memcpy(pCopyDst, pCopySrc, 8); - memcpy(pCopyDst + 8, pCopySrc + 8, 8); - pCopySrc += 16; - pCopyDst += 16; - } while (pCopyDst < pCopyEndDst); - - pCurOutData += nMatchLen; - } - else { - while (nMatchLen >= 4) { - *pCurOutData++ = *pSrc++; - *pCurOutData++ = *pSrc++; - *pCurOutData++ = *pSrc++; - *pCurOutData++ = *pSrc++; - nMatchLen -= 4; - } - while (nMatchLen) { - *pCurOutData++ = *pSrc++; - nMatchLen--; - } - } + return 0; } else { return -1; } - - *ppInBlock = pInBlock; - *ppCurOutData = pCurOutData; - return 0; } /** @@ -186,7 +109,6 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **p */ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { const unsigned char *pInBlockEnd = pInBlock + nBlockSize; - const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8; unsigned char *pCurOutData = pOutData + nOutDataOffset; const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize; const unsigned char *pOutDataFastEnd = pOutDataEnd - 20; @@ -194,113 +116,66 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS unsigned char nibbles; int nMatchOffset = 0; - /* Fast loop */ - - while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) { - const unsigned char token = *pInBlock++; - unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3); - - if (nLiterals < LITERALS_RUN_LEN_V2) { - memcpy(pCurOutData, pInBlock, 8); - pInBlock += nLiterals; - pCurOutData += nLiterals; - } - else { - if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd)) - return -1; - } - - if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ - unsigned char nOffsetMode = token & 0xc0; - - switch (nOffsetMode) { - case 0x00: - /* 5 bit offset */ - nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles); - nMatchOffset |= ((token & 0x20) >> 1); - nMatchOffset |= 0xffffffe0; - break; - - case 0x40: - /* 9 bit offset */ - nMatchOffset = (unsigned int)(*pInBlock++); - nMatchOffset |= (((unsigned int)(token & 0x20)) << 3); - nMatchOffset |= 0xfffffe00; - break; - - case 0x80: - /* 13 bit offset */ - nMatchOffset = (unsigned int)(*pInBlock++); - nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8); - nMatchOffset |= (((unsigned int)(token & 0x20)) << 7); - nMatchOffset |= 0xffffe000; - nMatchOffset -= 512; - break; - - default: - /* Check if this is a 16 bit offset or a rep-match */ - if ((token & 0x20) == 0) { - /* 16 bit offset */ - nMatchOffset = (unsigned int)(*pInBlock++); - nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8); - nMatchOffset |= 0xffff0000; - } - break; - } - - const unsigned char *pSrc = pCurOutData + nMatchOffset; - if (pSrc >= pOutData) { - unsigned int nMatchLen = (unsigned int)(token & 0x07); - if (nMatchLen < MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { - memcpy(pCurOutData, pSrc, 8); - memcpy(pCurOutData + 8, pSrc + 8, 4); - pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen); - } - else { - if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) - return -1; - } - } - else { - return -1; - } - } - } - - /* Slow loop for the remainder of the buffer */ - while (pInBlock < pInBlockEnd) { const unsigned char token = *pInBlock++; unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3); - if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd)) - return -1; + if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pInBlock, 4); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + if (nLiterals == LITERALS_RUN_LEN_V2) { + if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals)) + return -1; + } + + if (nLiterals != 0) { + if ((pInBlock + nLiterals) <= pInBlockEnd && + (pCurOutData + nLiterals) <= pOutDataEnd) { + memcpy(pCurOutData, pInBlock, nLiterals); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + return -1; + } + } + } if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ unsigned char nOffsetMode = token & 0xc0; + unsigned int nValue; switch (nOffsetMode) { case 0x00: /* 5 bit offset */ - nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles); + if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue)) + return -1; + nMatchOffset = nValue; nMatchOffset |= ((token & 0x20) >> 1); - nMatchOffset |= 0xffffffe0; + nMatchOffset ^= 0x1f; + nMatchOffset++; break; case 0x40: /* 9 bit offset */ nMatchOffset = (unsigned int)(*pInBlock++); nMatchOffset |= (((unsigned int)(token & 0x20)) << 3); - nMatchOffset |= 0xfffffe00; + nMatchOffset ^= 0x1ff; + nMatchOffset++; break; case 0x80: /* 13 bit offset */ nMatchOffset = (unsigned int)(*pInBlock++); - nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8); + if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue)) + return -1; + nMatchOffset |= (nValue << 8); nMatchOffset |= (((unsigned int)(token & 0x20)) << 7); - nMatchOffset |= 0xffffe000; - nMatchOffset -= 512; + nMatchOffset ^= 0x1fff; + nMatchOffset += (512 + 1); break; default: @@ -309,16 +184,54 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS /* 16 bit offset */ nMatchOffset = (unsigned int)(*pInBlock++); nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8); - nMatchOffset |= 0xffff0000; + nMatchOffset ^= 0xffff; + nMatchOffset++; } break; } - const unsigned char *pSrc = pCurOutData + nMatchOffset; + const unsigned char *pSrc = pCurOutData - nMatchOffset; if (pSrc >= pOutData) { unsigned int nMatchLen = (unsigned int)(token & 0x07); - if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) - return -1; + if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pSrc, 8); + memcpy(pCurOutData + 8, pSrc + 8, 2); + pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen); + } + else { + nMatchLen += MIN_MATCH_SIZE_V2; + if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) { + if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen)) + return -1; + } + + if ((pCurOutData + nMatchLen) <= pOutDataEnd) { + /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ + + if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { + const unsigned char *pCopySrc = pSrc; + unsigned char *pCopyDst = pCurOutData; + const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; + + do { + memcpy(pCopyDst, pCopySrc, 16); + pCopySrc += 16; + pCopyDst += 16; + } while (pCopyDst < pCopyEndDst); + + pCurOutData += nMatchLen; + } + else { + while (nMatchLen) { + *pCurOutData++ = *pSrc++; + nMatchLen--; + } + } + } + else { + return -1; + } + } } else { return -1; diff --git a/src/inmem.c b/src/inmem.c new file mode 100644 index 0000000..b6f00a4 --- /dev/null +++ b/src/inmem.c @@ -0,0 +1,161 @@ +/* + * inmem.c - in-memory decompression for benchmarks + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#include "inmem.h" +#include "lib.h" +#include "frame.h" + +#define BLOCK_SIZE 65536 + +/** + * Get maximum decompressed size of compressed data + * + * @param pFileData compressed data + * @param nFileSize compressed size in bytes + * + * @return maximum decompressed size + */ +size_t lzsa_inmem_get_max_decompressed_size(const unsigned char *pFileData, size_t nFileSize) { + const unsigned char *pCurFileData = pFileData; + const unsigned char *pEndFileData = pCurFileData + nFileSize; + int nFormatVersion = 0; + size_t nMaxDecompressedSize = 0; + const int nHeaderSize = lzsa_get_header_size(); + + /* Check header */ + if ((pCurFileData + nHeaderSize) > pEndFileData || + lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0) + return -1; + + pCurFileData += nHeaderSize; + + while (pCurFileData < pEndFileData) { + unsigned int nBlockDataSize = 0; + int nIsUncompressed = 0; + const int nFrameSize = lzsa_get_frame_size(); + + /* Decode frame header */ + if ((pCurFileData + nFrameSize) > pEndFileData || + lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0) + return -1; + pCurFileData += nFrameSize; + + if (!nBlockDataSize) + break; + + /* Add one potentially full block to the decompressed size */ + nMaxDecompressedSize += BLOCK_SIZE; + + if ((pCurFileData + nBlockDataSize) > pEndFileData) + return -1; + + pCurFileData += nBlockDataSize; + } + + return nMaxDecompressedSize; +} + +/** + * Decompress data in memory + * + * @param pFileData compressed data + * @param pOutBuffer buffer for decompressed data + * @param nFileSize compressed size in bytes + * @param nMaxOutBufferSize maximum capacity of decompression buffer + * @param pFormatVersion pointer to format version, updated if this function is successful + * + * @return actual decompressed size, or -1 for error + */ +size_t lzsa_inmem_decompress_stream(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion) { + const unsigned char *pCurFileData = pFileData; + const unsigned char *pEndFileData = pCurFileData + nFileSize; + unsigned char *pCurOutBuffer = pOutBuffer; + const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize; + int nFormatVersion = 0; + int nPreviousBlockSize; + const int nHeaderSize = lzsa_get_header_size(); + + /* Check header */ + if ((pCurFileData + nHeaderSize) > pEndFileData || + lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0) + return -1; + + pCurFileData += nHeaderSize; + nPreviousBlockSize = 0; + + while (pCurFileData < pEndFileData) { + unsigned int nBlockDataSize = 0; + int nIsUncompressed = 0; + const int nFrameSize = lzsa_get_frame_size(); + + /* Decode frame header */ + if ((pCurFileData + nFrameSize) > pEndFileData || + lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0) + return -1; + pCurFileData += nFrameSize; + + if (!nBlockDataSize) + break; + + if (!nIsUncompressed) { + int nDecompressedSize; + + /* Decompress block */ + if ((pCurFileData + nBlockDataSize) > pEndFileData) + return -1; + + nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize)); + if (nDecompressedSize < 0) + return -1; + + pCurOutBuffer += nDecompressedSize; + nPreviousBlockSize = nDecompressedSize; + } + else { + /* Copy uncompressed block */ + if ((pCurFileData + nBlockDataSize) > pEndFileData) + return -1; + if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer) + return -1; + memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize); + pCurOutBuffer += nBlockDataSize; + } + + pCurFileData += nBlockDataSize; + } + + *pFormatVersion = nFormatVersion; + return (int)(pCurOutBuffer - pOutBuffer); +} diff --git a/src/inmem.h b/src/inmem.h new file mode 100644 index 0000000..12a5709 --- /dev/null +++ b/src/inmem.h @@ -0,0 +1,61 @@ +/* + * inmem.h - in-memory decompression for benchmarks + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _INMEM_H +#define _INMEM_H + +#include + +/** + * Get maximum decompressed size of compressed data + * + * @param pFileData compressed data + * @param nFileSize compressed size in bytes + * + * @return maximum decompressed size + */ +size_t lzsa_inmem_get_max_decompressed_size(const unsigned char *pFileData, size_t nFileSize); + +/** + * Decompress data in memory + * + * @param pFileData compressed data + * @param pOutBuffer buffer for decompressed data + * @param nFileSize compressed size in bytes + * @param nMaxOutBufferSize maximum capacity of decompression buffer + * @param pFormatVersion pointer to format version, updated if this function is successful + * + * @return actual decompressed size, or -1 for error + */ +size_t lzsa_inmem_decompress_stream(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion); + +#endif /* _INMEM_H */ diff --git a/src/lzsa.c b/src/lzsa.c index d84d4d7..9b1eb21 100755 --- a/src/lzsa.c +++ b/src/lzsa.c @@ -35,11 +35,13 @@ #include #include #ifdef _WIN32 +#include #include #else #include #endif #include "lib.h" +#include "inmem.h" #define OPT_VERBOSE 1 #define OPT_RAW 2 @@ -47,16 +49,37 @@ #define TOOL_VERSION "0.6.0" -/*---------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------*/ + +#ifdef _WIN32 +LARGE_INTEGER hpc_frequency; +BOOL hpc_available = FALSE; +#endif + +static void do_init_time() { +#ifdef _WIN32 + hpc_frequency.QuadPart = 0; + hpc_available = QueryPerformanceFrequency(&hpc_frequency); +#endif +} static long long do_get_time() { long long nTime; #ifdef _WIN32 - struct _timeb tb; - _ftime(&tb); + if (hpc_available) { + LARGE_INTEGER nCurTime; - nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL; + /* Use HPC hardware for best precision */ + QueryPerformanceCounter(&nCurTime); + nTime = (long long)(nCurTime.QuadPart * 1000000LL / hpc_frequency.QuadPart); + } + else { + struct _timeb tb; + _ftime(&tb); + + nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL; + } #else struct timeval tm; gettimeofday(&tm, NULL); @@ -322,6 +345,112 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con /*---------------------------------------------------------------------------*/ +static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { + size_t nFileSize, nMaxDecompressedSize; + unsigned char *pFileData; + unsigned char *pDecompressedData; + int i; + + if (pszDictionaryFilename) { + fprintf(stderr, "in-memory benchmarking does not support dictionaries\n"); + return 100; + } + + /* Read the whole compressed file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nFileSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pFileData = (unsigned char*)malloc(nFileSize); + if (!pFileData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize); + return 100; + } + + if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) { + free(pFileData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + /* Allocate max decompressed size */ + + if (nOptions & OPT_RAW) + nMaxDecompressedSize = 65536; + else + nMaxDecompressedSize = lzsa_inmem_get_max_decompressed_size(pFileData, nFileSize); + if (nMaxDecompressedSize == -1) { + free(pFileData); + fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename); + return 100; + } + + pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize); + if (!pDecompressedData) { + free(pFileData); + fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize); + return 100; + } + + memset(pDecompressedData, 0, nMaxDecompressedSize); + + long long nBestDecTime = -1; + + size_t nActualDecompressedSize = 0; + for (i = 0; i < 50; i++) { + long long t0 = do_get_time(); + if (nOptions & OPT_RAW) + nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize); + else + nActualDecompressedSize = lzsa_inmem_decompress_stream(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion); + long long t1 = do_get_time(); + if (nActualDecompressedSize == -1) { + free(pDecompressedData); + free(pFileData); + fprintf(stderr, "decompression error\n"); + return 100; + } + + long long nCurDecTime = t1 - t0; + if (nBestDecTime == -1 || nBestDecTime > nCurDecTime) + nBestDecTime = nCurDecTime; + } + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole decompressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pDecompressedData, 1, nActualDecompressedSize, f_out); + fclose(f_out); + } + } + + free(pDecompressedData); + free(pFileData); + + fprintf(stdout, "format: LZSA%d\n", nFormatVersion); + fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize); + fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0)); + + return 0; +} + +/*---------------------------------------------------------------------------*/ + int main(int argc, char **argv) { int i; const char *pszInFilename = NULL; @@ -361,6 +490,14 @@ int main(int argc, char **argv) { else bArgsError = true; } + else if (!strcmp(argv[i], "-bench")) { + if (!bCommandDefined) { + bCommandDefined = true; + cCommand = 'b'; + } + else + bArgsError = true; + } else if (!strcmp(argv[i], "-D")) { if (!pszDictionaryFilename && (i + 1) < argc) { pszDictionaryFilename = argv[i + 1]; @@ -484,6 +621,7 @@ int main(int argc, char **argv) { fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] \n", argv[0]); fprintf(stderr, " -c: check resulting stream after compressing\n"); fprintf(stderr, " -d: decompress (default: compress)\n"); + fprintf(stderr, " -bench: benchmary in-memory decompression\n"); fprintf(stderr, " -v: be verbose\n"); fprintf(stderr, " -f : LZSA compression format (1-2)\n"); fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n"); @@ -494,6 +632,8 @@ int main(int argc, char **argv) { return 100; } + do_init_time(); + if (cCommand == 'z') { int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion); if (nResult == 0 && bVerifyCompression) { @@ -503,6 +643,9 @@ int main(int argc, char **argv) { else if (cCommand == 'd') { return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); } + else if (cCommand == 'b') { + return do_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); + } else { return 100; }