Speed up portable decompressor a bit

This commit is contained in:
emmanuel-marty 2019-04-20 10:27:24 +02:00
parent 3e5639afaf
commit fb79d319cb

View File

@ -32,37 +32,50 @@
#define FORCE_INLINE __attribute__((always_inline)) #define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */ #endif /* _MSC_VER */
static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) { static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
const unsigned char *pInBlock = *ppInBlock; const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData; unsigned char *pCurOutData = *ppCurOutData;
if (nLiterals == LITERALS_RUN_LEN) { if (nLiterals == LITERALS_RUN_LEN) {
unsigned char nByte; unsigned char nByte;
if (pInBlock >= pInBlockEnd) return -1; if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++; nByte = *pInBlock++;
nLiterals += (int)((unsigned int)nByte); nLiterals += ((unsigned int)nByte);
if (nByte == 254) { if (nByte == 254) {
if (pInBlock >= pInBlockEnd) return -1; if (pInBlock < pInBlockEnd) {
nLiterals += (int)((unsigned int)*pInBlock++); nLiterals += ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 255) {
if ((pInBlock + 1) < pInBlockEnd) {
nLiterals = ((unsigned int)*pInBlock++);
nLiterals |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
} }
else if (nByte == 255) { else {
if ((pInBlock + 1) >= pInBlockEnd) return -1; return -1;
nLiterals = ((unsigned int)*pInBlock++);
nLiterals |= (((unsigned int)*pInBlock++) << 8);
} }
} }
if (nLiterals != 0) { if (nLiterals != 0) {
if ((pInBlock + nLiterals) > pInBlockEnd || if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) > pOutDataEnd) { (pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1; return -1;
} }
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
} }
*ppInBlock = pInBlock; *ppInBlock = pInBlock;
@ -70,76 +83,76 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p
return 0; return 0;
} }
static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) { static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
const unsigned char *pInBlock = *ppInBlock; const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData; unsigned char *pCurOutData = *ppCurOutData;
if (nMatchLen == MATCH_RUN_LEN) { if (nMatchLen == MATCH_RUN_LEN) {
unsigned char nByte; unsigned char nByte;
if (pInBlock >= pInBlockEnd) return -1; if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++; nByte = *pInBlock++;
nMatchLen += (int)((unsigned int)nByte); nMatchLen += ((unsigned int)nByte);
if (nByte == 254) { if (nByte == 254) {
if (pInBlock >= pInBlockEnd) return -1; if (pInBlock < pInBlockEnd) {
nMatchLen += (int)((unsigned int)*pInBlock++); nMatchLen += ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 255) {
if ((pInBlock + 1) < pInBlockEnd) {
nMatchLen = ((unsigned int)*pInBlock++);
nMatchLen |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
} }
else if (nByte == 255) { else {
if ((pInBlock + 1) >= pInBlockEnd) return -1; return -1;
nMatchLen = ((unsigned int)*pInBlock++);
nMatchLen |= (((unsigned int)*pInBlock++) << 8);
} }
} }
nMatchLen += MIN_MATCH_SIZE; nMatchLen += MIN_MATCH_SIZE;
if ((pCurOutData + nMatchLen) > pOutDataEnd) { if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
return -1;
}
if ((pSrc + 1) == pCurOutData && nMatchLen >= 16) {
/* One-byte RLE */
memset(pCurOutData, *pSrc, nMatchLen);
pCurOutData += nMatchLen;
}
else {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
int nMaxFast = nMatchLen; if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
if (nMaxFast > (pCurOutData - pSrc))
nMaxFast = (int)(pCurOutData - pSrc);
if ((pCurOutData + nMaxFast) > (pOutDataFastEnd - 15))
nMaxFast = (int)(pOutDataFastEnd - 15 - pCurOutData);
if (nMaxFast > 0) {
const unsigned char *pCopySrc = pSrc; const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData; unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMaxFast; const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do { do {
memcpy(pCopyDst, pCopySrc, 16); memcpy(pCopyDst, pCopySrc, 8);
memcpy(pCopyDst + 8, pCopySrc + 8, 8);
pCopySrc += 16; pCopySrc += 16;
pCopyDst += 16; pCopyDst += 16;
} while (pCopyDst < pCopyEndDst); } while (pCopyDst < pCopyEndDst);
pCurOutData += nMaxFast; pCurOutData += nMatchLen;
pSrc += nMaxFast;
nMatchLen -= nMaxFast;
} }
else {
while (nMatchLen >= 4) { while (nMatchLen >= 4) {
*pCurOutData++ = *pSrc++; *pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++; *pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++; *pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++; *pCurOutData++ = *pSrc++;
nMatchLen -= 4; nMatchLen -= 4;
} }
while (nMatchLen > 0) { while (nMatchLen) {
*pCurOutData++ = *pSrc++; *pCurOutData++ = *pSrc++;
nMatchLen--; nMatchLen--;
}
} }
} }
else {
return -1;
}
*ppInBlock = pInBlock; *ppInBlock = pInBlock;
*ppCurOutData = pCurOutData; *ppCurOutData = pCurOutData;
@ -159,16 +172,16 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
*/ */
int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize; const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 16; const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
unsigned char *pCurOutData = pOutData + nOutDataOffset; unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize; const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 16; const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
/* Fast loop */ /* Fast loop */
while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) { while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) {
const unsigned char token = *pInBlock++; const unsigned char token = *pInBlock++;
int nLiterals = (int)((unsigned int)((token & 0x70) >> 4)); unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
if (nLiterals < LITERALS_RUN_LEN) { if (nLiterals < LITERALS_RUN_LEN) {
memcpy(pCurOutData, pInBlock, 8); memcpy(pCurOutData, pInBlock, 8);
@ -180,28 +193,31 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
return -1; return -1;
} }
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */ if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset; int nMatchOffset;
nMatchOffset = ((unsigned int)*pInBlock++); nMatchOffset = ((unsigned int)*pInBlock++);
if (token & 0x80) { if (token & 0x80) {
if (pInBlock >= pInBlockEnd) return -1;
nMatchOffset |= (((unsigned int)*pInBlock++) << 8); nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
} }
nMatchOffset++; nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset; const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc < pOutData) if (pSrc >= pOutData) {
return -1; unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (nMatchLen < MATCH_RUN_LEN && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
int nMatchLen = (int)((unsigned int)(token & 0x0f)); memcpy(pCurOutData, pSrc, 8);
if (nMatchLen < (16 - MIN_MATCH_SIZE + 1) && (pSrc + MIN_MATCH_SIZE + nMatchLen) < pCurOutData && pCurOutData < pOutDataFastEnd) { memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData, pSrc, 16); memcpy(pCurOutData + 16, pSrc + 16, 4);
pCurOutData += (MIN_MATCH_SIZE + nMatchLen); pCurOutData += (MIN_MATCH_SIZE + nMatchLen);
}
else {
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1;
}
} }
else { else {
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) return -1;
return -1;
} }
} }
} }
@ -210,28 +226,29 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
while (pInBlock < pInBlockEnd) { while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++; const unsigned char token = *pInBlock++;
int nLiterals = (int)((unsigned int)((token & 0x70) >> 4)); unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
return -1; return -1;
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */ if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset; int nMatchOffset;
nMatchOffset = ((unsigned int)*pInBlock++); nMatchOffset = ((unsigned int)*pInBlock++);
if (token & 0x80) { if (token & 0x80) {
if (pInBlock >= pInBlockEnd) return -1;
nMatchOffset |= (((unsigned int)*pInBlock++) << 8); nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
} }
nMatchOffset++; nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset; const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc < pOutData) if (pSrc >= pOutData) {
return -1; unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
int nMatchLen = (int)((unsigned int)(token & 0x0f)); return -1;
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) }
else {
return -1; return -1;
}
} }
} }