From f80008ca2110e681f1d9b27fb7e32af4990f0d45 Mon Sep 17 00:00:00 2001 From: "JASON-6700K\\jandersen" Date: Thu, 16 Jul 2020 19:05:36 -0400 Subject: [PATCH] faster compressor: WIP --- source/lzb.cpp | 54 +++++++++++++++++++++++++++++++++++-------------- source/lzb.h | 1 + source/main.cpp | 6 ++++++ 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/source/lzb.cpp b/source/lzb.cpp index 281b02c..87212ef 100644 --- a/source/lzb.cpp +++ b/source/lzb.cpp @@ -9,6 +9,7 @@ #include "bctypes.h" #define MAX_DICTIONARY_SIZE (32 * 1024) +#define MAX_STRING_SIZE (16383) // // Yes This is a 32K Buffer, of bytes, with no structure to it // @@ -98,7 +99,7 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) // This works, but it's stupidly slow, because it uses brute force, and // because the brute force starts over everytime I grow the data string // -int old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) +int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) { printf("LZB_Compress %d bytes\n", sourceSize); @@ -257,7 +258,6 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary) for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size) { - bool bMatch = true; int pattern_start = dictionary.size - pattern_size; for (int dataIndex = 0; dataIndex < data.size; ++dataIndex) @@ -269,12 +269,11 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary) continue; } - bMatch = false; break; } - if (candidate.size < pattern_size) - break; + //if (candidate.size < pattern_size) + // break; if (candidate.size > result.size) { @@ -284,34 +283,42 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary) } // As an optimization - int dictionarySize = dictionary.size - 1; // This last string has already been checked by, the + int dictionarySize = dictionary.size; // - 1; // This last string has already been checked by, the // run-length matcher above - if (dictionarySize >= candidate.size) + // As the size grows, we're missing potential matches in here + // I think the best way to counter this is to attempt somthing + // like KMP + + if (dictionarySize > candidate.size) { // Check the dictionary for a match, brute force - for (int idx = 0; idx <= (dictionarySize-candidate.size); ++idx) + for (int dictionaryIndex = 0; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex) { - bool bMatch = true; - for (int dataIdx = 0; dataIdx < data.size; ++dataIdx) + int sizeAvailable = dictionarySize - dictionaryIndex; + + if (sizeAvailable > data.size) sizeAvailable = data.size; + + // this could index off the end of the dictionary!!! FIX ME + for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex) { - if (data.pData[ dataIdx ] == dictionary.pData[ idx + dataIdx ]) + if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ]) { - if (dataIdx > (candidate.size-1)) + if (dataIndex >= candidate.size) { - candidate.pData = dictionary.pData + idx; - candidate.size = dataIdx - 1; + candidate.pData = dictionary.pData + dictionaryIndex; + candidate.size = dataIndex + 1; } continue; } - bMatch = false; break; } if (candidate.size > result.size) { result = candidate; + //dictionaryIndex = -1; break; } } @@ -504,11 +511,16 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize) { int decompressedBytes = 0; + unsigned char *pOriginalSource = pSource; + while (decompressedBytes < destSize) { u16 opcode = *pSource++; opcode |= ((u16)(*pSource++))<<8; + //printf("%04X:", (unsigned int)(pSource-pOriginalSource)); + + if (opcode & 0x8000) { // Dictionary @@ -518,8 +530,18 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize) u16 offset = *pSource++; offset |= ((u16)(*pSource++))<<8; + const char* overlapped = ""; + + if ((&pDest[ decompressedBytes ] - &pDest[ offset ]) < opcode) + { + overlapped = "pattern"; + } + my_memcpy(&pDest[ decompressedBytes ], &pDest[ offset ], opcode); decompressedBytes += opcode; + + + //printf("%04X:Dic %04X %s\n",decompressedBytes, (unsigned int)opcode, overlapped); } else { @@ -527,6 +549,8 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize) memcpy(&pDest[ decompressedBytes ], pSource, opcode); decompressedBytes += opcode; pSource += opcode; + + //printf("%04X:Lit %04X\n",decompressedBytes, (unsigned int)opcode); } } } diff --git a/source/lzb.h b/source/lzb.h index 8284aa2..015ba4b 100644 --- a/source/lzb.h +++ b/source/lzb.h @@ -8,6 +8,7 @@ // returns the size of data saved into the pDest Buffer // int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize); +int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize); void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize); #endif // LZB_H diff --git a/source/main.cpp b/source/main.cpp index fcb0d00..f62b9d4 100644 --- a/source/main.cpp +++ b/source/main.cpp @@ -114,14 +114,20 @@ int main(int argc, char* argv[]) const std::vector& c1Datas = c2data.GetPixelMaps(); unsigned char workbuffer[64*1024]; + unsigned char workbuffer2[64*1024]; for (int idx = 0; idx < frameCount; ++idx) { + int oldCompressedSize = Old_LZB_Compress(workbuffer2, c1Datas[ idx ], 32 * 1024); + printf("old compressedSize = %d\n", oldCompressedSize); int compressedSize = LZB_Compress(workbuffer, c1Datas[ idx ], 32 * 1024); printf("compressedSize = %d\n", compressedSize); unsigned char validationBuffer[ 32 * 1024 ]; + printf("Decompress OLD\n"); + LZB_Decompress(validationBuffer, workbuffer2, 32 * 1024); + printf("Decompress NEW\n"); LZB_Decompress(validationBuffer, workbuffer, 32 * 1024); if (0 == memcmp(c1Datas[ idx ], validationBuffer, 32*1024))