faster compressor: WIP

This commit is contained in:
JASON-6700K\jandersen 2020-07-16 19:05:36 -04:00
parent 955e8cf1d5
commit f80008ca21
3 changed files with 46 additions and 15 deletions

View File

@ -9,6 +9,7 @@
#include "bctypes.h" #include "bctypes.h"
#define MAX_DICTIONARY_SIZE (32 * 1024) #define MAX_DICTIONARY_SIZE (32 * 1024)
#define MAX_STRING_SIZE (16383)
// //
// Yes This is a 32K Buffer, of bytes, with no structure to it // Yes This is a 32K Buffer, of bytes, with no structure to it
// //
@ -98,7 +99,7 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
// This works, but it's stupidly slow, because it uses brute force, and // This works, but it's stupidly slow, because it uses brute force, and
// because the brute force starts over everytime I grow the data string // because the brute force starts over everytime I grow the data string
// //
int old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
{ {
printf("LZB_Compress %d bytes\n", sourceSize); printf("LZB_Compress %d bytes\n", sourceSize);
@ -257,7 +258,6 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size) for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size)
{ {
bool bMatch = true;
int pattern_start = dictionary.size - pattern_size; int pattern_start = dictionary.size - pattern_size;
for (int dataIndex = 0; dataIndex < data.size; ++dataIndex) for (int dataIndex = 0; dataIndex < data.size; ++dataIndex)
@ -269,12 +269,11 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
continue; continue;
} }
bMatch = false;
break; break;
} }
if (candidate.size < pattern_size) //if (candidate.size < pattern_size)
break; // break;
if (candidate.size > result.size) if (candidate.size > result.size)
{ {
@ -284,34 +283,42 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
} }
// As an optimization // As an optimization
int dictionarySize = dictionary.size - 1; // This last string has already been checked by, the int dictionarySize = dictionary.size; // - 1; // This last string has already been checked by, the
// run-length matcher above // run-length matcher above
if (dictionarySize >= candidate.size) // As the size grows, we're missing potential matches in here
// I think the best way to counter this is to attempt somthing
// like KMP
if (dictionarySize > candidate.size)
{ {
// Check the dictionary for a match, brute force // Check the dictionary for a match, brute force
for (int idx = 0; idx <= (dictionarySize-candidate.size); ++idx) for (int dictionaryIndex = 0; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex)
{ {
bool bMatch = true; int sizeAvailable = dictionarySize - dictionaryIndex;
for (int dataIdx = 0; dataIdx < data.size; ++dataIdx)
if (sizeAvailable > data.size) sizeAvailable = data.size;
// this could index off the end of the dictionary!!! FIX ME
for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex)
{ {
if (data.pData[ dataIdx ] == dictionary.pData[ idx + dataIdx ]) if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ])
{ {
if (dataIdx > (candidate.size-1)) if (dataIndex >= candidate.size)
{ {
candidate.pData = dictionary.pData + idx; candidate.pData = dictionary.pData + dictionaryIndex;
candidate.size = dataIdx - 1; candidate.size = dataIndex + 1;
} }
continue; continue;
} }
bMatch = false;
break; break;
} }
if (candidate.size > result.size) if (candidate.size > result.size)
{ {
result = candidate; result = candidate;
//dictionaryIndex = -1;
break; break;
} }
} }
@ -504,11 +511,16 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
{ {
int decompressedBytes = 0; int decompressedBytes = 0;
unsigned char *pOriginalSource = pSource;
while (decompressedBytes < destSize) while (decompressedBytes < destSize)
{ {
u16 opcode = *pSource++; u16 opcode = *pSource++;
opcode |= ((u16)(*pSource++))<<8; opcode |= ((u16)(*pSource++))<<8;
//printf("%04X:", (unsigned int)(pSource-pOriginalSource));
if (opcode & 0x8000) if (opcode & 0x8000)
{ {
// Dictionary // Dictionary
@ -518,8 +530,18 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
u16 offset = *pSource++; u16 offset = *pSource++;
offset |= ((u16)(*pSource++))<<8; offset |= ((u16)(*pSource++))<<8;
const char* overlapped = "";
if ((&pDest[ decompressedBytes ] - &pDest[ offset ]) < opcode)
{
overlapped = "pattern";
}
my_memcpy(&pDest[ decompressedBytes ], &pDest[ offset ], opcode); my_memcpy(&pDest[ decompressedBytes ], &pDest[ offset ], opcode);
decompressedBytes += opcode; decompressedBytes += opcode;
//printf("%04X:Dic %04X %s\n",decompressedBytes, (unsigned int)opcode, overlapped);
} }
else else
{ {
@ -527,6 +549,8 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
memcpy(&pDest[ decompressedBytes ], pSource, opcode); memcpy(&pDest[ decompressedBytes ], pSource, opcode);
decompressedBytes += opcode; decompressedBytes += opcode;
pSource += opcode; pSource += opcode;
//printf("%04X:Lit %04X\n",decompressedBytes, (unsigned int)opcode);
} }
} }
} }

View File

@ -8,6 +8,7 @@
// returns the size of data saved into the pDest Buffer // returns the size of data saved into the pDest Buffer
// //
int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize); int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize);
int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize);
void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize); void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize);
#endif // LZB_H #endif // LZB_H

View File

@ -114,14 +114,20 @@ int main(int argc, char* argv[])
const std::vector<unsigned char*>& c1Datas = c2data.GetPixelMaps(); const std::vector<unsigned char*>& c1Datas = c2data.GetPixelMaps();
unsigned char workbuffer[64*1024]; unsigned char workbuffer[64*1024];
unsigned char workbuffer2[64*1024];
for (int idx = 0; idx < frameCount; ++idx) for (int idx = 0; idx < frameCount; ++idx)
{ {
int oldCompressedSize = Old_LZB_Compress(workbuffer2, c1Datas[ idx ], 32 * 1024);
printf("old compressedSize = %d\n", oldCompressedSize);
int compressedSize = LZB_Compress(workbuffer, c1Datas[ idx ], 32 * 1024); int compressedSize = LZB_Compress(workbuffer, c1Datas[ idx ], 32 * 1024);
printf("compressedSize = %d\n", compressedSize); printf("compressedSize = %d\n", compressedSize);
unsigned char validationBuffer[ 32 * 1024 ]; unsigned char validationBuffer[ 32 * 1024 ];
printf("Decompress OLD\n");
LZB_Decompress(validationBuffer, workbuffer2, 32 * 1024);
printf("Decompress NEW\n");
LZB_Decompress(validationBuffer, workbuffer, 32 * 1024); LZB_Decompress(validationBuffer, workbuffer, 32 * 1024);
if (0 == memcmp(c1Datas[ idx ], validationBuffer, 32*1024)) if (0 == memcmp(c1Datas[ idx ], validationBuffer, 32*1024))