From d28efbc390279d642382022d36e9fe8109f45b02 Mon Sep 17 00:00:00 2001 From: "JASON-6700K\\jandersen" Date: Wed, 15 Jul 2020 13:14:27 -0400 Subject: [PATCH] remove use of vector template, and stop copying data around that doesn't need copied (substantially speeds up the code), even though we're still brute force --- source/lzb.cpp | 130 +++++++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 57 deletions(-) diff --git a/source/lzb.cpp b/source/lzb.cpp index 5ca0566..3138aef 100644 --- a/source/lzb.cpp +++ b/source/lzb.cpp @@ -4,42 +4,52 @@ #include "lzb.h" #include -#include +#include #include "bctypes.h" -#define DICTIONARY_SIZE (32 * 1024) +#define MAX_DICTIONARY_SIZE (32 * 1024) // // Yes This is a 32K Buffer, of bytes, with no structure to it // -static unsigned char Dictionary[ DICTIONARY_SIZE ]; +static unsigned char *pDictionary = nullptr; + +struct DataString { + int size; + unsigned char *pData; +}; + +static int AddDictionary(const DataString& data, int dictionarySize); +static int EmitLiteral(unsigned char *pDest, DataString& data); +static int ConcatLiteral(unsigned char *pDest, DataString& data); +static int EmitReference(unsigned char *pDest, int dictionaryOffset, DataString& data); +static int DictionaryMatch(const DataString& data, int dictionarySize); -static int AddDictionary(const std::vector&data, int dictionarySize); -static int EmitLiteral(unsigned char *pDest, std::vector& data); -static int ConcatLiteral(unsigned char *pDest, std::vector& data); -static int EmitReference(unsigned char *pDest, int dictionaryOffset, std::vector& data); -static int DictionaryMatch(const std::vector& data, int dictionarySize); int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) { printf("LZB_Compress %d bytes\n", sourceSize); - // anything less than 3 bytes, is going to be a literal match + // Initialize Dictionary + int bytesInDictionary = 0; // eventually add the ability to start with the dictionary filled + pDictionary = pSource; int processedBytes = 0; - int bytesInDictionary = 0; int bytesEmitted = 0; // dumb last emit is a literal stuff bool bLastEmitIsLiteral = false; int lastEmittedLiteralOffset = 0; - std::vector candidate_data; + DataString candidate_data; + candidate_data.pData = pSource; + candidate_data.size = 0; while (processedBytes < sourceSize) { - unsigned char byte_data = pSource[ processedBytes++ ]; - candidate_data.push_back(byte_data); + // Add a byte to the candidate_data, also tally number of processed + processedBytes++; + candidate_data.size++; // Basic Flow Idea Here // If there's a match, then add to the candidate data, and see if @@ -49,9 +59,6 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) // else emit what we have as a literal - - - // (KMP is probably the last planned optmization here) @@ -60,21 +67,23 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) if (DictionaryMatch(candidate_data, bytesInDictionary) < 0) { // Was there a dictionary match - std::vector prev_data = candidate_data; - prev_data.pop_back(); - int MatchOffset = DictionaryMatch(prev_data, bytesInDictionary); + // Previous Data, we can't get here with candidate_data.size == 0 + // this is an opportunity to use an assert + candidate_data.size--; - if ((MatchOffset >= 0) && prev_data.size() > 3) + int MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary); + + if ((MatchOffset >= 0) && candidate_data.size > 3) { - bytesInDictionary = AddDictionary(prev_data, bytesInDictionary); - bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, prev_data); - candidate_data[0] = candidate_data[ candidate_data.size() - 1 ]; - candidate_data.resize(1); + processedBytes--; + bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary); + bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data); bLastEmitIsLiteral = false; } else { + candidate_data.size++; // Add Dictionary bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary); @@ -94,12 +103,12 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) } } - if (candidate_data.size() > 0) + if (candidate_data.size > 0) { int MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary); - if ((MatchOffset >=0) && candidate_data.size() > 2) + if ((MatchOffset >=0) && candidate_data.size > 2) { bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary); bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data); @@ -127,14 +136,15 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize) //------------------------------------------------------------------------------ // Return new dictionarySize -static int AddDictionary(const std::vector&data, int dictionarySize) +static int AddDictionary(const DataString& data, int dictionarySize) { - int dataIndex = 0; + //int dataIndex = 0; + //while ((dictionarySize < MAX_DICTIONARY_SIZE) && (dataIndex < data.size)) + //{ + // pDictionary[ dictionarySize++ ] = data.pData[ dataIndex++ ]; + //} - while ((dictionarySize < DICTIONARY_SIZE) && (dataIndex < data.size())) - { - Dictionary[ dictionarySize++ ] = data[ dataIndex++ ]; - } + dictionarySize += data.size; return dictionarySize; } @@ -145,11 +155,11 @@ static int AddDictionary(const std::vector&data, int dictionarySi // // -1 means, no match // -static int DictionaryMatch(const std::vector& data, int dictionarySize) +static int DictionaryMatch(const DataString& data, int dictionarySize) { if( (0 == dictionarySize ) || - (0 == data.size()) || - (data.size() > 16384) ) // 16384 is largest string copy we can encode + (0 == data.size) || + (data.size > 16384) ) // 16384 is largest string copy we can encode { return -1; } @@ -168,9 +178,9 @@ static int DictionaryMatch(const std::vector& data, int dictionar bool bMatch = true; int pattern_start = dictionarySize - pattern_size; - for (int dataIndex = 0; dataIndex < data.size(); ++dataIndex) + for (int dataIndex = 0; dataIndex < data.size; ++dataIndex) { - if (data[ dataIndex ] == Dictionary[ pattern_start + (dataIndex % pattern_size) ]) + if (data.pData[ dataIndex ] == pDictionary[ pattern_start + (dataIndex % pattern_size) ]) continue; bMatch = false; @@ -185,7 +195,7 @@ static int DictionaryMatch(const std::vector& data, int dictionar } } - if (dictionarySize < data.size()) + if (dictionarySize < data.size) { return -1; } @@ -195,12 +205,12 @@ static int DictionaryMatch(const std::vector& data, int dictionar int result = -1; // Check the dictionary for a match, brute force - for (int idx = 0; idx <= (dictionarySize-data.size()); ++idx) + for (int idx = 0; idx <= (dictionarySize-data.size); ++idx) { bool bMatch = true; - for (int dataIdx = 0; dataIdx < data.size(); ++dataIdx) + for (int dataIdx = 0; dataIdx < data.size; ++dataIdx) { - if (data[ dataIdx ] == Dictionary[ idx + dataIdx ]) + if (data.pData[ dataIdx ] == pDictionary[ idx + dataIdx ]) continue; bMatch = false; @@ -221,10 +231,10 @@ static int DictionaryMatch(const std::vector& data, int dictionar // // Emit a literal, that appends itself to an existing literal // -static int ConcatLiteral(unsigned char *pDest, std::vector& data) +static int ConcatLiteral(unsigned char *pDest, DataString& data) { // Return Size - int outSize = (int)data.size(); + int outSize = (int)data.size; int opCode = pDest[0]; opCode |= (int)(((pDest[1])&0x7F)<<8); @@ -239,53 +249,59 @@ static int ConcatLiteral(unsigned char *pDest, std::vector& data) pDest += skip; // Literal Data - for (int idx = 0; idx < data.size(); ++idx) + for (int idx = 0; idx < data.size; ++idx) { - *pDest++ = data[ idx ]; + *pDest++ = data.pData[ idx ]; } - data.clear(); + // Clear + data.pData += data.size; + data.size = 0; return outSize; } //------------------------------------------------------------------------------ -static int EmitLiteral(unsigned char *pDest, std::vector& data) +static int EmitLiteral(unsigned char *pDest, DataString& data) { // Return Size - int outSize = 2 + (int)data.size(); + int outSize = 2 + (int)data.size; // Opcode - *pDest++ = (unsigned char)(data.size() & 0xFF); - *pDest++ = (unsigned char)((data.size() >> 8) & 0x7F); + *pDest++ = (unsigned char)(data.size & 0xFF); + *pDest++ = (unsigned char)((data.size >> 8) & 0x7F); // Literal Data - for (int idx = 0; idx < data.size(); ++idx) + for (int idx = 0; idx < data.size; ++idx) { - *pDest++ = data[ idx ]; + *pDest++ = data.pData[ idx ]; } - data.clear(); + // Clear + data.pData += data.size; + data.size = 0; return outSize; } //------------------------------------------------------------------------------ -static int EmitReference(unsigned char *pDest, int dictionaryOffset, std::vector& data) +static int EmitReference(unsigned char *pDest, int dictionaryOffset, DataString& data) { // Return Size int outSize = 2 + 2; // Opcode - *pDest++ = (unsigned char)(data.size() & 0xFF); - *pDest++ = (unsigned char)((data.size() >> 8) & 0x7F) | 0x80; + *pDest++ = (unsigned char)(data.size & 0xFF); + *pDest++ = (unsigned char)((data.size >> 8) & 0x7F) | 0x80; *pDest++ = (unsigned char)(dictionaryOffset & 0xFF); *pDest++ = (unsigned char)((dictionaryOffset>>8) & 0xFF); - data.clear(); + // Clear + data.pData += data.size; + data.size = 0; return outSize; }