mirror of https://github.com/dwsJason/gsla.git
remove use of vector template, and stop copying data around that doesn't need copied (substantially speeds up the code), even though we're still brute force
This commit is contained in:
parent
553c5775a9
commit
d28efbc390
130
source/lzb.cpp
130
source/lzb.cpp
|
@ -4,42 +4,52 @@
|
||||||
#include "lzb.h"
|
#include "lzb.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <vector>
|
#include <string.h>
|
||||||
|
|
||||||
#include "bctypes.h"
|
#include "bctypes.h"
|
||||||
|
|
||||||
#define DICTIONARY_SIZE (32 * 1024)
|
#define MAX_DICTIONARY_SIZE (32 * 1024)
|
||||||
//
|
//
|
||||||
// Yes This is a 32K Buffer, of bytes, with no structure to it
|
// Yes This is a 32K Buffer, of bytes, with no structure to it
|
||||||
//
|
//
|
||||||
static unsigned char Dictionary[ DICTIONARY_SIZE ];
|
static unsigned char *pDictionary = nullptr;
|
||||||
|
|
||||||
|
struct DataString {
|
||||||
|
int size;
|
||||||
|
unsigned char *pData;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int AddDictionary(const DataString& data, int dictionarySize);
|
||||||
|
static int EmitLiteral(unsigned char *pDest, DataString& data);
|
||||||
|
static int ConcatLiteral(unsigned char *pDest, DataString& data);
|
||||||
|
static int EmitReference(unsigned char *pDest, int dictionaryOffset, DataString& data);
|
||||||
|
static int DictionaryMatch(const DataString& data, int dictionarySize);
|
||||||
|
|
||||||
static int AddDictionary(const std::vector<unsigned char>&data, int dictionarySize);
|
|
||||||
static int EmitLiteral(unsigned char *pDest, std::vector<unsigned char>& data);
|
|
||||||
static int ConcatLiteral(unsigned char *pDest, std::vector<unsigned char>& data);
|
|
||||||
static int EmitReference(unsigned char *pDest, int dictionaryOffset, std::vector<unsigned char>& data);
|
|
||||||
static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionarySize);
|
|
||||||
|
|
||||||
int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
||||||
{
|
{
|
||||||
printf("LZB_Compress %d bytes\n", sourceSize);
|
printf("LZB_Compress %d bytes\n", sourceSize);
|
||||||
|
|
||||||
// anything less than 3 bytes, is going to be a literal match
|
// Initialize Dictionary
|
||||||
|
int bytesInDictionary = 0; // eventually add the ability to start with the dictionary filled
|
||||||
|
pDictionary = pSource;
|
||||||
|
|
||||||
int processedBytes = 0;
|
int processedBytes = 0;
|
||||||
int bytesInDictionary = 0;
|
|
||||||
int bytesEmitted = 0;
|
int bytesEmitted = 0;
|
||||||
|
|
||||||
// dumb last emit is a literal stuff
|
// dumb last emit is a literal stuff
|
||||||
bool bLastEmitIsLiteral = false;
|
bool bLastEmitIsLiteral = false;
|
||||||
int lastEmittedLiteralOffset = 0;
|
int lastEmittedLiteralOffset = 0;
|
||||||
|
|
||||||
std::vector<unsigned char> candidate_data;
|
DataString candidate_data;
|
||||||
|
candidate_data.pData = pSource;
|
||||||
|
candidate_data.size = 0;
|
||||||
|
|
||||||
while (processedBytes < sourceSize)
|
while (processedBytes < sourceSize)
|
||||||
{
|
{
|
||||||
unsigned char byte_data = pSource[ processedBytes++ ];
|
// Add a byte to the candidate_data, also tally number of processed
|
||||||
candidate_data.push_back(byte_data);
|
processedBytes++;
|
||||||
|
candidate_data.size++;
|
||||||
|
|
||||||
// Basic Flow Idea Here
|
// Basic Flow Idea Here
|
||||||
// If there's a match, then add to the candidate data, and see if
|
// If there's a match, then add to the candidate data, and see if
|
||||||
|
@ -49,9 +59,6 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
||||||
// else emit what we have as a literal
|
// else emit what we have as a literal
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// (KMP is probably the last planned optmization here)
|
// (KMP is probably the last planned optmization here)
|
||||||
|
|
||||||
|
|
||||||
|
@ -60,21 +67,23 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
||||||
if (DictionaryMatch(candidate_data, bytesInDictionary) < 0)
|
if (DictionaryMatch(candidate_data, bytesInDictionary) < 0)
|
||||||
{
|
{
|
||||||
// Was there a dictionary match
|
// Was there a dictionary match
|
||||||
std::vector<unsigned char> prev_data = candidate_data;
|
|
||||||
prev_data.pop_back();
|
|
||||||
|
|
||||||
int MatchOffset = DictionaryMatch(prev_data, bytesInDictionary);
|
// Previous Data, we can't get here with candidate_data.size == 0
|
||||||
|
// this is an opportunity to use an assert
|
||||||
|
candidate_data.size--;
|
||||||
|
|
||||||
if ((MatchOffset >= 0) && prev_data.size() > 3)
|
int MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary);
|
||||||
|
|
||||||
|
if ((MatchOffset >= 0) && candidate_data.size > 3)
|
||||||
{
|
{
|
||||||
bytesInDictionary = AddDictionary(prev_data, bytesInDictionary);
|
processedBytes--;
|
||||||
bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, prev_data);
|
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
||||||
candidate_data[0] = candidate_data[ candidate_data.size() - 1 ];
|
bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data);
|
||||||
candidate_data.resize(1);
|
|
||||||
bLastEmitIsLiteral = false;
|
bLastEmitIsLiteral = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
candidate_data.size++;
|
||||||
// Add Dictionary
|
// Add Dictionary
|
||||||
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
||||||
|
|
||||||
|
@ -94,12 +103,12 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (candidate_data.size() > 0)
|
if (candidate_data.size > 0)
|
||||||
{
|
{
|
||||||
|
|
||||||
int MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary);
|
int MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary);
|
||||||
|
|
||||||
if ((MatchOffset >=0) && candidate_data.size() > 2)
|
if ((MatchOffset >=0) && candidate_data.size > 2)
|
||||||
{
|
{
|
||||||
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
||||||
bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data);
|
bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data);
|
||||||
|
@ -127,14 +136,15 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Return new dictionarySize
|
// Return new dictionarySize
|
||||||
static int AddDictionary(const std::vector<unsigned char>&data, int dictionarySize)
|
static int AddDictionary(const DataString& data, int dictionarySize)
|
||||||
{
|
{
|
||||||
int dataIndex = 0;
|
//int dataIndex = 0;
|
||||||
|
//while ((dictionarySize < MAX_DICTIONARY_SIZE) && (dataIndex < data.size))
|
||||||
|
//{
|
||||||
|
// pDictionary[ dictionarySize++ ] = data.pData[ dataIndex++ ];
|
||||||
|
//}
|
||||||
|
|
||||||
while ((dictionarySize < DICTIONARY_SIZE) && (dataIndex < data.size()))
|
dictionarySize += data.size;
|
||||||
{
|
|
||||||
Dictionary[ dictionarySize++ ] = data[ dataIndex++ ];
|
|
||||||
}
|
|
||||||
|
|
||||||
return dictionarySize;
|
return dictionarySize;
|
||||||
}
|
}
|
||||||
|
@ -145,11 +155,11 @@ static int AddDictionary(const std::vector<unsigned char>&data, int dictionarySi
|
||||||
//
|
//
|
||||||
// -1 means, no match
|
// -1 means, no match
|
||||||
//
|
//
|
||||||
static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionarySize)
|
static int DictionaryMatch(const DataString& data, int dictionarySize)
|
||||||
{
|
{
|
||||||
if( (0 == dictionarySize ) ||
|
if( (0 == dictionarySize ) ||
|
||||||
(0 == data.size()) ||
|
(0 == data.size) ||
|
||||||
(data.size() > 16384) ) // 16384 is largest string copy we can encode
|
(data.size > 16384) ) // 16384 is largest string copy we can encode
|
||||||
{
|
{
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -168,9 +178,9 @@ static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionar
|
||||||
bool bMatch = true;
|
bool bMatch = true;
|
||||||
int pattern_start = dictionarySize - pattern_size;
|
int pattern_start = dictionarySize - pattern_size;
|
||||||
|
|
||||||
for (int dataIndex = 0; dataIndex < data.size(); ++dataIndex)
|
for (int dataIndex = 0; dataIndex < data.size; ++dataIndex)
|
||||||
{
|
{
|
||||||
if (data[ dataIndex ] == Dictionary[ pattern_start + (dataIndex % pattern_size) ])
|
if (data.pData[ dataIndex ] == pDictionary[ pattern_start + (dataIndex % pattern_size) ])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bMatch = false;
|
bMatch = false;
|
||||||
|
@ -185,7 +195,7 @@ static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictionarySize < data.size())
|
if (dictionarySize < data.size)
|
||||||
{
|
{
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -195,12 +205,12 @@ static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionar
|
||||||
int result = -1;
|
int result = -1;
|
||||||
|
|
||||||
// Check the dictionary for a match, brute force
|
// Check the dictionary for a match, brute force
|
||||||
for (int idx = 0; idx <= (dictionarySize-data.size()); ++idx)
|
for (int idx = 0; idx <= (dictionarySize-data.size); ++idx)
|
||||||
{
|
{
|
||||||
bool bMatch = true;
|
bool bMatch = true;
|
||||||
for (int dataIdx = 0; dataIdx < data.size(); ++dataIdx)
|
for (int dataIdx = 0; dataIdx < data.size; ++dataIdx)
|
||||||
{
|
{
|
||||||
if (data[ dataIdx ] == Dictionary[ idx + dataIdx ])
|
if (data.pData[ dataIdx ] == pDictionary[ idx + dataIdx ])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bMatch = false;
|
bMatch = false;
|
||||||
|
@ -221,10 +231,10 @@ static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionar
|
||||||
//
|
//
|
||||||
// Emit a literal, that appends itself to an existing literal
|
// Emit a literal, that appends itself to an existing literal
|
||||||
//
|
//
|
||||||
static int ConcatLiteral(unsigned char *pDest, std::vector<unsigned char>& data)
|
static int ConcatLiteral(unsigned char *pDest, DataString& data)
|
||||||
{
|
{
|
||||||
// Return Size
|
// Return Size
|
||||||
int outSize = (int)data.size();
|
int outSize = (int)data.size;
|
||||||
|
|
||||||
int opCode = pDest[0];
|
int opCode = pDest[0];
|
||||||
opCode |= (int)(((pDest[1])&0x7F)<<8);
|
opCode |= (int)(((pDest[1])&0x7F)<<8);
|
||||||
|
@ -239,53 +249,59 @@ static int ConcatLiteral(unsigned char *pDest, std::vector<unsigned char>& data)
|
||||||
pDest += skip;
|
pDest += skip;
|
||||||
|
|
||||||
// Literal Data
|
// Literal Data
|
||||||
for (int idx = 0; idx < data.size(); ++idx)
|
for (int idx = 0; idx < data.size; ++idx)
|
||||||
{
|
{
|
||||||
*pDest++ = data[ idx ];
|
*pDest++ = data.pData[ idx ];
|
||||||
}
|
}
|
||||||
|
|
||||||
data.clear();
|
// Clear
|
||||||
|
data.pData += data.size;
|
||||||
|
data.size = 0;
|
||||||
|
|
||||||
return outSize;
|
return outSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
static int EmitLiteral(unsigned char *pDest, std::vector<unsigned char>& data)
|
static int EmitLiteral(unsigned char *pDest, DataString& data)
|
||||||
{
|
{
|
||||||
// Return Size
|
// Return Size
|
||||||
int outSize = 2 + (int)data.size();
|
int outSize = 2 + (int)data.size;
|
||||||
|
|
||||||
// Opcode
|
// Opcode
|
||||||
*pDest++ = (unsigned char)(data.size() & 0xFF);
|
*pDest++ = (unsigned char)(data.size & 0xFF);
|
||||||
*pDest++ = (unsigned char)((data.size() >> 8) & 0x7F);
|
*pDest++ = (unsigned char)((data.size >> 8) & 0x7F);
|
||||||
|
|
||||||
// Literal Data
|
// Literal Data
|
||||||
for (int idx = 0; idx < data.size(); ++idx)
|
for (int idx = 0; idx < data.size; ++idx)
|
||||||
{
|
{
|
||||||
*pDest++ = data[ idx ];
|
*pDest++ = data.pData[ idx ];
|
||||||
}
|
}
|
||||||
|
|
||||||
data.clear();
|
// Clear
|
||||||
|
data.pData += data.size;
|
||||||
|
data.size = 0;
|
||||||
|
|
||||||
return outSize;
|
return outSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
static int EmitReference(unsigned char *pDest, int dictionaryOffset, std::vector<unsigned char>& data)
|
static int EmitReference(unsigned char *pDest, int dictionaryOffset, DataString& data)
|
||||||
{
|
{
|
||||||
// Return Size
|
// Return Size
|
||||||
int outSize = 2 + 2;
|
int outSize = 2 + 2;
|
||||||
|
|
||||||
// Opcode
|
// Opcode
|
||||||
*pDest++ = (unsigned char)(data.size() & 0xFF);
|
*pDest++ = (unsigned char)(data.size & 0xFF);
|
||||||
*pDest++ = (unsigned char)((data.size() >> 8) & 0x7F) | 0x80;
|
*pDest++ = (unsigned char)((data.size >> 8) & 0x7F) | 0x80;
|
||||||
|
|
||||||
*pDest++ = (unsigned char)(dictionaryOffset & 0xFF);
|
*pDest++ = (unsigned char)(dictionaryOffset & 0xFF);
|
||||||
*pDest++ = (unsigned char)((dictionaryOffset>>8) & 0xFF);
|
*pDest++ = (unsigned char)((dictionaryOffset>>8) & 0xFF);
|
||||||
|
|
||||||
data.clear();
|
// Clear
|
||||||
|
data.pData += data.size;
|
||||||
|
data.size = 0;
|
||||||
|
|
||||||
return outSize;
|
return outSize;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue