mirror of
https://github.com/dwsJason/gsla.git
synced 2025-01-14 07:29:48 +00:00
LZB Encoder: produces better compression (actually generates run/length pattern encoding)
This commit is contained in:
parent
e2676ed7bd
commit
0c5c466f64
139
source/lzb.cpp
139
source/lzb.cpp
@ -16,6 +16,7 @@ static unsigned char Dictionary[ DICTIONARY_SIZE ];
|
|||||||
|
|
||||||
static int AddDictionary(const std::vector<unsigned char>&data, int dictionarySize);
|
static int AddDictionary(const std::vector<unsigned char>&data, int dictionarySize);
|
||||||
static int EmitLiteral(unsigned char *pDest, std::vector<unsigned char>& data);
|
static int EmitLiteral(unsigned char *pDest, std::vector<unsigned char>& data);
|
||||||
|
static int ConcatLiteral(unsigned char *pDest, std::vector<unsigned char>& data);
|
||||||
static int EmitReference(unsigned char *pDest, int dictionaryOffset, std::vector<unsigned char>& data);
|
static int EmitReference(unsigned char *pDest, int dictionaryOffset, std::vector<unsigned char>& data);
|
||||||
static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionarySize);
|
static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionarySize);
|
||||||
|
|
||||||
@ -29,6 +30,10 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
|||||||
int bytesInDictionary = 0;
|
int bytesInDictionary = 0;
|
||||||
int bytesEmitted = 0;
|
int bytesEmitted = 0;
|
||||||
|
|
||||||
|
// dumb last emit is a literal stuff
|
||||||
|
bool bLastEmitIsLiteral = false;
|
||||||
|
int lastEmittedLiteralOffset = 0;
|
||||||
|
|
||||||
std::vector<unsigned char> candidate_data;
|
std::vector<unsigned char> candidate_data;
|
||||||
|
|
||||||
while (processedBytes < sourceSize)
|
while (processedBytes < sourceSize)
|
||||||
@ -39,8 +44,6 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
|||||||
// The dictionary only contains bytes that have been emitted, so we
|
// The dictionary only contains bytes that have been emitted, so we
|
||||||
// can't add this byte until we've emitted it?
|
// can't add this byte until we've emitted it?
|
||||||
|
|
||||||
if (candidate_data.size() < 3) continue;
|
|
||||||
|
|
||||||
if (DictionaryMatch(candidate_data, bytesInDictionary) < 0)
|
if (DictionaryMatch(candidate_data, bytesInDictionary) < 0)
|
||||||
{
|
{
|
||||||
// Was there a dictionary match
|
// Was there a dictionary match
|
||||||
@ -49,28 +52,61 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
|
|||||||
|
|
||||||
int MatchOffset = DictionaryMatch(prev_data, bytesInDictionary);
|
int MatchOffset = DictionaryMatch(prev_data, bytesInDictionary);
|
||||||
|
|
||||||
if ((MatchOffset >= 0) && prev_data.size() > 2)
|
if ((MatchOffset >= 0) && prev_data.size() > 3)
|
||||||
{
|
{
|
||||||
bytesInDictionary = AddDictionary(prev_data, bytesInDictionary);
|
bytesInDictionary = AddDictionary(prev_data, bytesInDictionary);
|
||||||
bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, prev_data);
|
bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, prev_data);
|
||||||
candidate_data[0] = candidate_data[ candidate_data.size() - 1 ];
|
candidate_data[0] = candidate_data[ candidate_data.size() - 1 ];
|
||||||
candidate_data.resize(1);
|
candidate_data.resize(1);
|
||||||
|
bLastEmitIsLiteral = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Add Dictionary
|
// Add Dictionary
|
||||||
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
||||||
bytesEmitted += EmitLiteral(pDest + bytesEmitted, candidate_data);
|
|
||||||
|
if (bLastEmitIsLiteral)
|
||||||
|
{
|
||||||
|
// If the last emit was a literal, I want to concatenate
|
||||||
|
// this literal into the previous opcode, to save space
|
||||||
|
bytesEmitted += ConcatLiteral(pDest + lastEmittedLiteralOffset, candidate_data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lastEmittedLiteralOffset = bytesEmitted;
|
||||||
|
bytesEmitted += EmitLiteral(pDest + bytesEmitted, candidate_data);
|
||||||
|
}
|
||||||
|
bLastEmitIsLiteral = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (candidate_data.size() > 0)
|
if (candidate_data.size() > 0)
|
||||||
{
|
{
|
||||||
// Emit as a literal? (we have 1 more chance here for a match
|
|
||||||
// Add Dictionary
|
int MatchOffset = DictionaryMatch(candidate_data, bytesInDictionary);
|
||||||
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
|
||||||
bytesEmitted += EmitLiteral(pDest + bytesEmitted, candidate_data);
|
if ((MatchOffset >=0) && candidate_data.size() > 2)
|
||||||
|
{
|
||||||
|
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
||||||
|
bytesEmitted += EmitReference(pDest + bytesEmitted, MatchOffset, candidate_data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Add Dictionary
|
||||||
|
bytesInDictionary = AddDictionary(candidate_data, bytesInDictionary);
|
||||||
|
|
||||||
|
if (bLastEmitIsLiteral)
|
||||||
|
{
|
||||||
|
// If the last emit was a literal, I want to concatenate
|
||||||
|
// this literal into the previous opcode, to save space
|
||||||
|
bytesEmitted += ConcatLiteral(pDest + lastEmittedLiteralOffset, candidate_data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bytesEmitted += EmitLiteral(pDest + bytesEmitted, candidate_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return bytesEmitted;
|
return bytesEmitted;
|
||||||
@ -98,7 +134,45 @@ static int AddDictionary(const std::vector<unsigned char>&data, int dictionarySi
|
|||||||
//
|
//
|
||||||
static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionarySize)
|
static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionarySize)
|
||||||
{
|
{
|
||||||
if(dictionarySize < data.size())
|
if( (0 == dictionarySize ) ||
|
||||||
|
(0 == data.size()) ||
|
||||||
|
(data.size() > 16384) ) // 16384 is largest string copy we can encode
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the end of the dictionary, to see if this data could be a
|
||||||
|
// pattern "run" (where we can repeat a pattern for X many times for free
|
||||||
|
// using the memcpy with overlapping source/dest buffers)
|
||||||
|
// (This is a dictionary based pattern run/length)
|
||||||
|
|
||||||
|
{
|
||||||
|
// Check for pattern sizes, start small
|
||||||
|
int max_pattern_size = dictionarySize;
|
||||||
|
|
||||||
|
for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size)
|
||||||
|
{
|
||||||
|
bool bMatch = true;
|
||||||
|
int pattern_start = dictionarySize - pattern_size;
|
||||||
|
|
||||||
|
for (int dataIndex = 0; dataIndex < data.size(); ++dataIndex)
|
||||||
|
{
|
||||||
|
if (data[ dataIndex ] == Dictionary[ pattern_start + (dataIndex % pattern_size) ])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bMatch = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bMatch)
|
||||||
|
{
|
||||||
|
// Return a RLE Style match result
|
||||||
|
return pattern_start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictionarySize < data.size())
|
||||||
{
|
{
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -130,6 +204,38 @@ static int DictionaryMatch(const std::vector<unsigned char>& data, int dictionar
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Emit a literal, that appends itself to an existing literal
|
||||||
|
//
|
||||||
|
static int ConcatLiteral(unsigned char *pDest, std::vector<unsigned char>& data)
|
||||||
|
{
|
||||||
|
// Return Size
|
||||||
|
int outSize = (int)data.size();
|
||||||
|
|
||||||
|
int opCode = pDest[0];
|
||||||
|
opCode |= (int)(((pDest[1])&0x7F)<<8);
|
||||||
|
|
||||||
|
int skip = opCode;
|
||||||
|
opCode += outSize;
|
||||||
|
|
||||||
|
// Opcode
|
||||||
|
*pDest++ = (unsigned char)(opCode & 0xFF);
|
||||||
|
*pDest++ = (unsigned char)((opCode >> 8) & 0x7F);
|
||||||
|
|
||||||
|
pDest += skip;
|
||||||
|
|
||||||
|
// Literal Data
|
||||||
|
for (int idx = 0; idx < data.size(); ++idx)
|
||||||
|
{
|
||||||
|
*pDest++ = data[ idx ];
|
||||||
|
}
|
||||||
|
|
||||||
|
data.clear();
|
||||||
|
|
||||||
|
return outSize;
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
static int EmitLiteral(unsigned char *pDest, std::vector<unsigned char>& data)
|
static int EmitLiteral(unsigned char *pDest, std::vector<unsigned char>& data)
|
||||||
@ -171,6 +277,19 @@ static int EmitReference(unsigned char *pDest, int dictionaryOffset, std::vector
|
|||||||
return outSize;
|
return outSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Std C memcpy seems to be stopping this from happening
|
||||||
|
// probably for my protection
|
||||||
|
//
|
||||||
|
void mymemcpy(u8* pDest, u8* pSrc, int length)
|
||||||
|
{
|
||||||
|
while (length-- > 0)
|
||||||
|
{
|
||||||
|
*pDest++ = *pSrc++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Simple Decompress, for validation
|
// Simple Decompress, for validation
|
||||||
@ -193,7 +312,7 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
|
|||||||
u16 offset = *pSource++;
|
u16 offset = *pSource++;
|
||||||
offset |= ((u16)(*pSource++))<<8;
|
offset |= ((u16)(*pSource++))<<8;
|
||||||
|
|
||||||
memcpy(&pDest[ decompressedBytes ], &pDest[ offset ], opcode);
|
mymemcpy(&pDest[ decompressedBytes ], &pDest[ offset ], opcode);
|
||||||
decompressedBytes += opcode;
|
decompressedBytes += opcode;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
Loading…
x
Reference in New Issue
Block a user