GSLA: encoder, work in progress

This commit is contained in:
JASON-6700K\jandersen 2020-07-19 14:19:42 -04:00
parent bb916c15e2
commit 0764a030e6
3 changed files with 312 additions and 37 deletions

View File

@ -242,7 +242,7 @@ void GSLAFile::LoadFromFile(const char* pFilePath)
if (pINIT->IsValid())
{
// We have an initial frame chunk
UnpackInitialFrame(pINIT);
UnpackInitialFrame(pINIT, pHeader);
}
else if (pANIM->IsValid())
{
@ -263,13 +263,13 @@ void GSLAFile::LoadFromFile(const char* pFilePath)
// Unpack the initial frame, that's been packed with an empty initial dictionary
// So every byte of the buffer will be written out (no skip opcodes)
//
void GSLAFile::UnpackInitialFrame(GSLA_INIT* pINIT)
void GSLAFile::UnpackInitialFrame(GSLA_INIT* pINIT, GSLA_Header* pHeader)
{
unsigned char *pData = ((unsigned char*)pINIT) + sizeof(GSLA_INIT);
unsigned char *pTargetBuffer = m_pC1PixelMaps[ 0 ]; // Data needs to be pre allocated
//DecompressFrame(pTargetBuffer, pData);
DecompressFrame(pTargetBuffer, pData, (unsigned char*)pHeader);
}
//------------------------------------------------------------------------------
@ -359,9 +359,8 @@ void GSLAFile::SaveToFile(const char* pFilenamePath)
unsigned char* pInitialFrame = m_pC1PixelMaps[ 0 ];
// We're not worried about bank wrap on the first frame, and we don't have a pre-populated
// dictionary
int compressedSize = LZBA_Compress(pWorkBuffer, pInitialFrame, m_frameSize,
pInitialFrame, pInitialFrame);
// dictionary - Also use the best compression we can get here
int compressedSize = Old_LZB_Compress(pWorkBuffer, pInitialFrame, m_frameSize);
for (int compressedIndex = 0; compressedIndex < compressedSize; ++compressedIndex)
{
@ -458,4 +457,122 @@ void GSLAFile::SaveToFile(const char* pFilenamePath)
}
//------------------------------------------------------------------------------
//
// Std C memcpy seems to be stopping the copy from happening, when I overlap
// the buffer to get a pattern run copy (overlapped buffers)
//
static void my_memcpy(unsigned char* pDest, unsigned char* pSrc, int length)
{
while (length-- > 0)
{
*pDest++ = *pSrc++;
}
}
//------------------------------------------------------------------------------
//
// pTarget is the Target Frame Buffer
// pData is the source data for a Frame
//
// pDataBaseAddress, is the base address wheret the animation file was loaded
// this is used so we can properly interpret bank-skip opcodes (data is broken
// into 64K chunks for the IIgs/65816)
//
// returns the number of bytes that have been processed in the pData
//
int GSLAFile::DecompressFrame(unsigned char* pTarget, unsigned char* pData, unsigned char* pDataBaseAddress)
{
unsigned char *pDataStart = pData;
int cursorPosition = 0;
unsigned short opcode;
bool bDoWork = true;
while (bDoWork)
{
opcode = pData[0];
opcode |= (((unsigned short)pData[1])<<8);
if (opcode & 0x8000)
{
if (opcode & 0x0001)
{
// Cursor Skip Forward
opcode = (opcode>>1) & 0x3FFF;
cursorPosition += (opcode+1);
pData+=2;
}
else
{
// Dictionary Copy
unsigned short dictionaryPosition = pData[2];
dictionaryPosition |= (((unsigned short)pData[3])<<8);
dictionaryPosition -= 0x2000; // it's like this to to help the
// GS decode it quicker
unsigned short length = ((opcode>>1) & 0x3FFF)+1;
my_memcpy(pTarget + cursorPosition, pTarget + dictionaryPosition, (int) length );
pData += 4;
cursorPosition += length;
}
}
else
{
if (opcode & 0x0001)
{
// Literal Copy Bytes
pData += 2;
unsigned short length = ((opcode>>1) & 0x3FFF)+1;
my_memcpy(pTarget + cursorPosition, pData, (int) length);
pData += length;
cursorPosition += length;
}
else
{
opcode = ((opcode>>1)) & 3;
switch (opcode)
{
case 0: // Source bank Skip
{
int offset = (int)(pData - pDataBaseAddress);
offset &= 0xFFFF0000;
offset += 0x00010000;
pData = pDataBaseAddress + offset;
}
break;
case 1: // End of frame
pData+=2;
bDoWork = false;
break;
case 2: // End of Animation
// Intentionally, leave cursor alone here
bDoWork = false;
break;
default:
// Reserved / Illegal
bDoWork = false;
break;
}
}
}
}
return (int)(pData - pDataStart);
}
//------------------------------------------------------------------------------

View File

@ -110,7 +110,7 @@ typedef struct GSLA_CHUNK
class GSLAFile
{
public:
// Load in a C2 File
// Load in a GSLA File
GSLAFile(const char *pFilePath);
~GSLAFile();
@ -127,9 +127,11 @@ public:
const std::vector<unsigned char*>& GetPixelMaps() { return m_pC1PixelMaps; }
int DecompressFrame(unsigned char* pTarget, unsigned char* pData, unsigned char* pDataBaseAddress);
private:
void UnpackInitialFrame(GSLA_INIT* pINIT);
void UnpackInitialFrame(GSLA_INIT* pINIT, GSLA_Header* pHeader);
void UnpackAnimation(GSLA_ANIM* pANIM, GSLA_Header* pHeader);

View File

@ -29,13 +29,14 @@ static int DictionaryMatch(const DataString& data, int dictionarySize);
// Stuff I need for a faster version
static DataString LongestMatch(const DataString& data, const DataString& dictionary);
static DataString LongestMatch(const DataString& data, const DataString& dictionary, int cursorPosition);
//
// New Version, still Brute Force, but not as many times
//
int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
{
printf("LZB Compress %d bytes\n", sourceSize);
//printf("LZB Compress %d bytes\n", sourceSize);
unsigned char *pOriginalDest = pDest;
@ -103,7 +104,7 @@ int LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
//
int Old_LZB_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize)
{
printf("LZB_Compress %d bytes\n", sourceSize);
//printf("LZB_Compress %d bytes\n", sourceSize);
// Initialize Dictionary
int bytesInDictionary = 0; // eventually add the ability to start with the dictionary filled
@ -345,7 +346,102 @@ DataString LongestMatch(const DataString& data, const DataString& dictionary)
return result;
}
//------------------------------------------------------------------------------
DataString LongestMatch(const DataString& data, const DataString& dictionary, int cursorPosition)
{
DataString result;
result.pData = nullptr;
result.size = 0;
// Find the longest matching data in the dictionary
if ((dictionary.size > 0) && (data.size > 0))
{
DataString candidate;
candidate.pData = data.pData;
candidate.size = 0;
// First Check for a pattern / run-length style match
// Check the end of the dictionary, to see if this data could be a
// pattern "run" (where we can repeat a pattern for X many times for free
// using the memcpy with overlapping source/dest buffers)
// (This is a dictionary based pattern run/length)
{
// Check for pattern sizes, start small
int max_pattern_size = 4096;
if (dictionary.size < max_pattern_size) max_pattern_size = dictionary.size;
if (data.size < max_pattern_size) max_pattern_size = data.size;
for (int pattern_size = 1; pattern_size <= max_pattern_size; ++pattern_size)
{
int pattern_start = dictionary.size - pattern_size;
for (int dataIndex = 0; dataIndex < data.size; ++dataIndex)
{
if (data.pData[ dataIndex ] == dictionary.pData[ pattern_start + (dataIndex % pattern_size) ])
{
candidate.pData = dictionary.pData + pattern_start;
candidate.size = dataIndex+1;
continue;
}
break;
}
//if (candidate.size < pattern_size)
// break;
if (candidate.size > result.size)
{
result = candidate;
}
}
}
// As an optimization
int dictionarySize = dictionary.size; // - 1; // This last string has already been checked by, the
// run-length matcher above
// As the size grows, we're missing potential matches in here
// I think the best way to counter this is to attempt somthing
// like KMP
if (dictionarySize > candidate.size)
{
// Check the dictionary for a match, brute force
for (int dictionaryIndex = 0; dictionaryIndex <= (dictionarySize-candidate.size); ++dictionaryIndex)
{
int sizeAvailable = dictionarySize - dictionaryIndex;
if (sizeAvailable > data.size) sizeAvailable = data.size;
// this could index off the end of the dictionary!!! FIX ME
for (int dataIndex = 0; dataIndex < sizeAvailable; ++dataIndex)
{
if (data.pData[ dataIndex ] == dictionary.pData[ dictionaryIndex + dataIndex ])
{
if (dataIndex >= candidate.size)
{
candidate.pData = dictionary.pData + dictionaryIndex;
candidate.size = dataIndex + 1;
}
continue;
}
break;
}
if (candidate.size > result.size)
{
result = candidate;
//dictionaryIndex = -1;
break;
}
}
}
}
return result;
}
//------------------------------------------------------------------------------
//
@ -577,12 +673,21 @@ void LZB_Decompress(unsigned char* pDest, unsigned char* pSource, int destSize)
//
// Compress a Frame in the GSLA LZB Format
//
// The dictionary is also the canvas, so when we're finished the dictionary
// buffer will match the original pSource buffer
//
// If they both match to begin with, we just crap out an End of Frame opcode
//
int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize,
unsigned char* pDataStart, unsigned char* pDictionary,
int dictionarySize)
{
// printf("LZBA Compress %d bytes\n", sourceSize);
// Used for bank skip opcode emission
int bankOffset = (int)((pDest - pDataStart) & 0xFFFF);
// So we can track how big our compressed data ends up being
unsigned char *pOriginalDest = pDest;
DataString sourceData;
@ -601,9 +706,47 @@ int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize,
bool bLastEmitIsLiteral = false;
unsigned char* pLastLiteralDest = nullptr;
int lastEmittedCursorPosition = 0; // This is the default for each frame
for (int cursorPosition = 0; cursorPosition < dictionarySize;)
{
if (pSource[ cursorPosition ] != pDictionary[ cursorPosition ])
{
// Here is some data that has to be processed, so let's decide
// how large of a chunk of data we're looking at here
// Do we need to emit a Skip opcode?, compare cursor to last emit
// and emit a skip command if we need it (I'm going want a gap of
// at least 3 bytes? before we call it the end
int tempCursorPosition = cursorPosition;
int gapCount = 0;
for (; tempCursorPosition < dictionarySize; ++tempCursorPosition)
{
if (pSource[ cursorPosition ] != pDictionary[ cursorPosition ])
{
gapCount = 0;
}
else
{
gapCount++;
if (gapCount >= 3)
break;
}
}
tempCursorPosition -= gapCount;
// Now we know from cursorPosition to tempCursorPosition is data
// that we want to encode, we either literally copy it, or look
// to see if this data is already in the dictionary (so we can copy
// it from one part of the frame buffer to another part)
sourceData.pData = &pSource[ cursorPosition ];
sourceData.size = tempCursorPosition - cursorPosition;
while (sourceData.size > 0)
{
candidateData = LongestMatch(sourceData, dictionaryData);
candidateData = LongestMatch(sourceData, dictionaryData, cursorPosition);
// If no match, or the match is too small, then take the next byte
// and emit as literal
@ -617,7 +760,9 @@ int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize,
sourceData.pData += candidateData.size;
sourceData.size -= candidateData.size;
dictionaryData.size = AddDictionary(candidateData, dictionaryData.size);
// Modify the dictionary
cursorPosition = AddDictionary(candidateData, cursorPosition);
lastEmittedCursorPosition = cursorPosition;
if (candidateData.size > 3)
{
@ -638,6 +783,17 @@ int LZBA_Compress(unsigned char* pDest, unsigned char* pSource, int sourceSize,
pDest += EmitLiteral(pDest, candidateData);
}
}
}
else
{
// no change
cursorPosition++;
}
}
// Emit the End of Frame Opcode
*pDest++ = 0x02;
*pDest++ = 0x00;
return (int)(pDest - pOriginalDest);